// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2013 Red Hat * Author: Rob Clark <robdclark@gmail.com> * * Copyright (c) 2014 The Linux Foundation. All rights reserved.
*/
for (i = 0; i < submit->nr_cmds; i++) { switch (submit->cmd[i].type) { case MSM_SUBMIT_CMD_IB_TARGET_BUF: /* ignore IB-targets */ break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: /* ignore if there has not been a ctx switch: */ if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) break;
fallthrough; case MSM_SUBMIT_CMD_BUF:
OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
OUT_RING(ring, submit->cmd[i].size);
OUT_PKT2(ring); break;
}
}
/* Flush HLSQ lazy updates to make sure there is nothing * pending for indirect loads after the timestamp has * passed:
*/
OUT_PKT3(ring, CP_EVENT_WRITE, 1);
OUT_RING(ring, HLSQ_FLUSH);
/* wait for idle before cache flush/interrupt */
OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
OUT_RING(ring, 0x00000000);
if (adreno_is_a305(adreno_gpu)) { /* Set up 16 deep read/write request queues: */
gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010); /* Enable WR-REQ: */
gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff); /* Set up round robin arbitration between both AXI ports: */
gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030); /* Set up AOOO: */
gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
} elseif (adreno_is_a305b(adreno_gpu)) {
gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x00181818);
gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x00181818);
gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000018);
gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000018);
gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000303);
gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
} elseif (adreno_is_a306(adreno_gpu)) {
gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
} elseif (adreno_is_a306a(adreno_gpu)) {
gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000010);
gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000010);
} elseif (adreno_is_a320(adreno_gpu)) { /* Set up 16 deep read/write request queues: */
gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010); /* Enable WR-REQ: */
gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff); /* Set up round robin arbitration between both AXI ports: */
gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030); /* Set up AOOO: */
gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c); /* Enable 1K sort: */
gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
} elseif (adreno_is_a330v2(adreno_gpu)) { /* * Most of the VBIF registers on 8974v2 have the correct * values at power on, so we won't modify those if we don't * need to
*/ /* Enable 1k sort: */
gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4); /* Enable WR-REQ: */
gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303); /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
} elseif (adreno_is_a330(adreno_gpu)) { /* Set up 16 deep read/write request queues: */
gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818); /* Enable WR-REQ: */
gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f); /* Set up round robin arbitration between both AXI ports: */
gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030); /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001); /* Set up AOOO: */
gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f); /* Enable 1K sort: */
gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4); /* Disable VBIF clock gating. This is to enable AXI running * higher frequency than GPU:
*/
gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
} else {
BUG();
}
/* Make all blocks contribute to the GPU BUSY perf counter: */
gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
/* Tune the hystersis counters for SP and CP idle detection: */
gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
/* Enable the RBBM error reporting bits. This lets us get * useful information on failure:
*/
gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
/* Turn on the power counters: */
gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
/* Turn on hang detection - this spews a lot of useful information * into the RBBM registers on a hang:
*/
gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
if (adreno_is_a330v2(adreno_gpu))
gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455); elseif (adreno_is_a330(adreno_gpu))
gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
/* Set the OCMEM base address for A330, etc */ if (a3xx_gpu->ocmem.hdl) {
gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
(unsignedint)(a3xx_gpu->ocmem.base >> 14));
}
/* Turn on performance counters: */
gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
/* Enable the perfcntrs that we use.. */ for (i = 0; i < gpu->num_perfcntrs; i++) { conststruct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
}
/* * Use the default ringbuffer size and block size but disable the RPTR * shadow
*/
gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
/* Set the ringbuffer address */
gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
/* NOTE: PM4/micro-engine firmware registers look to be the same * for a2xx and a3xx.. we could possibly push that part down to * adreno_gpu base class. Or push both PM4 and PFP but * parameterize the pfp ucode addr/data registers..
*/
gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0); for (i = 1; i < len; i++)
gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */ if (adreno_is_a305(adreno_gpu) ||
adreno_is_a306(adreno_gpu) ||
adreno_is_a306a(adreno_gpu) ||
adreno_is_a320(adreno_gpu)) {
gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
} elseif (adreno_is_a330(adreno_gpu) || adreno_is_a305b(adreno_gpu)) { /* NOTE: this (value take from downstream android driver) * includes some bits outside of the known bitfields. But * A330 has this "MERCIU queue" thing too, which might * explain a new bitfield or reshuffling:
*/
gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
}
staticbool a3xx_idle(struct msm_gpu *gpu)
{ /* wait for ringbuffer to drain: */ if (!adreno_idle(gpu, gpu->rb[0])) returnfalse;
/* then wait for GPU to finish: */ if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
A3XX_RBBM_STATUS_GPU_BUSY))) {
DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
/* TODO maybe we need to reset GPU here to recover from hang? */ returnfalse;
}
/* would be nice to not have to duplicate the _show() stuff with printk(): */ staticvoid a3xx_dump(struct msm_gpu *gpu)
{
printk("status: %08x\n",
gpu_read(gpu, REG_A3XX_RBBM_STATUS));
adreno_dump(gpu);
}
ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) goto fail;
/* if needed, allocate gmem: */ if (adreno_is_a330(adreno_gpu) || adreno_is_a305b(adreno_gpu)) {
ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev,
adreno_gpu, &a3xx_gpu->ocmem); if (ret) goto fail;
}
icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem"); if (IS_ERR(icc_path)) {
ret = PTR_ERR(icc_path); goto fail;
}
ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem"); if (IS_ERR(ocmem_icc_path)) {
ret = PTR_ERR(ocmem_icc_path); /* allow -ENODATA, ocmem icc is optional */ if (ret != -ENODATA) goto fail;
ocmem_icc_path = NULL;
}
/* * Set the ICC path to maximum speed for now by multiplying the fastest * frequency by the bus width (8). We'll want to scale this later on to * improve battery life.
*/
icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
return gpu;
fail: if (a3xx_gpu)
a3xx_destroy(&a3xx_gpu->base.base);
return ERR_PTR(ret);
}
Messung V0.5
¤ Dauer der Verarbeitung: 0.12 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.