for (i = 0; i < submit->nr_cmds; i++) { switch (submit->cmd[i].type) { case MSM_SUBMIT_CMD_IB_TARGET_BUF: /* ignore IB-targets */ break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: /* ignore if there has not been a ctx switch: */ if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) break;
fallthrough; case MSM_SUBMIT_CMD_BUF:
OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2);
OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
OUT_RING(ring, submit->cmd[i].size);
OUT_PKT2(ring); break;
}
}
/* Flush HLSQ lazy updates to make sure there is nothing * pending for indirect loads after the timestamp has * passed:
*/
OUT_PKT3(ring, CP_EVENT_WRITE, 1);
OUT_RING(ring, HLSQ_FLUSH);
/* wait for idle before cache flush/interrupt */
OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
OUT_RING(ring, 0x00000000);
/* * a4xx_enable_hwcg() - Program the clock control registers * @device: The adreno device pointer
*/ staticvoid a4xx_enable_hwcg(struct msm_gpu *gpu)
{ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); unsignedint i; for (i = 0; i < 4; i++)
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202); for (i = 0; i < 4; i++)
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222); for (i = 0; i < 4; i++)
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7); for (i = 0; i < 4; i++)
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111); for (i = 0; i < 4; i++)
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222); for (i = 0; i < 4; i++)
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222); for (i = 0; i < 4; i++)
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104); for (i = 0; i < 4; i++)
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112); for (i = 0; i < 4; i++)
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
/* Disable L1 clocking in A420 due to CCU issues with it */ for (i = 0; i < 4; i++) { if (adreno_is_a420(adreno_gpu)) {
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
0x00002020);
} else {
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
0x00022020);
}
}
/* No CCU for A405 */ if (!adreno_is_a405(adreno_gpu)) { for (i = 0; i < 4; i++) {
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
0x00000922);
}
for (i = 0; i < 4; i++) {
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
0x00000000);
}
for (i = 0; i < 4; i++) {
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
0x00000001);
}
}
/* Enable power counters*/
gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
/* * Turn on hang detection - this spews a lot of useful information * into the RBBM registers on a hang:
*/
gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
(1 << 30) | 0xFFFF);
/* Turn on performance counters: */
gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
/* use the first CP counter for timestamp queries.. userspace may set * this as well but it selects the same counter/countable:
*/
gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
if (adreno_is_a430(adreno_gpu))
gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
/* Disable L2 bypass to avoid UCHE out of bounds errors */
gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, lower_32_bits(adreno_gpu->uche_trap_base));
gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, upper_32_bits(adreno_gpu->uche_trap_base));
/* On A430 enable SP regfile sleep for power savings */ /* TODO downstream does this for !420, so maybe applies for 405 too? */ if (!adreno_is_a420(adreno_gpu)) {
gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
0x00000441);
gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
0x00000441);
}
a4xx_enable_hwcg(gpu);
/* * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2 * due to timing issue with HLSQ_TP_CLK_EN
*/ if (adreno_is_a420(adreno_gpu)) { unsignedint val;
val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
}
/* * Use the default ringbuffer size and block size but disable the RPTR * shadow
*/
gpu_write(gpu, REG_A4XX_CP_RB_CNTL,
MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
/* Set the ringbuffer address */
gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
/* Load PM4: */
ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
DBG("loading PM4 ucode version: %u", ptr[0]);
gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0); for (i = 1; i < len; i++)
gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
staticbool a4xx_idle(struct msm_gpu *gpu)
{ /* wait for ringbuffer to drain: */ if (!adreno_idle(gpu, gpu->rb[0])) returnfalse;
/* then wait for GPU to finish: */ if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
A4XX_RBBM_STATUS_GPU_BUSY))) {
DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name); /* TODO maybe we need to reset GPU here to recover from hang? */ returnfalse;
}
ret = msm_gpu_pm_resume(gpu); if (ret) return ret;
if (adreno_is_a430(adreno_gpu)) { unsignedint reg; /* Set the default register values; set SW_COLLAPSE to 0 */
gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000); do {
udelay(5);
reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
} while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
} return 0;
}
ret = msm_gpu_pm_suspend(gpu); if (ret) return ret;
if (adreno_is_a430(adreno_gpu)) { /* Set the default register values; set SW_COLLAPSE to 1 */
gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
} return 0;
}
icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem"); if (IS_ERR(icc_path)) {
ret = PTR_ERR(icc_path); goto fail;
}
ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem"); if (IS_ERR(ocmem_icc_path)) {
ret = PTR_ERR(ocmem_icc_path); /* allow -ENODATA, ocmem icc is optional */ if (ret != -ENODATA) goto fail;
ocmem_icc_path = NULL;
}
/* * Set the ICC path to maximum speed for now by multiplying the fastest * frequency by the bus width (8). We'll want to scale this later on to * improve battery life.
*/
icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
return gpu;
fail: if (a4xx_gpu)
a4xx_destroy(&a4xx_gpu->base.base);
return ERR_PTR(ret);
}
Messung V0.5
¤ Dauer der Verarbeitung: 0.3 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.