/* Make sure everything is posted before making a decision */
mb();
/* Update HW if this is the current ring and we are not in preempt */ if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
}
for (i = 0; i < submit->nr_cmds; i++) { switch (submit->cmd[i].type) { case MSM_SUBMIT_CMD_IB_TARGET_BUF: break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) break;
fallthrough; case MSM_SUBMIT_CMD_BUF: /* copy commands into RB: */
obj = submit->bos[submit->cmd[i].idx].obj;
dwords = submit->cmd[i].size;
ptr = msm_gem_get_vaddr(obj);
/* _get_vaddr() shouldn't fail at this point, * since we've already mapped it once in * submit_reloc()
*/ if (WARN_ON(IS_ERR_OR_NULL(ptr))) return;
for (i = 0; i < dwords; i++) { /* normally the OUT_PKTn() would wait * for space for the packet. But since * we just OUT_RING() the whole thing, * need to call adreno_wait_ring() * ourself:
*/
adreno_wait_ring(ring, 1);
OUT_RING(ring, ptr[i]);
}
/* we might not necessarily have a cmd from userspace to * trigger an event to know that submit has completed, so * do this manually:
*/
a5xx_idle(gpu, ring);
ring->memptrs->fence = submit->seqno;
msm_gpu_retire(gpu);
}
/* Turn off protected mode to write to special registers */
OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
OUT_RING(ring, 0);
/* Set the save preemption record for the ring/command */
OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
/* Turn back on protected mode */
OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
OUT_RING(ring, 1);
/* * Disable local preemption by default because it requires * user-space to be aware of it and provide additional handling * to restore rendering state or do various flushes on switch.
*/
OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
OUT_RING(ring, 0x0);
/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
OUT_RING(ring, 0x02);
/* Submit the commands */ for (i = 0; i < submit->nr_cmds; i++) { switch (submit->cmd[i].type) { case MSM_SUBMIT_CMD_IB_TARGET_BUF: break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) break;
fallthrough; case MSM_SUBMIT_CMD_BUF:
OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
OUT_RING(ring, submit->cmd[i].size);
ibs++; break;
}
/* * Periodically update shadow-wptr if needed, so that we * can see partial progress of submits with large # of * cmds.. otherwise we could needlessly stall waiting for * ringbuffer state, simply due to looking at a shadow * rptr value that has not been updated
*/ if ((ibs % 32) == 0)
update_shadow_rptr(gpu, ring);
}
/* * Write the render mode to NULL (0) to indicate to the CP that the IBs * are done rendering - otherwise a lucky preemption would start * replaying from the last checkpoint
*/
OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
OUT_RING(ring, 0);
OUT_RING(ring, 0);
OUT_RING(ring, 0);
OUT_RING(ring, 0);
OUT_RING(ring, 0);
/* Write the fence to the scratch register */
OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
OUT_RING(ring, submit->seqno);
a5xx_gpu->last_seqno[ring->id] = submit->seqno;
/* * Execute a CACHE_FLUSH_TS event. This will ensure that the * timestamp is written to the memory and then triggers the interrupt
*/
OUT_PKT7(ring, CP_EVENT_WRITE, 4);
OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
CP_EVENT_WRITE_0_IRQ);
OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
OUT_RING(ring, submit->seqno);
/* Yield the floor on command completion */
OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); /* * If dword[2:1] are non zero, they specify an address for the CP to * write the value of dword[3] to on preemption complete. Write 0 to * skip the write
*/
OUT_RING(ring, 0x00);
OUT_RING(ring, 0x00); /* Data value - not used if the address above is 0 */
OUT_RING(ring, 0x01); /* Set bit 0 to trigger an interrupt on preempt complete */
OUT_RING(ring, 0x01);
/* A WHERE_AM_I packet is not needed after a YIELD */
a5xx_flush(gpu, ring, false);
/* Check to see if we need to start preemption */
a5xx_preempt_trigger(gpu);
}
/* Specify workarounds for various microcode issues */ if (adreno_is_a505(adreno_gpu) || adreno_is_a506(adreno_gpu) ||
adreno_is_a530(adreno_gpu)) { /* Workaround for token end syncs * Force a WFI after every direct-render 3D mode draw and every * 2D mode 3 draw
*/
OUT_RING(ring, 0x0000000B);
} elseif (adreno_is_a510(adreno_gpu)) { /* Workaround for token and syncs */
OUT_RING(ring, 0x00000001);
} else { /* No workarounds enabled */
OUT_RING(ring, 0x00000000);
}
/* Turn off protected mode to write to special registers */
OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
OUT_RING(ring, 0);
/* Set the save preemption record for the ring/command */
OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
/* Turn back on protected mode */
OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
OUT_RING(ring, 1);
/* * If the lowest nibble is 0xa that is an indication that this microcode * has been patched. The actual version is in dword [3] but we only care * about the patchlevel which is the lowest nibble of dword [3]
*/ if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
a5xx_gpu->has_whereami = true;
/* * Adreno 506 have CPZ Retention feature and doesn't require * to resume zap shader
*/ if (adreno_is_a506(adreno_gpu)) return 0;
ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID); if (ret)
DRM_ERROR("%s: zap-shader resume failed: %d\n",
gpu->name, ret);
return ret;
}
staticint a5xx_zap_shader_init(struct msm_gpu *gpu)
{ staticbool loaded; int ret;
/* * If the zap shader is already loaded into memory we just need to kick * the remote processor to reinitialize it
*/ if (loaded) return a5xx_zap_shader_resume(gpu);
/* * In A5x, CCU can send context_done event of a particular context to * UCHE which ultimately reaches CP even when there is valid * transaction of that context inside CCU. This can let CP to program * config registers, which will make the "valid transaction" inside * CCU to be interpreted differently. This can cause gpu fault. This * bug is fixed in latest A510 revision. To enable this bug fix - * bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1 * (disable). For older A510 version this bit is unused.
*/ if (adreno_is_a510(adreno_gpu))
gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0); /* * Disable the trusted memory range - we don't actually supported secure * memory rendering at this point in time and we don't want to block off * part of the virtual memory space.
*/
gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000);
gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
/* Put the GPU into 64 bit by default */
gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
/* * VPC corner case with local memory load kill leads to corrupt * internal state. Normal Disable does not work for all a5x chips. * So do the following setting to disable it.
*/ if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
}
ret = adreno_hw_init(gpu); if (ret) return ret;
if (adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu))
a5xx_gpmu_ucode_init(gpu);
/* Set the ringbuffer address */
gpu_write64(gpu, REG_A5XX_CP_RB_BASE, gpu->rb[0]->iova);
/* * If the microcode supports the WHERE_AM_I opcode then we can use that * in lieu of the RPTR shadow and enable preemption. Otherwise, we * can't safely use the RPTR shadow or preemption. In either case, the * RPTR shadow should be disabled in hardware.
*/
gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
/* Configure the RPTR shadow if needed: */ if (a5xx_gpu->shadow_bo) {
gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
shadowptr(a5xx_gpu, gpu->rb[0]));
}
a5xx_preempt_hw_init(gpu);
/* Disable the interrupts through the initial bringup stage */
gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
/* Clear ME_HALT to start the micro engine */
gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
ret = a5xx_me_init(gpu); if (ret) return ret;
ret = a5xx_power_init(gpu); if (ret) return ret;
/* * Send a pipeline event stat to get misbehaving counters to start * ticking correctly
*/ if (adreno_is_a530(adreno_gpu)) {
OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
a5xx_flush(gpu, gpu->rb[0], true); if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL;
}
/* * If the chip that we are using does support loading one, then * try to load a zap shader into the secure world. If successful * we can use the CP to switch out of secure mode. If not then we * have no resource but to try to switch ourselves out manually. If we * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will * be blocked and a permissions violation will soon follow.
*/
ret = a5xx_zap_shader_init(gpu); if (!ret) {
OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
OUT_RING(gpu->rb[0], 0x00000000);
a5xx_flush(gpu, gpu->rb[0], true); if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL;
} elseif (ret == -ENODEV) { /* * This device does not use zap shader (but print a warning * just in case someone got their dt wrong.. hopefully they * have a debug UART to realize the error of their ways... * if you mess this up you are about to crash horribly)
*/
dev_warn_once(gpu->dev->dev, "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
} else { return ret;
}
/* Last step - yield the ringbuffer */
a5xx_preempt_start(gpu);
return 0;
}
staticvoid a5xx_recover(struct msm_gpu *gpu)
{ int i;
adreno_dump_info(gpu);
for (i = 0; i < 8; i++) {
printk("CP_SCRATCH_REG%d: %u\n", i,
gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
}
/* * Nearly every abnormality ends up pausing the GPU and triggering a * fault so we can safely just watch for this one interrupt to fire
*/ return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
}
/* * If stalled on SMMU fault, we could trip the GPU's hang detection, * but the fault handler will trigger the devcore dump, and we want * to otherwise resume normally rather than killing the submit, so * just bail.
*/ if (gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24)) return;
DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
gpu_read(gpu, REG_A5XX_RBBM_STATUS),
gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
gpu_read64(gpu, REG_A5XX_CP_IB1_BASE),
gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
gpu_read64(gpu, REG_A5XX_CP_IB2_BASE),
gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
/* Turn off the hangcheck timer to keep it from bothering us */
timer_delete(&gpu->hangcheck_timer);
/* * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it * before the source is cleared the interrupt will storm.
*/
gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
if (priv->disable_err_irq) {
status &= A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS |
A5XX_RBBM_INT_0_MASK_CP_SW;
}
/* Pass status to a5xx_rbbm_err_irq because we've already cleared it */ if (status & RBBM_ERROR_MASK)
a5xx_rbbm_err_irq(gpu, status);
if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
a5xx_cp_err_irq(gpu);
if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
a5xx_fault_detect_irq(gpu);
if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
a5xx_uche_err_irq(gpu);
if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
a5xx_gpmu_err_irq(gpu);
if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
a5xx_preempt_trigger(gpu);
msm_gpu_retire(gpu);
}
if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
a5xx_preempt_irq(gpu);
/* A505, A506, A508, A510 have 3 XIN ports in VBIF */ if (adreno_is_a505(adreno_gpu) || adreno_is_a506(adreno_gpu) ||
adreno_is_a508(adreno_gpu) || adreno_is_a510(adreno_gpu))
mask = 0x7;
/* Clear the VBIF pipe before shutting down */
gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
mask) == mask);
gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
/* * Reset the VBIF before power collapse to avoid issue with FIFO * entries on Adreno A510 and A530 (the others will tend to lock up)
*/ if (adreno_is_a510(adreno_gpu) || adreno_is_a530(adreno_gpu)) {
gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
}
ret = msm_gpu_pm_suspend(gpu); if (ret) return ret;
if (a5xx_gpu->has_whereami) for (i = 0; i < gpu->nr_rings; i++)
a5xx_gpu->shadow[i] = 0;
/* The script will be written at offset 0 */
ptr = dumper.ptr;
/* Start writing the data at offset 256k */
offset = dumper.iova + (256 * SZ_1K);
/* Count how many additional registers to get from the HLSQ aperture */ for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
count += a5xx_hlsq_aperture_regs[i].count;
a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL); if (!a5xx_state->hlsqregs) return;
/* Build the crashdump script */ for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
u32 type = a5xx_hlsq_aperture_regs[i].type;
u32 c = a5xx_hlsq_aperture_regs[i].count;
/* Write the register to select the desired bank */
*ptr++ = ((u64) type << 8);
*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
(1 << 21) | 1;
/* * Get the HLSQ regs with the help of the crashdumper, but only if * we are not stalled in an iommu fault (in which case the crashdumper * would not have access to memory)
*/ if (!stalled)
a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
/* Dump the additional a5xx HLSQ registers */ if (!a5xx_state->hlsqregs) return;
drm_printf(p, "registers-hlsq:\n");
for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
u32 c = a5xx_hlsq_aperture_regs[i].count;
for (j = 0; j < c; j++, pos++, o++) { /* * To keep the crashdump simple we pull the entire range * for each register type but not all of the registers * in the range are valid. Fortunately invalid registers * stick out like a sore thumb with a value of * 0xdeadbeef
*/ if (a5xx_state->hlsqregs[pos] == 0xdeadbeef) continue;
/* * If the OPP table specifies a opp-supported-hw property then we have * to set something with dev_pm_opp_set_supported_hw() or the table * doesn't get populated so pick an arbitrary value that should * ensure the default frequencies are selected but not conflict with any * actual bins
*/
val = 0x80;
cell = nvmem_cell_get(dev, "speed_bin");
if (!IS_ERR(cell)) { void *buf = nvmem_cell_read(cell, NULL);
/* Set up the preemption specific bits and pieces for each ringbuffer */
a5xx_preempt_init(gpu);
/* Inherit the common config and make some necessary fixups */
common_cfg = qcom_ubwc_config_get_data(); if (IS_ERR(common_cfg)) return ERR_CAST(common_cfg);
/* Copy the data into the internal struct to drop the const qualifier (temporarily) */
adreno_gpu->_ubwc_config = *common_cfg;
adreno_gpu->ubwc_config = &adreno_gpu->_ubwc_config;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.