/* Try again for another 1ms before failing */
gpu_write(gpu, offset, value);
mb();
if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
fence_status_check(gpu, offset, value, status, mask), 0, 1000)) { /* * The 'delay' warning is here because the pause to print this * warning will allow gpu to move to power collapse which * defeats the purpose of continuous polling for 2 ms
*/
dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
offset); return 0;
}
/* Copy the shadow to the actual register */
ring->cur = ring->next;
/* Make sure to wrap wptr if we need to */
wptr = get_wptr(ring);
/* Update HW if this is the current ring and we are not in preempt*/ if (!a6xx_in_preempt(a6xx_gpu)) { if (a6xx_gpu->cur_ring == ring)
a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false); else
ring->restore_wptr = true;
} else {
ring->restore_wptr = true;
}
if (!sysprof) { if (!adreno_is_a7xx(adreno_gpu)) { /* Turn off protected mode to write to special registers */
OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
OUT_RING(ring, 0);
}
/* * Write the new TTBR0 to the memstore. This is good for debugging. * Needed for preemption
*/
OUT_PKT7(ring, CP_MEM_WRITE, 5);
OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr)));
OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr)));
OUT_RING(ring, lower_32_bits(ttbr));
OUT_RING(ring, upper_32_bits(ttbr));
OUT_RING(ring, ctx->seqno);
/* * Sync both threads after switching pagetables and enable BR only * to make sure BV doesn't race ahead while BR is still switching * pagetables.
*/ if (adreno_is_a7xx(&a6xx_gpu->base)) {
OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
}
/* * And finally, trigger a uche flush to be sure there isn't anything * lingering in that part of the GPU
*/
if (!sysprof) { /* * Wait for SRAM clear after the pgtable update, so the * two can happen in parallel:
*/
OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ));
OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO(
REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS));
OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0));
OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1));
OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1));
OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0));
/* * For PM4 the GMU register offsets are calculated from the base of the * GPU registers so we need to add 0x1a800 to the register value on A630 * to get the right value from PM4.
*/
get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
rbmemptr_stats(ring, index, alwayson_start));
/* Invalidate CCU depth and color */
OUT_PKT7(ring, CP_EVENT_WRITE, 1);
OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH));
/* Submit the commands */ for (i = 0; i < submit->nr_cmds; i++) { switch (submit->cmd[i].type) { case MSM_SUBMIT_CMD_IB_TARGET_BUF: break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) break;
fallthrough; case MSM_SUBMIT_CMD_BUF:
OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size));
ibs++; break;
}
/* * Periodically update shadow-wptr if needed, so that we * can see partial progress of submits with large # of * cmds.. otherwise we could needlessly stall waiting for * ringbuffer state, simply due to looking at a shadow * rptr value that has not been updated
*/ if ((ibs % 32) == 0)
update_shadow_rptr(gpu, ring);
}
/* Write the fence to the scratch register */
OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
OUT_RING(ring, submit->seqno);
/* * Execute a CACHE_FLUSH_TS event. This will ensure that the * timestamp is written to the memory and then triggers the interrupt
*/
OUT_PKT7(ring, CP_EVENT_WRITE, 4);
OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
CP_EVENT_WRITE_0_IRQ);
OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
OUT_RING(ring, submit->seqno);
/* * Toggle concurrent binning for pagetable switch and set the thread to * BR since only it can execute the pagetable switch packets.
*/
OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
a6xx_set_pagetable(a6xx_gpu, ring, submit);
/* * If preemption is enabled, then set the pseudo register for the save * sequence
*/ if (gpu->nr_rings > 1)
a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue);
/* Submit the commands */ for (i = 0; i < submit->nr_cmds; i++) { switch (submit->cmd[i].type) { case MSM_SUBMIT_CMD_IB_TARGET_BUF: break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) break;
fallthrough; case MSM_SUBMIT_CMD_BUF:
OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size));
ibs++; break;
}
/* * Periodically update shadow-wptr if needed, so that we * can see partial progress of submits with large # of * cmds.. otherwise we could needlessly stall waiting for * ringbuffer state, simply due to looking at a shadow * rptr value that has not been updated
*/ if ((ibs % 32) == 0)
update_shadow_rptr(gpu, ring);
}
if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) {
OUT_PKT7(ring, CP_SET_MARKER, 1);
OUT_RING(ring, 0x00e); /* IB1LIST end */
}
/* * Make sure the timestamp is committed once BV pipe is * completely done with this submission.
*/
OUT_PKT7(ring, CP_EVENT_WRITE, 4);
OUT_RING(ring, CACHE_CLEAN | BIT(27));
OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence)));
OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence)));
OUT_RING(ring, submit->seqno);
/* * This makes sure that BR doesn't race ahead and commit * timestamp to memstore while BV is still processing * this submission.
*/
OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4);
OUT_RING(ring, 0);
OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence)));
OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence)));
OUT_RING(ring, submit->seqno);
/* If preemption is enabled */ if (gpu->nr_rings > 1) { /* Yield the floor on command completion */
OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
/* * If dword[2:1] are non zero, they specify an address for * the CP to write the value of dword[3] to on preemption * complete. Write 0 to skip the write
*/
OUT_RING(ring, 0x00);
OUT_RING(ring, 0x00); /* Data value - not used if the address above is 0 */
OUT_RING(ring, 0x01); /* generate interrupt on preemption completion */
OUT_RING(ring, 0x00);
}
/* * Enable access protection to privileged registers, fault on an access * protect violation and select the last span to protect from the start * address all the way to the end of the register address space
*/
gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL,
A6XX_CP_PROTECT_CNTL_ACCESS_PROT_EN |
A6XX_CP_PROTECT_CNTL_ACCESS_FAULT_ON_VIOL_EN |
A6XX_CP_PROTECT_CNTL_LAST_SPAN_INF_RANGE);
for (i = 0; i < protect->count - 1; i++) { /* Intentionally skip writing to some registers */ if (protect->regs[i])
gpu_write(gpu, REG_A6XX_CP_PROTECT(i), protect->regs[i]);
} /* last CP_PROTECT to have "infinite" length on the last entry */
gpu_write(gpu, REG_A6XX_CP_PROTECT(protect->count_max - 1), protect->regs[i]);
}
/* Inherit the common config and make some necessary fixups */
common_cfg = qcom_ubwc_config_get_data(); if (IS_ERR(common_cfg)) return PTR_ERR(common_cfg);
/* Copy the data into the internal struct to drop the const qualifier (temporarily) */
*cfg = *common_cfg;
if (adreno_is_a610(gpu)) {
cfg->highest_bank_bit = 13;
cfg->ubwc_swizzle = 0x7;
}
if (adreno_is_a618(gpu))
cfg->highest_bank_bit = 14;
if (adreno_is_a619(gpu)) /* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */
cfg->highest_bank_bit = 13;
if (adreno_is_a619_holi(gpu))
cfg->highest_bank_bit = 13;
if (adreno_is_a621(gpu))
cfg->highest_bank_bit = 13;
if (adreno_is_a623(gpu))
cfg->highest_bank_bit = 16;
if (adreno_is_a650(gpu) ||
adreno_is_a660(gpu) ||
adreno_is_a690(gpu) ||
adreno_is_a730(gpu) ||
adreno_is_a740_family(gpu)) { /* TODO: get ddr type from bootloader and use 15 for LPDDR4 */
cfg->highest_bank_bit = 16;
}
if (adreno_is_a663(gpu)) {
cfg->highest_bank_bit = 13;
cfg->ubwc_swizzle = 0x4;
}
if (adreno_is_7c3(gpu))
cfg->highest_bank_bit = 14;
if (adreno_is_a702(gpu))
cfg->highest_bank_bit = 14;
if (cfg->highest_bank_bit != common_cfg->highest_bank_bit)
DRM_WARN_ONCE("Inconclusive highest_bank_bit value: %u (GPU) vs %u (UBWC_CFG)\n",
cfg->highest_bank_bit, common_cfg->highest_bank_bit);
if (cfg->ubwc_swizzle != common_cfg->ubwc_swizzle)
DRM_WARN_ONCE("Inconclusive ubwc_swizzle value: %u (GPU) vs %u (UBWC_CFG)\n",
cfg->ubwc_swizzle, common_cfg->ubwc_swizzle);
gpu->ubwc_config = &gpu->_ubwc_config;
return 0;
}
staticvoid a6xx_set_ubwc_config(struct msm_gpu *gpu)
{ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); conststruct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config; /* * We subtract 13 from the highest bank bit (13 is the minimum value * allowed by hw) and write the lowest two bits of the remaining value * as hbb_lo and the one above it as hbb_hi to the hardware.
*/
BUG_ON(cfg->highest_bank_bit < 13);
u32 hbb = cfg->highest_bank_bit - 13; bool rgb565_predicator = cfg->ubwc_enc_version >= UBWC_4_0;
u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2); bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg); bool amsbc = cfg->ubwc_enc_version >= UBWC_3_0; bool min_acc_len_64b = false;
u8 uavflagprd_inv = 0;
u32 hbb_hi = hbb >> 2;
u32 hbb_lo = hbb & 3;
if (adreno_is_a650_family(adreno_gpu) || adreno_is_a7xx(adreno_gpu))
uavflagprd_inv = 2;
if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu))
min_acc_len_64b = true;
/* * For each entry in each of the lists, write the offset and the current * register value into the GPU buffer
*/ for (i = 0; i < reglist->count; i++) {
*dest++ = reglist->regs[i];
*dest++ = gpu_read(gpu, reglist->regs[i]);
}
/* * The overall register list is composed of * 1. Static IFPC-only registers * 2. Static IFPC + preemption registers * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects) * * The first two lists are static. Size of these lists are stored as * number of pairs in ifpc_list_len and preemption_list_len * respectively. With concurrent binning, Some of the perfcounter * registers being virtualized, CP needs to know the pipe id to program * the aperture inorder to restore the same. Thus, third list is a * dynamic list with triplets as * (<aperture, shifted 12 bits> <address> <data>), and the length is * stored as number for triplets in dynamic_list_len.
*/
lock->dynamic_list_len = 0;
}
/* *Don't* send a power up reg list for concurrent binning (TODO) */ /* Lo address */
OUT_RING(ring, lower_32_bits(a6xx_gpu->pwrup_reglist_iova)); /* Hi address */
OUT_RING(ring, upper_32_bits(a6xx_gpu->pwrup_reglist_iova)); /* BIT(31) set => read the regs from the list */
OUT_RING(ring, BIT(31));
/* * Check that the microcode version is new enough to include several key * security fixes. Return true if the ucode is safe.
*/ staticbool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, struct drm_gem_object *obj)
{ struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; struct msm_gpu *gpu = &adreno_gpu->base; constchar *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE];
u32 *buf = msm_gem_get_vaddr(obj); bool ret = false;
if (IS_ERR(buf)) returnfalse;
/* A7xx is safe! */ if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu)) returntrue;
/* * Targets up to a640 (a618, a630 and a640) need to check for a * microcode version that is patched to support the whereami opcode or * one that is new enough to include it by default. * * a650 tier targets don't need whereami but still need to be * equal to or newer than 0.95 for other security fixes * * a660 targets have all the critical security fixes from the start
*/ if (!strcmp(sqe_name, "a630_sqe.fw")) { /* * If the lowest nibble is 0xa that is an indication that this * microcode has been patched. The actual version is in dword * [3] but we only care about the patchlevel which is the lowest * nibble of dword [3] * * Otherwise check that the firmware is greater than or equal * to 1.90 which was the first version that had this fix built * in
*/ if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) ||
(buf[0] & 0xfff) >= 0x190) {
a6xx_gpu->has_whereami = true;
ret = true; goto out;
}
DRM_DEV_ERROR(&gpu->pdev->dev, "a630 SQE ucode is too old. Have version %x need at least %x\n",
buf[0] & 0xfff, 0x190);
} elseif (!strcmp(sqe_name, "a650_sqe.fw")) { if ((buf[0] & 0xfff) >= 0x095) {
ret = true; goto out;
}
DRM_DEV_ERROR(&gpu->pdev->dev, "a650 SQE ucode is too old. Have version %x need at least %x\n",
buf[0] & 0xfff, 0x095);
} elseif (!strcmp(sqe_name, "a660_sqe.fw")) {
ret = true;
} else {
DRM_DEV_ERROR(&gpu->pdev->dev, "unknown GPU, add it to a6xx_ucode_check_version()!!\n");
}
out:
msm_gem_put_vaddr(obj); return ret;
}
msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw"); if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) {
msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm);
drm_gem_object_put(a6xx_gpu->sqe_bo);
a6xx_gpu->sqe_bo = NULL; return -EPERM;
}
}
/* * Expanded APRIV and targets that support WHERE_AM_I both need a * privileged buffer to store the RPTR shadow
*/ if ((adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) &&
!a6xx_gpu->shadow_bo) {
a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev, sizeof(u32) * gpu->nr_rings,
MSM_BO_WC | MSM_BO_MAP_PRIV,
gpu->vm, &a6xx_gpu->shadow_bo,
&a6xx_gpu->shadow_iova);
if (IS_ERR(a6xx_gpu->shadow)) return PTR_ERR(a6xx_gpu->shadow);
if (!adreno_has_gmu_wrapper(adreno_gpu)) { /* Make sure the GMU keeps the GPU on while we set it up */
ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); if (ret) return ret;
}
/* Clear GBIF halt in case GX domain was not collapsed */ if (adreno_is_a619_holi(adreno_gpu)) {
gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
gpu_read(gpu, REG_A6XX_GBIF_HALT);
if (adreno_is_a619_holi(adreno_gpu))
a6xx_sptprac_enable(gmu);
/* * Disable the trusted memory range - we don't actually supported secure * memory rendering at this point in time and we don't want to block off * part of the virtual memory space.
*/
gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000);
gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
if (!adreno_is_a7xx(adreno_gpu)) { /* Turn on 64 bit addressing for all blocks */
gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1);
gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
}
if (adreno_is_a660_family(adreno_gpu))
gpu_write(gpu, REG_A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020);
/* Setting the mem pool size */ if (adreno_is_a610(adreno_gpu)) {
gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 48);
gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 47);
} elseif (adreno_is_a702(adreno_gpu)) {
gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 64);
gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 63);
} elseif (!adreno_is_a7xx(adreno_gpu))
gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128);
/* Set the default primFifo threshold values */ if (adreno_gpu->info->a6xx->prim_fifo_threshold)
gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL,
adreno_gpu->info->a6xx->prim_fifo_threshold);
/* Set the AHB default slave response to "ERROR" */
gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1);
/* Turn on performance counters */
gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1);
if (adreno_is_a7xx(adreno_gpu)) { /* Turn on the IFPC counter (countable 4 on XOCLK4) */
gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1,
FIELD_PREP(GENMASK(7, 0), 0x4));
}
/* Set the ringbuffer address */
gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
/* Targets that support extended APRIV can use the RPTR shadow from * hardware but all the other ones need to disable the feature. Targets * that support the WHERE_AM_I opcode can use that instead
*/ if (adreno_gpu->base.hw_apriv)
gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT); else
gpu_write(gpu, REG_A6XX_CP_RB_CNTL,
MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
/* Configure the RPTR shadow if needed: */ if (a6xx_gpu->shadow_bo) {
gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR,
shadowptr(a6xx_gpu, gpu->rb[0])); for (unsignedint i = 0; i < gpu->nr_rings; i++)
a6xx_gpu->shadow[i] = 0;
}
/* ..which means "always" on A7xx, also for BV shadow */ if (adreno_is_a7xx(adreno_gpu)) {
gpu_write64(gpu, REG_A7XX_CP_BV_RB_RPTR_ADDR,
rbmemptr(gpu->rb[0], bv_rptr));
}
a6xx_preempt_hw_init(gpu);
/* Always come up on rb 0 */
a6xx_gpu->cur_ring = gpu->rb[0];
for (i = 0; i < gpu->nr_rings; i++)
gpu->rb[i]->cur_ctx_seqno = 0;
/* Enable the SQE_to start the CP engine */
gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1);
if (adreno_is_a7xx(adreno_gpu) && !a6xx_gpu->pwrup_reglist_emitted) {
a7xx_patch_pwrup_reglist(gpu);
a6xx_gpu->pwrup_reglist_emitted = true;
}
ret = adreno_is_a7xx(adreno_gpu) ? a7xx_cp_init(gpu) : a6xx_cp_init(gpu); if (ret) goto out;
/* * Try to load a zap shader into the secure world. If successful * we can use the CP to switch out of secure mode. If not then we * have no resource but to try to switch ourselves out manually. If we * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will * be blocked and a permissions violation will soon follow.
*/
ret = a6xx_zap_shader_init(gpu); if (!ret) {
OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
OUT_RING(gpu->rb[0], 0x00000000);
a6xx_flush(gpu, gpu->rb[0]); if (!a6xx_idle(gpu, gpu->rb[0])) return -EINVAL;
} elseif (ret == -ENODEV) { /* * This device does not use zap shader (but print a warning * just in case someone got their dt wrong.. hopefully they * have a debug UART to realize the error of their ways... * if you mess this up you are about to crash horribly)
*/
dev_warn_once(gpu->dev->dev, "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
ret = 0;
} else { return ret;
}
out: if (adreno_has_gmu_wrapper(adreno_gpu)) return ret;
/* Last step - yield the ringbuffer */
a7xx_preempt_start(gpu);
/* * Tell the GMU that we are done touching the GPU and it can start power * management
*/
a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
if (a6xx_gpu->gmu.legacy) { /* Take the GMU out of its special boot mode */
a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER);
}
/* * The source of the data depends on the mid ID read from FSYNR1. * and the client ID read from the UCHE block
*/
val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF);
if (adreno_is_a7xx(adreno_gpu)) { /* Bit 3 for mid=3 indicates BR or BV */ staticconstchar *uche_clients_a7xx[16] = { "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP", "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP",
};
/* LPAC has the same clients as BR and BV, but because it is * compute-only some of them do not exist and there are holes * in the array.
*/ staticconstchar *uche_clients_lpac_a7xx[8] = { "-", "LPAC_SP", "-", "-", "LPAC_HLSQ", "-", "-", "LPAC_TP",
};
val &= GENMASK(6, 0);
/* mid=3 refers to BR or BV */ if (mid == 3) { if (val < ARRAY_SIZE(uche_clients_a7xx)) return uche_clients_a7xx[val]; else return"UCHE";
}
/* mid=8 refers to LPAC */ if (mid == 8) { if (val < ARRAY_SIZE(uche_clients_lpac_a7xx)) return uche_clients_lpac_a7xx[val]; else return"UCHE_LPAC";
}
/* mid=2 is a catchall for everything else in LPAC */ if (mid == 2) return"UCHE_LPAC";
/* mid=1 is a catchall for everything else in BR/BV */ return"UCHE";
} elseif (adreno_is_a660_family(adreno_gpu)) { staticconstchar *uche_clients_a660[8] = { "VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ", "TP",
};
if (id == 0) return"CP"; elseif (id == 4) return"CCU"; elseif (id == 6) return"CDP Prefetch"; elseif (id == 7) return"GMU"; elseif (id == 5 && adreno_is_a7xx(adreno_gpu)) return"Flag cache";
/* * If stalled on SMMU fault, we could trip the GPU's hang detection, * but the fault handler will trigger the devcore dump, and we want * to otherwise resume normally rather than killing the submit, so * just bail.
*/ if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT) return;
/* * Force the GPU to stay on until after we finish * collecting information
*/ if (!adreno_has_gmu_wrapper(adreno_gpu))
gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
DRM_DEV_ERROR(&gpu->pdev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
gpu_read(gpu, REG_A6XX_RBBM_STATUS),
gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
/* Turn off the hangcheck timer to keep it from bothering us */
timer_delete(&gpu->hangcheck_timer);
/* Turn off interrupts to avoid triggering recovery again */
gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 0);
/* * Ignore FASTBLEND violations, because the HW will silently fall back * to legacy blending.
*/ if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) {
timer_delete(&gpu->hangcheck_timer);
/* On A660, the SCID programming for UCHE traffic is done in * A6XX_GBIF_SCACHE_CNTL0[14:10]
*/ if (adreno_is_a660_family(adreno_gpu))
gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) |
(1 << 8), (gpu_scid << 10) | (1 << 8));
}
/* * For targets with a MMU500, activate the slice but don't program the * register. The XBL will take care of that.
*/ if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) { if (!a6xx_gpu->have_mmu500) {
u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
/* * Program the slice IDs for the various GPU blocks and GPU MMU * pagetables
*/ if (!a6xx_gpu->have_mmu500) {
a6xx_llc_write(a6xx_gpu,
REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval);
/* * Program cacheability overrides to not allocate cache * lines on a write miss
*/
a6xx_llc_rmw(a6xx_gpu,
REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03); return;
}
staticvoid a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
{ /* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */ if (adreno_has_gmu_wrapper(&a6xx_gpu->base)) return;
/* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */ if (adreno_has_gmu_wrapper(&a6xx_gpu->base)) return;
/* * There is a different programming path for A6xx targets with an * mmu500 attached, so detect if that is the case
*/
phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0);
a6xx_gpu->have_mmu500 = (phandle &&
of_device_is_compatible(phandle, "arm,mmu-500"));
of_node_put(phandle);
if (adreno_is_a750(adreno_gpu)) { /* * Assume that if qcom scm isn't available, that whatever * replacement allows writing the fuse register ourselves. * Users of alternative firmware need to make sure this * register is writeable or indicate that it's not somehow. * Print a warning because if you mess this up you're about to * crash horribly.
*/ if (!qcom_scm_is_available()) {
dev_warn_once(gpu->dev->dev, "SCM is not available, poking fuse register\n");
a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE,
A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND |
A7XX_CX_MISC_SW_FUSE_VALUE_LPAC);
adreno_gpu->has_ray_tracing = true; return 0;
}
ret = qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ |
QCOM_SCM_GPU_TSENSE_EN_REQ); if (ret) return ret;
/* * On a750 raytracing may be disabled by the firmware, find out * whether that's the case. The scm call above sets the fuse * register.
*/
fuse_val = a6xx_llc_read(a6xx_gpu,
REG_A7XX_CX_MISC_SW_FUSE_VALUE);
adreno_gpu->has_ray_tracing =
!!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING);
} elseif (adreno_is_a740(adreno_gpu)) { /* Raytracing is always enabled on a740 */
adreno_gpu->has_ray_tracing = true;
}
if (gx_off) { /* Halt the gx side of GBIF */
gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1);
spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1);
}
/* Halt new client requests on GBIF */
gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
(GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
/* Halt all AXI requests on GBIF */
gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
(GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
/* The GBIF halt needs to be explicitly cleared */
gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
}
void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert)
{ /* 11nm chips (e.g. ones with A610) have hw issues with the reset line! */ if (adreno_is_a610(to_adreno_gpu(gpu))) return;
gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert); /* Perform a bogus read and add a brief delay to ensure ordering. */
gpu_read(gpu, REG_A6XX_RBBM_SW_RESET_CMD);
udelay(1);
/* The reset line needs to be asserted for at least 100 us */ if (assert)
udelay(100);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.