/** * DOC: Broadcom V3D scheduling * * The shared DRM GPU scheduler is used to coordinate submitting jobs * to the hardware. Each DRM fd (roughly a client process) gets its * own scheduler entity, which will process jobs in order. The GPU * scheduler will schedule the clients with a FIFO scheduling algorithm. * * For simplicity, and in order to keep latency low for interactive * jobs when bulk background jobs are queued up, we submit a new job * to the HW only when it has completed the last one, instead of * filling up the CT[01]Q FIFOs with jobs. Similarly, we use * `drm_sched_job_add_dependency()` to manage the dependency between bin * and render, instead of having the clients submit jobs using the HW's * semaphores to interlock between them.
*/
/* * We only need to disable local interrupts to appease lockdep who * otherwise would think v3d_job_start_stats vs v3d_stats_update has an * unsafe in-irq vs no-irq-off usage problem. This is a false positive * because all the locks are per queue and stats type, and all jobs are * completely one at a time serialised. More specifically: * * 1. Locks for GPU queues are updated from interrupt handlers under a * spin lock and started here with preemption disabled. * * 2. Locks for CPU queues are updated from the worker with preemption * disabled and equally started here with preemption disabled. * * Therefore both are consistent. * * 3. Because next job can only be queued after the previous one has * been signaled, and locks are per queue, there is also no scope for * the start part to race with the update part.
*/ if (IS_ENABLED(CONFIG_LOCKDEP))
local_irq_save(flags); else
preempt_disable();
/* See comment in v3d_job_start_stats() */ if (IS_ENABLED(CONFIG_LOCKDEP))
local_irq_save(flags); else
preempt_disable();
/* Don't update the local stats if the file context has already closed */ if (file)
v3d_stats_update(&file->stats[queue], now); else
drm_dbg(&v3d->drm, "The file descriptor was closed before job completion\n");
v3d_stats_update(global_stats, now);
if (IS_ENABLED(CONFIG_LOCKDEP))
local_irq_restore(flags); else
preempt_enable();
}
/* Lock required around bin_job update vs * v3d_overflow_mem_work().
*/
spin_lock_irqsave(&v3d->job_lock, irqflags);
v3d->bin_job = job; /* Clear out the overflow allocation, so we don't * reuse the overflow attached to a previous job.
*/
V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
spin_unlock_irqrestore(&v3d->job_lock, irqflags);
v3d_invalidate_caches(v3d);
fence = v3d_fence_create(v3d, V3D_BIN); if (IS_ERR(fence)) return NULL;
if (job->base.irq_fence)
dma_fence_put(job->base.irq_fence);
job->base.irq_fence = dma_fence_get(fence);
/* Set the current and end address of the control list. * Writing the end register is what starts the job.
*/ if (job->qma) {
V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, job->qma);
V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, job->qms);
} if (job->qts) {
V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
V3D_CLE_CT0QTS_ENABLE |
job->qts);
}
V3D_CORE_WRITE(0, V3D_CLE_CT0QBA, job->start);
V3D_CORE_WRITE(0, V3D_CLE_CT0QEA, job->end);
if (unlikely(job->base.base.s_fence->finished.error)) {
v3d->render_job = NULL; return NULL;
}
v3d->render_job = job;
/* Can we avoid this flush? We need to be careful of * scheduling, though -- imagine job0 rendering to texture and * job1 reading, and them being executed as bin0, bin1, * render0, render1, so that render1's flush at bin time * wasn't enough.
*/
v3d_invalidate_caches(v3d);
fence = v3d_fence_create(v3d, V3D_RENDER); if (IS_ERR(fence)) return NULL;
if (job->base.irq_fence)
dma_fence_put(job->base.irq_fence);
job->base.irq_fence = dma_fence_get(fence);
/* Set the current and end address of the control list. * Writing the end register is what starts the job.
*/
V3D_CORE_WRITE(0, V3D_CLE_CT1QBA, job->start);
V3D_CORE_WRITE(0, V3D_CLE_CT1QEA, job->end);
csd_cfg0_reg = V3D_CSD_QUEUED_CFG0(v3d->ver); for (i = 1; i <= 6; i++)
V3D_CORE_WRITE(0, csd_cfg0_reg + 4 * i, job->args.cfg[i]);
/* Although V3D 7.1 has an eighth configuration register, we are not * using it. Therefore, make sure it remains unused. * * XXX: Set the CFG7 register
*/ if (v3d->ver >= V3D_GEN_71)
V3D_CORE_WRITE(0, V3D_V7_CSD_QUEUED_CFG7, 0);
/* CFG0 write kicks off the job. */
V3D_CORE_WRITE(0, csd_cfg0_reg, job->args.cfg[0]);
/* V3D 7.1.6 and later don't subtract 1 from the number of batches */ if (v3d->ver < 71 || (v3d->ver == 71 && v3d->rev < 6))
args->cfg[4] = num_batches - 1; else
args->cfg[4] = num_batches;
WARN_ON(args->cfg[4] == ~0);
for (int i = 0; i < 3; i++) { /* 0xffffffff indicates that the uniform rewrite is not needed */ if (indirect_csd->wg_uniform_offsets[i] != 0xffffffff) {
u32 uniform_idx = indirect_csd->wg_uniform_offsets[i];
((uint32_t *)indirect->vaddr)[uniform_idx] = wg_counts[i];
}
}
for (i = 0; i < timestamp_query->count; i++) {
fence = drm_syncobj_fence_get(queries[i].syncobj);
available = fence ? dma_fence_is_signaled(fence) : false;
write_result = available || copy->do_partial; if (write_result) {
query_addr = ((u8 *)timestamp->vaddr) + queries[i].offset;
write_to_buffer(data, 0, copy->do_64bit, *((u64 *)query_addr));
}
if (copy->availability_bit)
write_to_buffer(data, 1, copy->do_64bit, available ? 1u : 0u);
for (int i = 0; i < performance_query->count; i++) {
fence = drm_syncobj_fence_get(performance_query->queries[i].syncobj);
available = fence ? dma_fence_is_signaled(fence) : false;
write_result = available || copy->do_partial; if (write_result)
v3d_write_performance_query_result(job, data, i);
if (copy->availability_bit)
write_to_buffer(data, performance_query->ncounters,
copy->do_64bit, available ? 1u : 0u);
/* If the current address or return address have changed, then the GPU * has probably made progress and we should delay the reset. This * could fail if the GPU got in an infinite loop in the CL, but that * is pretty unlikely outside of an i-g-t testcase.
*/ if (*timedout_ctca != ctca || *timedout_ctra != ctra) {
*timedout_ctca = ctca;
*timedout_ctra = ctra;
/* If we've made progress, skip reset, add the job to the pending * list, and let the timer get rearmed.
*/ if (job->timedout_batches != batches) {
job->timedout_batches = batches;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.