int panfrost_job_get_slot(struct panfrost_job *job)
{ /* JS0: fragment jobs. * JS1: vertex/tiler jobs * JS2: compute jobs
*/ if (job->requirements & PANFROST_JD_REQ_FS) return 0;
/* Not exposed to userspace yet */ #if 0 if (job->requirements & PANFROST_JD_REQ_ONLY_COMPUTE) { if ((job->requirements & PANFROST_JD_REQ_CORE_GRP_MASK) &&
(job->pfdev->features.nr_core_groups == 2)) return 2; if (panfrost_has_hw_issue(job->pfdev, HW_ISSUE_8987)) return 2;
} #endif return 1;
}
/* * Use all cores for now. * Eventually we may need to support tiler only jobs and h/w with * multiple (2) coherent core groups
*/
affinity = pfdev->features.shader_present;
/* start MMU, medium priority, cache clean/flush on end, clean/flush on
* start */
cfg |= JS_CONFIG_THREAD_PRI(8) |
JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE |
JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE |
panfrost_get_job_chain_flag(job);
if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION))
cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10649))
cfg |= JS_CONFIG_START_MMU;
job_write(pfdev, JS_CONFIG_NEXT(js), cfg);
if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION))
job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id);
/* GO ! */
spin_lock(&pfdev->js->job_lock);
subslot = panfrost_enqueue_job(pfdev, js, job); /* Don't queue the job if a reset is in progress */ if (!atomic_read(&pfdev->reset.pending)) {
job->is_profiled = pfdev->profile_mode;
if (job->requirements & PANFROST_JD_REQ_CYCLE_COUNT ||
job->is_profiled)
panfrost_cycle_counter_get(pfdev);
if (job->is_profiled) {
job->start_time = ktime_get();
job->start_cycles = panfrost_cycle_counter_read(pfdev);
}
job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START);
dev_dbg(pfdev->dev, "JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d",
job, js, subslot, jc_head, cfg & 0xf);
}
spin_unlock(&pfdev->js->job_lock);
}
staticint panfrost_acquire_object_fences(struct drm_gem_object **bos, int bo_count, struct drm_sched_job *job)
{ int i, ret;
for (i = 0; i < bo_count; i++) {
ret = dma_resv_reserve_fences(bos[i]->resv, 1); if (ret) return ret;
/* panfrost always uses write mode in its current uapi */
ret = drm_sched_job_add_implicit_dependencies(job, bos[i], true); if (ret) return ret;
}
return 0;
}
staticvoid panfrost_attach_object_fences(struct drm_gem_object **bos, int bo_count, struct dma_fence *fence)
{ int i;
for (i = 0; i < bo_count; i++)
dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE);
}
int panfrost_job_push(struct panfrost_job *job)
{ struct panfrost_device *pfdev = job->pfdev; struct ww_acquire_ctx acquire_ctx; int ret = 0;
ret = drm_gem_lock_reservations(job->bos, job->bo_count,
&acquire_ctx); if (ret) return ret;
if (js_status == DRM_PANFROST_EXCEPTION_STOPPED) { /* Update the job head so we can resume */
job->jc = job_read(pfdev, JS_TAIL_LO(js)) |
((u64)job_read(pfdev, JS_TAIL_HI(js)) << 32);
/* The job will be resumed, don't signal the fence */
signal_fence = false;
} elseif (js_status == DRM_PANFROST_EXCEPTION_TERMINATED) { /* Job has been hard-stopped, flag it as canceled */
dma_fence_set_error(job->done_fence, -ECANCELED);
job->jc = 0;
} elseif (panfrost_exception_is_fault(js_status)) { /* We might want to provide finer-grained error code based on * the exception type, but unconditionally setting to EINVAL * is good enough for now.
*/
dma_fence_set_error(job->done_fence, -EINVAL);
job->jc = 0;
}
if (signal_fence)
dma_fence_signal_locked(job->done_fence);
pm_runtime_put_autosuspend(pfdev->dev);
if (panfrost_exception_needs_reset(pfdev, js_status)) {
atomic_set(&pfdev->reset.pending, 1);
drm_sched_fault(&pfdev->js->queue[js].sched);
}
}
staticvoid panfrost_job_handle_done(struct panfrost_device *pfdev, struct panfrost_job *job)
{ /* Set ->jc to 0 to avoid re-submitting an already finished job (can * happen when we receive the DONE interrupt while doing a GPU reset).
*/
job->jc = 0;
panfrost_mmu_as_put(pfdev, job->mmu);
panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
/* First we collect all failed/done jobs. */ while (status) {
u32 js_state_mask = 0;
for (j = 0; j < NUM_JOB_SLOTS; j++) { if (status & MK_JS_MASK(j))
js_state_mask |= MK_JS_MASK(j);
if (status & JOB_INT_MASK_DONE(j)) { if (done[j][0])
done[j][1] = panfrost_dequeue_job(pfdev, j); else
done[j][0] = panfrost_dequeue_job(pfdev, j);
}
if (status & JOB_INT_MASK_ERR(j)) { /* Cancel the next submission. Will be submitted * after we're done handling this failure if * there's no reset pending.
*/
job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP);
failed[j] = panfrost_dequeue_job(pfdev, j);
}
}
/* JS_STATE is sampled when JOB_INT_CLEAR is written. * For each BIT(slot) or BIT(slot + 16) bit written to * JOB_INT_CLEAR, the corresponding bits in JS_STATE * (BIT(slot) and BIT(slot + 16)) are updated, but this * is racy. If we only have one job done at the time we * read JOB_INT_RAWSTAT but the second job fails before we * clear the status, we end up with a status containing * only the DONE bit and consider both jobs as DONE since * JS_STATE reports both NEXT and CURRENT as inactive. * To prevent that, let's repeat this clear+read steps * until status is 0.
*/
job_write(pfdev, JOB_INT_CLEAR, status);
js_state &= ~js_state_mask;
js_state |= job_read(pfdev, JOB_INT_JS_STATE) & js_state_mask;
js_events |= status;
status = job_read(pfdev, JOB_INT_RAWSTAT);
}
/* Then we handle the dequeued jobs. */ for (j = 0; j < NUM_JOB_SLOTS; j++) { if (!(js_events & MK_JS_MASK(j))) continue;
if (failed[j]) {
panfrost_job_handle_err(pfdev, failed[j], j);
} elseif (pfdev->jobs[j][0] && !(js_state & MK_JS_MASK(j))) { /* When the current job doesn't fail, the JM dequeues * the next job without waiting for an ACK, this means * we can have 2 jobs dequeued and only catch the * interrupt when the second one is done. If both slots * are inactive, but one job remains in pfdev->jobs[j], * consider it done. Of course that doesn't apply if a * failure happened since we cancelled execution of the * job in _NEXT (see above).
*/ if (WARN_ON(!done[j][0]))
done[j][0] = panfrost_dequeue_job(pfdev, j); else
done[j][1] = panfrost_dequeue_job(pfdev, j);
}
for (i = 0; i < ARRAY_SIZE(done[0]) && done[j][i]; i++)
panfrost_job_handle_done(pfdev, done[j][i]);
}
/* And finally we requeue jobs that were waiting in the second slot * and have been stopped if we detected a failure on the first slot.
*/ for (j = 0; j < NUM_JOB_SLOTS; j++) { if (!(js_events & MK_JS_MASK(j))) continue;
if (!failed[j] || !pfdev->jobs[j][0]) continue;
if (pfdev->jobs[j][0]->jc == 0) { /* The job was cancelled, signal the fence now */ struct panfrost_job *canceled = panfrost_dequeue_job(pfdev, j);
dma_fence_set_error(canceled->done_fence, -ECANCELED);
panfrost_job_handle_done(pfdev, canceled);
} elseif (!atomic_read(&pfdev->reset.pending)) { /* Requeue the job we removed if no reset is pending */
job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_START);
}
}
}
staticvoid panfrost_job_handle_irqs(struct panfrost_device *pfdev)
{
u32 status = job_read(pfdev, JOB_INT_RAWSTAT);
while (status) {
pm_runtime_mark_last_busy(pfdev->dev);
spin_lock(&pfdev->js->job_lock);
panfrost_job_handle_irq(pfdev, status);
spin_unlock(&pfdev->js->job_lock);
status = job_read(pfdev, JOB_INT_RAWSTAT);
}
}
rawstat = job_read(pfdev, JOB_INT_RAWSTAT); if (rawstat) { unsignedint i;
for (i = 0; i < NUM_JOB_SLOTS; i++) { if (rawstat & MK_JS_MASK(i))
*js_state_mask &= ~MK_JS_MASK(i);
}
}
return js_state & *js_state_mask;
}
staticvoid
panfrost_reset(struct panfrost_device *pfdev, struct drm_sched_job *bad)
{
u32 js_state, js_state_mask = 0xffffffff; unsignedint i, j; bool cookie; int ret;
if (!atomic_read(&pfdev->reset.pending)) return;
/* Stop the schedulers. * * FIXME: We temporarily get out of the dma_fence_signalling section * because the cleanup path generate lockdep splats when taking locks * to release job resources. We should rework the code to follow this * pattern: * * try_lock * if (locked) * release * else * schedule_work_to_release_later
*/ for (i = 0; i < NUM_JOB_SLOTS; i++)
drm_sched_stop(&pfdev->js->queue[i].sched, bad);
cookie = dma_fence_begin_signalling();
if (bad)
drm_sched_increase_karma(bad);
/* Mask job interrupts and synchronize to make sure we won't be * interrupted during our reset.
*/
job_write(pfdev, JOB_INT_MASK, 0);
synchronize_irq(pfdev->js->irq);
for (i = 0; i < NUM_JOB_SLOTS; i++) { /* Cancel the next job and soft-stop the running job. */
job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP);
job_write(pfdev, JS_COMMAND(i), JS_COMMAND_SOFT_STOP);
}
/* Wait at most 10ms for soft-stops to complete */
ret = readl_poll_timeout(pfdev->iomem + JOB_INT_JS_STATE, js_state,
!panfrost_active_slots(pfdev, &js_state_mask, js_state),
10, 10000);
if (ret)
dev_err(pfdev->dev, "Soft-stop failed\n");
/* Handle the remaining interrupts before we reset. */
panfrost_job_handle_irqs(pfdev);
/* Remaining interrupts have been handled, but we might still have * stuck jobs. Let's make sure the PM counters stay balanced by * manually calling pm_runtime_put_noidle() and * panfrost_devfreq_record_idle() for each stuck job. * Let's also make sure the cycle counting register's refcnt is * kept balanced to prevent it from running forever
*/
spin_lock(&pfdev->js->job_lock); for (i = 0; i < NUM_JOB_SLOTS; i++) { for (j = 0; j < ARRAY_SIZE(pfdev->jobs[0]) && pfdev->jobs[i][j]; j++) { if (pfdev->jobs[i][j]->requirements & PANFROST_JD_REQ_CYCLE_COUNT ||
pfdev->jobs[i][j]->is_profiled)
panfrost_cycle_counter_put(pfdev->jobs[i][j]->pfdev);
pm_runtime_put_noidle(pfdev->dev);
panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
}
}
memset(pfdev->jobs, 0, sizeof(pfdev->jobs));
spin_unlock(&pfdev->js->job_lock);
/* Proceed with reset now. */
panfrost_device_reset(pfdev);
/* panfrost_device_reset() unmasks job interrupts, but we want to * keep them masked a bit longer.
*/
job_write(pfdev, JOB_INT_MASK, 0);
/* GPU has been reset, we can clear the reset pending bit. */
atomic_set(&pfdev->reset.pending, 0);
/* Now resubmit jobs that were previously queued but didn't have a * chance to finish. * FIXME: We temporarily get out of the DMA fence signalling section * while resubmitting jobs because the job submission logic will * allocate memory with the GFP_KERNEL flag which can trigger memory * reclaim and exposes a lock ordering issue.
*/
dma_fence_end_signalling(cookie); for (i = 0; i < NUM_JOB_SLOTS; i++)
drm_sched_resubmit_jobs(&pfdev->js->queue[i].sched);
cookie = dma_fence_begin_signalling();
/* Restart the schedulers */ for (i = 0; i < NUM_JOB_SLOTS; i++)
drm_sched_start(&pfdev->js->queue[i].sched, 0);
/* Re-enable job interrupts now that everything has been restarted. */
job_write(pfdev, JOB_INT_MASK,
GENMASK(16 + NUM_JOB_SLOTS - 1, 16) |
GENMASK(NUM_JOB_SLOTS - 1, 0));
/* * If the GPU managed to complete this jobs fence, the timeout has * fired before free-job worker. The timeout is spurious, so bail out.
*/ if (dma_fence_is_signaled(job->done_fence)) return DRM_GPU_SCHED_STAT_NO_HANG;
/* * Panfrost IRQ handler may take a long time to process an interrupt * if there is another IRQ handler hogging the processing. * For example, the HDMI encoder driver might be stuck in the IRQ * handler for a significant time in a case of bad cable connection. * In order to catch such cases and not report spurious Panfrost * job timeouts, synchronize the IRQ handler and re-check the fence * status.
*/
synchronize_irq(pfdev->js->irq);
if (dma_fence_is_signaled(job->done_fence)) {
dev_warn(pfdev->dev, "unexpectedly high interrupt latency\n"); return DRM_GPU_SCHED_STAT_NO_HANG;
}
/* Enable interrupts only if we're not about to get suspended */ if (!test_bit(PANFROST_COMP_BIT_JOB, pfdev->is_suspended))
job_write(pfdev, JOB_INT_MASK,
GENMASK(16 + NUM_JOB_SLOTS - 1, 16) |
GENMASK(NUM_JOB_SLOTS - 1, 0));
/* All GPUs have two entries per queue, but without jobchain * disambiguation stopping the right job in the close path is tricky, * so let's just advertise one entry in that case.
*/ if (!panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION))
args.credit_limit = 1;
for (i = 0; i < NUM_JOB_SLOTS; i++)
drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]);
/* Kill in-flight jobs */
spin_lock(&pfdev->js->job_lock); for (i = 0; i < NUM_JOB_SLOTS; i++) { struct drm_sched_entity *entity = &panfrost_priv->sched_entity[i]; int j;
if (j == 1) { /* Try to cancel the job before it starts */
job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP); /* Reset the job head so it doesn't get restarted if * the job in the first slot failed.
*/
job->jc = 0;
}
/* Jobs can outlive their file context */
job->engine_usage = NULL;
}
}
spin_unlock(&pfdev->js->job_lock);
}
int panfrost_job_is_idle(struct panfrost_device *pfdev)
{ struct panfrost_job_slot *js = pfdev->js; int i;
for (i = 0; i < NUM_JOB_SLOTS; i++) { /* If there are any jobs in the HW queue, we're not idle */ if (atomic_read(&js->queue[i].sched.credit_count)) returnfalse;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.