/* Rough estimate of the typical request size, performing a flush, * set-context and then emitting the batch.
*/ #define LEGACY_REQUEST_SIZE 200
staticvoid set_hwstam(struct intel_engine_cs *engine, u32 mask)
{ /* * Keep the render interrupt unmasked as this papers over * lost interrupts following a reset.
*/ if (engine->class == RENDER_CLASS) { if (GRAPHICS_VER(engine->i915) >= 6)
mask &= ~BIT(0); else
mask &= ~I915_USER_INTERRUPT;
}
/* * The ring status page addresses are no longer next to the rest of * the ring registers as of gen7.
*/ if (GRAPHICS_VER(engine->i915) == 7) { switch (engine->id) { /* * No more rings exist on Gen7. Default case is only to shut up * gcc switch check warning.
*/ default:
GEM_BUG_ON(engine->id);
fallthrough; case RCS0:
hwsp = RENDER_HWS_PGA_GEN7; break; case BCS0:
hwsp = BLT_HWS_PGA_GEN7; break; case VCS0:
hwsp = BSD_HWS_PGA_GEN7; break; case VECS0:
hwsp = VEBOX_HWS_PGA_GEN7; break;
}
} elseif (GRAPHICS_VER(engine->i915) == 6) {
hwsp = RING_HWS_PGA_GEN6(engine->mmio_base);
} else {
hwsp = RING_HWS_PGA(engine->mmio_base);
}
staticvoid flush_cs_tlb(struct intel_engine_cs *engine)
{ if (!IS_GRAPHICS_VER(engine->i915, 6, 7)) return;
/* ring should be idle before issuing a sync flush*/ if ((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0)
drm_warn(&engine->i915->drm, "%s not idle before sync flush!\n",
engine->name);
ENGINE_WRITE_FW(engine, RING_INSTPM,
_MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
INSTPM_SYNC_FLUSH)); if (__intel_wait_for_register_fw(engine->uncore,
RING_INSTPM(engine->mmio_base),
INSTPM_SYNC_FLUSH, 0,
2000, 0, NULL))
ENGINE_TRACE(engine, "wait for SyncFlush to complete for TLB invalidation timed out\n");
}
if (GRAPHICS_VER(engine->i915) >= 7) {
ENGINE_WRITE_FW(engine,
RING_MODE_GEN7,
_MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
}
}
staticbool stop_ring(struct intel_engine_cs *engine)
{ /* Empty the ring by skipping to the end */
ENGINE_WRITE_FW(engine, RING_HEAD, ENGINE_READ_FW(engine, RING_TAIL));
ENGINE_POSTING_READ(engine, RING_HEAD);
/* The ring must be empty before it is disabled */
ENGINE_WRITE_FW(engine, RING_CTL, 0);
ENGINE_POSTING_READ(engine, RING_CTL);
/* Then reset the disabled ring */
ENGINE_WRITE_FW(engine, RING_HEAD, 0);
ENGINE_WRITE_FW(engine, RING_TAIL, 0);
/* * Double check the ring is empty & disabled before we resume. Called * from atomic context during PCI probe, so _hardirq().
*/
intel_synchronize_hardirq(engine->i915); if (!stop_ring(engine)) goto err;
if (HWS_NEEDS_PHYSICAL(engine->i915))
ring_setup_phys_status_page(engine); else
ring_setup_status_page(engine);
intel_breadcrumbs_reset(engine->breadcrumbs);
/* Enforce ordering by reading HEAD register back */
ENGINE_POSTING_READ(engine, RING_HEAD);
/* * Initialize the ring. This must happen _after_ we've cleared the ring * registers with the above sequence (the readback of the HEAD registers * also enforces ordering), otherwise the hw might lose the new ring * register values.
*/
ENGINE_WRITE_FW(engine, RING_START, i915_ggtt_offset(ring->vma));
/* Check that the ring offsets point within the ring! */
GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
intel_ring_update_space(ring);
set_pp_dir(engine);
/* * First wake the ring up to an empty/idle ring. * Use 50ms of delay to let the engine write successfully * for all platforms. Experimented with different values and * determined that 50ms works best based on testing.
*/ for ((kt) = ktime_get() + (50 * NSEC_PER_MSEC);
ktime_before(ktime_get(), (kt)); cpu_relax()) { /* * In case of resets fails because engine resumes from * incorrect RING_HEAD and then GPU may be then fed * to invalid instructions, which may lead to unrecoverable * hang. So at first write doesn't succeed then try again.
*/
ENGINE_WRITE_FW(engine, RING_HEAD, ring->head); if (ENGINE_READ_FW(engine, RING_HEAD) == ring->head) break;
}
/* If the head is still not zero, the ring is dead */ if (__intel_wait_for_register_fw(engine->uncore,
RING_CTL(engine->mmio_base),
RING_VALID, RING_VALID,
5000, 0, NULL)) {
ENGINE_TRACE(engine, "failed to restart\n"); goto err;
}
/* Now awake, let it get started */ if (ring->tail != ring->head) {
ENGINE_WRITE_FW(engine, RING_TAIL, ring->tail);
ENGINE_POSTING_READ(engine, RING_TAIL);
}
/* Papering over lost _interrupts_ immediately following the restart */
intel_engine_signal_breadcrumbs(engine); return 0;
staticvoid xcs_sanitize(struct intel_engine_cs *engine)
{ /* * Poison residual state on resume, in case the suspend didn't! * * We have to assume that across suspend/resume (or other loss * of control) that the contents of our pinned buffers has been * lost, replaced by garbage. Since this doesn't always happen, * let's poison such state so that we more quickly spot when * we falsely assume it has been preserved.
*/ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
/* * The kernel_context HWSP is stored in the status_page. As above, * that may be lost on resume/initialisation, and so we need to * reset the value in the HWSP.
*/
sanitize_hwsp(engine);
/* And scrub the dirty cachelines for the HWSP */
drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
intel_engine_reset_pinned_contexts(engine);
}
staticvoid reset_prepare(struct intel_engine_cs *engine)
{ /* * We stop engines, otherwise we might get failed reset and a * dead gpu (on elk). Also as modern gpu as kbl can suffer * from system hang if batchbuffer is progressing when * the reset is issued, regardless of READY_TO_RESET ack. * Thus assume it is best to stop engines on all gens * where we have a gpu reset. * * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) * * WaMediaResetMainRingCleanup:ctg,elk (presumably) * WaClearRingBufHeadRegAtInit:ctg,elk * * FIXME: Wa for more modern gens needs to be validated
*/
ENGINE_TRACE(engine, "\n");
intel_engine_stop_cs(engine);
if (!stop_ring(engine)) { /* G45 ring initialization often fails to reset head to zero */
ENGINE_TRACE(engine, "HEAD not reset to zero, " "{ CTL:%08x, HEAD:%08x, TAIL:%08x, START:%08x }\n",
ENGINE_READ_FW(engine, RING_CTL),
ENGINE_READ_FW(engine, RING_HEAD),
ENGINE_READ_FW(engine, RING_TAIL),
ENGINE_READ_FW(engine, RING_START)); /* * Sometimes engine head failed to set to zero even after writing into it. * Use wait_for_atomic() with 20ms delay to let engine resumes from * correct RING_HEAD. Experimented different values and determined * that 20ms works best based on testing.
*/ if (wait_for_atomic((!stop_ring(engine) == 0), 20)) {
drm_err(&engine->i915->drm, "failed to set %s head to zero " "ctl %08x head %08x tail %08x start %08x\n",
engine->name,
ENGINE_READ_FW(engine, RING_CTL),
ENGINE_READ_FW(engine, RING_HEAD),
ENGINE_READ_FW(engine, RING_TAIL),
ENGINE_READ_FW(engine, RING_START));
}
}
}
/* * The guilty request will get skipped on a hung engine. * * Users of client default contexts do not rely on logical * state preserved between batches so it is safe to execute * queued requests following the hang. Non default contexts * rely on preserved state, so skipping a batch loses the * evolution of the state and it needs to be considered corrupted. * Executing more queued batches on top of corrupted state is * risky. But we take the risk by trying to advance through * the queued requests in order to make the client behaviour * more predictable around resets, by not throwing away random * amount of batches it has prepared for execution. Sophisticated * clients can use gem_reset_stats_ioctl and dma fence status * (exported via sync_file info ioctl on explicit fences) to observe * when it loses the context state and should rebuild accordingly. * * The context ban, and ultimately the client ban, mechanism are safety * valves if client submission ends up resulting in nothing more than * subsequent hangs.
*/
if (rq) { /* * Try to restore the logical GPU state to match the * continuation of the request queue. If we skip the * context/PD restore, then the next request may try to execute * assuming that its context is valid and loaded on the GPU and * so may try to access invalid memory, prompting repeated GPU * hangs. * * If the request was guilty, we still restore the logical * state in case the next request requires it (e.g. the * aliasing ppgtt), but skip over the hung batch. * * If the request was innocent, we try to replay the request * with the restored context.
*/
__i915_request_reset(rq, stalled);
GEM_BUG_ON(rq->ring != engine->legacy.ring);
head = rq->head;
} else {
head = engine->legacy.ring->tail;
}
engine->legacy.ring->head = intel_ring_wrap(engine->legacy.ring, head);
/* Mark all submitted requests as skipped. */
list_for_each_entry(request, &engine->sched_engine->requests, sched.link)
i915_request_put(i915_request_mark_eio(request));
intel_engine_signal_breadcrumbs(engine);
/* Remaining _unready_ requests will be nop'ed when submitted */
staticvoid i9xx_submit_request(struct i915_request *request)
{
i915_request_submit(request);
wmb(); /* paranoid flush writes out of the WCB before mmio */
obj = i915_gem_object_create_shmem(i915, engine->context_size); if (IS_ERR(obj)) return ERR_CAST(obj);
/* * Try to make the context utilize L3 as well as LLC. * * On VLV we don't have L3 controls in the PTEs so we * shouldn't touch the cache level, especially as that * would make the object snooped which might have a * negative performance impact. * * Snooping is required on non-llc platforms in execlist * mode, but since all GGTT accesses use PAT entry 0 we * get snooping anyway regardless of cache_level. * * This is only applicable for Ivy Bridge devices since * later platforms don't have L3 control bits in the PTE.
*/ if (IS_IVYBRIDGE(i915))
i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC);
*cs++ = i915_mmio_reg_offset(
RING_PSMI_CTL(signaller->mmio_base));
*cs++ = _MASKED_BIT_ENABLE(
GEN6_PSMI_SLEEP_MSG_DISABLE);
}
}
} elseif (GRAPHICS_VER(i915) == 5) { /* * This w/a is only listed for pre-production ilk a/b steppings, * but is also mentioned for programming the powerctx. To be * safe, just apply the workaround; we do not use SyncFlush so * this should never take effect and so be a no-op!
*/
*cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN;
}
if (force_restore) { /* * The HW doesn't handle being told to restore the current * context very well. Quite often it likes goes to go off and * sulk, especially when it is meant to be reloading PP_DIR. * A very simple fix to force the reload is to simply switch * away from the current context and back again. * * Note that the kernel_context will contain random state * following the INHIBIT_RESTORE. We accept this since we * never use the kernel_context state; it is merely a * placeholder we use to flush other contexts.
*/
*cs++ = MI_SET_CONTEXT;
*cs++ = i915_ggtt_offset(engine->kernel_context->state) |
MI_MM_SPACE_GTT |
MI_RESTORE_INHIBIT;
}
*cs++ = MI_NOOP;
*cs++ = MI_SET_CONTEXT;
*cs++ = i915_ggtt_offset(ce->state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv
*/
*cs++ = MI_NOOP;
if (GRAPHICS_VER(i915) == 7) { if (num_engines) { struct intel_engine_cs *signaller;
i915_reg_t last_reg = INVALID_MMIO_REG; /* keep gcc quiet */
/* * Note: We do not worry about the concurrent register cacheline hang * here because no other code should access these registers other than * at initialization time.
*/
*cs++ = MI_LOAD_REGISTER_IMM(L3LOG_DW); for (i = 0; i < L3LOG_DW; i++) {
*cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
*cs++ = remap_info[i];
}
*cs++ = MI_NOOP;
intel_ring_advance(rq, cs);
return 0; #undef L3LOG_DW
}
staticint remap_l3(struct i915_request *rq)
{ struct i915_gem_context *ctx = i915_request_gem_context(rq); int i, err;
if (!ctx || !ctx->remap_slice) return 0;
for (i = 0; i < MAX_L3_SLICES; i++) { if (!(ctx->remap_slice & BIT(i))) continue;
err = remap_l3_slice(rq, i); if (err) return err;
}
ctx->remap_slice = 0; return 0;
}
staticint switch_mm(struct i915_request *rq, struct i915_address_space *vm)
{ int ret;
if (!vm) return 0;
ret = rq->engine->emit_flush(rq, EMIT_FLUSH); if (ret) return ret;
/* * Not only do we need a full barrier (post-sync write) after * invalidating the TLBs, but we need to wait a little bit * longer. Whether this is merely delaying us, or the * subsequent flush is a key part of serialising with the * post-sync op, this extra pass appears vital before a * mm switch!
*/
ret = load_pd_dir(rq, vm, PP_DIR_DCLV_2G); if (ret) return ret;
if (engine->wa_ctx.vma && ce != engine->kernel_context) { if (engine->wa_ctx.vma->private != ce &&
i915_mitigate_clear_residuals()) {
ret = clear_residuals(rq); if (ret) return ret;
residuals = &engine->wa_ctx.vma->private;
}
}
ret = switch_mm(rq, vm_alias(ce->vm)); if (ret) return ret;
if (ce->state) {
u32 flags;
GEM_BUG_ON(engine->id != RCS0);
/* For resource streamer on HSW+ and power context elsewhere */
BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN);
BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN);
ret = mi_set_context(rq, ce, flags); if (ret) return ret;
}
ret = remap_l3(rq); if (ret) return ret;
/* * Now past the point of no return, this request _will_ be emitted. * * Or at least this preamble will be emitted, the request may be * interrupted prior to submitting the user payload. If so, we * still submit the "empty" request in order to preserve global * state tracking such as this, our tracking of the current * dirty context.
*/ if (residuals) {
intel_context_put(*residuals);
*residuals = intel_context_get(ce);
}
return 0;
}
staticint ring_request_alloc(struct i915_request *request)
{ int ret;
/* * Flush enough space to reduce the likelihood of waiting after * we start building the request - in which case we will just * have to repeat work.
*/
request->reserved_space += LEGACY_REQUEST_SIZE;
/* Unconditionally invalidate GPU caches and TLBs. */
ret = request->engine->emit_flush(request, EMIT_INVALIDATE); if (ret) return ret;
ret = switch_context(request); if (ret) return ret;
/* Every tail move must follow the sequence below */
/* Disable notification that the ring is IDLE. The GT * will then assume that it is busy and bring it out of rc6.
*/
intel_uncore_write_fw(uncore, RING_PSMI_CTL(GEN6_BSD_RING_BASE),
_MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
/* Clear the context id. Here be magic! */
intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, 0x0);
/* Wait for the ring not to be idle, i.e. for it to wake up. */ if (__intel_wait_for_register_fw(uncore,
RING_PSMI_CTL(GEN6_BSD_RING_BASE),
GEN6_BSD_SLEEP_INDICATOR,
0,
1000, 0, NULL))
drm_err(&uncore->i915->drm, "timed out waiting for the BSD ring to wake up\n");
/* Now that the ring is fully powered up, update the tail */
i9xx_submit_request(request);
/* Let the ring send IDLE messages to the GT again, * and so let it sleep to conserve power when idle.
*/
intel_uncore_write_fw(uncore, RING_PSMI_CTL(GEN6_BSD_RING_BASE),
_MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
/* * Using a global execution timeline; the previous final breadcrumb is * equivalent to our next initial bread so we can elide * engine->emit_init_breadcrumb().
*/
engine->emit_fini_breadcrumb = gen2_emit_breadcrumb; if (GRAPHICS_VER(i915) == 5)
engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
if (GRAPHICS_VER(i915) >= 6) { /* gen6 bsd needs a special wa for tail updates */ if (GRAPHICS_VER(i915) == 6)
engine->set_default_submission = gen6_bsd_set_default_submission;
engine->emit_flush = gen6_emit_flush_vcs;
engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.