timeout += jiffies; do { bool done = time_after(jiffies, timeout);
if (i915_request_completed(rq)) /* that was quick! */ return 0;
/* Wait until the HW has acknowledged the submission (or err) */
intel_engine_flush_submission(engine); if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq)) return 0;
do {
cond_resched();
intel_engine_flush_submission(engine);
if (READ_ONCE(engine->execlists.pending[0])) continue;
if (i915_request_completed(rq)) break;
if (READ_ONCE(rq->fence.error)) break;
} while (time_before(jiffies, timeout));
if (rq->fence.error != -EIO) {
pr_err("%s: hanging request %llx:%lld not reset\n",
engine->name,
rq->fence.context,
rq->fence.seqno); return -EINVAL;
}
/* Give the request a jiffy to complete after flushing the worker */ if (i915_request_wait(rq, 0,
max(0l, (long)(timeout - jiffies)) + 1) < 0) {
pr_err("%s: hanging request %llx:%lld did not complete\n",
engine->name,
rq->fence.context,
rq->fence.seqno); return -ETIME;
}
err = intel_context_pin(tmp); if (err) {
intel_context_put(tmp); goto err_ce;
}
/* * Setup the pair of contexts such that if we * lite-restore using the RING_TAIL from ce[1] it * will execute garbage from ce[0]->ring.
*/
memset(tmp->ring->vaddr,
POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
tmp->ring->vma->size);
if (!prio) { /* * Ensure we do the switch to ce[1] on completion. * * rq[0] is already submitted, so this should reduce * to a no-op (a wait on a request on the same engine * uses the submit fence, not the completion fence), * but it will install a dependency on rq[1] for rq[0] * that will prevent the pair being reordered by * timeslicing.
*/
i915_request_await_dma_fence(rq[1], &rq[0]->fence);
}
/* Alternatively preempt the spinner with ce[1] */
engine->sched_engine->schedule(rq[1], &attr);
}
/* And switch back to ce[0] for good measure */
rq[0] = i915_request_create(ce[0]); if (IS_ERR(rq[0])) {
err = PTR_ERR(rq[0]);
i915_request_put(rq[1]); goto err_ce;
}
/* * Setup a preemption event that will cause almost the entire ring * to be unwound, potentially fooling our intel_ring_direction() * into emitting a forward lite-restore instead of the rollback.
*/
/* Fill the ring, until we will cause a wrap */
n = 0; while (intel_ring_direction(ce[0]->ring,
rq->wa_tail,
ce[0]->ring->tail) <= 0) { struct i915_request *tmp;
i915_request_add(tmp);
intel_engine_flush_submission(engine);
n++;
}
intel_engine_flush_submission(engine);
pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
engine->name, n,
ce[0]->ring->size,
ce[0]->ring->tail,
ce[0]->ring->emit,
rq->tail);
GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
rq->tail,
ce[0]->ring->tail) <= 0);
i915_request_put(rq);
/* Create a second ring to preempt the first ring after rq[0] */
rq = intel_context_create_request(ce[1]); if (IS_ERR(rq)) {
err = PTR_ERR(rq); goto err_ce;
}
/* * We have to be careful not to trust intel_ring too much, for example * ring->head is updated upon retire which is out of sync with pinning * the context. Thus we cannot use ring->head to set CTX_RING_HEAD, * or else we risk writing an older, stale value. * * To simulate this, let's apply a bit of deliberate sabotague.
*/
ce = intel_context_create(engine); if (IS_ERR(ce)) {
err = PTR_ERR(ce); break;
}
err = intel_context_pin(ce); if (err) {
intel_context_put(ce); break;
}
/* Keep the context awake while we play games */
err = i915_active_acquire(&ce->active); if (err) {
intel_context_unpin(ce);
intel_context_put(ce); break;
}
ring = ce->ring;
/* Poison the ring, and offset the next request from HEAD */
memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
ring->emit = ring->size / 2;
ring->tail = ring->emit;
GEM_BUG_ON(ring->head);
intel_context_unpin(ce);
/* Submit a simple nop request */
GEM_BUG_ON(intel_context_is_pinned(ce));
rq = intel_context_create_request(ce);
i915_active_release(&ce->active); /* e.g. async retire */
intel_context_put(ce); if (IS_ERR(rq)) {
err = PTR_ERR(rq); break;
}
GEM_BUG_ON(!rq->head);
i915_request_add(rq);
/* Expect not to hang! */ if (igt_live_test_end(&t)) {
err = -EIO; break;
}
}
/* * In order to support offline error capture for fast preempt reset, * we need to decouple the guilty request and ensure that it and its * descendents are not executed while the capture is in progress.
*/
/* Check that we do not resubmit the held request */ if (!i915_request_wait(rq, 0, HZ / 5)) {
pr_err("%s: on hold request completed!\n",
engine->name);
i915_request_put(rq);
err = -EIO; goto out;
}
GEM_BUG_ON(!i915_request_on_hold(rq));
/* But is resubmitted on release */
execlists_unhold(engine, rq); if (i915_request_wait(rq, 0, HZ / 5) < 0) {
pr_err("%s: held request did not complete!\n",
engine->name);
intel_gt_set_wedged(gt);
err = -ETIME;
}
i915_request_put(rq);
out:
st_engine_heartbeat_enable(engine);
intel_context_put(ce); if (err) break;
}
staticint live_error_interrupt(void *arg)
{ staticconststruct error_phase { enum { GOOD = 0, BAD = -EIO } error[2];
} phases[] = {
{ { BAD, GOOD } },
{ { BAD, BAD } },
{ { BAD, GOOD } },
{ { GOOD, GOOD } }, /* sentinel */
}; struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id;
/* * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning * of invalid commands in user batches that will cause a GPU hang. * This is a faster mechanism than using hangcheck/heartbeats, but * only detects problems the HW knows about -- it will not warn when * we kill the HW! * * To verify our detection and reset, we throw some invalid commands * at the HW and wait for the interrupt.
*/
err = wait_for_submit(engine, client[0], HZ / 2); if (err) {
pr_err("%s: first request did not start within time!\n",
engine->name);
err = -ETIME; goto out;
}
for (i = 0; i < ARRAY_SIZE(client); i++) { if (i915_request_wait(client[i], 0, HZ / 5) < 0)
pr_debug("%s: %s request incomplete!\n",
engine->name,
error_repr(p->error[i]));
if (!i915_request_started(client[i])) {
pr_err("%s: %s request not started!\n",
engine->name,
error_repr(p->error[i]));
err = -ETIME; goto out;
}
/* Kick the tasklet to process the error */
intel_engine_flush_submission(engine); if (client[i]->fence.error != p->error[i]) {
pr_err("%s: %s request (%s) with wrong error code: %d\n",
engine->name,
error_repr(p->error[i]),
i915_request_completed(client[i]) ? "completed" : "running",
client[i]->fence.error);
err = -EINVAL; goto out;
}
}
out: for (i = 0; i < ARRAY_SIZE(client); i++) if (client[i])
i915_request_put(client[i]); if (err) {
pr_err("%s: failed at phase[%zd] { %d, %d }\n",
engine->name, p - phases,
p->error[0], p->error[1]); break;
}
}
st_engine_heartbeat_enable(engine); if (err) {
intel_gt_set_wedged(gt); return err;
}
}
/* * If a request takes too long, we would like to give other users * a fair go on the GPU. In particular, users may create batches * that wait upon external input, where that input may even be * supplied by another GPU job. To avoid blocking forever, we * need to preempt the current task and replace it with another * ready task.
*/ if (!CONFIG_DRM_I915_TIMESLICE_DURATION) return 0;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj);
/* * The usual presumption on timeslice expiration is that we replace * the active context with another. However, given a chain of * dependencies we may end up with replacing the context with itself, * but only a few of those requests, forcing us to rewind the * RING_TAIL of the original request.
*/ if (!CONFIG_DRM_I915_TIMESLICE_DURATION) return 0;
for_each_engine(engine, gt, id) { enum { A1, A2, B1 }; enum { X = 1, Z, Y }; struct i915_request *rq[3] = {}; struct intel_context *ce; unsignedlong timeslice; int i, err = 0;
u32 *slot;
if (!intel_engine_has_timeslices(engine)) continue;
/* * A:rq1 -- semaphore wait, timestamp X * A:rq2 -- write timestamp Y * * B:rq1 [await A:rq1] -- write timestamp Z * * Force timeslice, release semaphore. * * Expect execution/evaluation order XZY
*/
err = wait_for_submit(engine, rq[B1], HZ / 2); if (err) {
pr_err("%s: failed to submit second context\n",
engine->name); goto err;
}
/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
ENGINE_TRACE(engine, "forcing tasklet for rewind\n"); while (i915_request_is_active(rq[A2])) { /* semaphore yield! */ /* Wait for the timeslice to kick in */
timer_delete(&engine->execlists.timer);
tasklet_hi_schedule(&engine->sched_engine->tasklet);
intel_engine_flush_submission(engine);
} /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
GEM_BUG_ON(!i915_request_is_active(rq[A1]));
GEM_BUG_ON(!i915_request_is_active(rq[B1]));
GEM_BUG_ON(i915_request_is_active(rq[A2]));
/* Release the hounds! */
slot[0] = 1;
wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
for (i = 1; i <= 3; i++) { unsignedlong timeout = jiffies + HZ / 2;
while (!READ_ONCE(slot[i]) &&
time_before(jiffies, timeout))
;
if (!time_before(jiffies, timeout)) {
pr_err("%s: rq[%d] timed out\n",
engine->name, i - 1);
err = -ETIME; goto err;
}
pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
}
/* XZY: XZ < XY */ if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
engine->name,
slot[Z] - slot[X],
slot[Y] - slot[X]);
err = -EINVAL;
}
err:
memset32(&slot[0], -1, 4);
wmb();
engine->props.timeslice_duration_ms = timeslice;
st_engine_heartbeat_enable(engine); for (i = 0; i < 3; i++)
i915_request_put(rq[i]); if (igt_flush_test(gt->i915))
err = -EIO; if (err) return err;
}
/* * Make sure that even if ELSP[0] and ELSP[1] are filled with * timeslicing between them disabled, we *do* enable timeslicing * if the queue demands it. (Normally, we do not submit if * ELSP[1] is already occupied, so must rely on timeslicing to * eject ELSP[0] in favour of the queue.)
*/ if (!CONFIG_DRM_I915_TIMESLICE_DURATION) return 0;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj);
/* Queue: semaphore signal, matching priority as semaphore */
err = release_queue(engine, vma, 1, effective_prio(rq)); if (err) goto err_rq;
/* Wait until we ack the release_queue and start timeslicing */ do {
cond_resched();
intel_engine_flush_submission(engine);
} while (READ_ONCE(engine->execlists.pending[0]));
/* Timeslice every jiffy, so within 2 we should signal */ if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) { struct drm_printer p =
drm_info_printer(gt->i915->drm.dev);
pr_err("%s: Failed to timeslice into queue\n",
engine->name);
intel_engine_dump(engine, &p, "%s\n", engine->name);
/* * Wait until the barrier is in ELSP, and we know timeslicing * will have been activated.
*/ if (wait_for_submit(engine, rq, HZ / 2)) {
i915_request_put(rq);
err = -ETIME; goto out_spin;
}
/* * Since the ELSP[0] request is unpreemptible, it should not * allow the maximum priority barrier through. Wait long * enough to see if it is timesliced in by mistake.
*/ if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
engine->name);
err = -EINVAL;
}
i915_request_put(rq);
out_spin:
igt_spinner_end(&spin);
out_heartbeat:
xchg(&engine->props.timeslice_duration_ms, timeslice);
st_engine_heartbeat_enable(engine); if (err) break;
if (igt_flush_test(gt->i915)) {
err = -EIO; break;
}
}
/* * We create two requests. The low priority request * busywaits on a semaphore (inside the ringbuffer where * is should be preemptible) and the high priority requests * uses a MI_STORE_DWORD_IMM to update the semaphore value * allowing the first request to complete. If preemption * fails, we hang instead.
*/
lo = igt_request_alloc(ctx_lo, engine); if (IS_ERR(lo)) {
err = PTR_ERR(lo); goto err_vma;
}
/* B is much more important than A! (But A is unpreemptable.) */
GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
/* Wait long enough for preemption and timeslicing */ if (igt_wait_for_spinner(&b.spin, rq_b)) {
pr_err("Second client started too early!\n"); goto err_wedged;
}
igt_spinner_end(&a.spin);
if (!igt_wait_for_spinner(&b.spin, rq_b)) {
pr_err("Second client failed to start\n"); goto err_wedged;
}
igt_spinner_end(&b.spin);
if (engine->execlists.preempt_hang.count) {
pr_err("Preemption recorded x%d; should have been suppressed!\n",
engine->execlists.preempt_hang.count);
err = -EINVAL; goto err_wedged;
}
/* Full ELSP and one in the wings */
GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); if (igt_live_test_begin(&t, arg->engine->i915,
__func__, arg->engine->name)) return -EIO;
rq[0] = spinner_create_request(&arg->a.spin,
arg->a.ctx, arg->engine,
MI_ARB_CHECK); if (IS_ERR(rq[0])) return PTR_ERR(rq[0]);
if (rq[0]->fence.error != -EIO) {
pr_err("Cancelled inflight0 request did not report -EIO\n");
err = -EINVAL; goto out;
}
/* * The behavior between having semaphores and not is different. With * semaphores the subsequent request is on the hardware and not cancelled * while without the request is held in the driver and cancelled.
*/ if (intel_engine_has_semaphores(rq[1]->engine) &&
rq[1]->fence.error != 0) {
pr_err("Normal inflight1 request did not complete\n");
err = -EINVAL; goto out;
}
if (rq[2]->fence.error != -EIO) {
pr_err("Cancelled queued request did not report -EIO\n");
err = -EINVAL; goto out;
}
/* * Verify that if a preemption request does not cause a change in * the current execution order, the preempt-to-idle injection is * skipped and that we do not accidentally apply it after the CS * completion event.
*/
if (intel_uc_uses_guc_submission(>->uc)) return 0; /* presume black blox */
if (intel_vgpu_active(gt->i915)) return 0; /* GVT forces single port & request submission */
if (preempt_client_init(gt, &a)) return -ENOMEM; if (preempt_client_init(gt, &b)) goto err_client_a;
for_each_engine(engine, gt, id) { struct i915_request *rq_a, *rq_b; int depth;
if (!intel_engine_has_preemption(engine)) continue;
if (engine->execlists.preempt_hang.count) {
pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
engine->name,
engine->execlists.preempt_hang.count,
depth);
st_engine_heartbeat_enable(engine);
err = -EINVAL; goto err_client_b;
}
st_engine_heartbeat_enable(engine); if (igt_flush_test(gt->i915)) goto err_wedged;
}
/* * Build a chain AB...BA between two contexts (A, B) and request * preemption of the last request. It should then complete before * the previously submitted spinner in B.
*/
if (preempt_client_init(gt, &hi)) return -ENOMEM;
if (preempt_client_init(gt, &lo)) goto err_client_hi;
/* * Check that we rollback large chunks of a ring in order to do a * preemption event. Similar to live_unlite_ring, but looking at * ring size rather than the impact of intel_ring_direction().
*/
if (igt_spinner_init(&spin, gt)) return -ENOMEM;
for_each_engine(engine, gt, id) { int n;
if (!intel_engine_has_preemption(engine)) continue;
if (!intel_engine_can_store_dword(engine)) continue;
st_engine_heartbeat_disable(engine);
for (n = 0; n <= 3; n++) {
err = __live_preempt_ring(engine, &spin,
n * SZ_4K / 4, SZ_4K); if (err) break;
}
st_engine_heartbeat_enable(engine); if (err) break;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.