/* * Since we are waiting on a request, the GPU should be busy * and should have its own rpm reference.
*/
wakeref = intel_gt_pm_get_if_awake(b->irq_engine->gt); if (GEM_WARN_ON(!wakeref)) return;
/* * The breadcrumb irq will be disarmed on the interrupt after the * waiters are signaled. This gives us a single interrupt window in * which we can add a new waiter and avoid the cost of re-enabling * the irq.
*/
WRITE_ONCE(b->irq_armed, wakeref);
/* Requests may have completed before we could enable the interrupt. */ if (!b->irq_enabled++ && b->irq_enable(b))
irq_work_queue(&b->irq_work);
}
staticvoid intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
{ if (!b->irq_engine) return;
spin_lock(&b->irq_lock); if (!b->irq_armed)
__intel_breadcrumbs_arm_irq(b);
spin_unlock(&b->irq_lock);
}
signal = NULL; if (unlikely(!llist_empty(&b->signaled_requests)))
signal = llist_del_all(&b->signaled_requests);
/* * Keep the irq armed until the interrupt after all listeners are gone. * * Enabling/disabling the interrupt is rather costly, roughly a couple * of hundred microseconds. If we are proactive and enable/disable * the interrupt around every request that wants a breadcrumb, we * quickly drown in the extra orders of magnitude of latency imposed * on request submission. * * So we try to be lazy, and keep the interrupts enabled until no * more listeners appear within a breadcrumb interrupt interval (that * is until a request completes that no one cares about). The * observation is that listeners come in batches, and will often * listen to a bunch of requests in succession. Though note on icl+, * interrupts are always enabled due to concerns with rc6 being * dysfunctional with per-engine interrupt masking. * * We also try to avoid raising too many interrupts, as they may * be generated by userspace batches and it is unfortunately rather * too easy to drown the CPU under a flood of GPU interrupts. Thus * whenever no one appears to be listening, we turn off the interrupts. * Fewer interrupts should conserve power -- at the very least, fewer * interrupt draw less ire from other users of the system and tools * like powertop.
*/ if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers))
intel_breadcrumbs_disarm_irq(b);
if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
&rq->fence.flags)) break;
/* * Queue for execution after dropping the signaling * spinlock as the callback chain may end up adding * more signalers to the same context or engine.
*/
spin_lock(&ce->signal_lock);
list_del_rcu(&rq->signal_link);
release = remove_signaling_context(b, ce);
spin_unlock(&ce->signal_lock); if (release) { if (intel_timeline_is_last(ce->timeline, rq))
add_retire(b, ce->timeline);
intel_context_put(ce);
}
if (__dma_fence_signal(&rq->fence)) /* We own signal_node now, xfer to local list */
signal = slist_add(&rq->signal_node, signal); else
i915_request_put(rq);
}
}
atomic_dec(&b->signaler_active);
rcu_read_unlock();
/* Lazy irq enabling after HW submission */ if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers))
intel_breadcrumbs_arm_irq(b);
/* And confirm that we still want irqs enabled before we yield */ if (READ_ONCE(b->irq_armed) && !atomic_read(&b->active))
intel_breadcrumbs_disarm_irq(b);
}
if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) return;
/* * If the request is already completed, we can transfer it * straight onto a signaled list, and queue the irq worker for * its signal completion.
*/ if (__i915_request_is_complete(rq)) {
irq_signal_request(rq, b); return;
}
if (list_empty(&ce->signals)) {
intel_context_get(ce);
add_signaling_context(b, ce);
pos = &ce->signals;
} else { /* * We keep the seqno in retirement order, so we can break * inside intel_engine_signal_breadcrumbs as soon as we've * passed the last completed request (or seen a request that * hasn't event started). We could walk the timeline->requests, * but keeping a separate signalers_list has the advantage of * hopefully being much smaller than the full list and so * provides faster iteration and detection when there are no * more interrupts required for this context. * * We typically expect to add new signalers in order, so we * start looking for our insertion point from the tail of * the list.
*/
list_for_each_prev(pos, &ce->signals) { struct i915_request *it =
list_entry(pos, typeof(*it), signal_link);
if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno)) break;
}
}
/* * Defer enabling the interrupt to after HW submission and recheck * the request as it may have completed and raised the interrupt as * we were attaching it into the lists.
*/ if (!READ_ONCE(b->irq_armed) || __i915_request_is_complete(rq))
irq_work_queue(&b->irq_work);
}
/* Serialises with i915_request_retire() using rq->lock */ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) returntrue;
/* * Peek at i915_request_submit()/i915_request_unsubmit() status. * * If the request is not yet active (and not signaled), we will * attach the breadcrumb later.
*/ if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) returntrue;
spin_lock(&ce->signal_lock); if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
insert_breadcrumb(rq);
spin_unlock(&ce->signal_lock);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.