/* * TLB inval depends on pending commands in the CT queue and then the real * invalidation time. Double up the time to process full CT queue * just to be on the safe side.
*/ staticlong tlb_timeout_jiffies(struct xe_gt *gt)
{ /* this reflects what HW/GuC needs to process TLB inv request */ constlong hw_tlb_timeout = HZ / 4;
/* this estimates actual delay caused by the CTB transport */ long delay = xe_guc_ct_queue_proc_time_jiffies(>->uc.guc.ct);
return hw_tlb_timeout + 2 * delay;
}
staticvoid xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence)
{ if (WARN_ON_ONCE(!fence->gt)) return;
xe_pm_runtime_put(gt_to_xe(fence->gt));
fence->gt = NULL; /* fini() should be called once */
}
/** * xe_gt_tlb_invalidation_init_early - Initialize GT TLB invalidation state * @gt: GT structure * * Initialize GT TLB invalidation state, purely software initialization, should * be called once during driver load. * * Return: 0 on success, negative error code on error.
*/ int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt)
{
gt->tlb_invalidation.seqno = 1;
INIT_LIST_HEAD(>->tlb_invalidation.pending_fences);
spin_lock_init(>->tlb_invalidation.pending_lock);
spin_lock_init(>->tlb_invalidation.lock);
INIT_DELAYED_WORK(>->tlb_invalidation.fence_tdr,
xe_gt_tlb_fence_timeout);
return 0;
}
/** * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset * @gt: GT structure * * Signal any pending invalidation fences, should be called during a GT reset
*/ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
{ struct xe_gt_tlb_invalidation_fence *fence, *next; int pending_seqno;
/* * we can get here before the CTs are even initialized if we're wedging * very early, in which case there are not going to be any pending * fences so we can bail immediately.
*/ if (!xe_guc_ct_initialized(>->uc.guc.ct)) return;
/* * CT channel is already disabled at this point. No new TLB requests can * appear.
*/
mutex_lock(>->uc.guc.ct.lock);
spin_lock_irq(>->tlb_invalidation.pending_lock);
cancel_delayed_work(>->tlb_invalidation.fence_tdr); /* * We might have various kworkers waiting for TLB flushes to complete * which are not tracked with an explicit TLB fence, however at this * stage that will never happen since the CT is already disabled, so * make sure we signal them here under the assumption that we have * completed a full GT reset.
*/ if (gt->tlb_invalidation.seqno == 1)
pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1; else
pending_seqno = gt->tlb_invalidation.seqno - 1;
WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno);
/** * * xe_gt_tlb_invalidation_fini - Clean up GT TLB invalidation state * * Cancel pending fence workers and clean up any additional * GT TLB invalidation state.
*/ void xe_gt_tlb_invalidation_fini(struct xe_gt *gt)
{
xe_gt_tlb_invalidation_reset(gt);
}
staticbool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
{ int seqno_recv = READ_ONCE(gt->tlb_invalidation.seqno_recv);
if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2)) returnfalse;
if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2)) returntrue;
return seqno_recv >= seqno;
}
staticint send_tlb_invalidation(struct xe_guc *guc, struct xe_gt_tlb_invalidation_fence *fence,
u32 *action, int len)
{ struct xe_gt *gt = guc_to_gt(guc); struct xe_device *xe = gt_to_xe(gt); int seqno; int ret;
xe_gt_assert(gt, fence);
/* * XXX: The seqno algorithm relies on TLB invalidation being processed * in order which they currently are, if that changes the algorithm will * need to be updated.
*/
mutex_lock(&guc->ct.lock);
seqno = gt->tlb_invalidation.seqno;
fence->seqno = seqno;
trace_xe_gt_tlb_invalidation_fence_send(xe, fence);
action[1] = seqno;
ret = xe_guc_ct_send_locked(&guc->ct, action, len,
G2H_LEN_DW_TLB_INVALIDATE, 1); if (!ret) {
spin_lock_irq(>->tlb_invalidation.pending_lock); /* * We haven't actually published the TLB fence as per * pending_fences, but in theory our seqno could have already * been written as we acquired the pending_lock. In such a case * we can just go ahead and signal the fence here.
*/ if (tlb_invalidation_seqno_past(gt, seqno)) {
__invalidation_fence_signal(xe, fence);
} else {
fence->invalidation_time = ktime_get();
list_add_tail(&fence->link,
>->tlb_invalidation.pending_fences);
/** * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC * @gt: GT structure * @fence: invalidation fence which will be signal on TLB invalidation * completion * * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and * caller can use the invalidation fence to wait for completion. * * Return: 0 on success, negative error code on error
*/ staticint xe_gt_tlb_invalidation_guc(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence)
{
u32 action[] = {
XE_GUC_ACTION_TLB_INVALIDATION,
0, /* seqno, replaced in send_tlb_invalidation */
MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
}; int ret;
ret = send_tlb_invalidation(>->uc.guc, fence, action,
ARRAY_SIZE(action)); /* * -ECANCELED indicates the CT is stopped for a GT reset. TLB caches * should be nuked on a GT reset so this error can be ignored.
*/ if (ret == -ECANCELED) return 0;
return ret;
}
/** * xe_gt_tlb_invalidation_ggtt - Issue a TLB invalidation on this GT for the GGTT * @gt: GT structure * * Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is * synchronous. * * Return: 0 on success, negative error code on error
*/ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
{ struct xe_device *xe = gt_to_xe(gt); unsignedint fw_ref;
if (xe_guc_ct_enabled(>->uc.guc.ct) &&
gt->uc.guc.submission_state.enabled) { struct xe_gt_tlb_invalidation_fence fence; int ret;
xe_gt_tlb_invalidation_fence_init(gt, &fence, true);
ret = xe_gt_tlb_invalidation_guc(gt, &fence); if (ret) return ret;
/** * xe_gt_tlb_invalidation_all - Invalidate all TLBs across PF and all VFs. * @gt: the &xe_gt structure * @fence: the &xe_gt_tlb_invalidation_fence to be signaled on completion * * Send a request to invalidate all TLBs across PF and all VFs. * * Return: 0 on success, negative error code on error
*/ int xe_gt_tlb_invalidation_all(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence)
{ int err;
/* * Ensure that roundup_pow_of_two(length) doesn't overflow. * Note that roundup_pow_of_two() operates on unsigned long, * not on u64.
*/ #define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX))
/** * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an * address range * * @gt: GT structure * @fence: invalidation fence which will be signal on TLB invalidation * completion * @start: start address * @end: end address * @asid: address space id * * Issue a range based TLB invalidation if supported, if not fallback to a full * TLB invalidation. Completion of TLB is asynchronous and caller can use * the invalidation fence to wait for completion. * * Return: Negative error code on error, 0 on success
*/ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence,
u64 start, u64 end, u32 asid)
{ struct xe_device *xe = gt_to_xe(gt); #define MAX_TLB_INVALIDATION_LEN 7
u32 action[MAX_TLB_INVALIDATION_LEN];
u64 length = end - start; int len = 0;
xe_gt_assert(gt, fence);
/* Execlists not supported */ if (gt_to_xe(gt)->info.force_execlist) {
__invalidation_fence_signal(xe, fence); return 0;
}
/* * We need to invalidate a higher granularity if start address * is not aligned to length. When start is not aligned with * length we need to find the length large enough to create an * address mask covering the required range.
*/
align = roundup_pow_of_two(length);
start = ALIGN_DOWN(start, align);
end = ALIGN(end, align);
length = align; while (start + length < end) {
length <<= 1;
start = ALIGN_DOWN(orig_start, length);
}
/* * Minimum invalidation size for a 2MB page that the hardware * expects is 16MB
*/ if (length >= SZ_2M) {
length = max_t(u64, SZ_16M, length);
start = ALIGN_DOWN(orig_start, length);
}
/** * xe_gt_tlb_invalidation_vm - Issue a TLB invalidation on this GT for a VM * @gt: graphics tile * @vm: VM to invalidate * * Invalidate entire VM's address space
*/ void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm)
{ struct xe_gt_tlb_invalidation_fence fence;
u64 range = 1ull << vm->xe->info.va_bits; int ret;
ret = xe_gt_tlb_invalidation_range(gt, &fence, 0, range, vm->usm.asid); if (ret < 0) return;
xe_gt_tlb_invalidation_fence_wait(&fence);
}
/** * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler * @guc: guc * @msg: message indicating TLB invalidation done * @len: length of message * * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any * invalidation fences for seqno. Algorithm for this depends on seqno being * received in-order and asserts this assumption. * * Return: 0 on success, -EPROTO for malformed messages.
*/ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
{ struct xe_gt *gt = guc_to_gt(guc); struct xe_device *xe = gt_to_xe(gt); struct xe_gt_tlb_invalidation_fence *fence, *next; unsignedlong flags;
if (unlikely(len != 1)) return -EPROTO;
/* * This can also be run both directly from the IRQ handler and also in * process_g2h_msg(). Only one may process any individual CT message, * however the order they are processed here could result in skipping a * seqno. To handle that we just process all the seqnos from the last * seqno_recv up to and including the one in msg[0]. The delta should be * very small so there shouldn't be much of pending_fences we actually * need to iterate over here. * * From GuC POV we expect the seqnos to always appear in-order, so if we * see something later in the timeline we can be sure that anything * appearing earlier has already signalled, just that we have yet to * officially process the CT message like if racing against * process_g2h_msg().
*/
spin_lock_irqsave(>->tlb_invalidation.pending_lock, flags); if (tlb_invalidation_seqno_past(gt, msg[0])) {
spin_unlock_irqrestore(>->tlb_invalidation.pending_lock, flags); return 0;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.