/** * DOC: GuC CTB Blob * * We allocate single blob to hold both CTB descriptors and buffers: * * +--------+-----------------------------------------------+------+ * | offset | contents | size | * +========+===============================================+======+ * | 0x0000 | H2G CTB Descriptor (send) | | * +--------+-----------------------------------------------+ 4K | * | 0x0800 | G2H CTB Descriptor (g2h) | | * +--------+-----------------------------------------------+------+ * | 0x1000 | H2G CT Buffer (send) | n*4K | * | | | | * +--------+-----------------------------------------------+------+ * | 0x1000 | G2H CT Buffer (g2h) | m*4K | * | + n*4K | | | * +--------+-----------------------------------------------+------+ * * Size of each ``CT Buffer`` must be multiple of 4K. * We don't expect too many messages in flight at any time, unless we are * using the GuC submission. In that case each request requires a minimum * 2 dwords which gives us a maximum 256 queue'd requests. Hopefully this * enough space to avoid backpressure on the driver. We increase the size * of the receive buffer (relative to the send) to ensure a G2H response * CTB has a landing spot. * * In addition to submissions, the G2H buffer needs to be able to hold * enough space for recoverable page fault notifications. The number of * page faults is interrupt driven and can be as much as the number of * compute resources available. However, most of the actual work for these * is in a separate page fault worker thread. Therefore we only need to * make sure the queue has enough space to handle all of the submissions * and responses and an extra buffer for incoming page faults.
*/
/** * xe_guc_ct_queue_proc_time_jiffies - Return maximum time to process a full * CT command queue * @ct: the &xe_guc_ct. Unused at this moment but will be used in the future. * * Observation is that a 4KiB buffer full of commands takes a little over a * second to process. Use that to calculate maximum time to process a full CT * command queue. * * Return: Maximum time to process a full CT queue in jiffies.
*/ long xe_guc_ct_queue_proc_time_jiffies(struct xe_guc_ct *ct)
{
BUILD_BUG_ON(!IS_ALIGNED(CTB_H2G_BUFFER_SIZE, SZ_4)); return (CTB_H2G_BUFFER_SIZE / SZ_4K) * HZ;
}
/** * xe_guc_ct_init_post_hwconfig - Reinitialize the GuC CTB in VRAM * @ct: the &xe_guc_ct * * Allocate a new BO in VRAM and free the previous BO that was allocated * in system memory (SMEM). Applicable only for DGFX products. * * Return: 0 on success, or a negative errno on failure.
*/ int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct)
{ struct xe_device *xe = ct_to_xe(ct); struct xe_gt *gt = ct_to_gt(ct); struct xe_tile *tile = gt_to_tile(gt); int ret;
xe_assert(xe, !xe_guc_ct_enabled(ct));
if (IS_DGFX(xe)) {
ret = xe_managed_bo_reinit_in_vram(xe, tile, &ct->bo); if (ret) return ret;
}
/* make sure guc_ct_send_recv() will see g2h_fence changes */
smp_mb();
wake_up_all(&ct->g2h_fence_wq);
/* * Lockdep doesn't like this under the fast lock and he destroy only * needs to be serialized with the send path which ct lock provides.
*/
xa_destroy(&ct->fence_lookup);
err = guc_ct_ctb_h2g_register(ct); if (err) goto err_out;
err = guc_ct_ctb_g2h_register(ct); if (err) goto err_out;
err = guc_ct_control_toggle(ct, true); if (err) goto err_out;
guc_ct_change_state(ct, XE_GUC_CT_STATE_ENABLED);
smp_mb();
wake_up_all(&ct->wq);
if (ct_needs_safe_mode(ct))
ct_enter_safe_mode(ct);
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) /* * The CT has now been reset so the dumper can be re-armed * after any existing dead state has been dumped.
*/
spin_lock_irq(&ct->dead.lock); if (ct->dead.reason) {
ct->dead.reason |= (1 << CT_DEAD_STATE_REARM);
queue_work(system_unbound_wq, &ct->dead.worker);
}
spin_unlock_irq(&ct->dead.lock); #endif
/** * xe_guc_ct_disable - Set GuC to disabled state * @ct: the &xe_guc_ct * * Set GuC CT to disabled state and stop g2h handler. No outstanding g2h expected * in this transition.
*/ void xe_guc_ct_disable(struct xe_guc_ct *ct)
{
guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED);
ct_exit_safe_mode(ct);
stop_g2h_handler(ct);
}
/** * xe_guc_ct_stop - Set GuC to stopped state * @ct: the &xe_guc_ct * * Set GuC CT to stopped state, stop g2h handler, and clear any outstanding g2h
*/ void xe_guc_ct_stop(struct xe_guc_ct *ct)
{ if (!xe_guc_ct_initialized(ct)) return;
n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
/* May be called under spinlock, so avoid sleeping */
ct->fast_req[slot].stack = stack_depot_save(entries, n, GFP_NOWAIT); #endif
ct->fast_req[slot].fence = fence;
ct->fast_req[slot].action = action;
} #else staticvoid fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action)
{
} #endif
/* * The CT protocol accepts a 16 bits fence. This field is fully owned by the * driver, the GuC will just copy it to the reply message. Since we need to * be able to distinguish between replies to REQUEST and FAST_REQUEST messages, * we use one bit of the seqno as an indicator for that and a rolling counter * for the remaining 15 bits.
*/ #define CT_SEQNO_MASK GENMASK(14, 0) #define CT_SEQNO_UNTRACKED BIT(15) static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence)
{
u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK;
if (!is_g2h_fence)
seqno |= CT_SEQNO_UNTRACKED;
return seqno;
}
#define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */
try_again:
ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h,
g2h_fence);
/* * We wait to try to restore credits for about 1 second before bailing. * In the case of H2G credits we have no choice but just to wait for the * GuC to consume H2Gs in the channel so we use a wait / sleep loop. In * the case of G2H we process any G2H in the channel, hopefully freeing * credits as we consume the G2H messages.
*/ if (unlikely(ret == -EBUSY &&
!h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) { struct guc_ctb *h2g = &ct->ctbs.h2g;
mutex_lock(&ct->lock);
ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence);
mutex_unlock(&ct->lock);
return ret;
}
int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
u32 g2h_len, u32 num_g2h)
{ int ret;
ret = guc_ct_send(ct, action, len, g2h_len, num_g2h, NULL); if (ret == -EDEADLK)
kick_reset(ct);
return ret;
}
int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
u32 g2h_len, u32 num_g2h)
{ int ret;
ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, NULL); if (ret == -EDEADLK)
kick_reset(ct);
return ret;
}
int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, u32 len)
{ int ret;
lockdep_assert_held(&ct->lock);
ret = guc_ct_send_locked(ct, action, len, 0, 0, NULL); if (ret == -EDEADLK)
kick_reset(ct);
return ret;
}
/* * Check if a GT reset is in progress or will occur and if GT reset brought the * CT back up. Randomly picking 5 seconds for an upper limit to do a GT a reset.
*/ staticbool retry_failure(struct xe_guc_ct *ct, int ret)
{ if (!(ret == -EDEADLK || ret == -EPIPE || ret == -ENODEV)) returnfalse;
/* * We use a fence to implement blocking sends / receiving response data. * The seqno of the fence is sent in the H2G, returned in the G2H, and * an xarray is used as storage media with the seqno being to key. * Fields in the fence hold success, failure, retry status and the * response data. Safe to allocate on the stack as the xarray is the * only reference and it cannot be present after this function exits.
*/
retry:
g2h_fence_init(&g2h_fence, response_buffer);
retry_same_fence:
ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence); if (unlikely(ret == -ENOMEM)) { /* Retry allocation /w GFP_KERNEL */
ret = xa_err(xa_store(&ct->fence_lookup, g2h_fence.seqno,
&g2h_fence, GFP_KERNEL)); if (ret) return ret;
if (no_fail && retry_failure(ct, ret)) goto retry_same_fence;
if (!g2h_fence_needs_alloc(&g2h_fence))
xa_erase(&ct->fence_lookup, g2h_fence.seqno);
return ret;
}
ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ); if (!ret) {
LNL_FLUSH_WORK(&ct->g2h_worker); if (g2h_fence.done) {
xe_gt_warn(gt, "G2H fence %u, action %04x, done\n",
g2h_fence.seqno, action[0]);
ret = 1;
}
}
/* * Ensure we serialize with completion side to prevent UAF with fence going out of scope on * the stack, since we have no clue if it will fire after the timeout before we can erase * from the xa. Also we have some dependent loads and stores below for which we need the * correct ordering, and we lack the needed barriers.
*/
mutex_lock(&ct->lock); if (!ret) {
xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s",
g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done));
xa_erase(&ct->fence_lookup, g2h_fence.seqno);
mutex_unlock(&ct->lock); return -ETIME;
}
if (ret > 0)
ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data;
unlock:
mutex_unlock(&ct->lock);
return ret;
}
/** * xe_guc_ct_send_recv - Send and receive HXG to the GuC * @ct: the &xe_guc_ct * @action: the dword array with `HXG Request`_ message (can't be NULL) * @len: length of the `HXG Request`_ message (in dwords, can't be 0) * @response_buffer: placeholder for the `HXG Response`_ message (can be NULL) * * Send a `HXG Request`_ message to the GuC over CT communication channel and * blocks until GuC replies with a `HXG Response`_ message. * * For non-blocking communication with GuC use xe_guc_ct_send(). * * Note: The size of &response_buffer must be at least GUC_CTB_MAX_DWORDS_. * * Return: response length (in dwords) if &response_buffer was not NULL, or * DATA0 from `HXG Response`_ if &response_buffer was NULL, or * a negative error code on failure.
*/ int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
u32 *response_buffer)
{
KUNIT_STATIC_STUB_REDIRECT(xe_guc_ct_send_recv, ct, action, len, response_buffer); return guc_ct_send_recv(ct, action, len, response_buffer, false);
}
ALLOW_ERROR_INJECTION(xe_guc_ct_send_recv, ERRNO);
switch (action) { case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE: case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE: case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
g2h_release_space(ct, len);
}
for (n = 0; n < ARRAY_SIZE(ct->fast_req); n++) { if (ct->fast_req[n].fence < fence_min)
fence_min = ct->fast_req[n].fence; if (ct->fast_req[n].fence > fence_max)
fence_max = ct->fast_req[n].fence;
if (ct->fast_req[n].fence != fence) continue;
found = true;
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC)
buf = kmalloc(SZ_4K, GFP_NOWAIT); if (buf && stack_depot_snprint(ct->fast_req[n].stack, buf, SZ_4K, 0))
xe_gt_err(gt, "Fence 0x%x was used by action %#04x sent at:\n%s",
fence, ct->fast_req[n].action, buf); else
xe_gt_err(gt, "Fence 0x%x was used by action %#04x [failed to retrieve stack]\n",
fence, ct->fast_req[n].action);
kfree(buf); #else
xe_gt_err(gt, "Fence 0x%x was used by action %#04x\n",
fence, ct->fast_req[n].action); #endif break;
}
if (!found)
xe_gt_warn(gt, "Fence 0x%x not found - tracking buffer wrapped? [range = 0x%x -> 0x%x, next = 0x%X]\n",
fence, fence_min, fence_max, ct->fence_seqno);
} #else staticvoid fast_req_report(struct xe_guc_ct *ct, u16 fence)
{
} #endif
/* * Fences for FAST_REQUEST messages are not tracked in ct->fence_lookup. * Those messages should never fail, so if we do get an error back it * means we're likely doing an illegal operation and the GuC is * rejecting it. We have no way to inform the code that submitted the * H2G that the message was rejected, so we need to escalate the * failure to trigger a reset.
*/ if (fence & CT_SEQNO_UNTRACKED) { if (type == GUC_HXG_TYPE_RESPONSE_FAILURE)
xe_gt_err(gt, "FAST_REQ H2G fence 0x%x failed! e=0x%x, h=%u\n",
fence,
FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, hxg[0]),
FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, hxg[0])); else
xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n",
type, fence);
fast_req_report(ct, fence);
CT_DEAD(ct, NULL, PARSE_G2H_RESPONSE);
return -EPROTO;
}
g2h_fence = xa_erase(&ct->fence_lookup, fence); if (unlikely(!g2h_fence)) { /* Don't tear down channel, as send could've timed out */ /* CT_DEAD(ct, NULL, PARSE_G2H_UNKNOWN); */
xe_gt_warn(gt, "G2H fence (%u) not found!\n", fence);
g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); return 0;
}
switch (action) { case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
ret = xe_guc_sched_done_handler(guc, payload, adj_len); break; case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
ret = xe_guc_deregister_done_handler(guc, payload, adj_len); break; case XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION:
ret = xe_guc_exec_queue_reset_handler(guc, payload, adj_len); break; case XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
ret = xe_guc_exec_queue_reset_failure_handler(guc, payload,
adj_len); break; case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE: /* Selftest only at the moment */ break; case XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
ret = xe_guc_error_capture_handler(guc, payload, adj_len); break; case XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE: /* FIXME: Handle this */ break; case XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR:
ret = xe_guc_exec_queue_memory_cat_error_handler(guc, payload,
adj_len); break; case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
ret = xe_guc_pagefault_handler(guc, payload, adj_len); break; case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
adj_len); break; case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY:
ret = xe_guc_access_counter_notify_handler(guc, payload,
adj_len); break; case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF:
ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len); break; case XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF:
ret = xe_guc_relay_process_guc2vf(&guc->relay, hxg, hxg_len); break; case GUC_ACTION_GUC2PF_VF_STATE_NOTIFY:
ret = xe_gt_sriov_pf_control_process_guc2pf(gt, hxg, hxg_len); break; case GUC_ACTION_GUC2PF_ADVERSE_EVENT:
ret = xe_gt_sriov_pf_monitor_process_guc2pf(gt, hxg, hxg_len); break; case XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED: case XE_GUC_ACTION_NOTIFY_EXCEPTION:
ret = guc_crash_process_msg(ct, action); break; default:
xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
}
if (ct->state == XE_GUC_CT_STATE_DISABLED) return -ENODEV;
if (ct->state == XE_GUC_CT_STATE_STOPPED) return -ECANCELED;
if (g2h->info.broken) return -EPIPE;
xe_gt_assert(gt, xe_guc_ct_enabled(ct));
desc_status = desc_read(xe, g2h, status); if (desc_status) { if (desc_status & GUC_CTB_STATUS_DISABLED) { /* * Potentially valid if a CLIENT_RESET request resulted in * contexts/engines being reset. But should never happen as * no contexts should be active when CLIENT_RESET is sent.
*/
xe_gt_err(gt, "CT read: unexpected G2H after GuC has stopped!\n");
desc_status &= ~GUC_CTB_STATUS_DISABLED;
}
* info.head and desc_head are updated back-to-back at the end of * this function and nowhere else. Hence, they cannot be different * unless two g2h_read calls are running concurrently. Which is not * possible because it is guarded by ct->fast_lock. And yet, some * discrete platforms are regularly hitting this error :(. * * desc_head rolling backwards shouldn't cause any noticeable * problems - just a delay in GuC being allowed to proceed past that * point in the queue. So for now, just disable the error until it * can be root caused. * if (g2h->info.head != desc_head) { desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_MISMATCH); xe_gt_err(gt, "CT read: head was modified %u != %u\n", desc_head, g2h->info.head); goto corrupted; }
*/
if (g2h->info.head > g2h->info.size) {
desc_write(xe, g2h, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
xe_gt_err(gt, "CT read: head out of range: %u vs %u\n",
g2h->info.head, g2h->info.size); goto corrupted;
}
if (fast_path) { if (FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT) return 0;
switch (action) { case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC: case XE_GUC_ACTION_TLB_INVALIDATION_DONE: break; /* Process these in fast-path */ default: return 0;
}
}
/** * xe_guc_ct_fast_path - process critical G2H in the IRQ handler * @ct: GuC CT object * * Anything related to page faults is critical for performance, process these * critical G2H in the IRQ. This is safe as these handlers either just wake up * waiters or queue another worker.
*/ void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
{ struct xe_device *xe = ct_to_xe(ct); bool ongoing; int len;
ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct)); if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL) return;
spin_lock(&ct->fast_lock); do {
len = g2h_read(ct, ct->fast_msg, true); if (len > 0)
g2h_fast_path(ct, ct->fast_msg, len);
} while (len > 0);
spin_unlock(&ct->fast_lock);
if (ongoing)
xe_pm_runtime_put(xe);
}
/* Returns less than zero on error, 0 on done, 1 on more available */ staticint dequeue_one_g2h(struct xe_guc_ct *ct)
{ int len; int ret;
lockdep_assert_held(&ct->lock);
spin_lock_irq(&ct->fast_lock);
len = g2h_read(ct, ct->msg, false);
spin_unlock_irq(&ct->fast_lock); if (len <= 0) return len;
ret = parse_g2h_msg(ct, ct->msg, len); if (unlikely(ret < 0)) return ret;
ret = process_g2h_msg(ct, ct->msg, len); if (unlikely(ret < 0)) return ret;
return 1;
}
staticvoid receive_g2h(struct xe_guc_ct *ct)
{ bool ongoing; int ret;
/* * Normal users must always hold mem_access.ref around CT calls. However * during the runtime pm callbacks we rely on CT to talk to the GuC, but * at this stage we can't rely on mem_access.ref and even the * callback_task will be different than current. For such cases we just * need to ensure we always process the responses from any blocking * ct_send requests or where we otherwise expect some response when * initiated from those callbacks (which will need to wait for the below * dequeue_one_g2h()). The dequeue_one_g2h() will gracefully fail if * the device has suspended to the point that the CT communication has * been disabled. * * If we are inside the runtime pm callback, we can be the only task * still issuing CT requests (since that requires having the * mem_access.ref). It seems like it might in theory be possible to * receive unsolicited events from the GuC just as we are * suspending-resuming, but those will currently anyway be lost when * eventually exiting from suspend, hence no need to wake up the device * here. If we ever need something stronger than get_if_ongoing() then * we need to be careful with blocking the pm callbacks from getting CT * responses, if the worker here is blocked on those callbacks * completing, creating a deadlock.
*/
ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct)); if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL) return;
do {
mutex_lock(&ct->lock);
ret = dequeue_one_g2h(ct);
mutex_unlock(&ct->lock);
if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) {
xe_gt_err(ct_to_gt(ct), "CT dequeue failed: %d", ret);
CT_DEAD(ct, NULL, G2H_RECV);
kick_reset(ct);
}
} while (ret == 1);
lo = xe_map_rd_ring_u32(xe, cmds, idx, size);
hi = xe_map_rd_ring_u32(xe, cmds, idx + 1, size);
offset = make_u64(hi, lo);
offset += shift;
lo = lower_32_bits(offset);
hi = upper_32_bits(offset);
xe_map_wr_ring_u32(xe, cmds, idx, size, lo);
xe_map_wr_ring_u32(xe, cmds, idx + 1, size, hi);
}
/* * Shift any GGTT addresses within a single message left within CTB from * before post-migration recovery. * @ct: pointer to CT struct of the target GuC * @cmds: iomap buffer containing CT messages * @head: start of the target message within the buffer * @len: length of the target message * @size: size of the commands buffer * @shift: the address shift to be added to each GGTT reference * Return: true if the message was fixed or needed no fixups, false on failure
*/ staticbool ct_fixup_ggtt_in_message(struct xe_guc_ct *ct, struct iosys_map *cmds, u32 head,
u32 len, u32 size, s64 shift)
{ struct xe_gt *gt = ct_to_gt(ct); struct xe_device *xe = ct_to_xe(ct);
u32 msg[GUC_HXG_MSG_MIN_LEN];
u32 action, i, n;
/* * Apply fixups to the next outgoing CT message within given CTB * @ct: the &xe_guc_ct struct instance representing the target GuC * @h2g: the &guc_ctb struct instance of the target buffer * @shift: shift to be added to all GGTT addresses within the CTB * @mhead: pointer to an integer storing message start position; the * position is changed to next message before this function return * @avail: size of the area available for parsing, that is length * of all remaining messages stored within the CTB * Return: size of the area available for parsing after one message * has been parsed, that is length remaining from the updated mhead
*/ staticint ct_fixup_ggtt_in_buffer(struct xe_guc_ct *ct, struct guc_ctb *h2g,
s64 shift, u32 *mhead, s32 avail)
{ struct xe_gt *gt = ct_to_gt(ct); struct xe_device *xe = ct_to_xe(ct);
u32 msg[GUC_HXG_MSG_MIN_LEN];
u32 size = h2g->info.size;
u32 head = *mhead;
u32 len;
/** * xe_guc_ct_fixup_messages_with_ggtt - Fixup any pending H2G CTB messages * @ct: pointer to CT struct of the target GuC * @ggtt_shift: shift to be added to all GGTT addresses within the CTB * * Messages in GuC to Host CTB are owned by GuC and any fixups in them * are made by GuC. But content of the Host to GuC CTB is owned by the * KMD, so fixups to GGTT references in any pending messages need to be * applied here. * This function updates GGTT offsets in payloads of pending H2G CTB * messages (messages which were not consumed by GuC before the VF got * paused).
*/ void xe_guc_ct_fixup_messages_with_ggtt(struct xe_guc_ct *ct, s64 ggtt_shift)
{ struct guc_ctb *h2g = &ct->ctbs.h2g; struct xe_guc *guc = ct_to_guc(ct); struct xe_gt *gt = guc_to_gt(guc);
u32 head, tail, size;
s32 avail;
if (ct->bo && snapshot->ctb)
xe_map_memcpy_from(xe, snapshot->ctb, &ct->bo->vmap, 0, snapshot->ctb_size);
return snapshot;
}
/** * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state. * @ct: GuC CT object. * * This can be printed out in a later stage like during dev_coredump * analysis. This is safe to be called during atomic context. * * Returns: a GuC CT snapshot object that must be freed by the caller * by using `xe_guc_ct_snapshot_free`.
*/ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct)
{ return guc_ct_snapshot_capture(ct, true, true);
}
/** * xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot. * @snapshot: GuC CT snapshot object. * @p: drm_printer where it will be printed out. * * This function prints out a given GuC CT snapshot object.
*/ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_printer *p)
{ if (!snapshot) return;
if (snapshot->ct_enabled) {
drm_puts(p, "H2G CTB (all sizes in DW):\n");
guc_ctb_snapshot_print(&snapshot->h2g, p);
/** * xe_guc_ct_snapshot_free - Free all allocated objects for a given snapshot. * @snapshot: GuC CT snapshot object. * * This function free all the memory that needed to be allocated at capture * time.
*/ void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
{ if (!snapshot) return;
kfree(snapshot->ctb);
kfree(snapshot);
}
/** * xe_guc_ct_print - GuC CT Print. * @ct: GuC CT. * @p: drm_printer where it will be printed out. * @want_ctb: Should the full CTB content be dumped (vs just the headers) * * This function will quickly capture a snapshot of the CT state * and immediately print it out.
*/ void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb)
{ struct xe_guc_ct_snapshot *snapshot;
#ifdef CONFIG_FUNCTION_ERROR_INJECTION /* * This is a helper function which assists the driver in identifying if a fault * injection test is currently active, allowing it to reduce unnecessary debug * output. Typically, the function returns zero, but the fault injection * framework can alter this to return an error. Since faults are injected * through this function, it's important to ensure the compiler doesn't optimize * it into an inline function. To avoid such optimization, the 'noinline' * attribute is applied. Compiler optimizes the static function defined in the * header file as an inline function.
*/
noinline int xe_is_injection_active(void) { return 0; }
ALLOW_ERROR_INJECTION(xe_is_injection_active, ERRNO); #else int xe_is_injection_active(void) { return 0; } #endif
if (ctb)
ctb->info.broken = true; /* * Huge dump is getting generated when injecting error for guc CT/MMIO * functions. So, let us suppress the dump when fault is injected.
*/ if (xe_is_injection_active()) return;
/* Ignore further errors after the first dump until a reset */ if (ct->dead.reported) return;
spin_lock_irqsave(&ct->dead.lock, flags);
/* And only capture one dump at a time */
have_capture = ct->dead.reason & (1 << CT_DEAD_STATE_CAPTURE);
ct->dead.reason |= (1 << reason_code) |
(1 << CT_DEAD_STATE_CAPTURE);
if (!dead->reason) {
xe_gt_err(gt, "CTB is dead for no reason!?\n"); return;
}
/* Can't generate a genuine core dump at this point, so just do the good bits */
drm_puts(&lp, "**** Xe Device Coredump ****\n");
drm_printf(&lp, "Reason: CTB is dead - 0x%X\n", dead->reason);
xe_device_snapshot_print(xe, &lp);
if (ct->dead.reason & (1 << CT_DEAD_STATE_REARM)) { /* A reset has occurred so re-arm the error reporting */
ct->dead.reason = 0;
ct->dead.reported = false;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.