/* * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Authors: * Zhi Wang <zhi.a.wang@intel.com> * * Contributors: * Ping Gao <ping.a.gao@intel.com> * Tina Zhang <tina.zhang@intel.com> * Chanbin Du <changbin.du@intel.com> * Min He <min.he@intel.com> * Bing Niu <bing.niu@intel.com> * Zhenyu Wang <zhenyuw@linux.intel.com> *
*/
/* * When populating shadow ctx from guest, we should not override oa related * registers, so that they will not be overlapped by guest oa configs. Thus * made it possible to capture oa data from host for both host and guests.
*/ staticvoid sr_oa_regs(struct intel_vgpu_workload *workload,
u32 *reg_state, bool save)
{ struct drm_i915_private *dev_priv = workload->vgpu->gvt->gt->i915;
u32 ctx_oactxctrl = dev_priv->perf.ctx_oactxctrl_offset;
u32 ctx_flexeu0 = dev_priv->perf.ctx_flexeu0_offset; int i = 0;
u32 flex_mmio[] = {
i915_mmio_reg_offset(EU_PERF_CNTL0),
i915_mmio_reg_offset(EU_PERF_CNTL1),
i915_mmio_reg_offset(EU_PERF_CNTL2),
i915_mmio_reg_offset(EU_PERF_CNTL3),
i915_mmio_reg_offset(EU_PERF_CNTL4),
i915_mmio_reg_offset(EU_PERF_CNTL5),
i915_mmio_reg_offset(EU_PERF_CNTL6),
};
if (workload->engine->id != RCS0) return;
if (save) {
workload->oactxctrl = reg_state[ctx_oactxctrl + 1];
for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
u32 state_offset = ctx_flexeu0 + i * 2;
/* don't copy Ring Context (the first 0x50 dwords), * only copy the Engine Context part from guest
*/
intel_gvt_read_gpa(vgpu,
workload->ring_context_gpa +
RING_CTX_SIZE,
(void *)shadow_ring_context +
RING_CTX_SIZE,
I915_GTT_PAGE_SIZE - RING_CTX_SIZE);
/* only need to ensure this context is not pinned/unpinned during the * period from last submission to this this submission. * Upon reaching this function, the currently submitted context is not * supposed to get unpinned. If a misbehaving guest driver ever does * this, it would corrupt itself.
*/ if (s->last_ctx[ring_id].valid &&
(s->last_ctx[ring_id].lrca ==
workload->ctx_desc.lrca) &&
(s->last_ctx[ring_id].ring_context_gpa ==
workload->ring_context_gpa))
skip = true;
if (IS_BROADWELL(gvt->gt->i915) && workload->engine->id == RCS0)
context_page_num = 19;
/* find consecutive GPAs from gma until the first inconsecutive GPA. * read from the continuous GPAs into dst virtual address
*/
gpa_size = 0; for (i = 2; i < context_page_num; i++) {
context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
(u32)((workload->ctx_desc.lrca + i) <<
I915_GTT_PAGE_SHIFT)); if (context_gpa == INTEL_GVT_INVALID_ADDR) {
gvt_vgpu_err("Invalid guest context descriptor\n"); return -EFAULT;
}
/* * Update bits 0-11 of the context descriptor which includes flags * like GEN8_CTX_* cached in desc_template
*/
desc &= ~(0x3ull << GEN8_CTX_ADDRESSING_MODE_SHIFT);
desc |= (u64)workload->ctx_desc.addressing_mode <<
GEN8_CTX_ADDRESSING_MODE_SHIFT;
if (GRAPHICS_VER(req->engine->i915) == 9 && is_inhibit_context(req->context))
intel_vgpu_restore_inhibit_context(vgpu, req);
/* * To track whether a request has started on HW, we can emit a * breadcrumb at the beginning of the request and check its * timeline's HWSP to see if the breadcrumb has advanced past the * start of this request. Actually, the request must have the * init_breadcrumb if its timeline set has_init_bread_crumb, or the * scheduler might get a wrong state of it during reset. Since the * requests from gvt always set the has_init_breadcrumb flag, here * need to do the emit_init_breadcrumb for all the requests.
*/ if (req->engine->emit_init_breadcrumb) {
err = req->engine->emit_init_breadcrumb(req); if (err) {
gvt_vgpu_err("fail to emit init breadcrumb\n"); return err;
}
}
/* allocate shadow ring buffer */
cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); if (IS_ERR(cs)) {
gvt_vgpu_err("fail to alloc size =%ld shadow ring buffer\n",
workload->rb_len); return PTR_ERR(cs);
}
if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
set_dma_address(ppgtt->pd, mm->ppgtt_mm.shadow_pdps[0]);
} else { for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) { struct i915_page_directory * const pd =
i915_pd_entry(ppgtt->pd, i); /* skip now as current i915 ppgtt alloc won't allocate top level pdp for non 4-level table, won't impact
shadow ppgtt. */ if (!pd) break;
rq = i915_request_create(s->shadow[workload->engine->id]); if (IS_ERR(rq)) {
gvt_vgpu_err("fail to allocate gem request\n"); return PTR_ERR(rq);
}
workload->req = i915_request_get(rq); return 0;
}
/** * intel_gvt_scan_and_shadow_workload - audit the workload by scanning and * shadow it as well, include ringbuffer,wa_ctx and ctx. * @workload: an abstract entity for each execlist submission. * * This function is called before the workload submitting to i915, to make * sure the content of the workload is valid.
*/ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
{ struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; int ret;
lockdep_assert_held(&vgpu->vgpu_lock);
if (workload->shadow) return 0;
if (!test_and_set_bit(workload->engine->id, s->shadow_ctx_desc_updated))
shadow_context_descriptor_update(s->shadow[workload->engine->id],
workload);
ret = intel_gvt_scan_and_shadow_ringbuffer(workload); if (ret) return ret;
if (workload->engine->id == RCS0 &&
workload->wa_ctx.indirect_ctx.size) {
ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx); if (ret) goto err_shadow;
}
list_for_each_entry(bb, &workload->shadow_bb, list) { /* * For privilege batch buffer and not wa_ctx, the bb_start_cmd_va * is only updated into ring_scan_buffer, not real ring address * allocated in later copy_workload_to_ring_buffer. Please be noted * shadow_ring_buffer_va is now pointed to real ring buffer va * in copy_workload_to_ring_buffer.
*/
if (bb->bb_offset)
bb->bb_start_cmd_va = workload->shadow_ring_buffer_va
+ bb->bb_offset;
/* * For non-priv bb, scan&shadow is only for * debugging purpose, so the content of shadow bb * is the same as original bb. Therefore, * here, rather than switch to shadow bb's gma * address, we directly use original batch buffer's * gma address, and send original bb to hardware * directly.
*/ if (!bb->ppgtt) {
i915_gem_ww_ctx_init(&ww, false);
retry:
i915_gem_object_lock(bb->obj, &ww);
bb->vma = i915_gem_object_ggtt_pin_ww(bb->obj, &ww,
NULL, 0, 0, 0); if (IS_ERR(bb->vma)) {
ret = PTR_ERR(bb->vma); if (ret == -EDEADLK) {
ret = i915_gem_ww_ctx_backoff(&ww); if (!ret) goto retry;
} goto err;
}
ret = i915_vma_move_to_active(bb->vma, workload->req,
__EXEC_OBJECT_NO_REQUEST_AWAIT); if (ret) goto err;
/* No one is going to touch shadow bb from now on. */
i915_gem_object_flush_map(bb->obj);
i915_gem_ww_ctx_fini(&ww);
}
} return 0;
err:
i915_gem_ww_ctx_fini(&ww);
release_shadow_batch_buffer(workload); return ret;
}
gvt_dbg_sched("ring id %s prepare to dispatch workload %p\n",
workload->engine->name, workload);
mutex_lock(&vgpu->vgpu_lock);
ret = intel_gvt_workload_req_alloc(workload); if (ret) goto err_req;
ret = intel_gvt_scan_and_shadow_workload(workload); if (ret) goto out;
ret = populate_shadow_context(workload); if (ret) {
release_shadow_wa_ctx(&workload->wa_ctx); goto out;
}
ret = prepare_workload(workload);
out: if (ret) { /* We might still need to add request with * clean ctx to retire it properly..
*/
rq = fetch_and_zero(&workload->req);
i915_request_put(rq);
}
if (!IS_ERR_OR_NULL(workload->req)) {
gvt_dbg_sched("ring id %s submit workload to i915 %p\n",
workload->engine->name, workload->req);
i915_request_add(workload->req);
workload->dispatched = true;
}
err_req: if (ret)
workload->status = ret;
mutex_unlock(&vgpu->vgpu_lock); return ret;
}
/* * no current vgpu / will be scheduled out / no workload * bail out
*/ if (!scheduler->current_vgpu) {
gvt_dbg_sched("ring %s stop - no current vgpu\n", engine->name); goto out;
}
if (scheduler->need_reschedule) {
gvt_dbg_sched("ring %s stop - will reschedule\n", engine->name); goto out;
}
if (!test_bit(INTEL_VGPU_STATUS_ACTIVE,
scheduler->current_vgpu->status) ||
list_empty(workload_q_head(scheduler->current_vgpu, engine))) goto out;
/* * still have current workload, maybe the workload disptacher * fail to submit it for some reason, resubmit it.
*/ if (scheduler->current_workload[engine->id]) {
workload = scheduler->current_workload[engine->id];
gvt_dbg_sched("ring %s still have current workload %p\n",
engine->name, workload); goto out;
}
/* * pick a workload as current workload * once current workload is set, schedule policy routines * will wait the current workload is finished when trying to * schedule out a vgpu.
*/
scheduler->current_workload[engine->id] =
list_first_entry(workload_q_head(scheduler->current_vgpu,
engine), struct intel_vgpu_workload, list);
if (shadow_pdp != m->ppgtt_mm.shadow_pdps[0]) {
gvt_dbg_mm("4-level context ppgtt not match LRI command\n"); returnfalse;
} returntrue;
} else { /* see comment in LRI handler in cmd_parser.c */
gvt_dbg_mm("invalid shadow mm type\n"); returnfalse;
}
}
/* find consecutive GPAs from gma until the first inconsecutive GPA. * write to the consecutive GPAs from src virtual address
*/
gpa_size = 0; for (i = 2; i < context_page_num; i++) {
context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
(u32)((workload->ctx_desc.lrca + i) <<
I915_GTT_PAGE_SHIFT)); if (context_gpa == INTEL_GVT_INVALID_ADDR) {
gvt_vgpu_err("invalid guest context descriptor\n"); return;
}
/* For the workload w/ request, needs to wait for the context * switch to make sure request is completed. * For the workload w/o request, directly complete the workload.
*/ if (rq) {
wait_event(workload->shadow_ctx_status_wq,
!atomic_read(&workload->shadow_ctx_active));
/* If this request caused GPU hang, req->fence.error will * be set to -EIO. Use -EIO to set workload status so * that when this request caused GPU hang, didn't trigger * context switch interrupt to guest.
*/ if (likely(workload->status == -EINPROGRESS)) { if (workload->req->fence.error == -EIO)
workload->status = -EIO; else
workload->status = 0;
}
if (!workload->status &&
!(vgpu->resetting_eng & BIT(ring_id))) {
update_guest_context(workload);
gvt_dbg_sched("ring id %d complete workload %p status %d\n",
ring_id, workload, workload->status);
scheduler->current_workload[ring_id] = NULL;
list_del_init(&workload->list);
if (workload->status || vgpu->resetting_eng & BIT(ring_id)) { /* if workload->status is not successful means HW GPU * has occurred GPU hang or something wrong with i915/GVT, * and GVT won't inject context switch interrupt to guest. * So this error is a vGPU hang actually to the guest. * According to this we should emunlate a vGPU hang. If * there are pending workloads which are already submitted * from guest, we should clean them up like HW GPU does. * * if it is in middle of engine resetting, the pending * workloads won't be submitted to HW GPU and will be * cleaned up during the resetting process later, so doing * the workload clean up here doesn't have any impact.
**/
intel_vgpu_clean_workloads(vgpu, BIT(ring_id));
}
gvt_dbg_sched("ring %s will dispatch workload %p\n",
engine->name, workload);
if (need_force_wake)
intel_uncore_forcewake_get(engine->uncore,
FORCEWAKE_ALL); /* * Update the vReg of the vGPU which submitted this * workload. The vGPU may use these registers for checking * the context state. The value comes from GPU commands * in this workload.
*/
update_vreg_in_ctx(workload);
ret = dispatch_workload(workload);
if (ret) {
vgpu = workload->vgpu;
gvt_vgpu_err("fail to dispatch workload, skip\n"); goto complete;
}
/** * intel_vgpu_clean_submission - free submission-related resource for vGPU * @vgpu: a vGPU * * This function is called when a vGPU is being destroyed. *
*/ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu)
{ struct intel_vgpu_submission *s = &vgpu->submission; struct intel_engine_cs *engine; enum intel_engine_id id;
/** * intel_vgpu_reset_submission - reset submission-related resource for vGPU * @vgpu: a vGPU * @engine_mask: engines expected to be reset * * This function is called when a vGPU is being destroyed. *
*/ void intel_vgpu_reset_submission(struct intel_vgpu *vgpu,
intel_engine_mask_t engine_mask)
{ struct intel_vgpu_submission *s = &vgpu->submission;
staticvoid
i915_context_ppgtt_root_save(struct intel_vgpu_submission *s, struct i915_ppgtt *ppgtt)
{ int i;
if (i915_vm_is_4lvl(&ppgtt->vm)) {
s->i915_context_pml4 = px_dma(ppgtt->pd);
} else { for (i = 0; i < GEN8_3LVL_PDPES; i++) { struct i915_page_directory * const pd =
i915_pd_entry(ppgtt->pd, i);
s->i915_context_pdps[i] = px_dma(pd);
}
}
}
/** * intel_vgpu_setup_submission - setup submission-related resource for vGPU * @vgpu: a vGPU * * This function is called when a vGPU is being created. * * Returns: * Zero on success, negative error code if failed. *
*/ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
{ struct drm_i915_private *i915 = vgpu->gvt->gt->i915; struct intel_vgpu_submission *s = &vgpu->submission; struct intel_engine_cs *engine; struct i915_ppgtt *ppgtt; enum intel_engine_id i; int ret;
ppgtt = i915_ppgtt_create(to_gt(i915), I915_BO_ALLOC_PM_EARLY); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt);
/** * intel_vgpu_destroy_workload - destroy a vGPU workload * @workload: workload to destroy * * This function is called when destroy a vGPU workload. *
*/ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload)
{ struct intel_vgpu_submission *s = &workload->vgpu->submission;
head &= RB_HEAD_OFF_MASK;
tail &= RB_TAIL_OFF_MASK;
list_for_each_entry_reverse(last_workload, q, list) {
if (same_context(&last_workload->ctx_desc, desc)) {
gvt_dbg_el("ring %s cur workload == last\n",
engine->name);
gvt_dbg_el("ctx head %x real head %lx\n", head,
last_workload->rb_tail); /* * cannot use guest context head pointer here, * as it might not be updated at this time
*/
head = last_workload->rb_tail; break;
}
}
gvt_dbg_el("ring %s begin a new workload\n", engine->name);
/* record some ring buffer register values for scan and shadow */
intel_gvt_read_gpa(vgpu, ring_context_gpa +
RING_CTX_OFF(rb_start.val), &start, 4);
intel_gvt_read_gpa(vgpu, ring_context_gpa +
RING_CTX_OFF(rb_ctrl.val), &ctl, 4);
intel_gvt_read_gpa(vgpu, ring_context_gpa +
RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4);
gvt_dbg_el("workload %p ring %s head %x tail %x start %x ctl %x\n",
workload, engine->name, head, tail, start, ctl);
ret = prepare_mm(workload); if (ret) {
kmem_cache_free(s->workloads, workload); return ERR_PTR(ret);
}
/* Only scan and shadow the first workload in the queue * as there is only one pre-allocated buf-obj for shadow.
*/ if (list_empty(q)) {
intel_wakeref_t wakeref;
with_intel_runtime_pm(engine->gt->uncore->rpm, wakeref)
ret = intel_gvt_scan_and_shadow_workload(workload);
}
if (ret) { if (vgpu_is_vm_unhealthy(ret))
enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
intel_vgpu_destroy_workload(workload); return ERR_PTR(ret);
}
ret = intel_context_pin(s->shadow[engine->id]); if (ret) {
intel_vgpu_destroy_workload(workload); return ERR_PTR(ret);
}
return workload;
}
/** * intel_vgpu_queue_workload - Queue a vGPU workload * @workload: the workload to queue in
*/ void intel_vgpu_queue_workload(struct intel_vgpu_workload *workload)
{
list_add_tail(&workload->list,
workload_q_head(workload->vgpu, workload->engine));
intel_gvt_kick_schedule(workload->vgpu->gvt);
wake_up(&workload->vgpu->gvt->scheduler.waitq[workload->engine->id]);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.