/* * GTT virtualization * * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Authors: * Zhi Wang <zhi.a.wang@intel.com> * Zhenyu Wang <zhenyuw@linux.intel.com> * Xiao Zheng <xiao.zheng@intel.com> * * Contributors: * Min He <min.he@intel.com> * Bing Niu <bing.niu@intel.com> *
*/
/* * validate a gm address and related range size, * translate it to host gm address
*/ bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
{ if (size == 0) return vgpu_gmadr_is_valid(vgpu, addr);
/* * Mappings between GTT_TYPE* enumerations. * Following information can be found according to the given type: * - type of next level page table * - type of entry inside this level page table * - type of entry with PSE set * * If the given type doesn't have such a kind of information, * e.g. give a l4 root entry type, then request to get its PSE type, * give a PTE page table type, then request to get its next level page * table type, as we know l4 root entry doesn't have a PSE bit, * and a PTE page table doesn't have a next level page table type, * GTT_TYPE_INVALID will be returned. This is useful when traversing a * page table.
*/
struct gtt_type_table_entry { int entry_type; int pt_type; int next_pt_type; int pse_entry_type;
};
/* Update entry type per pse and ips bit. */ staticvoid update_entry_type_for_real(conststruct intel_gvt_gtt_pte_ops *pte_ops, struct intel_gvt_gtt_entry *entry, bool ips)
{ switch (entry->type) { case GTT_TYPE_PPGTT_PDE_ENTRY: case GTT_TYPE_PPGTT_PDP_ENTRY: if (pte_ops->test_pse(entry))
entry->type = get_pse_type(entry->type); break; case GTT_TYPE_PPGTT_PTE_4K_ENTRY: if (ips)
entry->type = get_pse_type(entry->type); break; default:
GEM_BUG_ON(!gtt_type_is_entry(entry->type));
}
#define for_each_present_guest_entry(spt, e, i) \ for (i = 0; i < pt_entries(spt); \
i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \ if (!ppgtt_get_guest_entry(spt, e, i) && \
spt->vgpu->gvt->gtt.pte_ops->test_present(e))
#define for_each_present_shadow_entry(spt, e, i) \ for (i = 0; i < pt_entries(spt); \
i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \ if (!ppgtt_get_shadow_entry(spt, e, i) && \
spt->vgpu->gvt->gtt.pte_ops->test_present(e))
#define for_each_shadow_entry(spt, e, i) \ for (i = 0; i < pt_entries(spt); \
i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \ if (!ppgtt_get_shadow_entry(spt, e, i))
staticinlinevoid ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
{ int v = atomic_read(&spt->refcount);
trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
atomic_inc(&spt->refcount);
}
staticinlineint ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt)
{ int v = atomic_read(&spt->refcount);
trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1)); return atomic_dec_return(&spt->refcount);
}
if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY)
ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we);
spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we)); if (spt) {
ppgtt_get_spt(spt);
if (ips != spt->guest_page.pde_ips) {
spt->guest_page.pde_ips = ips;
gvt_dbg_mm("reshadow PDE since ips changed\n");
clear_page(spt->shadow_page.vaddr);
ret = ppgtt_populate_spt(spt); if (ret) {
ppgtt_put_spt(spt); goto err;
}
}
} else { int type = get_next_pt_type(we->type);
if (!gtt_type_is_pt(type)) {
ret = -EINVAL; goto err;
}
spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips); if (IS_ERR(spt)) {
ret = PTR_ERR(spt); goto err;
}
ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn); if (ret) goto err_free_spt;
ret = ppgtt_populate_spt(spt); if (ret) goto err_free_spt;
switch (ge->type) { case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
gvt_vdbg_mm("shadow 4K gtt entry\n");
ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr); if (ret) return -ENXIO; break; case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
gvt_vdbg_mm("shadow 64K gtt entry\n"); /* * The layout of 64K page is special, the page size is * controlled by upper PDE. To be simple, we always split * 64K page to smaller 4K pages in shadow PT.
*/ return split_64KB_gtt_entry(vgpu, spt, index, &se); case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
gvt_vdbg_mm("shadow 2M gtt entry\n"); if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M) ||
intel_gvt_dma_map_guest_page(vgpu, gfn,
I915_GTT_PAGE_SIZE_2M, &dma_addr)) return split_2MB_gtt_entry(vgpu, spt, index, &se); break; case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
gvt_vgpu_err("GVT doesn't support 1GB entry\n"); return -EINVAL; default:
GEM_BUG_ON(1); return -EINVAL;
}
/** * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU * @vgpu: a vGPU * * This function is called before submitting a guest workload to host, * to sync all the out-of-synced shadow for vGPU * * Returns: * Zero on success, negative error code if failed.
*/ int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu)
{ struct list_head *pos, *n; struct intel_vgpu_oos_page *oos_page; int ret;
if (!enable_out_of_sync) return 0;
list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) {
oos_page = container_of(pos, struct intel_vgpu_oos_page, vm_list);
ret = ppgtt_set_guest_page_sync(oos_page->spt); if (ret) return ret;
} return 0;
}
/* * The heart of PPGTT shadow page table.
*/ staticint ppgtt_handle_guest_write_page_table( struct intel_vgpu_ppgtt_spt *spt, struct intel_gvt_gtt_entry *we, unsignedlong index)
{ struct intel_vgpu *vgpu = spt->vgpu; int type = spt->shadow_page.type; conststruct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_gvt_gtt_entry old_se; int new_present; int i, ret;
new_present = ops->test_present(we);
/* * Adding the new entry first and then removing the old one, that can * guarantee the ppgtt table is validated during the window between * adding and removal.
*/
ppgtt_get_shadow_entry(spt, &old_se, index);
if (new_present) {
ret = ppgtt_handle_guest_entry_add(spt, we, index); if (ret) goto fail;
}
ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index); if (ret) goto fail;
if (!new_present) { /* For 64KB splited entries, we need clear them all. */ if (ops->test_64k_splited(&old_se) &&
!(index % GTT_64K_PTE_STRIDE)) {
gvt_vdbg_mm("remove splited 64K shadow entries\n"); for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
ops->clear_64k_splited(&old_se);
ops->set_pfn(&old_se,
vgpu->gtt.scratch_pt[type].page_mfn);
ppgtt_set_shadow_entry(spt, &old_se, index + i);
}
} elseif (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY ||
old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
ops->clear_pse(&old_se);
ops->set_pfn(&old_se,
vgpu->gtt.scratch_pt[type].page_mfn);
ppgtt_set_shadow_entry(spt, &old_se, index);
} else {
ops->set_pfn(&old_se,
vgpu->gtt.scratch_pt[type].page_mfn);
ppgtt_set_shadow_entry(spt, &old_se, index);
}
}
/** * intel_vgpu_flush_post_shadow - flush the post shadow transactions * @vgpu: a vGPU * * This function is called before submitting a guest workload to host, * to flush all the post shadows for a vGPU. * * Returns: * Zero on success, negative error code if failed.
*/ int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
{ struct list_head *pos, *n; struct intel_vgpu_ppgtt_spt *spt; struct intel_gvt_gtt_entry ge; unsignedlong index; int ret;
list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) {
spt = container_of(pos, struct intel_vgpu_ppgtt_spt,
post_shadow_list);
index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
ppgtt_get_guest_entry(spt, &we, index);
/* * For page table which has 64K gtt entry, only PTE#0, PTE#16, * PTE#32, ... PTE#496 are used. Unused PTEs update should be * ignored.
*/ if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY &&
(index % GTT_64K_PTE_STRIDE)) {
gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n",
index); return 0;
}
if (bytes == info->gtt_entry_size) {
ret = ppgtt_handle_guest_write_page_table(spt, &we, index); if (ret) return ret;
} else { if (!test_bit(index, spt->post_shadow_bitmap)) { int type = spt->shadow_page.type;
/** * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU * @vgpu: a vGPU * @root_entry_type: ppgtt root entry type * @pdps: guest pdps. * * This function is used to create a ppgtt mm object for a vGPU. * * Returns: * Zero on success, negative error code in pointer if failed.
*/ struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
{ struct intel_gvt *gvt = vgpu->gvt; struct intel_vgpu_mm *mm; int ret;
mm = vgpu_alloc_mm(vgpu); if (!mm) return ERR_PTR(-ENOMEM);
/** * _intel_vgpu_mm_release - destroy a mm object * @mm_ref: a kref object * * This function is used to destroy a mm object for vGPU *
*/ void _intel_vgpu_mm_release(struct kref *mm_ref)
{ struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
if (GEM_WARN_ON(atomic_read(&mm->pincount)))
gvt_err("vgpu mm pin count bug detected\n");
if (mm->type == INTEL_GVT_MM_PPGTT) {
list_del(&mm->ppgtt_mm.list);
/** * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object * @mm: a vGPU mm object * * This function is called when user doesn't want to use a vGPU mm object
*/ void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
{
atomic_dec_if_positive(&mm->pincount);
}
/** * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object * @mm: target vgpu mm * * This function is called when user wants to use a vGPU mm object. If this * mm object hasn't been shadowed yet, the shadow will be populated at this * time. * * Returns: * Zero on success, negative error code if failed.
*/ int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
{ int ret;
atomic_inc(&mm->pincount);
if (mm->type == INTEL_GVT_MM_PPGTT) {
ret = shadow_ppgtt_mm(mm); if (ret) return ret;
s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e)); if (!s) return -ENXIO;
if (!guest)
ppgtt_get_shadow_entry(s, e, index); else
ppgtt_get_guest_entry(s, e, index); return 0;
}
/** * intel_vgpu_gma_to_gpa - translate a gma to GPA * @mm: mm object. could be a PPGTT or GGTT mm object * @gma: graphics memory address in this mm object * * This function is used to translate a graphics memory address in specific * graphics memory space to guest physical address. * * Returns: * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed.
*/ unsignedlong intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsignedlong gma)
{ struct intel_vgpu *vgpu = mm->vgpu; struct intel_gvt *gvt = vgpu->gvt; conststruct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops; conststruct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops; unsignedlong gpa = INTEL_GVT_INVALID_ADDR; unsignedlong gma_index[4]; struct intel_gvt_gtt_entry e; int i, levels = 0; int ret;
/* walk the shadow page table and get gpa from guest entry */ for (i = 0; i < levels; i++) {
ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
(i == levels - 1)); if (ret) goto err;
if (!pte_ops->test_present(&e)) {
gvt_dbg_core("GMA 0x%lx is not present\n", gma); goto err;
}
}
/** * intel_vgpu_emulate_ggtt_mmio_read - emulate GTT MMIO register read * @vgpu: a vGPU * @off: register offset * @p_data: data will be returned to guest * @bytes: data length * * This function is used to emulate the GTT MMIO register read * * Returns: * Zero on success, error code if failed.
*/ int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsignedint off, void *p_data, unsignedint bytes)
{ conststruct intel_gvt_device_info *info = &vgpu->gvt->device_info; int ret;
if (bytes != 4 && bytes != 8) return -EINVAL;
off -= info->gtt_start_offset;
ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes); return ret;
}
/* If ggtt entry size is 8 bytes, and it's split into two 4 bytes * write, save the first 4 bytes in a list and update virtual * PTE. Only update shadow PTE when the second 4 bytes comes.
*/ if (bytes < info->gtt_entry_size) { bool found = false;
list_for_each_entry_safe(pos, n,
&ggtt_mm->ggtt_mm.partial_pte_list, list) { if (g_gtt_index == pos->offset >>
info->gtt_entry_size_shift) { if (off != pos->offset) { /* the second partial part*/ int last_off = pos->offset &
(info->gtt_entry_size - 1);
ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE,
&dma_addr); if (ret) {
gvt_vgpu_err("fail to populate guest ggtt entry\n"); /* guest driver may read/write the entry when partial * update the entry in this situation p2m will fail * setting the shadow entry to point to a scratch page
*/
ops->set_pfn(&m, gvt->gtt.scratch_mfn);
} else
ops->set_pfn(&m, dma_addr >> PAGE_SHIFT);
} else {
ops->set_pfn(&m, gvt->gtt.scratch_mfn);
ops->clear_present(&m);
}
/* * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write * @vgpu: a vGPU * @off: register offset * @p_data: data from guest write * @bytes: data length * * This function is used to emulate the GTT MMIO register write * * Returns: * Zero on success, error code if failed.
*/ int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsignedint off, void *p_data, unsignedint bytes)
{ conststruct intel_gvt_device_info *info = &vgpu->gvt->device_info; int ret; struct intel_vgpu_submission *s = &vgpu->submission; struct intel_engine_cs *engine; int i;
if (bytes != 4 && bytes != 8) return -EINVAL;
off -= info->gtt_start_offset;
ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
/* if ggtt of last submitted context is written, * that context is probably got unpinned. * Set last shadowed ctx to invalid.
*/
for_each_engine(engine, vgpu->gvt->gt, i) { if (!s->last_ctx[i].valid) continue;
/* Build the tree by full filled the scratch pt with the entries which * point to the next level scratch pt or scratch page. The * scratch_pt[type] indicate the scratch pt/scratch page used by the * 'type' pt. * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
*/ if (type > GTT_TYPE_PPGTT_PTE_PT) { struct intel_gvt_gtt_entry se;
/* The entry parameters like present/writeable/cache type * set to the same as i915's scratch page tree.
*/
se.val64 |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW; if (type == GTT_TYPE_PPGTT_PDE_PT)
se.val64 |= PPAT_CACHED;
for (i = 0; i < page_entry_num; i++)
ops->set_entry(scratch_pt, &se, i, false, 0, vgpu);
}
/** * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization * @vgpu: a vGPU * * This function is used to initialize per-vGPU graphics memory virtualization * components. * * Returns: * Zero on success, error code if failed.
*/ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
{ struct intel_vgpu_gtt *gtt = &vgpu->gtt;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.