first = drm_gpusvm_range_find(notifier, adj_start, adj_end); if (!first) return;
/* * PTs may be getting destroyed so not safe to touch these but PT should * be invalidated at this point in time. Regardless we still need to * ensure any dma mappings are unmapped in the here.
*/ if (xe_vm_is_closed(vm)) goto range_notifier_event_end;
/* * XXX: Less than ideal to always wait on VM's resv slots if an * invalidation is not required. Could walk range list twice to figure * out if an invalidations is need, but also not ideal.
*/
err = dma_resv_wait_timeout(xe_vm_resv(vm),
DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT);
XE_WARN_ON(err <= 0);
r = first;
drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end)
tile_mask |= xe_svm_range_notifier_event_begin(vm, r, mmu_range,
&adj_start,
&adj_end); if (!tile_mask) goto range_notifier_event_end;
staticint xe_svm_garbage_collector(struct xe_vm *vm)
{ struct xe_svm_range *range; int err;
lockdep_assert_held_write(&vm->lock);
if (xe_vm_is_closed_or_banned(vm)) return -ENOENT;
spin_lock(&vm->svm.garbage_collector.lock); for (;;) {
range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list,
typeof(*range),
garbage_collector_link); if (!range) break;
/* * This flow is complex: it locates physically contiguous device pages, * derives the starting physical address, and performs a single GPU copy * to for every 8M chunk in a DMA address array. Both device pages and * DMA addresses may be sparsely populated. If either is NULL, a copy is * triggered based on the current search state. The last GPU copy is * waited on to ensure all copies are complete.
*/
for (i = 0; i < npages; ++i) { struct page *spage = pages[i]; struct dma_fence *__fence;
u64 __vram_addr; bool match = false, chunk, last;
#define XE_MIGRATE_CHUNK_SIZE SZ_8M
chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE);
last = (i + 1) == npages;
/* No CPU page and no device pages queue'd to copy */ if (!dma_addr[i] && vram_addr == XE_VRAM_ADDR_INVALID) continue;
/* * CPU page and device page valid, capture physical address on * first device page, check if physical contiguous on subsequent * device pages.
*/ if (dma_addr[i] && spage) {
__vram_addr = xe_vram_region_page_to_dpa(vr, spage); if (vram_addr == XE_VRAM_ADDR_INVALID) {
vram_addr = __vram_addr;
pos = i;
}
match = vram_addr + PAGE_SIZE * (i - pos) == __vram_addr;
}
/* * Mismatched physical address, 8M copy chunk, or last page - * trigger a copy.
*/ if (!match || chunk || last) { /* * Extra page for first copy if last page and matching * physical address.
*/ int incr = (match && last) ? 1 : 0;
if (vram_addr != XE_VRAM_ADDR_INVALID) { if (sram) {
vm_dbg(&xe->drm, "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld",
vram_addr, (u64)dma_addr[pos], i - pos + incr);
__fence = xe_migrate_from_vram(vr->migrate,
i - pos + incr,
vram_addr,
dma_addr + pos);
} else {
vm_dbg(&xe->drm, "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld",
(u64)dma_addr[pos], vram_addr, i - pos + incr);
__fence = xe_migrate_to_vram(vr->migrate,
i - pos + incr,
dma_addr + pos,
vram_addr);
} if (IS_ERR(__fence)) {
err = PTR_ERR(__fence); goto err_out;
}
dma_fence_put(fence);
fence = __fence;
}
/* Setup physical address of next device page */ if (dma_addr[i] && spage) {
vram_addr = __vram_addr;
pos = i;
} else {
vram_addr = XE_VRAM_ADDR_INVALID;
}
/* Extra mismatched device page, copy it */ if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) { if (sram) {
vm_dbg(&xe->drm, "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d",
vram_addr, (u64)dma_addr[pos], 1);
__fence = xe_migrate_from_vram(vr->migrate, 1,
vram_addr,
dma_addr + pos);
} else {
vm_dbg(&xe->drm, "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d",
(u64)dma_addr[pos], vram_addr, 1);
__fence = xe_migrate_to_vram(vr->migrate, 1,
dma_addr + pos,
vram_addr);
} if (IS_ERR(__fence)) {
err = PTR_ERR(__fence); goto err_out;
}
dma_fence_put(fence);
fence = __fence;
}
}
}
err_out: /* Wait for all copies to complete */ if (fence) {
dma_fence_wait(fence, false);
dma_fence_put(fence);
}
/** * xe_svm_init() - SVM initialize * @vm: The VM. * * Initialize SVM state which is embedded within the VM. * * Return: 0 on success, negative error code on error.
*/ int xe_svm_init(struct xe_vm *vm)
{ int err;
/** * xe_svm_close() - SVM close * @vm: The VM. * * Close SVM state (i.e., stop and flush all SVM actions).
*/ void xe_svm_close(struct xe_vm *vm)
{
xe_assert(vm->xe, xe_vm_is_closed(vm));
flush_work(&vm->svm.garbage_collector.work);
}
/** * xe_svm_fini() - SVM finalize * @vm: The VM. * * Finalize SVM state which is embedded within the VM.
*/ void xe_svm_fini(struct xe_vm *vm)
{
xe_assert(vm->xe, xe_vm_is_closed(vm));
/** xe_svm_range_migrate_to_smem() - Move range pages from VRAM to SMEM * @vm: xe_vm pointer * @range: Pointer to the SVM range structure * * The xe_svm_range_migrate_to_smem() checks range has pages in VRAM * and migrates them to SMEM
*/ void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range)
{ if (xe_svm_range_in_vram(range))
drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base);
}
/** * xe_svm_range_validate() - Check if the SVM range is valid * @vm: xe_vm pointer * @range: Pointer to the SVM range structure * @tile_mask: Mask representing the tiles to be checked * @devmem_preferred : if true range needs to be in devmem * * The xe_svm_range_validate() function checks if a range is * valid and located in the desired memory region. * * Return: true if the range is valid, false otherwise
*/ bool xe_svm_range_validate(struct xe_vm *vm, struct xe_svm_range *range,
u8 tile_mask, bool devmem_preferred)
{ bool ret;
/** * xe_svm_find_vma_start - Find start of CPU VMA * @vm: xe_vm pointer * @start: start address * @end: end address * @vma: Pointer to struct xe_vma * * * This function searches for a cpu vma, within the specified * range [start, end] in the given VM. It adjusts the range based on the * xe_vma start and end addresses. If no cpu VMA is found, it returns ULONG_MAX. * * Return: The starting address of the VMA within the range, * or ULONG_MAX if no VMA is found
*/
u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 start, u64 end, struct xe_vma *vma)
{ return drm_gpusvm_find_vma_start(&vm->svm.gpusvm,
max(start, xe_vma_start(vma)),
min(end, xe_vma_end(vma)));
}
/* Ensure the device has a pm ref while there are device pages active. */
xe_pm_runtime_get_noresume(xe);
err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm,
start, end, timeslice_ms,
xe_svm_devm_owner(xe)); if (err)
xe_svm_devmem_release(&bo->devmem_allocation);
staticbool supports_4K_migration(struct xe_device *xe)
{ if (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) returnfalse;
returntrue;
}
/** * xe_svm_range_needs_migrate_to_vram() - SVM range needs migrate to VRAM or not * @range: SVM range for which migration needs to be decided * @vma: vma which has range * @preferred_region_is_vram: preferred region for range is vram * * Return: True for range needing migration and migration is supported else false
*/ bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma, bool preferred_region_is_vram)
{ struct xe_vm *vm = range_to_vm(&range->base);
u64 range_size = xe_svm_range_size(range);
if (!range->base.flags.migrate_devmem || !preferred_region_is_vram) returnfalse;
xe_assert(vm->xe, IS_DGFX(vm->xe));
if (preferred_region_is_vram && xe_svm_range_in_vram(range)) {
drm_info(&vm->xe->drm, "Range is already in VRAM\n"); returnfalse;
}
if (preferred_region_is_vram && range_size < SZ_64K && !supports_4K_migration(vm->xe)) {
drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n"); returnfalse;
}
returntrue;
}
/** * xe_svm_handle_pagefault() - SVM handle page fault * @vm: The VM. * @vma: The CPU address mirror VMA. * @gt: The gt upon the fault occurred. * @fault_addr: The GPU fault address. * @atomic: The fault atomic access bit. * * Create GPU bindings for a SVM page fault. Optionally migrate to device * memory. * * Return: 0 on success, negative error code on error.
*/ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, struct xe_gt *gt, u64 fault_addr, bool atomic)
{ struct drm_gpusvm_ctx ctx = {
.read_only = xe_vma_read_only(vma),
.devmem_possible = IS_DGFX(vm->xe) &&
IS_ENABLED(CONFIG_DRM_XE_PAGEMAP),
.check_pages_threshold = IS_DGFX(vm->xe) &&
IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? SZ_64K : 0,
.devmem_only = atomic && IS_DGFX(vm->xe) &&
IS_ENABLED(CONFIG_DRM_XE_PAGEMAP),
.timeslice_ms = atomic && IS_DGFX(vm->xe) &&
IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ?
vm->xe->atomic_svm_timeslice_ms : 0,
}; struct xe_svm_range *range; struct dma_fence *fence; struct xe_tile *tile = gt_to_tile(gt); int migrate_try_count = ctx.devmem_only ? 3 : 1;
ktime_t end = 0; int err;
/** * xe_svm_has_mapping() - SVM has mappings * @vm: The VM. * @start: Start address. * @end: End address. * * Check if an address range has SVM mappings. * * Return: True if address range has a SVM mapping, False otherwise
*/ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end)
{ return drm_gpusvm_has_mapping(&vm->svm.gpusvm, start, end);
}
/** * xe_svm_bo_evict() - SVM evict BO to system memory * @bo: BO to evict * * SVM evict BO to system memory. GPU SVM layer ensures all device pages * are evicted before returning. * * Return: 0 on success standard error code otherwise
*/ int xe_svm_bo_evict(struct xe_bo *bo)
{ return drm_pagemap_evict_to_ram(&bo->devmem_allocation);
}
/** * xe_svm_range_find_or_insert- Find or insert GPU SVM range * @vm: xe_vm pointer * @addr: address for which range needs to be found/inserted * @vma: Pointer to struct xe_vma which mirrors CPU * @ctx: GPU SVM context * * This function finds or inserts a newly allocated a SVM range based on the * address. * * Return: Pointer to the SVM range on success, ERR_PTR() on failure.
*/ struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, struct xe_vma *vma, struct drm_gpusvm_ctx *ctx)
{ struct drm_gpusvm_range *r;
r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, max(addr, xe_vma_start(vma)),
xe_vma_start(vma), xe_vma_end(vma), ctx); if (IS_ERR(r)) return ERR_PTR(PTR_ERR(r));
return to_xe_range(r);
}
/** * xe_svm_range_get_pages() - Get pages for a SVM range * @vm: Pointer to the struct xe_vm * @range: Pointer to the xe SVM range structure * @ctx: GPU SVM context * * This function gets pages for a SVM range and ensures they are mapped for * DMA access. In case of failure with -EOPNOTSUPP, it evicts the range. * * Return: 0 on success, negative error code on failure.
*/ int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, struct drm_gpusvm_ctx *ctx)
{ int err = 0;
/** * xe_devm_add: Remap and provide memmap backing for device memory * @tile: tile that the memory region belongs to * @vr: vram memory region to remap * * This remap device memory to host physical address space and create * struct page to back device memory * * Return: 0 on success standard error code otherwise
*/ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
{ struct xe_device *xe = tile_to_xe(tile); struct device *dev = &to_pci_dev(xe->drm.dev)->dev; struct resource *res; void *addr; int ret;
res = devm_request_free_mem_region(dev, &iomem_resource,
vr->usable_size); if (IS_ERR(res)) {
ret = PTR_ERR(res); return ret;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.