// SPDX-License-Identifier: GPL-2.0 OR MIT /* * Copyright 2020-2021 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE.
*/ #include <linux/types.h> #include <linux/hmm.h> #include <linux/dma-direction.h> #include <linux/dma-mapping.h> #include <linux/migrate.h> #include"amdgpu_sync.h" #include"amdgpu_object.h" #include"amdgpu_vm.h" #include"amdgpu_res_cursor.h" #include"kfd_priv.h" #include"kfd_svm.h" #include"kfd_migrate.h" #include"kfd_smi_events.h"
/** * svm_migrate_copy_memory_gart - sdma copy data between ram and vram * * @adev: amdgpu device the sdma ring running * @sys: system DMA pointer to be copied * @vram: vram destination DMA pointer * @npages: number of pages to copy * @direction: enum MIGRATION_COPY_DIR * @mfence: output, sdma fence to signal after sdma is done * * ram address uses GART table continuous entries mapping to ram pages, * vram address uses direct mapping of vram pages, which must have npages * number of continuous pages. * GART update and sdma uses same buf copy function ring, sdma is splited to * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for * the last sdma finish fence which is returned to check copy memory is done. * * Context: Process context, takes and releases gtt_window_lock * * Return: * 0 - OK, otherwise error code
*/
/** * svm_migrate_copy_done - wait for memory copy sdma is done * * @adev: amdgpu device the sdma memory copy is executing on * @mfence: migrate fence * * Wait for dma fence is signaled, if the copy ssplit into multiple sdma * operations, this is the last sdma operation fence. * * Context: called after svm_migrate_copy_memory * * Return: * 0 - success * otherwise - error code from dma fence signal
*/ staticint
svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence)
{ int r = 0;
if (mfence) {
r = dma_fence_wait(mfence, false);
dma_fence_put(mfence);
pr_debug("sdma copy memory fence done\n");
}
if (j >= (cursor.size >> PAGE_SHIFT) - 1 && i < npages - 1) {
r = svm_migrate_copy_memory_gart(adev, src + i - j,
dst + i - j, j + 1,
FROM_RAM_TO_VRAM,
mfence); if (r) goto out_free_vram_pages;
amdgpu_res_next(&cursor, (j + 1) * PAGE_SIZE);
j = 0;
} else {
j++;
}
}
r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j,
FROM_RAM_TO_VRAM, mfence);
out_free_vram_pages: if (r) {
pr_debug("failed %d to copy memory to vram\n", r); for (i = 0; i < npages && mpages; i++) { if (!dst[i]) continue;
svm_migrate_put_vram_page(adev, dst[i]);
migrate->dst[i] = 0;
mpages--;
}
}
r = amdgpu_amdkfd_reserve_mem_limit(node->adev,
prange->npages * PAGE_SIZE,
KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
node->xcp ? node->xcp->id : 0); if (r) {
dev_dbg(node->adev->dev, "failed to reserve VRAM, r: %ld\n", r); return -ENOSPC;
}
r = svm_range_vram_node_new(node, prange, true); if (r) {
dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r); goto out;
}
ttm_res_offset = (start_mgr - prange->start + prange->offset) << PAGE_SHIFT;
for (addr = start; addr < end;) { unsignedlong next;
vma = vma_lookup(mm, addr); if (!vma) break;
next = min(vma->vm_end, end);
r = svm_migrate_vma_to_vram(node, prange, vma, addr, next, trigger, ttm_res_offset); if (r < 0) {
pr_debug("failed %ld to migrate\n", r); break;
} else {
mpages += r;
}
ttm_res_offset += next - addr;
addr = next;
}
if (mpages) {
prange->actual_loc = best_loc;
prange->vram_pages += mpages;
} elseif (!prange->actual_loc) { /* if no page migrated and all pages from prange are at * sys ram drop svm_bo got from svm_range_vram_node_new
*/
svm_range_vram_node_free(prange);
}
out:
amdgpu_amdkfd_unreserve_mem_limit(node->adev,
prange->npages * PAGE_SIZE,
KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
node->xcp ? node->xcp->id : 0); return r < 0 ? r : 0;
}
/** * svm_migrate_vram_to_ram - migrate svm range from device to system * @prange: range structure * @mm: process mm, use current->mm if NULL * @start_mgr: start page need be migrated to sys ram * @last_mgr: last page need be migrated to sys ram * @trigger: reason of migration * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page fault callback * * Context: Process context, caller hold mmap read lock, prange->migrate_mutex * * Return: * 0 - OK, otherwise error code
*/ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, unsignedlong start_mgr, unsignedlong last_mgr,
uint32_t trigger, struct page *fault_page)
{ struct kfd_node *node; struct vm_area_struct *vma; unsignedlong addr; unsignedlong start; unsignedlong end; unsignedlong mpages = 0; long r = 0;
/* this pragne has no any vram page to migrate to sys ram */ if (!prange->actual_loc) {
pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
prange->start, prange->last); return 0;
}
node = svm_range_get_node_by_id(prange, prange->actual_loc); if (!node) {
pr_debug("failed to get kfd node by id 0x%x\n", prange->actual_loc); return -ENODEV;
}
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
prange->svms, prange, start_mgr, last_mgr,
prange->actual_loc);
for (addr = start; addr < end;) { unsignedlong next;
vma = vma_lookup(mm, addr); if (!vma) {
pr_debug("failed to find vma for prange %p\n", prange);
r = -EFAULT; break;
}
next = min(vma->vm_end, end);
r = svm_migrate_vma_to_ram(node, prange, vma, addr, next, trigger,
fault_page); if (r < 0) {
pr_debug("failed %ld to migrate prange %p\n", r, prange); break;
} else {
mpages += r;
}
addr = next;
}
if (r >= 0) {
prange->vram_pages -= mpages;
/* prange does not have vram page set its actual_loc to system * and drop its svm_bo ref
*/ if (prange->vram_pages == 0 && prange->ttm_res) {
prange->actual_loc = 0;
svm_range_vram_node_free(prange);
}
}
return r < 0 ? r : 0;
}
/** * svm_migrate_vram_to_vram - migrate svm range from device to device * @prange: range structure * @best_loc: the device to migrate to * @start: start page need be migrated to sys ram * @last: last page need be migrated to sys ram * @mm: process mm, use current->mm if NULL * @trigger: reason of migration * * Context: Process context, caller hold mmap read lock, svms lock, prange lock * * migrate all vram pages in prange to sys ram, then migrate * [start, last] pages from sys ram to gpu node best_loc. * * Return: * 0 - OK, otherwise error code
*/ staticint
svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, unsignedlong start, unsignedlong last, struct mm_struct *mm, uint32_t trigger)
{ int r, retries = 3;
/* * TODO: for both devices with PCIe large bar or on same xgmi hive, skip * system memory as migration bridge
*/
pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
do {
r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last,
trigger, NULL); if (r) return r;
} while (prange->actual_loc && --retries);
if (prange->actual_loc) return -EDEADLK;
return svm_migrate_ram_to_vram(prange, best_loc, start, last, mm, trigger);
}
int
svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, unsignedlong start, unsignedlong last, struct mm_struct *mm, uint32_t trigger)
{ if (!prange->actual_loc || prange->actual_loc == best_loc) return svm_migrate_ram_to_vram(prange, best_loc, start, last,
mm, trigger);
else return svm_migrate_vram_to_vram(prange, best_loc, start, last,
mm, trigger);
}
/** * svm_migrate_to_ram - CPU page fault handler * @vmf: CPU vm fault vma, address * * Context: vm fault handler, caller holds the mmap read lock * * Return: * 0 - OK * VM_FAULT_SIGBUS - notice application to have SIGBUS page fault
*/ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
{ unsignedlong start, last, size; unsignedlong addr = vmf->address; struct svm_range_bo *svm_bo; struct svm_range *prange; struct kfd_process *p; struct mm_struct *mm; int r = 0;
svm_bo = vmf->page->zone_device_data; if (!svm_bo) {
pr_debug("failed get device page at addr 0x%lx\n", addr); return VM_FAULT_SIGBUS;
} if (!mmget_not_zero(svm_bo->eviction_fence->mm)) {
pr_debug("addr 0x%lx of process mm is destroyed\n", addr); return VM_FAULT_SIGBUS;
}
mm = svm_bo->eviction_fence->mm; if (mm != vmf->vma->vm_mm)
pr_debug("addr 0x%lx is COW mapping in child process\n", addr);
p = kfd_lookup_process_by_mm(mm); if (!p) {
pr_debug("failed find process at fault address 0x%lx\n", addr);
r = VM_FAULT_SIGBUS; goto out_mmput;
} if (READ_ONCE(p->svms.faulting_task) == current) {
pr_debug("skipping ram migration\n");
r = 0; goto out_unref_process;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.