// SPDX-License-Identifier: MIT /* * Copyright 2014-2018 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE.
*/ #include <linux/dma-buf.h> #include <linux/list.h> #include <linux/pagemap.h> #include <linux/sched/mm.h> #include <linux/sched/task.h> #include <drm/ttm/ttm_tt.h>
/* Userptr restore delay, just long enough to allow consecutive VM * changes to accumulate
*/ #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 #define AMDGPU_RESERVE_MEM_LIMIT (3UL << 29)
/* * Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB * BO chunk
*/ #define VRAM_AVAILABLITY_ALIGN (1 << 21)
/* Impose limit on how much memory KFD can use */ staticstruct {
uint64_t max_system_mem_limit;
uint64_t max_ttm_mem_limit;
int64_t system_mem_used;
int64_t ttm_mem_used;
spinlock_t mem_limit_lock;
} kfd_mem_limit;
list_for_each_entry(entry, &mem->attachments, list) if (entry->bo_va->base.vm == avm) returntrue;
returnfalse;
}
/** * reuse_dmamap() - Check whether adev can share the original * userptr BO * * If both adev and bo_adev are in direct mapping or * in the same iommu group, they can share the original BO. * * @adev: Device to which can or cannot share the original BO * @bo_adev: Device to which allocated BO belongs to * * Return: returns true if adev can share original userptr BO, * false otherwise.
*/ staticbool reuse_dmamap(struct amdgpu_device *adev, struct amdgpu_device *bo_adev)
{ return (adev->ram_is_direct_mapped && bo_adev->ram_is_direct_mapped) ||
(adev->dev->iommu_group == bo_adev->dev->iommu_group);
}
/* Set memory usage limits. Current, limits are * System (TTM + userptr) memory - 15/16th System RAM * TTM memory - 3/8th System RAM
*/ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
{ struct sysinfo si;
uint64_t mem;
if (kfd_mem_limit.max_system_mem_limit) return;
si_meminfo(&si);
mem = si.totalram - si.totalhigh;
mem *= si.mem_unit;
/* Estimate page table size needed to represent a given memory size * * With 4KB pages, we need one 8 byte PTE for each 4KB of memory * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB * of memory (factor 256K, >> 18). ROCm user mode tries to optimize * for 2MB pages for TLB efficiency. However, small allocations and * fragmented system memory still need some 4KB pages. We choose a * compromise that should work in most cases without reserving too * much memory for page tables unnecessarily (factor 16K, >> 14).
*/
/** * amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size * of buffer. * * @adev: Device to which allocated BO belongs to * @size: Size of buffer, in bytes, encapsulated by B0. This should be * equivalent to amdgpu_bo_size(BO) * @alloc_flag: Flag used in allocating a BO as noted above * @xcp_id: xcp_id is used to get xcp from xcp manager, one xcp is * managed as one compute node in driver for app * * Return: * returns -ENOMEM in case of error, ZERO otherwise
*/ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag, int8_t xcp_id)
{
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0);
size_t system_mem_needed, ttm_mem_needed, vram_needed; int ret = 0;
uint64_t vram_size = 0;
system_mem_needed = 0;
ttm_mem_needed = 0;
vram_needed = 0; if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
system_mem_needed = size;
ttm_mem_needed = size;
} elseif (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { /* * Conservatively round up the allocation requirement to 2 MB * to avoid fragmentation caused by 4K allocations in the tail * 2M BO chunk.
*/
vram_needed = size; /* * For GFX 9.4.3, get the VRAM size from XCP structs
*/ if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id)) return -EINVAL;
if (kfd_mem_limit.system_mem_used + system_mem_needed >
kfd_mem_limit.max_system_mem_limit) {
pr_debug("Set no_system_mem_limit=1 if using shared memory\n"); if (!no_system_mem_limit) {
ret = -ENOMEM; goto release;
}
}
if (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
kfd_mem_limit.max_ttm_mem_limit) {
ret = -ENOMEM; goto release;
}
/*if is_app_apu is false and apu_prefer_gtt is true, it is an APU with * carve out < gtt. In that case, VRAM allocation will go to gtt domain, skip * VRAM check since ttm_mem_limit check already cover this allocation
*/
/** * create_dmamap_sg_bo() - Creates a amdgpu_bo object to reflect information * about USERPTR or DOOREBELL or MMIO BO. * * @adev: Device for which dmamap BO is being created * @mem: BO of peer device that is being DMA mapped. Provides parameters * in building the dmamap BO * @bo_out: Output parameter updated with handle of dmamap BO
*/ staticint
create_dmamap_sg_bo(struct amdgpu_device *adev, struct kgd_mem *mem, struct amdgpu_bo **bo_out)
{ struct drm_gem_object *gem_obj; int ret;
uint64_t flags = 0;
ret = amdgpu_bo_reserve(mem->bo, false); if (ret) return ret;
/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's * reservation object. * * @bo: [IN] Remove eviction fence(s) from this BO * @ef: [IN] This eviction fence is removed if it * is present in the shared list. * * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held.
*/ staticint amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, struct amdgpu_amdkfd_fence *ef)
{ struct dma_fence *replacement;
if (!ef) return -EINVAL;
/* TODO: Instead of block before we should use the fence of the page * table update and TLB flush here directly.
*/
replacement = dma_fence_get_stub();
dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
replacement, DMA_RESV_USAGE_BOOKKEEP);
dma_fence_put(replacement); return 0;
}
/** * amdgpu_amdkfd_remove_all_eviction_fences - Remove all eviction fences * @bo: the BO where to remove the evictions fences from. * * This functions should only be used on release when all references to the BO * are already dropped. We remove the eviction fence from the private copy of * the dma_resv object here since that is what is used during release to * determine of the BO is idle or not.
*/ void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo)
{ struct dma_resv *resv = &bo->tbo.base._resv; struct dma_fence *fence, *stub; struct dma_resv_iter cursor;
/** * create_sg_table() - Create an sg_table for a contiguous DMA addr range * @addr: The starting address to point to * @size: Size of memory area in bytes being pointed to * * Allocates an instance of sg_table and initializes it to point to memory * area specified by input parameters. The address used to build is assumed * to be DMA mapped, if needed. * * DOORBELL or MMIO BOs use only one scatterlist node in their sg_table * because they are physically contiguous. * * Return: Initialized instance of SG Table or NULL
*/ staticstruct sg_table *create_sg_table(uint64_t addr, uint32_t size)
{ struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
if (WARN_ON(ttm->num_pages != src_ttm->num_pages)) return -EINVAL;
ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL); if (unlikely(!ttm->sg)) return -ENOMEM;
/* Same sequence as in amdgpu_ttm_tt_pin_userptr */
ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages,
ttm->num_pages, 0,
(u64)ttm->num_pages << PAGE_SHIFT,
GFP_KERNEL); if (unlikely(ret)) goto free_sg;
ret = dma_map_sgtable(adev->dev, ttm->sg, direction, 0); if (unlikely(ret)) goto release_sg;
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (ret) goto unmap_sg;
/** * kfd_mem_dmamap_sg_bo() - Create DMA mapped sg_table to access DOORBELL or MMIO BO * @mem: SG BO of the DOORBELL or MMIO resource on the owning device * @attachment: Virtual address attachment of the BO on accessing device * * An access request from the device that owns DOORBELL does not require DMA mapping. * This is because the request doesn't go through PCIe root complex i.e. it instead * loops back. The need to DMA map arises only when accessing peer device's DOORBELL * * In contrast, all access requests for MMIO need to be DMA mapped without regard to * device ownership. This is because access requests for MMIO go through PCIe root * complex. * * This is accomplished in two steps: * - Obtain DMA mapped address of DOORBELL or MMIO memory that could be used * in updating requesting device's page table * - Signal TTM to mark memory pointed to by requesting device's BO as GPU * accessible. This allows an update of requesting device's page table * with entries associated with DOOREBELL or MMIO memory * * This method is invoked in the following contexts: * - Mapping of DOORBELL or MMIO BO of same or peer device * - Validating an evicted DOOREBELL or MMIO BO on device seeking access * * Return: ZERO if successful, NON-ZERO otherwise
*/ staticint
kfd_mem_dmamap_sg_bo(struct kgd_mem *mem, struct kfd_mem_attachment *attachment)
{ struct ttm_operation_ctx ctx = {.interruptible = true}; struct amdgpu_bo *bo = attachment->bo_va->base.bo; struct amdgpu_device *adev = attachment->adev; struct ttm_tt *ttm = bo->tbo.ttm; enum dma_data_direction dir;
dma_addr_t dma_addr; bool mmio; int ret;
/* Expect SG Table of dmapmap BO to be NULL */
mmio = (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP); if (unlikely(ttm->sg)) {
pr_err("SG Table of %d BO for peer device is UNEXPECTEDLY NON-NULL", mmio); return -EINVAL;
}
dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
dma_addr = mem->bo->tbo.sg->sgl->dma_address;
pr_debug("%d BO size: %d\n", mmio, mem->bo->tbo.sg->sgl->length);
pr_debug("%d BO address before DMA mapping: %llx\n", mmio, dma_addr);
dma_addr = dma_map_resource(adev->dev, dma_addr,
mem->bo->tbo.sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
ret = dma_mapping_error(adev->dev, dma_addr); if (unlikely(ret)) return ret;
pr_debug("%d BO address after DMA mapping: %llx\n", mmio, dma_addr);
ttm->sg = create_sg_table(dma_addr, mem->bo->tbo.sg->sgl->length); if (unlikely(!ttm->sg)) {
ret = -ENOMEM; goto unmap_sg;
}
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (unlikely(ret)) goto free_sg;
staticvoid
kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
{ /* This is a no-op. We don't want to trigger eviction fences when * unmapping DMABufs. Therefore the invalidation (moving to system * domain) is done in kfd_mem_dmamap_dmabuf.
*/
}
/** * kfd_mem_dmaunmap_sg_bo() - Free DMA mapped sg_table of DOORBELL or MMIO BO * @mem: SG BO of the DOORBELL or MMIO resource on the owning device * @attachment: Virtual address attachment of the BO on accessing device * * The method performs following steps: * - Signal TTM to mark memory pointed to by BO as GPU inaccessible * - Free SG Table that is used to encapsulate DMA mapped memory of * peer device's DOORBELL or MMIO memory * * This method is invoked in the following contexts: * UNMapping of DOORBELL or MMIO BO on a device having access to its memory * Eviction of DOOREBELL or MMIO BO on device having access to its memory * * Return: void
*/ staticvoid
kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem, struct kfd_mem_attachment *attachment)
{ struct ttm_operation_ctx ctx = {.interruptible = true}; struct amdgpu_bo *bo = attachment->bo_va->base.bo; struct amdgpu_device *adev = attachment->adev; struct ttm_tt *ttm = bo->tbo.ttm; enum dma_data_direction dir;
if (unlikely(!ttm->sg)) {
pr_debug("SG Table of BO is NULL"); return;
}
/* kfd_mem_attach - Add a BO to a VM * * Everything that needs to bo done only once when a BO is first added * to a VM. It can later be mapped and unmapped many times without * repeating these steps. * * 0. Create BO for DMA mapping, if needed * 1. Allocate and initialize BO VA entry data structure * 2. Add BO to the VM * 3. Determine ASIC-specific PTE flags * 4. Alloc page tables and directories if needed * 4a. Validate new page tables and directories
*/ staticint kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, struct amdgpu_vm *vm, bool is_aql)
{ struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); unsignedlong bo_size = mem->bo->tbo.base.size;
uint64_t va = mem->va; struct kfd_mem_attachment *attachment[2] = {NULL, NULL}; struct amdgpu_bo *bo[2] = {NULL, NULL}; struct amdgpu_bo_va *bo_va; bool same_hive = false; int i, ret;
if (!va) {
pr_err("Invalid VA when adding BO to VM\n"); return -EINVAL;
}
/* Determine access to VRAM, MMIO and DOORBELL BOs of peer devices * * The access path of MMIO and DOORBELL BOs of is always over PCIe. * In contrast the access path of VRAM BOs depens upon the type of * link that connects the peer device. Access over PCIe is allowed * if peer device has large BAR. In contrast, access over xGMI is * allowed for both small and large BAR configurations of peer device
*/ if ((adev != bo_adev && !adev->apu_prefer_gtt) &&
((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM)
same_hive = amdgpu_xgmi_same_hive(adev, bo_adev); if (!same_hive && !amdgpu_device_is_peer_accessible(bo_adev, adev)) return -EINVAL;
}
for (i = 0; i <= is_aql; i++) {
attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL); if (unlikely(!attachment[i])) {
ret = -ENOMEM; goto unwind;
}
pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
va + bo_size, vm);
if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) ||
(amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && reuse_dmamap(adev, bo_adev)) ||
(mem->domain == AMDGPU_GEM_DOMAIN_GTT && reuse_dmamap(adev, bo_adev)) ||
same_hive) { /* Mappings on the local GPU, or VRAM mappings in the * local hive, or userptr, or GTT mapping can reuse dma map * address space share the original BO
*/
attachment[i]->type = KFD_MEM_ATT_SHARED;
bo[i] = mem->bo;
drm_gem_object_get(&bo[i]->tbo.base);
} elseif (i > 0) { /* Multiple mappings on the same GPU share the BO */
attachment[i]->type = KFD_MEM_ATT_SHARED;
bo[i] = bo[0];
drm_gem_object_get(&bo[i]->tbo.base);
} elseif (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) { /* Create an SG BO to DMA-map userptrs on other GPUs */
attachment[i]->type = KFD_MEM_ATT_USERPTR;
ret = create_dmamap_sg_bo(adev, mem, &bo[i]); if (ret) goto unwind; /* Handle DOORBELL BOs of peer devices and MMIO BOs of local and peer devices */
} elseif (mem->bo->tbo.type == ttm_bo_type_sg) {
WARN_ONCE(!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL ||
mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP), "Handing invalid SG BO in ATTACH request");
attachment[i]->type = KFD_MEM_ATT_SG;
ret = create_dmamap_sg_bo(adev, mem, &bo[i]); if (ret) goto unwind; /* Enable acces to GTT and VRAM BOs of peer devices */
} elseif (mem->domain == AMDGPU_GEM_DOMAIN_GTT ||
mem->domain == AMDGPU_GEM_DOMAIN_VRAM) {
attachment[i]->type = KFD_MEM_ATT_DMABUF;
ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]); if (ret) goto unwind;
pr_debug("Employ DMABUF mechanism to enable peer GPU access\n");
} else {
WARN_ONCE(true, "Handling invalid ATTACH request");
ret = -EINVAL; goto unwind;
}
/* Add BO to VM internal data structures */
ret = amdgpu_bo_reserve(bo[i], false); if (ret) {
pr_debug("Unable to reserve BO during memory attach"); goto unwind;
}
bo_va = amdgpu_vm_bo_find(vm, bo[i]); if (!bo_va)
bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]); else
++bo_va->ref_count;
attachment[i]->bo_va = bo_va;
amdgpu_bo_unreserve(bo[i]); if (unlikely(!attachment[i]->bo_va)) {
ret = -ENOMEM;
pr_err("Failed to add BO object to VM. ret == %d\n",
ret); goto unwind;
}
attachment[i]->va = va;
attachment[i]->pte_flags = get_pte_flags(adev, mem);
attachment[i]->adev = adev;
list_add(&attachment[i]->list, &mem->attachments);
va += bo_size;
}
return 0;
unwind: for (; i >= 0; i--) { if (!attachment[i]) continue; if (attachment[i]->bo_va) {
(void)amdgpu_bo_reserve(bo[i], true); if (--attachment[i]->bo_va->ref_count == 0)
amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
amdgpu_bo_unreserve(bo[i]);
list_del(&attachment[i]->list);
} if (bo[i])
drm_gem_object_put(&bo[i]->tbo.base);
kfree(attachment[i]);
} return ret;
}
/* Initializes user pages. It registers the MMU notifier and validates * the userptr BO in the GTT domain. * * The BO must already be on the userptr_valid_list. Otherwise an * eviction and restore may happen that leaves the new BO unmapped * with the user mode queues running. * * Takes the process_info->lock to protect against concurrent restore * workers. * * Returns 0 for success, negative errno for errors.
*/ staticint init_user_pages(struct kgd_mem *mem, uint64_t user_addr, bool criu_resume)
{ struct amdkfd_process_info *process_info = mem->process_info; struct amdgpu_bo *bo = mem->bo; struct ttm_operation_ctx ctx = { true, false }; struct hmm_range *range; int ret = 0;
mutex_lock(&process_info->lock);
ret = amdgpu_ttm_tt_set_userptr(&bo->tbo, user_addr, 0); if (ret) {
pr_err("%s: Failed to set userptr: %d\n", __func__, ret); goto out;
}
ret = amdgpu_hmm_register(bo, user_addr); if (ret) {
pr_err("%s: Failed to register MMU notifier: %d\n",
__func__, ret); goto out;
}
if (criu_resume) { /* * During a CRIU restore operation, the userptr buffer objects * will be validated in the restore_userptr_work worker at a * later stage when it is scheduled by another ioctl called by * CRIU master process for the target pid for restore.
*/
mutex_lock(&process_info->notifier_lock);
mem->invalid++;
mutex_unlock(&process_info->notifier_lock);
mutex_unlock(&process_info->lock); return 0;
}
ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range); if (ret) { if (ret == -EAGAIN)
pr_debug("Failed to get user pages, try again\n"); else
pr_err("%s: Failed to get user pages: %d\n", __func__, ret); goto unregister_out;
}
ret = amdgpu_bo_reserve(bo, true); if (ret) {
pr_err("%s: Failed to reserve BO\n", __func__); goto release_out;
}
amdgpu_bo_placement_from_domain(bo, mem->domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (ret)
pr_err("%s: failed to validate BO\n", __func__);
amdgpu_bo_unreserve(bo);
/* Reserving a BO and its page table BOs must happen atomically to * avoid deadlocks. Some operations update multiple VMs at once. Track * all the reservation info in a context structure. Optionally a sync * object can track VM updates.
*/ struct bo_vm_reservation_context { /* DRM execution context for the reservation */ struct drm_exec exec; /* Number of VMs reserved */ unsignedint n_vms; /* Pointer to sync object */ struct amdgpu_sync *sync;
};
enum bo_vm_match {
BO_VM_NOT_MAPPED = 0, /* Match VMs where a BO is not mapped */
BO_VM_MAPPED, /* Match VMs where a BO is mapped */
BO_VM_ALL, /* Match all VMs a BO was added to */
};
/** * reserve_bo_and_vm - reserve a BO and a VM unconditionally. * @mem: KFD BO structure. * @vm: the VM to reserve. * @ctx: the struct that will be used in unreserve_bo_and_vms().
*/ staticint reserve_bo_and_vm(struct kgd_mem *mem, struct amdgpu_vm *vm, struct bo_vm_reservation_context *ctx)
{ struct amdgpu_bo *bo = mem->bo; int ret;
ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1);
drm_exec_retry_on_contention(&ctx->exec); if (unlikely(ret)) goto error;
} return 0;
error:
pr_err("Failed to reserve buffers in ttm.\n");
drm_exec_fini(&ctx->exec); return ret;
}
/** * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally * @mem: KFD BO structure. * @vm: the VM to reserve. If NULL, then all VMs associated with the BO * is used. Otherwise, a single VM associated with the BO. * @map_type: the mapping status that will be used to filter the VMs. * @ctx: the struct that will be used in unreserve_bo_and_vms(). * * Returns 0 for success, negative for failure.
*/ staticint reserve_bo_and_cond_vms(struct kgd_mem *mem, struct amdgpu_vm *vm, enum bo_vm_match map_type, struct bo_vm_reservation_context *ctx)
{ struct kfd_mem_attachment *entry; struct amdgpu_bo *bo = mem->bo; int ret;
ret = amdgpu_vm_lock_pd(entry->bo_va->base.vm,
&ctx->exec, 2);
drm_exec_retry_on_contention(&ctx->exec); if (unlikely(ret)) goto error;
++ctx->n_vms;
}
ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1);
drm_exec_retry_on_contention(&ctx->exec); if (unlikely(ret)) goto error;
} return 0;
error:
pr_err("Failed to reserve buffers in ttm.\n");
drm_exec_fini(&ctx->exec); return ret;
}
/** * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context * @ctx: Reservation context to unreserve * @wait: Optionally wait for a sync object representing pending VM updates * @intr: Whether the wait is interruptible * * Also frees any resources allocated in * reserve_bo_and_(cond_)vm(s). Returns the status from * amdgpu_sync_wait.
*/ staticint unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, bool wait, bool intr)
{ int ret = 0;
if (wait)
ret = amdgpu_sync_wait(ctx->sync, intr);
/* Set virtual address for the allocation */
ret = amdgpu_vm_bo_map(entry->adev, entry->bo_va, entry->va, 0,
amdgpu_bo_size(entry->bo_va->base.bo),
entry->pte_flags); if (ret) {
pr_err("Failed to map VA 0x%llx in vm. ret %d\n",
entry->va, ret); return ret;
}
if (no_update_pte) return 0;
ret = update_gpuvm_pte(mem, entry, sync); if (ret) {
pr_err("update_gpuvm_pte() failed\n"); goto update_gpuvm_pte_failed;
}
/* Validate page directory and attach eviction fence */
ret = amdgpu_bo_reserve(vm->root.bo, true); if (ret) goto reserve_pd_fail;
ret = vm_validate_pt_pd_bos(vm, NULL); if (ret) {
pr_err("validate_pt_pd_bos() failed\n"); goto validate_pd_fail;
}
ret = amdgpu_bo_sync_wait(vm->root.bo,
AMDGPU_FENCE_OWNER_KFD, false); if (ret) goto wait_pd_fail;
ret = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1); if (ret) goto reserve_shared_fail;
dma_resv_add_fence(vm->root.bo->tbo.base.resv,
&vm->process_info->eviction_fence->base,
DMA_RESV_USAGE_BOOKKEEP);
amdgpu_bo_unreserve(vm->root.bo);
/* Update process info */
mutex_lock(&vm->process_info->lock);
list_add_tail(&vm->vm_list_node,
&(vm->process_info->vm_list_head));
vm->process_info->n_vms++; if (ef)
*ef = dma_fence_get(&vm->process_info->eviction_fence->base);
mutex_unlock(&vm->process_info->lock);
/** * amdgpu_amdkfd_gpuvm_pin_bo() - Pins a BO using following criteria * @bo: Handle of buffer object being pinned * @domain: Domain into which BO should be pinned * * - USERPTR BOs are UNPINNABLE and will return error * - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their * PIN count incremented. It is valid to PIN a BO multiple times * * Return: ZERO if successful in pinning, Non-Zero in case of error.
*/ staticint amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
{ int ret = 0;
ret = amdgpu_bo_reserve(bo, false); if (unlikely(ret)) return ret;
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) { /* * If bo is not contiguous on VRAM, move to system memory first to ensure * we can get contiguous VRAM space after evicting other BOs.
*/ if (!(bo->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) { struct ttm_operation_ctx ctx = { true, false };
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (unlikely(ret)) {
pr_debug("validate bo 0x%p to GTT failed %d\n", &bo->tbo, ret); goto out;
}
}
}
ret = amdgpu_bo_pin(bo, domain); if (ret)
pr_err("Error in Pinning BO to domain: %d\n", domain);
/** * amdgpu_amdkfd_gpuvm_unpin_bo() - Unpins BO using following criteria * @bo: Handle of buffer object being unpinned * * - Is a illegal request for USERPTR BOs and is ignored * - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their * PIN count decremented. Calls to UNPIN must balance calls to PIN
*/ staticvoid amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
{ int ret = 0;
ret = amdgpu_bo_reserve(bo, false); if (unlikely(ret)) return;
amdgpu_bo_unpin(bo);
amdgpu_bo_unreserve(bo);
}
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, struct amdgpu_vm *avm, void **process_info, struct dma_fence **ef)
{ int ret;
/* Already a compute VM? */ if (avm->process_info) return -EINVAL;
/* Convert VM into a compute VM */
ret = amdgpu_vm_make_compute(adev, avm); if (ret) return ret;
/* Initialize KFD part of the VM and process info */
ret = init_kfd_vm(avm, process_info, ef); if (ret) return ret;
/* Update process info */
mutex_lock(&process_info->lock);
process_info->n_vms--;
list_del(&vm->vm_list_node);
mutex_unlock(&process_info->lock);
vm->process_info = NULL;
/* Release per-process resources when last compute VM is destroyed */ if (!process_info->n_vms) {
WARN_ON(!list_empty(&process_info->kfd_bo_list));
WARN_ON(!list_empty(&process_info->userptr_valid_list));
WARN_ON(!list_empty(&process_info->userptr_inval_list));
/* Workaround for AQL queue wraparound bug. Map the same * memory twice. That means we only actually allocate half * the memory.
*/ if ((*mem)->aql_queue)
size >>= 1;
aligned_size = PAGE_ALIGN(size);
(*mem)->alloc_flags = flags;
amdgpu_sync_create(&(*mem)->sync);
ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags,
xcp_id); if (ret) {
pr_debug("Insufficient memory\n"); goto err_reserve_limit;
}
pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s xcp_id %d\n",
va, (*mem)->aql_queue ? size << 1 : size,
domain_string(alloc_domain), xcp_id);
ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
bo_type, NULL, &gobj, xcp_id + 1); if (ret) {
pr_debug("Failed to create BO on domain %s. ret %d\n",
domain_string(alloc_domain), ret); goto err_bo_create;
}
ret = drm_vma_node_allow(&gobj->vma_node, drm_priv); if (ret) {
pr_debug("Failed to allow vma node access. ret %d\n", ret); goto err_node_allow;
}
ret = drm_gem_handle_create(adev->kfd.client.file, gobj, &(*mem)->gem_handle); if (ret) goto err_gem_handle_create;
bo = gem_to_amdgpu_bo(gobj); if (bo_type == ttm_bo_type_sg) {
bo->tbo.sg = sg;
bo->tbo.ttm->sg = sg;
}
bo->kfd_bo = *mem;
(*mem)->bo = bo; if (user_addr)
bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO;
/* Unpin MMIO/DOORBELL BO's that were pinned during allocation */ if (mem->alloc_flags &
(KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
}
mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
is_imported = mem->is_imported;
mutex_unlock(&mem->lock); /* lock is not needed after this, since mem is unused and will * be freed anyway
*/
if (mapped_to_gpu_memory > 0) {
pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
mem->va, bo_size); return -EBUSY;
}
/* Make sure restore workers don't access the BO any more */
mutex_lock(&process_info->lock);
list_del(&mem->validate_list);
mutex_unlock(&process_info->lock);
/* Cleanup user pages and MMU notifiers */ if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
amdgpu_hmm_unregister(mem->bo);
mutex_lock(&process_info->notifier_lock);
amdgpu_ttm_tt_discard_user_pages(mem->bo->tbo.ttm, mem->range);
mutex_unlock(&process_info->notifier_lock);
}
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); if (unlikely(ret)) return ret;
/* Remove from VM internal data structures */
list_for_each_entry_safe(entry, tmp, &mem->attachments, list) {
kfd_mem_dmaunmap_attachment(mem, entry);
kfd_mem_detach(entry);
}
ret = unreserve_bo_and_vms(&ctx, false, false);
/* Free the sync object */
amdgpu_sync_free(&mem->sync);
/* If the SG is not NULL, it's one we created for a doorbell or mmio * remap BO. We need to free it.
*/ if (mem->bo->tbo.sg) {
sg_free_table(mem->bo->tbo.sg);
kfree(mem->bo->tbo.sg);
}
/* Update the size of the BO being freed if it was allocated from * VRAM and is not imported. For APP APU VRAM allocations are done * in GTT domain
*/ if (size) { if (!is_imported &&
(mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
(adev->apu_prefer_gtt &&
mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
*size = bo_size; else
*size = 0;
}
/* Free the BO*/
drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle); if (mem->dmabuf) {
dma_buf_put(mem->dmabuf);
mem->dmabuf = NULL;
}
mutex_destroy(&mem->lock);
/* If this releases the last reference, it will end up calling * amdgpu_amdkfd_release_notify and kfree the mem struct. That's why * this needs to be the last call here.
*/
drm_gem_object_put(&mem->bo->tbo.base);
/* * For kgd_mem allocated in amdgpu_amdkfd_gpuvm_import_dmabuf(), * explicitly free it here.
*/ if (!use_release_notifier)
kfree(mem);
bo = mem->bo; if (!bo) {
pr_err("Invalid BO when mapping memory to GPU\n"); return -EINVAL;
}
/* Make sure restore is not running concurrently. Since we * don't map invalid userptr BOs, we rely on the next restore * worker to do the mapping
*/
mutex_lock(&mem->process_info->lock);
/* Lock notifier lock. If we find an invalid userptr BO, we can be * sure that the MMU notifier is no longer running * concurrently and the queues are actually stopped
*/ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
mutex_lock(&mem->process_info->notifier_lock);
is_invalid_userptr = !!mem->invalid;
mutex_unlock(&mem->process_info->notifier_lock);
}
pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
mem->va,
mem->va + bo_size * (1 + mem->aql_queue),
avm, domain_string(domain));
if (!kfd_mem_is_attached(avm, mem)) {
ret = kfd_mem_attach(adev, mem, avm, mem->aql_queue); if (ret) goto out;
}
ret = reserve_bo_and_vm(mem, avm, &ctx); if (unlikely(ret)) goto out;
/* Userptr can be marked as "not invalid", but not actually be * validated yet (still in the system domain). In that case * the queues are still stopped and we can leave mapping for * the next restore worker
*/ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) &&
bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
is_invalid_userptr = true;
ret = vm_validate_pt_pd_bos(avm, NULL); if (unlikely(ret)) goto out_unreserve;
ret = reserve_bo_and_cond_vms(mem, avm, BO_VM_MAPPED, &ctx); if (unlikely(ret)) goto out; /* If no VMs were reserved, it means the BO wasn't actually mapped */ if (ctx.n_vms == 0) {
ret = -EINVAL; goto unreserve_out;
}
ret = vm_validate_pt_pd_bos(avm, NULL); if (unlikely(ret)) goto unreserve_out;
pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
mem->va,
mem->va + bo_size * (1 + mem->aql_queue),
avm);
ret = amdgpu_sync_wait(&sync, intr);
amdgpu_sync_free(&sync); return ret;
}
/** * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count * @bo: Buffer object to be mapped * @bo_gart: Return bo reference * * Before return, bo reference count is incremented. To release the reference and unpin/ * unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
*/ int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart)
{ int ret;
ret = amdgpu_bo_reserve(bo, true); if (ret) {
pr_err("Failed to reserve bo. ret %d\n", ret); goto err_reserve_bo_failed;
}
ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); if (ret) {
pr_err("Failed to pin bo. ret %d\n", ret); goto err_pin_bo_failed;
}
ret = amdgpu_ttm_alloc_gart(&bo->tbo); if (ret) {
pr_err("Failed to bind bo to GART. ret %d\n", ret); goto err_map_bo_gart_failed;
}
/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Map a GTT BO for kernel CPU access * * @mem: Buffer object to be mapped for CPU access * @kptr[out]: pointer in kernel CPU address space * @size[out]: size of the buffer * * Pins the BO and maps it for kernel CPU access. The eviction fence is removed * from the BO, since pinned BOs cannot be evicted. The bo must remain on the * validate_list, so the GPU mapping can be restored after a page table was * evicted. * * Return: 0 on success, error code on failure
*/ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, void **kptr, uint64_t *size)
{ int ret; struct amdgpu_bo *bo = mem->bo;
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
pr_err("userptr can't be mapped to kernel\n"); return -EINVAL;
}
mutex_lock(&mem->process_info->lock);
ret = amdgpu_bo_reserve(bo, true); if (ret) {
pr_err("Failed to reserve bo. ret %d\n", ret); goto bo_reserve_failed;
}
ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); if (ret) {
pr_err("Failed to pin bo. ret %d\n", ret); goto pin_failed;
}
ret = amdgpu_bo_kmap(bo, kptr); if (ret) {
pr_err("Failed to map bo to kernel. ret %d\n", ret); goto kmap_failed;
}
/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Unmap a GTT BO for kernel CPU access * * @mem: Buffer object to be unmapped for CPU access * * Removes the kernel CPU mapping and unpins the BO. It does not restore the * eviction fence, so this function should only be used for cleanup before the * BO is destroyed.
*/ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
{ struct amdgpu_bo *bo = mem->bo;
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.21Bemerkung:
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.