// SPDX-License-Identifier: GPL-2.0 OR MIT /* * Copyright 2014-2022 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE.
*/
if (is_32bit_user_mode) {
dev_warn(kfd_device, "Process %d (32-bit) failed to open /dev/kfd\n" "32-bit processes are not supported by amdkfd\n",
current->pid); return -EPERM;
}
process = kfd_create_process(current); if (IS_ERR(process)) return PTR_ERR(process);
if (kfd_process_init_cwsr_apu(process, filep)) {
kfd_unref_process(process); return -EFAULT;
}
/* filep now owns the reference returned by kfd_create_process */
filep->private_data = process;
staticint set_queue_properties_from_user(struct queue_properties *q_properties, struct kfd_ioctl_create_queue_args *args)
{ /* * Repurpose queue percentage to accommodate new features: * bit 0-7: queue percentage * bit 8-15: pm4_target_xcc
*/ if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) {
pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); return -EINVAL;
}
if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); return -EINVAL;
}
if ((args->ring_base_address) &&
(!access_ok((constvoid __user *) args->ring_base_address, sizeof(uint64_t)))) {
pr_err("Can't access ring base address\n"); return -EFAULT;
}
if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
pr_err("Ring size must be a power of 2 or 0\n"); return -EINVAL;
}
if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) {
args->ring_size = KFD_MIN_QUEUE_RING_SIZE;
pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE");
}
if (q_properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { int max_sdma_eng_id = kfd_get_num_sdma_engines(dev) +
kfd_get_num_xgmi_sdma_engines(dev) - 1;
if (q_properties.sdma_engine_id > max_sdma_eng_id) {
err = -EINVAL;
pr_err("sdma_engine_id %i exceeds maximum id of %i\n",
q_properties.sdma_engine_id, max_sdma_eng_id); goto err_sdma_engine_id;
}
}
if (!pdd->qpd.proc_doorbells) {
err = kfd_alloc_process_doorbells(dev->kfd, pdd); if (err) {
pr_debug("failed to allocate process doorbells\n"); goto err_bind_process;
}
}
err = kfd_queue_acquire_buffers(pdd, &q_properties); if (err) {
pr_debug("failed to acquire user queue buffers\n"); goto err_acquire_queue_buf;
}
pr_debug("Creating queue for process pid %d on gpu 0x%x\n",
p->lead_thread->pid,
dev->id);
/* Return gpu_id as doorbell offset for mmap usage */
args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); if (KFD_IS_SOC15(dev)) /* On SOC15 ASICs, include the doorbell offset within the * process doorbell frame, which is 2 pages.
*/
args->doorbell_offset |= doorbell_offset_in_process;
mutex_unlock(&p->mutex);
pr_debug("Queue id %d was created successfully\n", args->queue_id);
/* * Repurpose queue percentage to accommodate new features: * bit 0-7: queue percentage * bit 8-15: pm4_target_xcc
*/ if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) {
pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); return -EINVAL;
}
if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); return -EINVAL;
}
if ((args->ring_base_address) &&
(!access_ok((constvoid __user *) args->ring_base_address, sizeof(uint64_t)))) {
pr_err("Can't access ring base address\n"); return -EFAULT;
}
if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
pr_err("Ring size must be a power of 2 or 0\n"); return -EINVAL;
}
if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) {
args->ring_size = KFD_MIN_QUEUE_RING_SIZE;
pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE");
}
properties.queue_address = args->ring_base_address;
properties.queue_size = args->ring_size;
properties.queue_percent = args->queue_percentage & 0xFF; /* bit 8-15 are repurposed to be PM4 target XCC */
properties.pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;
properties.priority = args->queue_priority;
pr_debug("Updating queue id %d for process pid %d\n",
args->queue_id, p->lead_thread->pid);
if ((args->num_cu_mask % 32) != 0) {
pr_debug("num_cu_mask 0x%x must be a multiple of 32",
args->num_cu_mask); return -EINVAL;
}
minfo.cu_mask.count = args->num_cu_mask; if (minfo.cu_mask.count == 0) {
pr_debug("CU mask cannot be 0"); return -EINVAL;
}
/* To prevent an unreasonably large CU mask size, set an arbitrary * limit of max_num_cus bits. We can then just drop any CU mask bits * past max_num_cus bits and just use the first max_num_cus bits.
*/ if (minfo.cu_mask.count > max_num_cus) {
pr_debug("CU mask cannot be greater than 1024 bits");
minfo.cu_mask.count = max_num_cus;
cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
}
minfo.cu_mask.ptr = kzalloc(cu_mask_size, GFP_KERNEL); if (!minfo.cu_mask.ptr) return -ENOMEM;
retval = copy_from_user(minfo.cu_mask.ptr, cu_mask_ptr, cu_mask_size); if (retval) {
pr_debug("Could not copy CU mask from userspace");
retval = -EFAULT; goto out;
}
dev_dbg(kfd_device, "get apertures for process pid %d",
p->lead_thread->pid);
if (args->num_of_nodes == 0) { /* Return number of nodes, so that user space can alloacate * sufficient memory
*/
mutex_lock(&p->mutex);
args->num_of_nodes = p->n_pdds; goto out_unlock;
}
/* Fill in process-aperture information for all available * nodes, but not more than args->num_of_nodes as that is * the amount of memory allocated by user
*/
pa = kcalloc(args->num_of_nodes, sizeof(struct kfd_process_device_apertures),
GFP_KERNEL); if (!pa) return -ENOMEM;
mutex_lock(&p->mutex);
if (!p->n_pdds) {
args->num_of_nodes = 0;
kfree(pa); goto out_unlock;
}
/* Run over all pdd of the process */ for (i = 0; i < min(p->n_pdds, args->num_of_nodes); i++) { struct kfd_process_device *pdd = p->pdds[i];
/* For dGPUs the event page is allocated in user mode. The * handle is passed to KFD with the first call to this IOCTL * through the event_page_offset field.
*/ if (args->event_page_offset) {
mutex_lock(&p->mutex);
err = kfd_kmap_event_page(p, args->event_page_offset);
mutex_unlock(&p->mutex); if (err) return err;
}
bool kfd_dev_is_large_bar(struct kfd_node *dev)
{ if (dev->kfd->adev->debug_largebar) {
pr_debug("Simulate large-bar allocation on non large-bar machine\n"); returntrue;
}
if (dev->local_mem_info.local_mem_size_private == 0 &&
dev->local_mem_info.local_mem_size_public > 0) returntrue;
if (dev->local_mem_info.local_mem_size_public == 0 &&
dev->kfd->adev->gmc.is_app_apu) {
pr_debug("APP APU, Consider like a large bar system\n"); returntrue;
}
#if IS_ENABLED(CONFIG_HSA_AMD_SVM) /* Flush pending deferred work to avoid racing with deferred actions * from previous memory map changes (e.g. munmap).
*/
svm_range_list_lock_and_flush_work(&p->svms, current->mm);
mutex_lock(&p->svms.lock);
mmap_write_unlock(current->mm);
/* Skip a special case that allocates VRAM without VA, * VA will be invalid of 0.
*/ if (!(!args->va_addr && (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) &&
interval_tree_iter_first(&p->svms.objects,
args->va_addr >> PAGE_SHIFT,
(args->va_addr + args->size - 1) >> PAGE_SHIFT)) {
pr_err("Address: 0x%llx already allocated by SVM\n",
args->va_addr);
mutex_unlock(&p->svms.lock); return -EADDRINUSE;
}
/* When register user buffer check if it has been registered by svm by * buffer cpu virtual address.
*/ if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) &&
interval_tree_iter_first(&p->svms.objects,
args->mmap_offset >> PAGE_SHIFT,
(args->mmap_offset + args->size - 1) >> PAGE_SHIFT)) {
pr_err("User Buffer Address: 0x%llx already allocated by SVM\n",
args->mmap_offset);
mutex_unlock(&p->svms.lock); return -EADDRINUSE;
}
if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
!kfd_dev_is_large_bar(dev)) {
pr_err("Alloc host visible vram on small bar is not allowed\n");
err = -EINVAL; goto err_large_bar;
}
mutex_lock(&p->mutex); /* * Safeguard to prevent user space from freeing signal BO. * It will be freed at process termination.
*/ if (p->signal_handle && (p->signal_handle == args->handle)) {
pr_err("Free signal BO is not allowed\n");
ret = -EPERM; goto err_unlock;
}
pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle)); if (!pdd) {
pr_err("Process device data doesn't exist\n");
ret = -EINVAL; goto err_pdd;
}
mem = kfd_process_device_translate_handle(
pdd, GET_IDR_HANDLE(args->handle)); if (!mem) {
ret = -EINVAL; goto err_unlock;
}
ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev,
(struct kgd_mem *)mem, pdd->drm_priv, &size);
/* If freeing the buffer failed, leave the handle in place for * clean-up during process tear-down.
*/ if (!ret)
kfd_process_device_remove_obj_handle(
pdd, GET_IDR_HANDLE(args->handle));
mem = kfd_process_device_translate_handle(pdd,
GET_IDR_HANDLE(args->handle)); if (!mem) {
err = -ENOMEM; goto get_mem_obj_from_handle_failed;
}
for (i = args->n_success; i < args->n_devices; i++) {
peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); if (!peer_pdd) {
pr_debug("Getting device by id failed for 0x%x\n",
devices_arr[i]);
err = -EINVAL; goto get_mem_obj_from_handle_failed;
}
err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true); if (err) {
pr_debug("Sync memory failed, wait interrupted by user signal\n"); goto sync_memory_failed;
}
mutex_unlock(&p->mutex);
/* Flush TLBs after waiting for the page table updates to complete */ for (i = 0; i < args->n_devices; i++) {
peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); if (WARN_ON_ONCE(!peer_pdd)) continue;
kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
}
kfree(devices_arr);
mem = kfd_process_device_translate_handle(pdd,
GET_IDR_HANDLE(args->handle)); if (!mem) {
err = -ENOMEM; goto get_mem_obj_from_handle_failed;
}
for (i = args->n_success; i < args->n_devices; i++) {
peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); if (!peer_pdd) {
err = -EINVAL; goto get_mem_obj_from_handle_failed;
}
err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv); if (err) {
pr_debug("Failed to unmap from gpu %d/%d\n", i, args->n_devices); goto unmap_memory_from_gpu_failed;
}
args->n_success = i+1;
}
flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev->kfd); if (flush_tlb) {
err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,
(struct kgd_mem *) mem, true); if (err) {
pr_debug("Sync memory failed, wait interrupted by user signal\n"); goto sync_memory_failed;
}
}
/* Flush TLBs after waiting for the page table updates to complete */ for (i = 0; i < args->n_devices; i++) {
peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); if (WARN_ON_ONCE(!peer_pdd)) continue; if (flush_tlb)
kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
/* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */
err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv); if (err) goto sync_memory_failed;
}
/* Find a KFD GPU device that supports the get_dmabuf_info query */ for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++) if (dev && !kfd_devcgroup_check_permission(dev)) break; if (!dev) return -EINVAL;
if (args->metadata_ptr) {
metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL); if (!metadata_buffer) return -ENOMEM;
}
/* Get dmabuf info from KGD */
r = amdgpu_amdkfd_get_dmabuf_info(dev->adev, args->dmabuf_fd,
&dmabuf_adev, &args->size,
metadata_buffer, args->metadata_size,
&args->metadata_size, &flags, &xcp_id); if (r) gotoexit;
/* Copy metadata buffer to user mode */ if (metadata_buffer) {
r = copy_to_user((void __user *)args->metadata_ptr,
metadata_buffer, args->metadata_size); if (r != 0)
r = -EFAULT;
}
dev = kfd_device_by_id(GET_GPU_ID(args->handle)); if (!dev) return -EINVAL;
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(dev, p); if (!pdd) {
ret = -EINVAL; goto err_unlock;
}
mem = kfd_process_device_translate_handle(pdd,
GET_IDR_HANDLE(args->handle)); if (!mem) {
ret = -EINVAL; goto err_unlock;
}
ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf);
mutex_unlock(&p->mutex); if (ret) goto err_out;
ret = dma_buf_fd(dmabuf, args->flags); if (ret < 0) {
dma_buf_put(dmabuf); goto err_out;
} /* dma_buf_fd assigns the reference count to the fd, no need to * put the reference here.
*/
args->dmabuf_fd = ret;
staticint kfd_ioctl_set_xnack_mode(struct file *filep, struct kfd_process *p, void *data)
{ struct kfd_ioctl_set_xnack_mode_args *args = data; int r = 0;
mutex_lock(&p->mutex); if (args->xnack_enabled >= 0) { if (!list_empty(&p->pqm.queues)) {
pr_debug("Process has user queues running\n");
r = -EBUSY; goto out_unlock;
}
if (p->xnack_enabled == args->xnack_enabled) goto out_unlock;
if (args->xnack_enabled && !kfd_process_xnack_mode(p, true)) {
r = -EPERM; goto out_unlock;
}
process_priv.version = KFD_CRIU_PRIV_VERSION; /* For CR, we don't consider negative xnack mode which is used for * querying without changing it, here 0 simply means disabled and 1 * means enabled so retry for finding a valid PTE.
*/
process_priv.xnack_mode = p->xnack_enabled ? 1 : 0;
ret = copy_to_user(user_priv_data + *priv_offset,
&process_priv, sizeof(process_priv));
if (ret) {
pr_err("Failed to copy process information to user\n");
ret = -EFAULT;
}
/* * priv_data does not contain useful information for now and is reserved for * future use, so we do not set its contents.
*/
}
ret = copy_to_user(user_addr, device_buckets, num_devices * sizeof(*device_buckets)); if (ret) {
pr_err("Failed to copy device information to user\n");
ret = -EFAULT; gotoexit;
}
ret = copy_to_user(user_priv_data + *priv_offset,
device_priv,
num_devices * sizeof(*device_priv)); if (ret) {
pr_err("Failed to copy device information to user\n");
ret = -EFAULT;
}
*priv_offset += num_devices * sizeof(*device_priv);
/* Skip checkpointing BOs that are used for Trap handler * code and state. Currently, these BOs have a VA that * is less GPUVM Base
*/ if (kgd_mem->va && kgd_mem->va <= pdd->gpuvm_base) continue;
for (i = 0; i < p->n_pdds; i++) { if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->drm_priv, kgd_mem))
bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id;
}
ret = copy_to_user(user_bos, bo_buckets, num_bos * sizeof(*bo_buckets)); if (ret) {
pr_err("Failed to copy BO information to user\n");
ret = -EFAULT; gotoexit;
}
ret = copy_to_user(user_priv_data + *priv_offset, bo_privs, num_bos * sizeof(*bo_privs)); if (ret) {
pr_err("Failed to copy BO priv information to user\n");
ret = -EFAULT; gotoexit;
}
if (!args->devices || !args->bos || !args->priv_data) return -EINVAL;
mutex_lock(&p->mutex);
if (!p->n_pdds) {
pr_err("No pdd for given process\n");
ret = -ENODEV; goto exit_unlock;
}
/* Confirm all process queues are evicted */ if (!p->queues_paused) {
pr_err("Cannot dump process when queues are not in evicted state\n"); /* CRIU plugin did not call op PROCESS_INFO before checkpointing */
ret = -EINVAL; goto exit_unlock;
}
ret = criu_get_process_object_info(p, &num_devices, &num_bos, &num_objects, &priv_size); if (ret) goto exit_unlock;
/* each function will store private data inside priv_data and adjust priv_offset */
ret = criu_checkpoint_process(p, (uint8_t __user *)args->priv_data, &priv_offset); if (ret) goto exit_unlock;
ret = criu_checkpoint_devices(p, num_devices, (uint8_t __user *)args->devices,
(uint8_t __user *)args->priv_data, &priv_offset); if (ret) goto exit_unlock;
/* Leave room for BOs in the private data. They need to be restored * before events, but we checkpoint them last to simplify the error * handling.
*/
bo_priv_offset = priv_offset;
priv_offset += num_bos * sizeof(struct kfd_criu_bo_priv_data);
if (num_objects) {
ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data,
&priv_offset); if (ret) goto exit_unlock;
ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,
&priv_offset); if (ret) goto exit_unlock;
ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset); if (ret) goto exit_unlock;
}
/* This must be the last thing in this function that can fail. * Otherwise we leak dmabuf file descriptors.
*/
ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,
(uint8_t __user *)args->priv_data, &bo_priv_offset);
exit_unlock:
mutex_unlock(&p->mutex); if (ret)
pr_err("Failed to dump CRIU ret:%d\n", ret); else
pr_debug("CRIU dump ret:%d\n", ret);
return ret;
}
staticint criu_restore_process(struct kfd_process *p, struct kfd_ioctl_criu_args *args,
uint64_t *priv_offset,
uint64_t max_priv_data_size)
{ int ret = 0; struct kfd_criu_process_priv_data process_priv;
if (*priv_offset + sizeof(process_priv) > max_priv_data_size) return -EINVAL;
ret = copy_from_user(&process_priv,
(void __user *)(args->priv_data + *priv_offset), sizeof(process_priv)); if (ret) {
pr_err("Failed to copy process private information from user\n");
ret = -EFAULT; gotoexit;
}
*priv_offset += sizeof(process_priv);
if (process_priv.version != KFD_CRIU_PRIV_VERSION) {
pr_err("Invalid CRIU API version (checkpointed:%d current:%d)\n",
process_priv.version, KFD_CRIU_PRIV_VERSION); return -EINVAL;
}
pr_debug("Setting XNACK mode\n"); if (process_priv.xnack_mode && !kfd_process_xnack_mode(p, true)) {
pr_err("xnack mode cannot be set\n");
ret = -EPERM; gotoexit;
} else {
pr_debug("set xnack mode: %d\n", process_priv.xnack_mode);
p->xnack_enabled = process_priv.xnack_mode;
}
if (args->num_devices != p->n_pdds) return -EINVAL;
if (*priv_offset + (args->num_devices * sizeof(*device_privs)) > max_priv_data_size) return -EINVAL;
device_buckets = kmalloc_array(args->num_devices, sizeof(*device_buckets), GFP_KERNEL); if (!device_buckets) return -ENOMEM;
ret = copy_from_user(device_buckets, (void __user *)args->devices,
args->num_devices * sizeof(*device_buckets)); if (ret) {
pr_err("Failed to copy devices buckets from user\n");
ret = -EFAULT; gotoexit;
}
for (i = 0; i < args->num_devices; i++) { struct kfd_node *dev; struct kfd_process_device *pdd; struct file *drm_file;
/* device private data is not currently used */
if (!device_buckets[i].user_gpu_id) {
pr_err("Invalid user gpu_id\n");
ret = -EINVAL; gotoexit;
}
dev = kfd_device_by_id(device_buckets[i].actual_gpu_id); if (!dev) {
pr_err("Failed to find device with gpu_id = %x\n",
device_buckets[i].actual_gpu_id);
ret = -EINVAL; gotoexit;
}
pdd = kfd_get_process_device_data(dev, p); if (!pdd) {
pr_err("Failed to get pdd for gpu_id = %x\n",
device_buckets[i].actual_gpu_id);
ret = -EINVAL; gotoexit;
}
pdd->user_gpu_id = device_buckets[i].user_gpu_id;
drm_file = fget(device_buckets[i].drm_fd); if (!drm_file) {
pr_err("Invalid render node file descriptor sent from plugin (%d)\n",
device_buckets[i].drm_fd);
ret = -EINVAL; gotoexit;
}
if (pdd->drm_file) {
ret = -EINVAL; gotoexit;
}
/* create the vm using render nodes for kfd pdd */ if (kfd_process_device_init_vm(pdd, drm_file)) {
pr_err("could not init vm for given pdd\n"); /* On success, the PDD keeps the drm_file reference */
fput(drm_file);
ret = -EINVAL; gotoexit;
} /* * pdd now already has the vm bound to render node so below api won't create a new * exclusive kfd mapping but use existing one with renderDXXX but is still needed * for iommu v2 binding and runtime pm.
*/
pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) {
ret = PTR_ERR(pdd); gotoexit;
}
if (!pdd->qpd.proc_doorbells) {
ret = kfd_alloc_process_doorbells(dev->kfd, pdd); if (ret) gotoexit;
}
}
/* * We are not copying device private data from user as we are not using the data for now, * but we still adjust for its private data.
*/
*priv_offset += args->num_devices * sizeof(*device_privs);
pdd = kfd_process_device_data_by_id(p, bo_bucket->gpu_id); if (!pdd) {
pr_err("Failed to get pdd\n"); return -ENODEV;
}
ret = criu_restore_memory_of_gpu(pdd, bo_bucket, bo_priv, &kgd_mem); if (ret) return ret;
/* now map these BOs to GPU/s */ for (j = 0; j < p->n_pdds; j++) { struct kfd_node *peer; struct kfd_process_device *peer_pdd;
if (!bo_priv->mapped_gpuids[j]) break;
peer_pdd = kfd_process_device_data_by_id(p, bo_priv->mapped_gpuids[j]); if (!peer_pdd) return -EINVAL;
peer = peer_pdd->dev;
peer_pdd = kfd_bind_process_to_device(peer, p); if (IS_ERR(peer_pdd)) return PTR_ERR(peer_pdd);
ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev, kgd_mem,
peer_pdd->drm_priv); if (ret) {
pr_err("Failed to map to gpu %d/%d\n", j, p->n_pdds); return ret;
}
}
pr_debug("map memory was successful for the BO\n"); /* create the dmabuf object and export the bo */ if (bo_bucket->alloc_flags
& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
ret = criu_get_prime_handle(kgd_mem, DRM_RDWR,
&bo_bucket->dmabuf_fd, file); if (ret) return ret;
} else {
bo_bucket->dmabuf_fd = KFD_INVALID_FD;
}
if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size) return -EINVAL;
/* Prevent MMU notifications until stage-4 IOCTL (CRIU_RESUME) is received */
amdgpu_amdkfd_block_mmu_notifications(p->kgd_process_info);
bo_buckets = kvmalloc_array(args->num_bos, sizeof(*bo_buckets), GFP_KERNEL); if (!bo_buckets) return -ENOMEM;
files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL); if (!files) {
ret = -ENOMEM; gotoexit;
}
ret = copy_from_user(bo_buckets, (void __user *)args->bos,
args->num_bos * sizeof(*bo_buckets)); if (ret) {
pr_err("Failed to copy BOs information from user\n");
ret = -EFAULT; gotoexit;
}
bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL); if (!bo_privs) {
ret = -ENOMEM; gotoexit;
}
ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset,
args->num_bos * sizeof(*bo_privs)); if (ret) {
pr_err("Failed to copy BOs information from user\n");
ret = -EFAULT; gotoexit;
}
*priv_offset += args->num_bos * sizeof(*bo_privs);
/* Create and map new BOs */ for (; i < args->num_bos; i++) {
ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i], &files[i]); if (ret) {
pr_debug("Failed to restore BO[%d] ret%d\n", i, ret); gotoexit;
}
} /* done */
/* Copy only the buckets back so user can read bo_buckets[N].restored_offset */
ret = copy_to_user((void __user *)args->bos,
bo_buckets,
(args->num_bos * sizeof(*bo_buckets))); if (ret)
ret = -EFAULT;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.