// SPDX-License-Identifier: GPL-2.0 OR MIT /* * Copyright 2014-2022 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. *
*/
found = find_first_zero_bit(pqm->queue_slot_bitmap,
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
pr_debug("The new slot id %lu\n", found);
if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
pr_info("Cannot open more queues for process with pid %d\n",
pqm->process->lead_thread->pid); return -ENOMEM;
}
if (pdd->already_dequeued) return; /* The MES context flush needs to filter out the case which the * KFD process is created without setting up the MES context and * queue for creating a compute queue.
*/
dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); if (dev->kfd->shared_resources.enable_mes && !!pdd->proc_ctx_gpu_addr &&
down_read_trylock(&dev->adev->reset_domain->sem)) {
amdgpu_mes_flush_shader_debugger(dev->adev,
pdd->proc_ctx_gpu_addr);
up_read(&dev->adev->reset_domain->sem);
}
pdd->already_dequeued = true;
}
pqn = get_queue_by_qid(pqm, qid); if (!pqn) {
pr_err("Queue id does not match any known queue\n"); return -EINVAL;
}
if (pqn->q)
dev = pqn->q->device; if (WARN_ON(!dev)) return -ENODEV;
pdd = kfd_get_process_device_data(dev, pqm->process); if (!pdd) {
pr_err("Process device data doesn't exist\n"); return -EINVAL;
}
/* Only allow one queue per process can have GWS assigned */ if (gws && pdd->qpd.num_gws) return -EBUSY;
if (!gws && pdd->qpd.num_gws == 0) return -EINVAL;
if ((KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0)) &&
!dev->kfd->shared_resources.enable_mes) { if (gws)
ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
gws, &mem); else
ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
pqn->q->gws); if (unlikely(ret)) return ret;
pqn->q->gws = mem;
} else { /* * Intentionally set GWS to a non-NULL value * for devices that do not use GWS for global wave * synchronization but require the formality * of setting GWS for cooperative groups.
*/
pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL;
}
retval = init_queue(q, q_properties); if (retval != 0) return retval;
(*q)->device = dev;
(*q)->process = pqm->process;
if (dev->kfd->shared_resources.enable_mes) {
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
AMDGPU_MES_GANG_CTX_SIZE,
&(*q)->gang_ctx_bo,
&(*q)->gang_ctx_gpu_addr,
&(*q)->gang_ctx_cpu_ptr, false); if (retval) {
pr_err("failed to allocate gang context bo\n"); goto cleanup;
}
memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
/* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work * on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
*/ if (dev->adev != amdgpu_ttm_adev(q_properties->wptr_bo->tbo.bdev)) {
pr_err("Queue memory allocated to wrong device\n");
retval = -EINVAL; goto free_gang_ctx_bo;
}
retval = amdgpu_amdkfd_map_gtt_bo_to_gart(q_properties->wptr_bo,
&(*q)->wptr_bo_gart); if (retval) {
pr_err("Failed to map wptr bo to GART\n"); goto free_gang_ctx_bo;
}
}
/* * On GFX 9.4.3/9.5.0, increase the number of queues that * can be created to 255. No HWS limit on GFX 9.4.3/9.5.0.
*/ if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0))
max_queues = 255;
q = NULL;
kq = NULL;
pdd = kfd_get_process_device_data(dev, pqm->process); if (!pdd) {
pr_err("Process device data doesn't exist\n"); return -1;
}
/* * for debug process, verify that it is within the static queues limit * currently limit is set to half of the total avail HQD slots * If we are just about to create DIQ, the is_debug flag is not set yet * Hence we also check the type as well
*/ if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ))
max_queues = dev->kfd->device_info.max_no_of_hqd/2;
if (pdd->qpd.queue_count >= max_queues) return -ENOSPC;
/* Register process if this is the first queue */ if (list_empty(&pdd->qpd.queues_list) &&
list_empty(&pdd->qpd.priv_queue_list))
dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
/* Allocate proc_ctx_bo only if MES is enabled and this is the first queue */ if (!pdd->proc_ctx_cpu_ptr && dev->kfd->shared_resources.enable_mes) {
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
AMDGPU_MES_PROC_CTX_SIZE,
&pdd->proc_ctx_bo,
&pdd->proc_ctx_gpu_addr,
&pdd->proc_ctx_cpu_ptr, false); if (retval) {
dev_err(dev->adev->dev, "failed to allocate process context bo\n"); return retval;
}
memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
}
switch (type) { case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_SDMA_XGMI: case KFD_QUEUE_TYPE_SDMA_BY_ENG_ID: /* SDMA queues are always allocated statically no matter * which scheduler mode is used. We also do not need to * check whether a SDMA queue can be allocated here, because * allocate_sdma_queue() in create_queue() has the * corresponding check logic.
*/
retval = init_user_queue(pqm, dev, &q, properties, *qid); if (retval != 0) goto err_create_queue;
pqn->q = q;
pqn->kq = NULL;
retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
restore_mqd, restore_ctl_stack);
print_queue(q); break;
case KFD_QUEUE_TYPE_COMPUTE: /* check if there is over subscription */ if ((dev->dqm->sched_policy ==
KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
(dev->dqm->active_queue_count >= get_cp_queues_num(dev->dqm)))) {
pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n");
retval = -EPERM; goto err_create_queue;
}
retval = init_user_queue(pqm, dev, &q, properties, *qid); if (retval != 0) goto err_create_queue;
pqn->q = q;
pqn->kq = NULL;
retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
restore_mqd, restore_ctl_stack);
print_queue(q); break; case KFD_QUEUE_TYPE_DIQ:
kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ); if (!kq) {
retval = -ENOMEM; goto err_create_queue;
}
kq->queue->properties.queue_id = *qid;
pqn->kq = kq;
pqn->q = NULL;
retval = kfd_process_drain_interrupts(pdd); if (retval) break;
if (retval != 0) { if ((type == KFD_QUEUE_TYPE_SDMA ||
type == KFD_QUEUE_TYPE_SDMA_XGMI ||
type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) &&
retval == -ENOMEM)
pr_warn("process pid %d DQM create queue type %d failed. ret %d\n",
pqm->process->lead_thread->pid, type, retval); else
pr_err("process pid %d DQM create queue type %d failed. ret %d\n",
pqm->process->lead_thread->pid, type, retval); goto err_create_queue;
}
if (q && p_doorbell_offset_in_process) { /* Return the doorbell offset within the doorbell page * to the caller so it can be passed up to user mode * (in bytes). * relative doorbell index = Absolute doorbell index - * absolute index of first doorbell in the page.
*/
uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev,
pdd->qpd.proc_doorbells,
0,
pdd->dev->kfd->device_info.doorbell_size);
if (q) {
pr_debug("PQM done creating queue\n");
kfd_procfs_add_queue(q);
print_queue_properties(&q->properties);
}
return retval;
err_create_queue:
uninit_queue(q); if (kq)
kernel_queue_uninit(kq);
kfree(pqn);
err_allocate_pqn: /* check if queues list is empty unregister process from device */
clear_bit(*qid, pqm->queue_slot_bitmap); if (list_empty(&pdd->qpd.queues_list) &&
list_empty(&pdd->qpd.priv_queue_list))
dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd); return retval;
}
if (list_empty(&pdd->qpd.queues_list) &&
list_empty(&pdd->qpd.priv_queue_list))
dqm->ops.unregister_process(dqm, &pdd->qpd);
err_destroy_queue: return retval;
}
int pqm_update_queue_properties(struct process_queue_manager *pqm, unsignedint qid, struct queue_properties *p)
{ int retval; struct process_queue_node *pqn;
pqn = get_queue_by_qid(pqm, qid); if (!pqn || !pqn->q) {
pr_debug("No queue %d exists for update operation\n", qid); return -EFAULT;
}
/* * Update with NULL ring address is used to disable the queue
*/ if (p->queue_address && p->queue_size) { struct kfd_process_device *pdd; struct amdgpu_vm *vm; struct queue *q = pqn->q; int err;
pdd = kfd_get_process_device_data(q->device, q->process); if (!pdd) return -ENODEV;
vm = drm_priv_to_vm(pdd->drm_priv);
err = amdgpu_bo_reserve(vm->root.bo, false); if (err) return err;
if (kfd_queue_buffer_get(vm, (void *)p->queue_address, &p->ring_bo,
p->queue_size)) {
pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n",
p->queue_address, p->queue_size); return -EFAULT;
}
int pqm_update_mqd(struct process_queue_manager *pqm, unsignedint qid, struct mqd_update_info *minfo)
{ int retval; struct process_queue_node *pqn;
pqn = get_queue_by_qid(pqm, qid); if (!pqn) {
pr_debug("No queue %d exists for update operation\n", qid); return -EFAULT;
}
/* CUs are masked for debugger requirements so deny user mask */ if (pqn->q->properties.is_dbg_wa && minfo && minfo->cu_mask.ptr) return -EBUSY;
/* ASICs that have WGPs must enforce pairwise enabled mask checks. */ if (minfo && minfo->cu_mask.ptr &&
KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) { int i;
for (i = 0; i < minfo->cu_mask.count; i += 2) {
uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3;
if (cu_pair && cu_pair != 0x3) {
pr_debug("CUs must be adjacent pairwise enabled.\n"); return -EINVAL;
}
}
}
ret = pqm_get_queue_checkpoint_info(&pdd->process->pqm,
q->properties.queue_id,
mqd_size,
ctl_stack_size); if (ret)
pr_err("Failed to get queue dump info (%d)\n", ret);
return ret;
}
int kfd_process_get_queue_info(struct kfd_process *p,
uint32_t *num_queues,
uint64_t *priv_data_sizes)
{
uint32_t extra_data_sizes = 0; struct queue *q; int i; int ret;
*num_queues = 0;
/* Run over all PDDs of the process */ for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i];
/* * criu_checkpoint_queues_device will copy data to user and update q_index and * queues_priv_data_offset
*/
ret = criu_checkpoint_queues_device(pdd, user_priv_data, &q_index,
priv_data_offset);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.