// SPDX-License-Identifier: MIT /* * Copyright 2014 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE.
*/
/* Total memory size in system memory and all GPU VRAM. Used to * estimate worst case amount of memory to reserve for page tables
*/
uint64_t amdgpu_amdkfd_total_mem_size;
staticbool kfd_initialized;
int amdgpu_amdkfd_init(void)
{ struct sysinfo si; int ret;
/** * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to * setup amdkfd * * @adev: amdgpu_device pointer * @aperture_base: output returning doorbell aperture base physical address * @aperture_size: output returning doorbell aperture size in bytes * @start_offset: output returning # of doorbell bytes reserved for amdgpu. * * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up, * takes doorbells required for its own rings and reports the setup to amdkfd. * amdgpu reserved doorbells are at the start of the doorbell aperture.
*/ staticvoid amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
phys_addr_t *aperture_base,
size_t *aperture_size,
size_t *start_offset)
{ /* * The first num_kernel_doorbells are used by amdgpu. * amdkfd takes whatever's left in the aperture.
*/ if (adev->enable_mes) { /* * With MES enabled, we only need to initialize * the base address. The size and offset are * not initialized as AMDGPU manages the whole * doorbell space.
*/
*aperture_base = adev->doorbell.base;
*aperture_size = 0;
*start_offset = 0;
} elseif (adev->doorbell.size > adev->doorbell.num_kernel_doorbells * sizeof(u32)) {
*aperture_base = adev->doorbell.base;
*aperture_size = adev->doorbell.size;
*start_offset = adev->doorbell.num_kernel_doorbells * sizeof(u32);
} else {
*aperture_base = 0;
*aperture_size = 0;
*start_offset = 0;
}
}
/* this is going to have a few of the MSBs set that we need to * clear
*/
bitmap_complement(gpu_resources.cp_queue_bitmap,
adev->gfx.mec_bitmap[0].queue_bitmap,
AMDGPU_MAX_QUEUES);
/* According to linux/bitmap.h we shouldn't use bitmap_clear if * nbits is not compile time constant
*/
last_valid_bit = 1 /* only first MEC can have compute queues */
* adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe; for (i = last_valid_bit; i < AMDGPU_MAX_QUEUES; ++i)
clear_bit(i, gpu_resources.cp_queue_bitmap);
/* Since SOC15, BIF starts to statically use the * lower 12 bits of doorbell addresses for routing * based on settings in registers like * SDMA0_DOORBELL_RANGE etc.. * In order to route a doorbell to CP engine, the lower * 12 bits of its address has to be outside the range * set for SDMA, VCN, and IH blocks.
*/ if (adev->asic_type >= CHIP_VEGA10) {
gpu_resources.non_cp_doorbells_start =
adev->doorbell_index.first_non_cp;
gpu_resources.non_cp_doorbells_end =
adev->doorbell_index.last_non_cp;
}
if (cp_mqd_gfx9)
bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
r = amdgpu_bo_create(adev, &bp, &bo); if (r) {
dev_err(adev->dev, "failed to allocate BO for amdkfd (%d)\n", r); return r;
}
/* map the buffer */
r = amdgpu_bo_reserve(bo, true); if (r) {
dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r); goto allocate_mem_reserve_bo_failed;
}
r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); if (r) {
dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r); goto allocate_mem_pin_bo_failed;
}
r = amdgpu_ttm_alloc_gart(&bo->tbo); if (r) {
dev_err(adev->dev, "%p bind failed\n", bo); goto allocate_mem_kmap_bo_failed;
}
r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp); if (r) {
dev_err(adev->dev, "(%d) failed to map bo to kernel for amdkfd\n", r); goto allocate_mem_kmap_bo_failed;
}
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev)
{ /* the sclk is in quantas of 10kHz */ if (adev->pm.dpm_enabled) return amdgpu_dpm_get_sclk(adev, false) / 100; else return 100;
}
int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, struct amdgpu_device **dmabuf_adev,
uint64_t *bo_size, void *metadata_buffer,
size_t buffer_size, uint32_t *metadata_size,
uint32_t *flags, int8_t *xcp_id)
{ struct dma_buf *dma_buf; struct drm_gem_object *obj; struct amdgpu_bo *bo;
uint64_t metadata_flags; int r = -EINVAL;
dma_buf = dma_buf_get(dma_buf_fd); if (IS_ERR(dma_buf)) return PTR_ERR(dma_buf);
obj = dma_buf->priv; if (obj->dev->driver != adev_to_drm(adev)->driver) /* Can't handle buffers from different drivers */ goto out_put;
adev = drm_to_adev(obj->dev);
bo = gem_to_amdgpu_bo(obj); if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT))) /* Only VRAM and GTT BOs are supported */ goto out_put;
r = 0; if (dmabuf_adev)
*dmabuf_adev = adev; if (bo_size)
*bo_size = amdgpu_bo_size(bo); if (metadata_buffer)
r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
metadata_size, &metadata_flags); if (flags) {
*flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
KFD_IOC_ALLOC_MEM_FLAGS_VRAM
: KFD_IOC_ALLOC_MEM_FLAGS_GTT;
if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
*flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
} if (xcp_id)
*xcp_id = bo->xcp_id;
switch (engine) { case KGD_ENGINE_MEC1:
ring = &adev->gfx.compute_ring[0]; break; case KGD_ENGINE_SDMA1:
ring = &adev->sdma.instance[0].ring; break; case KGD_ENGINE_SDMA2:
ring = &adev->sdma.instance[1].ring; break; default:
pr_err("Invalid engine in IB submission: %d\n", engine);
ret = -EINVAL; goto err;
}
ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job, 0); if (ret) goto err;
ib->gpu_addr = gpu_addr;
ib->ptr = ib_cmd;
ib->length_dw = ib_len; /* This works for NO_HWS. TODO: need to handle without knowing VMID */
job->vmid = vmid;
job->num_ibs = 1;
ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
if (ret) {
DRM_ERROR("amdgpu: failed to schedule IB.\n"); goto err_ib_sched;
}
/* Drop the initial kref_init count (see drm_sched_main as example) */
dma_fence_put(f);
ret = dma_fence_wait(f, false);
if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 0) { if (adev->gmc.is_app_apu && adev->gmc.num_mem_partitions == 1) { /* In NPS1 mode, we should restrict the vram reporting * tied to the ttm_pages_limit which is 1/2 of the system * memory. For other partition modes, the HBM is uniformly * divided already per numa node reported. If user wants to * go beyond the default ttm limit and maximize the ROCm * allocations, they can go up to max ttm and sysmem limits.
*/
/* Submit unmap queue packet */
amdgpu_ring_commit(kiq_ring); /* * Ring test will do a basic scratch register change check. Just run * this to ensure that unmap queues that is submitted before got * processed successfully before returning.
*/
r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&kiq->ring_lock);
free_ring:
kfree(ring);
free_ring_funcs:
kfree(ring_funcs);
return r;
}
/* Stop scheduling on KFD */ int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id)
{ if (!adev->kfd.init_complete) return 0;
/* check if there are KFD queues active */ bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id)
{ if (!adev->kfd.init_complete) returnfalse;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.