/* * Copyright 2016 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. *
*/
/* add these here since we already include dce12 headers and these are for DCN */ #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT 0x0 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT 0x10 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x049d #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2
switch (state) { case AMDGPU_IRQ_STATE_DISABLE:
for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
hub = &adev->vmhub[j]; for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
/* This works because this interrupt is only * enabled at init/resume and disabled in * fini/suspend, so the overall state doesn't * change over the course of suspend/resume.
*/ if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0))) continue;
if (j >= AMDGPU_MMHUB0(0))
WREG32_SOC15_IP(MMHUB, reg, tmp); else
WREG32_XCC(reg, tmp, j);
}
} break; case AMDGPU_IRQ_STATE_ENABLE:
for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
hub = &adev->vmhub[j]; for (i = 0; i < 16; i++) {
reg = hub->vm_context0_cntl + i;
/* This works because this interrupt is only * enabled at init/resume and disabled in * fini/suspend, so the overall state doesn't * change over the course of suspend/resume.
*/ if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0))) continue;
if (retry_fault) { if (adev->irq.retry_cam_enabled) { /* Delegate it to a different ring if the hardware hasn't * already done it.
*/ if (entry->ih == &adev->irq.ih) {
amdgpu_irq_delegate(adev, entry, 8); return 1;
}
cam_index = entry->src_data[2] & 0x3ff;
ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
addr, entry->timestamp, write_fault);
WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); if (ret) return 1;
} else { /* Process it onyl if it's the first fault for this address */ if (entry->ih != &adev->irq.ih_soft &&
amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
entry->timestamp)) return 1;
/* Delegate it to a different ring if the hardware hasn't * already done it.
*/ if (entry->ih == &adev->irq.ih) {
amdgpu_irq_delegate(adev, entry, 8); return 1;
}
/* Try to handle the recoverable page faults by filling page * tables
*/ if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
addr, entry->timestamp, write_fault)) return 1;
}
}
if (kgd2kfd_vmfault_fast_path(adev, entry, retry_fault)) return 1;
dev_err(adev->dev, " in page starting at address 0x%016llx from IH client 0x%x (%s)\n",
addr, entry->client_id,
soc15_ih_clientid_name[entry->client_id]);
/* * Issue a dummy read to wait for the status register to * be updated to avoid reading an incorrect value due to * the new fast GRBM interface.
*/ if ((entry->vmid_src == AMDGPU_GFXHUB(0)) &&
(amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)))
RREG32(hub->vm_l2_pro_fault_status);
/* * GART * VMID 0 is the physical GPU addresses as used by the kernel. * VMIDs 1-15 are used for userspace clients and are handled * by the amdgpu vm/hsa code.
*/
/** * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type * * @adev: amdgpu_device pointer * @vmid: vm instance to flush * @vmhub: which hub to flush * @flush_type: the flush type * * Flush the TLB for the requested page table using certain type.
*/ staticvoid gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
{ bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
u32 j, inv_req, tmp, sem, req, ack, inst; constunsignedint eng = 17; struct amdgpu_vmhub *hub;
/* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal
*/ if (adev->gfx.kiq[inst].ring.sched.ready &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
/* This path is needed before KIQ/MES/GFXOFF are set up */
spin_lock(&adev->gmc.invalidate_lock);
/* * It may lose gpuvm invalidate acknowldege state across power-gating * off cycle, add semaphore acquire before invalidation and semaphore * release after invalidation to avoid entering power gated state * to WA the Issue
*/
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ if (use_semaphore) { for (j = 0; j < adev->usec_timeout; j++) { /* a read return value of 1 means semaphore acquire */ if (vmhub >= AMDGPU_MMHUB0(0))
tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem, GET_INST(GC, inst)); else
tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem, GET_INST(GC, inst)); if (tmp & 0x1) break;
udelay(1);
}
if (j >= adev->usec_timeout)
DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
}
/* * Issue a dummy read to wait for the ACK register to * be cleared to avoid a false ACK due to the new fast * GRBM interface.
*/ if ((vmhub == AMDGPU_GFXHUB(0)) &&
(amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)))
RREG32_NO_KIQ(req);
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ if (use_semaphore) { /* * add semaphore release after invalidation, * write with 0 means semaphore release
*/ if (vmhub >= AMDGPU_MMHUB0(0))
WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0, GET_INST(GC, inst)); else
WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, GET_INST(GC, inst));
}
spin_unlock(&adev->gmc.invalidate_lock);
if (j < adev->usec_timeout) return;
DRM_ERROR("Timeout waiting for VM flush ACK!\n");
}
/** * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid * * @adev: amdgpu_device pointer * @pasid: pasid to be flush * @flush_type: the flush type * @all_hub: flush all hubs * @inst: is used to select which instance of KIQ to use for the invalidation * * Flush the TLB for the requested pasid.
*/ staticvoid gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint16_t pasid, uint32_t flush_type, bool all_hub, uint32_t inst)
{
uint16_t queried; int i, vmid;
/* * It may lose gpuvm invalidate acknowldege state across power-gating * off cycle, add semaphore acquire before invalidation and semaphore * release after invalidation to avoid entering power gated state * to WA the Issue
*/
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ if (use_semaphore) /* a read return value of 1 means semaphore acuqire */
amdgpu_ring_emit_reg_wait(ring,
hub->vm_inv_eng0_sem +
hub->eng_distance * eng, 0x1, 0x1);
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ if (use_semaphore) /* * add semaphore release after invalidation, * write with 0 means semaphore release
*/
amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
hub->eng_distance * eng, 0);
/* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system * memory can use more efficient MTYPEs.
*/ if (!(adev->flags & AMD_IS_APU) ||
amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3)) return;
/* Only direct-mapped memory allows us to determine the NUMA node from * the DMA address.
*/ if (!adev->ram_is_direct_mapped) {
dev_dbg_ratelimited(adev->dev, "RAM is not direct mapped\n"); return;
}
/* MTYPE_NC is the same default and can be overridden. * MTYPE_UC will be present if the memory is extended-coherent * and can also be overridden.
*/ if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC) &&
(*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC)) {
dev_dbg_ratelimited(adev->dev, "MTYPE is not NC or UC\n"); return;
}
/* FIXME: Only supported on native mode for now. For carve-out, the * NUMA affinity of the GPU/VM needs to come from the PCI info because * memory partitions are not associated with different NUMA nodes.
*/ if (adev->gmc.is_app_apu && vm->mem_id >= 0) {
local_node = adev->gmc.mem_partitions[vm->mem_id].numa.node;
} else {
dev_dbg_ratelimited(adev->dev, "Only native mode APU is supported.\n"); return;
}
/* Only handle real RAM. Mappings of PCIe resources don't have struct * page or NUMA nodes.
*/ if (!page_is_ram(addr >> PAGE_SHIFT)) {
dev_dbg_ratelimited(adev->dev, "Page is not RAM.\n"); return;
}
nid = pfn_to_nid(addr >> PAGE_SHIFT);
dev_dbg_ratelimited(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n",
vm->mem_id, local_node, nid); if (nid == local_node) {
uint64_t old_flags = *flags; if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) ==
AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC)) { unsignedint mtype_local = MTYPE_RW;
/* is UMC the right IP to check for MCA? Maybe DF? */ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { case IP_VERSION(6, 7, 0): if (!adev->gmc.xgmi.connected_to_cpu) {
mca->mp0.ras = &mca_v3_0_mp0_ras;
mca->mp1.ras = &mca_v3_0_mp1_ras;
mca->mpio.ras = &mca_v3_0_mpio_ras;
} break; default: break;
}
}
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) { enum amdgpu_pkg_type pkg_type =
adev->smuio.funcs->get_pkg_type(adev); /* On GFXIP 9.4.3. APU, there is no physical VRAM domain present * and the APU, can be in used two possible modes: * - carveout mode * - native APU mode * "is_app_apu" can be used to identify the APU in the native * mode.
*/
adev->gmc.is_app_apu = (pkg_type == AMDGPU_PKG_TYPE_APU &&
!pci_resource_len(adev->pdev, 0));
}
/* add the xgmi offset of the physical node */
base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; if (amdgpu_gmc_is_pdb0_enabled(adev)) {
amdgpu_gmc_sysvm_location(adev, mc);
} else {
amdgpu_gmc_vram_location(adev, mc, base);
amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
amdgpu_gmc_agp_location(adev, mc);
} /* base offset of vram pages */
adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
/* XXX: add the xgmi offset of the physical node? */
adev->vm_manager.vram_base_offset +=
adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
}
/** * gmc_v9_0_mc_init - initialize the memory controller driver params * * @adev: amdgpu_device pointer * * Look up the amount of vram, vram width, and decide how to place * vram and gart within the GPU's physical address space. * Returns 0 for success.
*/ staticint gmc_v9_0_mc_init(struct amdgpu_device *adev)
{ int r;
/* size in MB on si */ if (!adev->gmc.is_app_apu) {
adev->gmc.mc_vram_size =
adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
} else {
DRM_DEBUG("Set mc_vram_size = 0 for APP APU\n");
adev->gmc.mc_vram_size = 0;
}
adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
if (!(adev->flags & AMD_IS_APU) &&
!adev->gmc.xgmi.connected_to_cpu) {
r = amdgpu_device_resize_fb_bar(adev); if (r) return r;
}
adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
#ifdef CONFIG_X86_64 /* * AMD Accelerated Processing Platform (APP) supporting GPU-HOST xgmi * interface can use VRAM through here as it appears system reserved * memory in host address space. * * For APUs, VRAM is just the stolen system memory and can be accessed * directly. * * Otherwise, use the legacy Host Data Path (HDP) through PCIe BAR.
*/
/* Initialize common gart structure */
r = amdgpu_gart_init(adev); if (r) return r;
adev->gart.table_size = adev->gart.num_gpu_pages * 8;
adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC) |
AMDGPU_PTE_EXECUTABLE;
if (!adev->gmc.real_vram_size) {
dev_info(adev->dev, "Put GART in system memory for APU\n");
r = amdgpu_gart_table_ram_alloc(adev); if (r)
dev_err(adev->dev, "Failed to allocate GART in system memory\n");
} else {
r = amdgpu_gart_table_vram_alloc(adev); if (r) return r;
if (amdgpu_gmc_is_pdb0_enabled(adev))
r = amdgpu_gmc_pdb0_alloc(adev);
}
return r;
}
/** * gmc_v9_0_save_registers - saves regs * * @adev: amdgpu_device pointer * * This saves potential register values that should be * restored upon resume
*/ staticvoid gmc_v9_0_save_registers(struct amdgpu_device *adev)
{ if ((amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 0)) ||
(amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 1)))
adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0);
}
if (amdgpu_is_multi_aid(adev)) {
gmc_v9_4_3_init_vram_info(adev);
} elseif (!adev->bios) { if (adev->flags & AMD_IS_APU) {
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4;
adev->gmc.vram_width = 64 * 64;
} else {
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
adev->gmc.vram_width = 128 * 64;
}
} else {
r = amdgpu_atomfirmware_get_vram_info(adev,
&vram_width, &vram_type, &vram_vendor); if (amdgpu_sriov_vf(adev)) /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN, * and DF related registers is not readable, seems hardcord is the * only way to set the correct vram_width
*/
adev->gmc.vram_width = 2048; elseif (amdgpu_emu_mode != 1)
adev->gmc.vram_width = vram_width;
if (!adev->gmc.vram_width) { int chansize, numchan;
/* This interrupt is VMC page fault.*/
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT,
&adev->gmc.vm_fault); if (r) return r;
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT,
&adev->gmc.vm_fault); if (r) return r;
}
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
&adev->gmc.vm_fault);
if (r) return r;
if (!amdgpu_sriov_vf(adev) &&
!adev->gmc.xgmi.connected_to_cpu &&
!adev->gmc.is_app_apu) { /* interrupt sent to DF. */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
&adev->gmc.ecc_irq); if (r) return r;
}
/* Set the internal MC address mask * This is the max address of the GPU's * internal address space.
*/
adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
if (amdgpu_is_multi_aid(adev)) {
r = amdgpu_gmc_init_mem_ranges(adev); if (r) return r;
}
/* Memory manager */
r = amdgpu_bo_init(adev); if (r) return r;
r = gmc_v9_0_gart_init(adev); if (r) return r;
gmc_v9_0_init_nps_details(adev); /* * number of VMs * VMID 0 is reserved for System * amdgpu graphics/compute will use VMIDs 1..n-1 * amdkfd will use VMIDs n..15 * * The first KFD VMID is 8 for GPUs with graphics, 3 for * compute-only GPUs. On compute-only GPUs that leaves 2 VMIDs * for video processing.
*/
adev->vm_manager.first_kfd_vmid =
(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
amdgpu_is_multi_aid(adev)) ?
3 :
8;
amdgpu_vm_manager_init(adev);
gmc_v9_0_save_registers(adev);
r = amdgpu_gmc_ras_sw_init(adev); if (r) return r;
if (amdgpu_is_multi_aid(adev))
amdgpu_gmc_sysfs_init(adev);
if (amdgpu_gmc_is_pdb0_enabled(adev))
amdgpu_gmc_init_pdb0(adev);
if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); return -EINVAL;
}
amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
if (!adev->in_s0ix) {
r = adev->gfxhub.funcs->gart_enable(adev); if (r) return r;
}
r = adev->mmhub.funcs->gart_enable(adev); if (r) return r;
DRM_INFO("PCIE GART of %uM enabled.\n",
(unsignedint)(adev->gmc.gart_size >> 20)); if (adev->gmc.pdb0_bo)
DRM_INFO("PDB0 located at 0x%016llX\n",
(unsignedlonglong)amdgpu_bo_gpu_offset(adev->gmc.pdb0_bo));
DRM_INFO("PTB located at 0x%016llX\n",
(unsignedlonglong)amdgpu_bo_gpu_offset(adev->gart.bo));
return 0;
}
staticint gmc_v9_0_hw_init(struct amdgpu_ip_block *ip_block)
{ struct amdgpu_device *adev = ip_block->adev; bool value; int i, r;
adev->gmc.flush_pasid_uses_kiq = true;
/* Vega20+XGMI caches PTEs in TC and TLB. Add a heavy-weight TLB flush * (type 2), which flushes both. Due to a race condition with * concurrent memory accesses using the same TLB cache line, we still * need a second TLB flush after this.
*/
adev->gmc.flush_tlb_needs_extra_type_2 =
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) &&
adev->gmc.xgmi.num_physical_nodes;
/* The sequence of these two function calls matters.*/
gmc_v9_0_init_golden_registers(adev);
if (amdgpu_sriov_vf(adev)) { /* full access mode, so don't touch any GMC register */
DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); return 0;
}
/* * Pair the operations did in gmc_v9_0_hw_init and thus maintain * a correct cached state for GMC. Otherwise, the "gate" again * operation on S3 resuming will fail due to wrong cached state.
*/ if (adev->mmhub.funcs->update_power_gating)
adev->mmhub.funcs->update_power_gating(adev, false);
/* * For minimal init, late_init is not called, hence VM fault/RAS irqs * are not enabled.
*/ if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
if (adev->gmc.ecc_irq.funcs &&
amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
}
/* If a reset is done for NPS mode switch, read the memory range * information again.
*/ if (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS) {
amdgpu_gmc_init_sw_mem_ranges(adev, adev->gmc.mem_partitions);
adev->gmc.reset_flags &= ~AMDGPU_GMC_INIT_RESET_NPS;
}
r = gmc_v9_0_hw_init(ip_block); if (r) return r;
amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
staticbool gmc_v9_0_is_idle(struct amdgpu_ip_block *ip_block)
{ /* MC is always ready in GMC v9.*/ returntrue;
}
staticint gmc_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{ /* There is no need to wait for MC idle in GMC v9.*/ return 0;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.