/* * Copyright 2018 Advanced Micro Devices, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. *
*/
r = amdgpu_bo_create(adev, &bp, &adev->gmc.pdb0_bo); if (r) return r;
r = amdgpu_bo_reserve(adev->gmc.pdb0_bo, false); if (unlikely(r != 0)) goto bo_reserve_failure;
r = amdgpu_bo_pin(adev->gmc.pdb0_bo, AMDGPU_GEM_DOMAIN_VRAM); if (r) goto bo_pin_failure;
r = amdgpu_bo_kmap(adev->gmc.pdb0_bo, &adev->gmc.ptr_pdb0); if (r) goto bo_kmap_failure;
/** * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO * * @bo: the BO to get the PDE for * @level: the level in the PD hirarchy * @addr: resulting addr * @flags: resulting flags * * Get the address and flags to be used for a PDE (Page Directory Entry).
*/ void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
uint64_t *addr, uint64_t *flags)
{ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
/** * amdgpu_gmc_set_pte_pde - update the page tables using CPU * * @adev: amdgpu_device pointer * @cpu_pt_addr: cpu address of the page table * @gpu_page_idx: entry in the page table to update * @addr: dst addr to write into pte/pde * @flags: access flags * * Update the page tables using CPU.
*/ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
uint32_t gpu_page_idx, uint64_t addr,
uint64_t flags)
{ void __iomem *ptr = (void *)cpu_pt_addr;
uint64_t value;
/* * The following is for PTE only. GART does not have PDEs.
*/
value = addr & 0x0000FFFFFFFFF000ULL;
value |= flags;
writeq(value, ptr + (gpu_page_idx * 8));
return 0;
}
/** * amdgpu_gmc_agp_addr - return the address in the AGP address space * * @bo: TTM BO which needs the address, must be in GTT domain * * Tries to figure out how to access the BO through the AGP aperture. Returns * AMDGPU_BO_INVALID_OFFSET if that is not possible.
*/
uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
{ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
if (!bo->ttm) return AMDGPU_BO_INVALID_OFFSET;
if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached) return AMDGPU_BO_INVALID_OFFSET;
if (bo->ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size) return AMDGPU_BO_INVALID_OFFSET;
/** * amdgpu_gmc_vram_location - try to find VRAM location * * @adev: amdgpu device structure holding all necessary information * @mc: memory controller structure holding memory information * @base: base address at which to put VRAM * * Function will try to place VRAM at base address provided * as parameter.
*/ void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
u64 base)
{
uint64_t vis_limit = (uint64_t)amdgpu_vis_vram_limit << 20;
uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
/** amdgpu_gmc_sysvm_location - place vram and gart in sysvm aperture * * @adev: amdgpu device structure holding all necessary information * @mc: memory controller structure holding memory information * * This function is only used if use GART for FB translation. In such * case, we use sysvm aperture (vmid0 page tables) for both vram * and gart (aka system memory) access. * * GPUVM (and our organization of vmid0 page tables) require sysvm * aperture to be placed at a location aligned with 8 times of native * page size. For example, if vm_context0_cntl.page_table_block_size * is 12, then native page size is 8G (2M*2^12), sysvm should start * with a 64G aligned address. For simplicity, we just put sysvm at * address 0. So vram start at address 0 and gart is right after vram.
*/ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
{
u64 hive_vram_start = 0;
u64 hive_vram_end = mc->xgmi.node_segment_size * mc->xgmi.num_physical_nodes - 1;
mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id;
mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1; /* node_segment_size may not 4GB aligned on SRIOV, align up is needed. */
mc->gart_start = ALIGN(hive_vram_end + 1, four_gb);
mc->gart_end = mc->gart_start + mc->gart_size - 1; if (amdgpu_virt_xgmi_migrate_enabled(adev)) { /* set mc->vram_start to 0 to switch the returned GPU address of * amdgpu_bo_create_reserved() from FB aperture to GART aperture.
*/
mc->vram_start = 0;
mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
mc->visible_vram_size = min(mc->visible_vram_size, mc->real_vram_size);
} else {
mc->fb_start = hive_vram_start;
mc->fb_end = hive_vram_end;
}
dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
mc->mc_vram_size >> 20, mc->vram_start,
mc->vram_end, mc->real_vram_size >> 20);
dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
mc->gart_size >> 20, mc->gart_start, mc->gart_end);
}
/** * amdgpu_gmc_gart_location - try to find GART location * * @adev: amdgpu device structure holding all necessary information * @mc: memory controller structure holding memory information * @gart_placement: GART placement policy with respect to VRAM * * Function will try to place GART before or after VRAM. * If GART size is bigger than space left then we ajust GART size. * Thus function will never fails.
*/ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc, enum amdgpu_gart_placement gart_placement)
{
u64 size_af, size_bf; /*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
/* VCE doesn't like it when BOs cross a 4GB segment, so align * the GART base on a 4GB boundary as well.
*/
size_bf = mc->fb_start;
size_af = max_mc_address + 1 - ALIGN(mc->fb_end + 1, four_gb);
/** * amdgpu_gmc_agp_location - try to find AGP location * @adev: amdgpu device structure holding all necessary information * @mc: memory controller structure holding memory information * * Function will place try to find a place for the AGP BAR in the MC address * space. * * AGP BAR will be assigned the largest available hole in the address space. * Should be called after VRAM and GART locations are setup.
*/ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
{ const uint64_t sixteen_gb = 1ULL << 34; const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
u64 size_af, size_bf;
/** * amdgpu_gmc_set_agp_default - Set the default AGP aperture value. * @adev: amdgpu device structure holding all necessary information * @mc: memory controller structure holding memory information * * To disable the AGP aperture, you need to set the start to a larger * value than the end. This function sets the default value which * can then be overridden using amdgpu_gmc_agp_location() if you want * to enable the AGP aperture on a specific chip. *
*/ void amdgpu_gmc_set_agp_default(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
{
mc->agp_start = 0xffffffffffff;
mc->agp_end = 0;
mc->agp_size = 0;
}
/** * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid * * @addr: 48 bit physical address, page aligned (36 significant bits) * @pasid: 16 bit process address space identifier
*/ staticinline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
{ return addr << 4 | pasid;
}
/** * amdgpu_gmc_filter_faults - filter VM faults * * @adev: amdgpu device structure * @ih: interrupt ring that the fault received from * @addr: address of the VM fault * @pasid: PASID of the process causing the fault * @timestamp: timestamp of the fault * * Returns: * True if the fault was filtered and should not be processed further. * False if the fault is a new one and needs to be handled.
*/ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, uint64_t addr,
uint16_t pasid, uint64_t timestamp)
{ struct amdgpu_gmc *gmc = &adev->gmc;
uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid); struct amdgpu_gmc_fault *fault;
uint32_t hash;
/* Stale retry fault if timestamp goes backward */ if (amdgpu_ih_ts_after(timestamp, ih->processed_timestamp)) returntrue;
/* If we don't have space left in the ring buffer return immediately */
stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) -
AMDGPU_GMC_FAULT_TIMEOUT; if (gmc->fault_ring[gmc->last_fault].timestamp >= stamp) returntrue;
/* Try to find the fault in the hash */
hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
fault = &gmc->fault_ring[gmc->fault_hash[hash].idx]; while (fault->timestamp >= stamp) {
uint64_t tmp;
if (atomic64_read(&fault->key) == key) { /* * if we get a fault which is already present in * the fault_ring and the timestamp of * the fault is after the expired timestamp, * then this is a new fault that needs to be added * into the fault ring.
*/ if (fault->timestamp_expiry != 0 &&
amdgpu_ih_ts_after(fault->timestamp_expiry,
timestamp)) break; else returntrue;
}
/* Check if the entry was reused */ if (fault->timestamp >= tmp) break;
}
/* Add the fault to the ring */
fault = &gmc->fault_ring[gmc->last_fault];
atomic64_set(&fault->key, key);
fault->timestamp = timestamp;
/* And update the hash */
fault->next = gmc->fault_hash[hash].idx;
gmc->fault_hash[hash].idx = gmc->last_fault++; returnfalse;
}
/** * amdgpu_gmc_filter_faults_remove - remove address from VM faults filter * * @adev: amdgpu device structure * @addr: address of the VM fault * @pasid: PASID of the process causing the fault * * Remove the address from fault filter, then future vm fault on this address * will pass to retry fault handler to recover.
*/ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
uint16_t pasid)
{ struct amdgpu_gmc *gmc = &adev->gmc;
uint64_t key = amdgpu_gmc_fault_key(addr, pasid); struct amdgpu_ih_ring *ih; struct amdgpu_gmc_fault *fault;
uint32_t last_wptr;
uint64_t last_ts;
uint32_t hash;
uint64_t tmp;
if (adev->irq.retry_cam_enabled) return;
ih = &adev->irq.ih1; /* Get the WPTR of the last entry in IH ring */
last_wptr = amdgpu_ih_get_wptr(adev, ih); /* Order wptr with ring data. */
rmb(); /* Get the timetamp of the last entry in IH ring */
last_ts = amdgpu_ih_decode_iv_ts(adev, ih, last_wptr, -1);
hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
fault = &gmc->fault_ring[gmc->fault_hash[hash].idx]; do { if (atomic64_read(&fault->key) == key) { /* * Update the timestamp when this fault * expired.
*/
fault->timestamp_expiry = last_ts; break;
}
dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
ring->name, ring->vm_inv_eng, ring->vm_hub); /* SDMA has a special packet which allows it to use the same * invalidation engine for all the rings in one instance. * Therefore, we do not allocate a separate VM invalidation engine * for SDMA page rings. Instead, they share the VM invalidation * engine with the SDMA gfx ring. This change ensures efficient * resource management and avoids the issue of insufficient VM * invalidation engines.
*/
shared_ring = amdgpu_sdma_get_shared_ring(adev, ring); if (shared_ring) {
shared_ring->vm_inv_eng = ring->vm_inv_eng;
dev_info(adev->dev, "ring %s shares VM invalidation engine %u with ring %s on hub %u\n",
ring->name, ring->vm_inv_eng, shared_ring->name, ring->vm_hub); continue;
}
}
if (!hub->sdma_invalidation_workaround || vmid ||
!adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready ||
!ring->sched.ready) { /* * A GPU reset should flush all TLBs anyway, so no need to do * this while one is ongoing.
*/ if (!down_read_trylock(&adev->reset_domain->sem)) return;
if (adev->gmc.flush_tlb_needs_extra_type_2)
adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
vmhub, 2);
if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2)
adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
vmhub, 0);
/* The SDMA on Navi 1x has a bug which can theoretically result in memory * corruption if an invalidation happens at the same time as an VA * translation. Avoid this by doing the invalidation from the SDMA * itself at least for GART.
*/
mutex_lock(&adev->mman.gtt_window_lock);
r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
AMDGPU_FENCE_OWNER_UNDEFINED,
16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
&job); if (r) goto error_alloc;
/* * A GPU reset should flush all TLBs anyway, so no need to do * this while one is ongoing.
*/ if (!down_read_trylock(&adev->reset_domain->sem)) return 0;
if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready) { if (adev->gmc.flush_tlb_needs_extra_type_2)
adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
2, all_hub,
inst);
/* * Some ASICs need to reserve a region of video memory to avoid access * from driver
*/
adev->mman.stolen_reserved_offset = 0;
adev->mman.stolen_reserved_size = 0;
/* * TODO: * Currently there is a bug where some memory client outside * of the driver writes to first 8M of VRAM on S3 resume, * this overrides GART which by default gets placed in first 8M and * causes VM_FAULTS once GTT is accessed. * Keep the stolen memory reservation until the while this is not solved.
*/ switch (adev->asic_type) { case CHIP_VEGA10:
adev->mman.keep_stolen_vga_memory = true; /* * VEGA10 SRIOV VF with MS_HYPERV host needs some firmware reserved area.
*/ #ifdef CONFIG_X86 if (amdgpu_sriov_vf(adev) && hypervisor_is_type(X86_HYPER_MS_HYPERV)) {
adev->mman.stolen_reserved_offset = 0x500000;
adev->mman.stolen_reserved_size = 0x200000;
} #endif break; case CHIP_RAVEN: case CHIP_RENOIR:
adev->mman.keep_stolen_vga_memory = true; break; default:
adev->mman.keep_stolen_vga_memory = false; break;
}
/** * amdgpu_gmc_init_pdb0 - initialize PDB0 * * @adev: amdgpu_device pointer * * This function is only used when GART page table is used * for FB address translatioin. In such a case, we construct * a 2-level system VM page table: PDB0->PTB, to cover both * VRAM of the hive and system memory. * * PDB0 is static, initialized once on driver initialization. * The first n entries of PDB0 are used as PTE by setting * P bit to 1, pointing to VRAM. The n+1'th entry points * to a big PTB covering system memory. *
*/ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
{ int i;
uint64_t flags = adev->gart.gart_pte_flags; //TODO it is UC. explore NC/RW? /* Each PDE0 (used as PTE) covers (2^vmid0_page_table_block_size)*2M
*/
u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
u64 pde0_page_size = (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21;
u64 vram_addr, vram_end;
u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo); int idx;
if (!drm_dev_enter(adev_to_drm(adev), &idx)) return;
/* The first n PDE0 entries are used as PTE, * pointing to vram
*/ for (i = 0; vram_addr < vram_end; i++, vram_addr += pde0_page_size)
amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, vram_addr, flags);
/* The n+1'th PDE0 entry points to a huge * PTB who has more than 512 entries each * pointing to a 4K system page
*/
flags = AMDGPU_PTE_VALID;
flags |= AMDGPU_PTE_SNOOPED | AMDGPU_PDE_BFS_FLAG(adev, 0); /* Requires gart_ptb_gpu_pa to be 4K aligned */
amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
drm_dev_exit(idx);
}
/** * amdgpu_gmc_vram_mc2pa - calculate vram buffer's physical address from MC * address * * @adev: amdgpu_device pointer * @mc_addr: MC address of buffer
*/
uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr)
{ return mc_addr - adev->gmc.vram_start + adev->vm_manager.vram_base_offset;
}
/** * Check the start, the mid, and the end of the memory if the content of * each byte is the pattern "0x86". If yes, we suppose the vram bo is * workable. * * Note: If check the each byte of whole 1M bo, it will cost too many * seconds, so here, we just pick up three parts for emulation.
*/
ret = memcmp(vram_ptr, cptr, 10); if (ret) {
ret = -EIO; goto release_buffer;
}
ret = memcmp(vram_ptr + (size / 2), cptr, 10); if (ret) {
ret = -EIO; goto release_buffer;
}
ret = memcmp(vram_ptr + size - 10, cptr, 10); if (ret) {
ret = -EIO; goto release_buffer;
}
if (mode == UNKNOWN_MEMORY_PARTITION_MODE) return -EINVAL;
if (mode == adev->gmc.gmc_funcs->query_mem_partition_mode(adev)) {
dev_info(
adev->dev, "requested NPS mode is same as current NPS mode, skipping\n"); return count;
}
/* If device is part of hive, all devices in the hive should request the * same mode. Hence store the requested mode in hive.
*/
hive = amdgpu_get_xgmi_hive(adev); if (hive) {
atomic_set(&hive->requested_nps_mode, mode);
amdgpu_put_xgmi_hive(hive);
} else {
adev->gmc.requested_nps_mode = mode;
}
dev_info(
adev->dev, "NPS mode change requested, please remove and reload the driver\n");
/* TODO: For now, expect ranges and partition count to be the same. * Adjust if there are holes expected in any NPS domain.
*/ if (*exp_ranges && (range_cnt != *exp_ranges)) {
dev_warn(
adev->dev, "NPS config mismatch - expected ranges: %d discovery - nps mode: %d, nps ranges: %d",
*exp_ranges, nps_type, range_cnt);
ret = -EINVAL; goto err;
}
for (i = 0; i < range_cnt; ++i) { if (ranges[i].base_address >= ranges[i].limit_address) {
dev_warn(
adev->dev, "Invalid NPS range - nps mode: %d, range[%d]: base: %llx limit: %llx",
nps_type, i, ranges[i].base_address,
ranges[i].limit_address);
ret = -EINVAL; goto err;
}
/* Check for overlaps, not expecting any now */ for (j = i - 1; j >= 0; j--) { if (max(ranges[j].base_address,
ranges[i].base_address) <=
min(ranges[j].limit_address,
ranges[i].limit_address)) {
dev_warn(
adev->dev, "overlapping ranges detected [ %llx - %llx ] | [%llx - %llx]",
ranges[j].base_address,
ranges[j].limit_address,
ranges[i].base_address,
ranges[i].limit_address);
ret = -EINVAL; goto err;
}
}
if (!*exp_ranges)
*exp_ranges = range_cnt;
err:
kfree(ranges);
return ret;
}
int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev, int nps_mode)
{ /* Not supported on VF devices and APUs */ if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU)) return -EOPNOTSUPP;
if (!adev->psp.funcs) {
dev_err(adev->dev, "PSP interface not available for nps mode change request"); return -EINVAL;
}
if (amdgpu_sriov_vf(adev) || !adev->gmc.supported_nps_modes ||
!adev->gmc.gmc_funcs->request_mem_partition_mode) return;
cur_nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
hive = amdgpu_get_xgmi_hive(adev); if (hive) {
req_nps_mode = atomic_read(&hive->requested_nps_mode); if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode,
cur_nps_mode)) {
amdgpu_put_xgmi_hive(hive); return;
}
r = amdgpu_xgmi_request_nps_change(adev, hive, req_nps_mode);
amdgpu_put_xgmi_hive(hive); goto out;
}
req_nps_mode = adev->gmc.requested_nps_mode; if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode, cur_nps_mode)) return;
/* even if this fails, we should let driver unload w/o blocking */
r = adev->gmc.gmc_funcs->request_mem_partition_mode(adev, req_nps_mode);
out: if (r)
dev_err(adev->dev, "NPS mode change request failed\n"); else
dev_info(
adev->dev, "NPS mode change request done, reload driver to complete the change\n");
}
bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev)
{ if (adev->gmc.gmc_funcs->need_reset_on_init) return adev->gmc.gmc_funcs->need_reset_on_init(adev);
returnfalse;
}
enum amdgpu_memory_partition
amdgpu_gmc_get_vf_memory_partition(struct amdgpu_device *adev)
{ switch (adev->gmc.num_mem_partitions) { case 0: return UNKNOWN_MEMORY_PARTITION_MODE; case 1: return AMDGPU_NPS1_PARTITION_MODE; case 2: return AMDGPU_NPS2_PARTITION_MODE; case 4: return AMDGPU_NPS4_PARTITION_MODE; case 8: return AMDGPU_NPS8_PARTITION_MODE; default: return AMDGPU_NPS1_PARTITION_MODE;
}
}
void amdgpu_gmc_init_sw_mem_ranges(struct amdgpu_device *adev, struct amdgpu_mem_partition_info *mem_ranges)
{ enum amdgpu_memory_partition mode;
u32 start_addr = 0, size; int i, r, l;
mode = amdgpu_gmc_query_memory_partition(adev);
switch (mode) { case UNKNOWN_MEMORY_PARTITION_MODE:
adev->gmc.num_mem_partitions = 0; break; case AMDGPU_NPS1_PARTITION_MODE:
adev->gmc.num_mem_partitions = 1; break; case AMDGPU_NPS2_PARTITION_MODE:
adev->gmc.num_mem_partitions = 2; break; case AMDGPU_NPS4_PARTITION_MODE: if (adev->flags & AMD_IS_APU)
adev->gmc.num_mem_partitions = 3; else
adev->gmc.num_mem_partitions = 4; break; case AMDGPU_NPS8_PARTITION_MODE:
adev->gmc.num_mem_partitions = 8; break; default:
adev->gmc.num_mem_partitions = 1; break;
}
/* Use NPS range info, if populated */
r = amdgpu_gmc_get_nps_memranges(adev, mem_ranges,
&adev->gmc.num_mem_partitions); if (!r) {
l = 0; for (i = 1; i < adev->gmc.num_mem_partitions; ++i) { if (mem_ranges[i].range.lpfn >
mem_ranges[i - 1].range.lpfn)
l = i;
}
} else { if (!adev->gmc.num_mem_partitions) {
dev_warn(adev->dev, "Not able to detect NPS mode, fall back to NPS1\n");
adev->gmc.num_mem_partitions = 1;
} /* Fallback to sw based calculation */
size = (adev->gmc.real_vram_size + SZ_16M) >> AMDGPU_GPU_PAGE_SHIFT;
size /= adev->gmc.num_mem_partitions;
for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
mem_ranges[i].range.fpfn = start_addr;
mem_ranges[i].size =
((u64)size << AMDGPU_GPU_PAGE_SHIFT);
mem_ranges[i].range.lpfn = start_addr + size - 1;
start_addr += size;
}
l = adev->gmc.num_mem_partitions - 1;
}
/* Adjust the last one */
mem_ranges[l].range.lpfn =
(adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1;
mem_ranges[l].size =
adev->gmc.real_vram_size -
((u64)mem_ranges[l].range.fpfn << AMDGPU_GPU_PAGE_SHIFT);
}
int amdgpu_gmc_init_mem_ranges(struct amdgpu_device *adev)
{ bool valid;
adev->gmc.mem_partitions = kcalloc(AMDGPU_MAX_MEM_RANGES, sizeof(struct amdgpu_mem_partition_info),
GFP_KERNEL); if (!adev->gmc.mem_partitions) return -ENOMEM;
if (adev->gmc.is_app_apu)
amdgpu_gmc_init_acpi_mem_ranges(adev, adev->gmc.mem_partitions); else
amdgpu_gmc_init_sw_mem_ranges(adev, adev->gmc.mem_partitions);
if (amdgpu_sriov_vf(adev))
valid = true; else
valid = amdgpu_gmc_validate_partition_info(adev); if (!valid) { /* TODO: handle invalid case */
dev_warn(adev->dev, "Mem ranges not matching with hardware config\n");
}
return 0;
}
Messung V0.5
¤ Dauer der Verarbeitung: 0.24 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.