/* * Copyright 2016 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. *
*/
#define POPULATE_UCODE_INFO(vf2pf_info, ucode, ver) \ do { \
vf2pf_info->ucode_info[ucode].id = ucode; \
vf2pf_info->ucode_info[ucode].version = ver; \
} while (0)
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
{ /* By now all MMIO pages except mailbox are blocked */ /* if blocking is enabled in hypervisor. Choose the */ /* SCRATCH_REG0 to test. */ return RREG32_NO_KIQ(0xc040) == 0xffffffff;
}
/* Reduce kcq number to 2 to reduce latency */ if (amdgpu_num_kcq == -1)
amdgpu_num_kcq = 2;
}
/** * amdgpu_virt_request_full_gpu() - request full gpu access * @adev: amdgpu device. * @init: is driver init time. * When start to init/fini driver, first need to request full gpu access. * Return: Zero if request success, otherwise will return error.
*/ int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init)
{ struct amdgpu_virt *virt = &adev->virt; int r;
if (virt->ops && virt->ops->req_full_gpu) {
r = virt->ops->req_full_gpu(adev, init); if (r) {
adev->no_hw_access = true; return r;
}
adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
}
return 0;
}
/** * amdgpu_virt_release_full_gpu() - release full gpu access * @adev: amdgpu device. * @init: is driver init time. * When finishing driver init/fini, need to release full gpu access. * Return: Zero if release success, otherwise will returen error.
*/ int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init)
{ struct amdgpu_virt *virt = &adev->virt; int r;
if (virt->ops && virt->ops->rel_full_gpu) {
r = virt->ops->rel_full_gpu(adev, init); if (r) return r;
/** * amdgpu_virt_reset_gpu() - reset gpu * @adev: amdgpu device. * Send reset command to GPU hypervisor to reset GPU that VM is using * Return: Zero if reset success, otherwise will return error.
*/ int amdgpu_virt_reset_gpu(struct amdgpu_device *adev)
{ struct amdgpu_virt *virt = &adev->virt; int r;
if (virt->ops && virt->ops->reset_gpu) {
r = virt->ops->reset_gpu(adev); if (r) return r;
if (virt->ops && virt->ops->req_init_data)
virt->ops->req_init_data(adev);
if (adev->virt.req_init_data_ver > 0)
DRM_INFO("host supports REQ_INIT_DATA handshake\n"); else
DRM_WARN("host doesn't support REQ_INIT_DATA handshake\n");
}
/** * amdgpu_virt_ready_to_reset() - send ready to reset to host * @adev: amdgpu device. * Send ready to reset message to GPU hypervisor to signal we have stopped GPU * activity and is ready for host FLR
*/ void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev)
{ struct amdgpu_virt *virt = &adev->virt;
if (virt->ops && virt->ops->reset_gpu)
virt->ops->ready_to_reset(adev);
}
/** * amdgpu_virt_wait_reset() - wait for reset gpu completed * @adev: amdgpu device. * Wait for GPU reset completed. * Return: Zero if reset success, otherwise will return error.
*/ int amdgpu_virt_wait_reset(struct amdgpu_device *adev)
{ struct amdgpu_virt *virt = &adev->virt;
if (!virt->ops || !virt->ops->wait_reset) return -EINVAL;
return virt->ops->wait_reset(adev);
}
/** * amdgpu_virt_alloc_mm_table() - alloc memory for mm table * @adev: amdgpu device. * MM table is used by UVD and VCE for its initialization * Return: Zero if allocate success.
*/ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev)
{ int r;
if (!amdgpu_sriov_vf(adev) || adev->virt.mm_table.gpu_addr) return 0;
r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT,
&adev->virt.mm_table.bo,
&adev->virt.mm_table.gpu_addr,
(void *)&adev->virt.mm_table.cpu_addr); if (r) {
DRM_ERROR("failed to alloc mm table and error = %d.\n", r); return r;
}
if (!virt->ops || !virt->ops->rcvd_ras_intr) returnfalse;
return virt->ops->rcvd_ras_intr(adev);
}
unsignedint amd_sriov_msg_checksum(void *obj, unsignedlong obj_size, unsignedint key, unsignedint checksum)
{ unsignedint ret = key; unsignedlong i = 0; unsignedchar *pos;
pos = (char *)obj; /* calculate checksum */ for (i = 0; i < obj_size; ++i)
ret += *(pos + i); /* minus the checksum itself */
pos = (char *)&checksum; for (i = 0; i < sizeof(checksum); ++i)
ret -= *(pos + i); return ret;
}
staticint amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)
{ struct amdgpu_virt *virt = &adev->virt; struct amdgpu_virt_ras_err_handler_data **data = &virt->virt_eh_data; /* GPU will be marked bad on host if bp count more then 10, * so alloc 512 is enough.
*/ unsignedint align_space = 512; void *bps = NULL; struct amdgpu_bo **bps_bo = NULL;
*data = kmalloc(sizeof(struct amdgpu_virt_ras_err_handler_data), GFP_KERNEL); if (!*data) goto data_failure;
bps = kmalloc_array(align_space, sizeof(*(*data)->bps), GFP_KERNEL); if (!bps) goto bps_failure;
bps_bo = kmalloc_array(align_space, sizeof(*(*data)->bps_bo), GFP_KERNEL); if (!bps_bo) goto bps_bo_failure;
for (i = data->last_reserved; i < data->count; i++) {
bp = data->bps[i].retired_page;
/* There are two cases of reserve error should be ignored: * 1) a ras bad page has been allocated (used by someone); * 2) a ras bad page has been reserved (duplicate error injection * for one page);
*/ if (ttm_resource_manager_used(man)) {
amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
bp << AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_GPU_PAGE_SIZE);
data->bps_bo[i] = NULL;
} else { if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_GPU_PAGE_SIZE,
&bo, NULL))
DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
data->bps_bo[i] = bo;
}
data->last_reserved = i + 1;
bo = NULL;
}
}
/* correct too large or too little interval value */ if (adev->virt.vf2pf_update_interval_ms < 200 || adev->virt.vf2pf_update_interval_ms > 10000)
adev->virt.vf2pf_update_interval_ms = 2000;
if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!");
} elseif (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) { /* go through this logic in ip_init and reset to init workqueue*/
amdgpu_virt_exchange_data(adev);
INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
} elseif (adev->bios != NULL) { /* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
(adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
/* bad page handling for version 2 */ if (adev->virt.fw_reserve.p_pf2vf->version == 2) {
pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf;
switch (adev->asic_type) { case CHIP_TONGA: case CHIP_FIJI:
reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER); break; case CHIP_VEGA10: case CHIP_VEGA20: case CHIP_NAVI10: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: case CHIP_ARCTURUS: case CHIP_ALDEBARAN: case CHIP_IP_DISCOVERY:
reg = RREG32(mmRCC_IOV_FUNC_IDENTIFIER); break; default: /* other chip doesn't support SRIOV */
reg = 0; break;
}
if (reg & 1)
adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF;
if (reg & 0x80000000)
adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
if (!reg) { /* passthrough mode exclus sriov mod */ if (is_virtual_machine() && !xen_initial_domain())
adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
}
/* we have the ability to check now */ if (amdgpu_sriov_vf(adev)) {
is_sriov = true;
switch (adev->asic_type) { case CHIP_TONGA: case CHIP_FIJI:
vi_set_virt_ops(adev); break; case CHIP_VEGA10:
soc15_set_virt_ops(adev); #ifdef CONFIG_X86 /* not send GPU_INIT_DATA with MS_HYPERV*/ if (!hypervisor_is_type(X86_HYPER_MS_HYPERV)) #endif /* send a dummy GPU_INIT_DATA request to host on vega10 */
amdgpu_virt_request_init_data(adev); break; case CHIP_VEGA20: case CHIP_ARCTURUS: case CHIP_ALDEBARAN:
soc15_set_virt_ops(adev); break; case CHIP_NAVI10: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: case CHIP_IP_DISCOVERY:
nv_set_virt_ops(adev); /* try send GPU_INIT_DATA request to host */
amdgpu_virt_request_init_data(adev); break; default: /* other chip doesn't support SRIOV */
is_sriov = false;
DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type); break;
}
}
if (amdgpu_sriov_vf(adev)) { if (amdgpu_sriov_is_pp_one_vf(adev))
mode = SRIOV_VF_MODE_ONE_VF; else
mode = SRIOV_VF_MODE_MULTI_VF;
} else {
mode = SRIOV_VF_MODE_BARE_METAL;
}
return mode;
}
void amdgpu_virt_pre_reset(struct amdgpu_device *adev)
{ /* stop the data exchange thread */
amdgpu_virt_fini_data_exchange(adev);
amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_FLR);
}
void amdgpu_virt_post_reset(struct amdgpu_device *adev)
{ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) { /* force set to GFXOFF state after reset, * to avoid some invalid operation before GC enable
*/
adev->gfx.is_poweron = false;
}
if (adev->virt.ras_en_caps.bits.block_umc)
adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__UMC); if (adev->virt.ras_en_caps.bits.block_sdma)
adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SDMA); if (adev->virt.ras_en_caps.bits.block_gfx)
adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__GFX); if (adev->virt.ras_en_caps.bits.block_mmhub)
adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MMHUB); if (adev->virt.ras_en_caps.bits.block_athub)
adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__ATHUB); if (adev->virt.ras_en_caps.bits.block_pcie_bif)
adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__PCIE_BIF); if (adev->virt.ras_en_caps.bits.block_hdp)
adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__HDP); if (adev->virt.ras_en_caps.bits.block_xgmi_wafl)
adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__XGMI_WAFL); if (adev->virt.ras_en_caps.bits.block_df)
adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__DF); if (adev->virt.ras_en_caps.bits.block_smn)
adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SMN); if (adev->virt.ras_en_caps.bits.block_sem)
adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SEM); if (adev->virt.ras_en_caps.bits.block_mp0)
adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MP0); if (adev->virt.ras_en_caps.bits.block_mp1)
adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MP1); if (adev->virt.ras_en_caps.bits.block_fuse)
adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__FUSE); if (adev->virt.ras_en_caps.bits.block_mca)
adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MCA); if (adev->virt.ras_en_caps.bits.block_vcn)
adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__VCN); if (adev->virt.ras_en_caps.bits.block_jpeg)
adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__JPEG); if (adev->virt.ras_en_caps.bits.block_ih)
adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__IH); if (adev->virt.ras_en_caps.bits.block_mpio)
adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MPIO);
if (adev->virt.ras_en_caps.bits.poison_propogation_mode)
con->poison_supported = true; /* Poison is handled by host */
returntrue;
}
staticinlineenum amd_sriov_ras_telemetry_gpu_block
amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block block) { switch (block) { case AMDGPU_RAS_BLOCK__UMC: return RAS_TELEMETRY_GPU_BLOCK_UMC; case AMDGPU_RAS_BLOCK__SDMA: return RAS_TELEMETRY_GPU_BLOCK_SDMA; case AMDGPU_RAS_BLOCK__GFX: return RAS_TELEMETRY_GPU_BLOCK_GFX; case AMDGPU_RAS_BLOCK__MMHUB: return RAS_TELEMETRY_GPU_BLOCK_MMHUB; case AMDGPU_RAS_BLOCK__ATHUB: return RAS_TELEMETRY_GPU_BLOCK_ATHUB; case AMDGPU_RAS_BLOCK__PCIE_BIF: return RAS_TELEMETRY_GPU_BLOCK_PCIE_BIF; case AMDGPU_RAS_BLOCK__HDP: return RAS_TELEMETRY_GPU_BLOCK_HDP; case AMDGPU_RAS_BLOCK__XGMI_WAFL: return RAS_TELEMETRY_GPU_BLOCK_XGMI_WAFL; case AMDGPU_RAS_BLOCK__DF: return RAS_TELEMETRY_GPU_BLOCK_DF; case AMDGPU_RAS_BLOCK__SMN: return RAS_TELEMETRY_GPU_BLOCK_SMN; case AMDGPU_RAS_BLOCK__SEM: return RAS_TELEMETRY_GPU_BLOCK_SEM; case AMDGPU_RAS_BLOCK__MP0: return RAS_TELEMETRY_GPU_BLOCK_MP0; case AMDGPU_RAS_BLOCK__MP1: return RAS_TELEMETRY_GPU_BLOCK_MP1; case AMDGPU_RAS_BLOCK__FUSE: return RAS_TELEMETRY_GPU_BLOCK_FUSE; case AMDGPU_RAS_BLOCK__MCA: return RAS_TELEMETRY_GPU_BLOCK_MCA; case AMDGPU_RAS_BLOCK__VCN: return RAS_TELEMETRY_GPU_BLOCK_VCN; case AMDGPU_RAS_BLOCK__JPEG: return RAS_TELEMETRY_GPU_BLOCK_JPEG; case AMDGPU_RAS_BLOCK__IH: return RAS_TELEMETRY_GPU_BLOCK_IH; case AMDGPU_RAS_BLOCK__MPIO: return RAS_TELEMETRY_GPU_BLOCK_MPIO; default:
DRM_WARN_ONCE("Unsupported SRIOV RAS telemetry block 0x%x\n",
block); return RAS_TELEMETRY_GPU_BLOCK_COUNT;
}
}
if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT ||
!amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block)) return -EOPNOTSUPP;
/* Host Access may be lost during reset, just return last cached data. */ if (down_read_trylock(&adev->reset_domain->sem)) {
amdgpu_virt_req_ras_err_count_internal(adev, false);
up_read(&adev->reset_domain->sem);
}
staticint amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device *adev)
{ struct amdgpu_virt *virt = &adev->virt; int ret = 0;
uint32_t more = 0;
if (!virt->ops || !virt->ops->req_ras_cper_dump) return -EOPNOTSUPP;
do { if (!virt->ops->req_ras_cper_dump(adev, virt->ras.cper_rptr))
ret = amdgpu_virt_write_cpers_to_ring(
adev, virt->fw_reserve.ras_telemetry, &more); else
ret = 0;
} while (more && !ret);
return ret;
}
int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update)
{ struct amdgpu_virt *virt = &adev->virt; int ret = 0;
if (!amdgpu_sriov_ras_cper_en(adev)) return -EOPNOTSUPP;
if ((__ratelimit(&virt->ras.ras_cper_dump_rs) || force_update) &&
down_read_trylock(&adev->reset_domain->sem)) {
mutex_lock(&virt->ras.ras_telemetry_mutex);
ret = amdgpu_virt_req_ras_cper_dump_internal(adev);
mutex_unlock(&virt->ras.ras_telemetry_mutex);
up_read(&adev->reset_domain->sem);
}
return ret;
}
int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev)
{ unsignedlong ue_count, ce_count;
if (amdgpu_sriov_ras_telemetry_en(adev)) {
amdgpu_virt_req_ras_err_count_internal(adev, true);
amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, NULL);
}
if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT ||
!amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block)) returnfalse;
returntrue;
}
/* * amdgpu_virt_request_bad_pages() - request bad pages * @adev: amdgpu device. * Send command to GPU hypervisor to write new bad pages into the shared PF2VF region
*/ void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev)
{ struct amdgpu_virt *virt = &adev->virt;
if (virt->ops && virt->ops->req_bad_pages)
virt->ops->req_bad_pages(adev);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.