/* * Copyright 2019 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. *
*/
int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst)
{ struct ras_err_data err_data; int ret;
ret = amdgpu_ras_error_data_init(&err_data); if (ret) return ret;
err_data.err_addr =
kcalloc(adev->umc.max_ras_err_cnt_per_query, sizeof(struct eeprom_table_record), GFP_KERNEL); if (!err_data.err_addr) {
dev_warn(adev->dev, "Failed to alloc memory for umc error record in MCA notifier!\n");
ret = AMDGPU_RAS_FAIL; goto out_fini_err_data;
}
/* still call query_ras_error_address to clear error status * even NOMEM error is encountered
*/ if(!err_data->err_addr)
dev_warn(adev->dev, "Failed to alloc memory for " "umc error address record!\n"); else
err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query;
/* umc query_ras_error_address is also responsible for clearing * error status
*/
adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status);
}
} elseif (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY ||
(!ret && error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY)) { if (adev->umc.ras &&
adev->umc.ras->ecc_info_query_ras_error_count)
adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status);
/* still call query_ras_error_address to clear error status * even NOMEM error is encountered
*/ if(!err_data->err_addr)
dev_warn(adev->dev, "Failed to alloc memory for " "umc error address record!\n"); else
err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query;
/* umc query_ras_error_address is also responsible for clearing * error status
*/
adev->umc.ras->ecc_info_query_ras_error_address(adev, ras_error_status);
}
}
int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev, enum amdgpu_ras_block block, uint16_t pasid,
pasid_notify pasid_fn, void *data, uint32_t reset)
{ int ret = AMDGPU_RAS_SUCCESS;
if (adev->gmc.xgmi.connected_to_cpu ||
adev->gmc.is_app_apu) { if (reset) { /* MCA poison handler is only responsible for GPU reset, * let MCA notifier do page retirement.
*/
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
amdgpu_ras_reset_gpu(adev);
} return ret;
}
if (!amdgpu_sriov_vf(adev)) { if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) { struct ras_err_data err_data; struct ras_common_if head = {
.block = AMDGPU_RAS_BLOCK__UMC,
}; struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
ret = amdgpu_ras_error_data_init(&err_data); if (ret) return ret;
ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);
/* * This loop is done based on the following - * umc.active mask = mask of active umc instances across all nodes * umc.umc_inst_num = maximum number of umc instancess per node * umc.node_inst_num = maximum number of node instances * Channel instances are not assumed to be harvested.
*/
dev_dbg(adev->dev, "active umcs :%lx umc_inst per node: %d",
adev->umc.active_mask, adev->umc.umc_inst_num);
for_each_set_bit(umc_node_inst, &(adev->umc.active_mask),
adev->umc.node_inst_num * adev->umc.umc_inst_num) {
node_inst = umc_node_inst / adev->umc.umc_inst_num;
umc_inst = umc_node_inst % adev->umc.umc_inst_num;
LOOP_UMC_CH_INST(ch_inst) {
dev_dbg(adev->dev, "node_inst :%d umc_inst: %d ch_inst: %d",
node_inst, umc_inst, ch_inst);
ret = func(adev, node_inst, umc_inst, ch_inst, data); if (ret) {
dev_err(adev->dev, "Node %d umc %d ch %d func returns %d\n",
node_inst, umc_inst, ch_inst, ret); return ret;
}
}
}
return 0;
}
int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
umc_func func, void *data)
{
uint32_t node_inst = 0;
uint32_t umc_inst = 0;
uint32_t ch_inst = 0; int ret = 0;
if (adev->aid_mask) return amdgpu_umc_loop_all_aid(adev, func, data);
int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
uint64_t pa_addr, uint64_t *pfns, int len)
{ int i, ret; struct ras_err_data err_data;
err_data.err_addr = kcalloc(adev->umc.retire_unit, sizeof(struct eeprom_table_record), GFP_KERNEL); if (!err_data.err_addr) {
dev_warn(adev->dev, "Failed to alloc memory in bad page lookup!\n"); return 0;
}
ret = amdgpu_umc_pages_in_a_row(adev, &err_data, pa_addr); if (ret) goto out;
for (i = 0; i < adev->umc.retire_unit; i++) { if (i >= len) goto out;
pfns[i] = err_data.err_addr[i].retired_page;
}
ret = i;
adev->umc.err_addr_cnt = err_data.err_addr_cnt;
if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
ret = adev->umc.ras->convert_ras_err_addr(adev, NULL, &addr_in,
addr_out, dump_addr); if (ret) return ret;
} else { return 0;
}
return 0;
}
int amdgpu_umc_pa2mca(struct amdgpu_device *adev,
uint64_t pa, uint64_t *mca, enum amdgpu_memory_partition nps)
{ struct ta_ras_query_address_input addr_in; struct ta_ras_query_address_output addr_out; int ret;
/* nps: the pa belongs to */
addr_in.pa.pa = pa | ((uint64_t)nps << 58);
addr_in.addr_type = TA_RAS_PA_TO_MCA;
ret = psp_ras_query_address(&adev->psp, &addr_in, &addr_out); if (ret) {
dev_warn(adev->dev, "Failed to query RAS MCA address for 0x%llx",
pa);
return ret;
}
*mca = addr_out.ma.err_addr;
return 0;
}
Messung V0.5
¤ Dauer der Verarbeitung: 0.13 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.