// SPDX-License-Identifier: GPL-2.0-only /* * APEI Generic Hardware Error Source support * * Generic Hardware Error Source provides a way to report platform * hardware errors (such as that from chipset). It works in so called * "Firmware First" mode, that is, hardware errors are reported to * firmware firstly, then reported to Linux by firmware. This way, * some non-standard hardware error registers or non-standard hardware * link can be checked by firmware to produce more hardware error * information for Linux. * * For more information about Generic Hardware Error Source, please * refer to ACPI Specification version 4.0, section 17.3.2.6 * * Copyright 2010,2011 Intel Corp. * Author: Huang Ying <ying.huang@intel.com>
*/
/* This is just an estimation for memory pool allocation */ #define GHES_ESTATUS_CACHE_AVG_SIZE 512
#define GHES_ESTATUS_CACHES_SIZE 4
#define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL /* Prevent too many caches are allocated because of RCU */ #define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2)
/* * NMI-like notifications vary by architecture, before the compiler can prune * unused static functions it needs a value for these enums.
*/ #ifndef CONFIG_ARM_SDE_INTERFACE #define FIX_APEI_GHES_SDEI_NORMAL __end_of_fixed_addresses #define FIX_APEI_GHES_SDEI_CRITICAL __end_of_fixed_addresses #endif
/* * A platform may describe one error source for the handling of synchronous * errors (e.g. MCE or SEA), or for handling asynchronous errors (e.g. SCI * or External Interrupt). On x86, the HEST notifications are always * asynchronous, so only SEA on ARM is delivered as a synchronous * notification.
*/ staticinlinebool is_hest_sync_notify(struct ghes *ghes)
{
u8 notify_type = ghes->generic->notify.type;
return notify_type == ACPI_HEST_NOTIFY_SEA;
}
/* * This driver isn't really modular, however for the time being, * continuing to use module_param is the easiest way to remain * compatible with existing boot arg use cases.
*/ bool ghes_disable;
module_param_named(disable, ghes_disable, bool, 0);
/* * "ghes.edac_force_enable" forcibly enables ghes_edac and skips the platform * check.
*/ staticbool ghes_edac_force_enable;
module_param_named(edac_force_enable, ghes_edac_force_enable, bool, 0);
/* * All error sources notified with HED (Hardware Error Device) share a * single notifier callback, so they need to be linked and checked one * by one. This holds true for NMI too. * * RCU is used for these lists, so ghes_list_mutex is only used for * list changing, not for traversing.
*/ static LIST_HEAD(ghes_hed); static DEFINE_MUTEX(ghes_list_mutex);
/* * A list of GHES devices which are given to the corresponding EDAC driver * ghes_edac for further use.
*/ static LIST_HEAD(ghes_devs); static DEFINE_MUTEX(ghes_devs_mutex);
/* * Because the memory area used to transfer hardware error information * from BIOS to Linux can be determined only in NMI, IRQ or timer * handler, but general ioremap can not be used in atomic context, so * the fixmap is used instead. * * This spinlock is used to prevent the fixmap entry from being used * simultaneously.
*/ static DEFINE_SPINLOCK(ghes_notify_lock_irq);
/* Check the top-level record header has an appropriate size. */ staticint __ghes_check_estatus(struct ghes *ghes, struct acpi_hest_generic_status *estatus)
{
u32 len = cper_estatus_len(estatus);
if (len < sizeof(*estatus)) {
pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n"); return -EIO;
}
if (len > ghes->generic->error_block_length) {
pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n"); return -EIO;
}
/* * GHESv2 type HEST entries introduce support for error acknowledgment, * so only acknowledge the error if this support is present.
*/ if (is_hest_type_generic_v2(ghes))
ghes_ack_error(ghes->generic_v2);
}
/** * struct ghes_task_work - for synchronous RAS event * * @twork: callback_head for task work * @pfn: page frame number of corrupted page * @flags: work control flags * * Structure to pass task work to be handled before * returning to user-space via task_work_add().
*/ struct ghes_task_work { struct callback_head twork;
u64 pfn; int flags;
};
staticbool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev, bool sync)
{ int flags = -1; int sec_sev = ghes_severity(gdata->error_severity); struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) returnfalse;
/* iff following two events can be handled properly by now */ if (sec_sev == GHES_SEV_CORRECTED &&
(gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
flags = MF_SOFT_OFFLINE; if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
flags = sync ? MF_ACTION_REQUIRED : 0;
if (flags != -1) return ghes_do_memory_failure(mem_err->physical_addr, flags);
p = (char *)(err + 1); for (i = 0; i < err->err_info_num; i++) { struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p; bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR); bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR); constchar *error_type = "unknown error";
/* * The field (err_info->error_info & BIT(26)) is fixed to set to * 1 in some old firmware of HiSilicon Kunpeng920. We assume that * firmware won't mix corrected errors in an uncorrected section, * and don't filter out 'corrected' error here.
*/ if (is_cache && has_pa) {
queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags);
p += err_info->length; continue;
}
if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs))
error_type = cper_proc_error_type_strs[err_info->type];
/* * PCIe AER errors need to be sent to the AER driver for reporting and * recovery. The GHES severities map to the following AER severities and * require the following handling: * * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE * These need to be reported by the AER driver but no recovery is * necessary. * GHES_SEV_RECOVERABLE -> AER_NONFATAL * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL * These both need to be reported and recovered from by the AER driver. * GHES_SEV_PANIC does not make it to this handling since the kernel must * panic.
*/ staticvoid ghes_handle_aer(struct acpi_hest_generic_data *gdata)
{ #ifdef CONFIG_ACPI_APEI_PCIEAER struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) { unsignedint devfn; int aer_severity;
u8 *aer_info;
/* * If firmware reset the component to contain * the error, we must reinitialize it before * use, so treat it as a fatal AER error.
*/ if (gdata->flags & CPER_SEC_RESET)
aer_severity = AER_FATAL;
switch (prot_err->agent_type) { case RCD: case DEVICE: case LD: case FMLD: case RP: case DSP: case USP:
memcpy(&wd.prot_err, prot_err, sizeof(wd.prot_err));
/* Room for 8 entries for each of the 4 event log queues */ #define CXL_CPER_FIFO_DEPTH 32
DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH);
/* * If no memory failure work is queued for abnormal synchronous * errors, do a force kill.
*/ if (sync && !queued) {
dev_err(ghes->dev,
HW_ERR GHES_PFX "%s:%d: synchronous unrecoverable error (SIGBUS)\n",
current->comm, task_pid_nr(current));
force_sig(SIGBUS);
}
}
/* * GHES error status reporting throttle, to report more kinds of * errors, instead of just most frequently occurred errors.
*/ staticint ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
{
u32 len; int i, cached = 0; unsignedlonglong now; struct ghes_estatus_cache *cache; struct acpi_hest_generic_status *cache_estatus;
len = cper_estatus_len(estatus);
rcu_read_lock(); for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
cache = rcu_dereference(ghes_estatus_caches[i]); if (cache == NULL) continue; if (len != cache->estatus_len) continue;
cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); if (memcmp(estatus, cache_estatus, len)) continue;
atomic_inc(&cache->count);
now = sched_clock(); if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
cached = 1; break;
}
rcu_read_unlock(); return cached;
}
new_cache = ghes_estatus_cache_alloc(generic, estatus); if (!new_cache) return;
rcu_read_lock();
now = sched_clock(); for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
cache = rcu_dereference(ghes_estatus_caches[i]); if (cache == NULL) {
slot = i; break;
}
duration = now - cache->time_in; if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
slot = i; break;
}
count = atomic_read(&cache->count);
period = duration;
do_div(period, (count + 1)); if (period > max_period) {
max_period = period;
slot = i;
}
}
rcu_read_unlock();
if (slot != -1) { /* * Use release semantics to ensure that ghes_estatus_cached() * running on another CPU will see the updated cache fields if * it can see the new value of the pointer.
*/
victim = xchg_release(&ghes_estatus_caches[slot],
RCU_INITIALIZER(new_cache));
/* * At this point, victim may point to a cached item different * from the one based on which we selected the slot. Instead of * going to the loop again to pick another slot, let's just * drop the other item anyway: this may cause a false cache * miss later on, but that won't cause any problems.
*/ if (victim)
call_rcu(&unrcu_pointer(victim)->rcu,
ghes_estatus_cache_rcu_free);
}
}
/* * Handlers for CPER records may not be NMI safe. For example, * memory_failure_queue() takes spinlocks and calls schedule_work_on(). * In any NMI-like handler, memory from ghes_estatus_pool is used to save * estatus, and added to the ghes_estatus_llist. irq_work_queue() causes * ghes_proc_in_irq() to run in IRQ context where each estatus in * ghes_estatus_llist is processed. * * Memory from the ghes_estatus_pool is also used with the ghes_estatus_cache * to suppress frequent messages.
*/ staticstruct llist_head ghes_estatus_llist; staticstruct irq_work ghes_proc_irq_work;
llnode = llist_del_all(&ghes_estatus_llist); /* * Because the time order of estatus in list is reversed, * revert it back to proper order.
*/
llnode = llist_reverse_order(llnode); while (llnode) {
next = llnode->next;
estatus_node = llist_entry(llnode, struct ghes_estatus_node,
llnode);
estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
len = cper_estatus_len(estatus);
node_len = GHES_ESTATUS_NODE_LEN(len);
ghes_do_proc(estatus_node->ghes, estatus);
if (!ghes_estatus_cached(estatus)) {
generic = estatus_node->generic; if (ghes_print_estatus(NULL, generic, estatus))
ghes_estatus_cache_add(generic, estatus);
}
gen_pool_free(ghes_estatus_pool, (unsignedlong)estatus_node,
node_len);
llnode = llist_del_all(&ghes_estatus_llist); /* * Because the time order of estatus in list is reversed, * revert it back to proper order.
*/
llnode = llist_reverse_order(llnode); while (llnode) {
estatus_node = llist_entry(llnode, struct ghes_estatus_node,
llnode);
estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
generic = estatus_node->generic;
ghes_print_estatus(NULL, generic, estatus);
llnode = llnode->next;
}
}
/* * Return 0 only if one of the SEA error sources successfully reported an error * record sent from the firmware.
*/ int ghes_notify_sea(void)
{ static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sea); int rv;
#ifdef CONFIG_HAVE_ACPI_APEI_NMI /* * NMI may be triggered on any CPU, so ghes_in_nmi is used for * having only one concurrent reader.
*/ static atomic_t ghes_in_nmi = ATOMIC_INIT(0);
static LIST_HEAD(ghes_nmi);
staticint ghes_notify_nmi(unsignedint cmd, struct pt_regs *regs)
{ static DEFINE_RAW_SPINLOCK(ghes_notify_lock_nmi); int ret = NMI_DONE;
if (!atomic_add_unless(&ghes_in_nmi, 1, 1)) return ret;
raw_spin_lock(&ghes_notify_lock_nmi); if (!ghes_in_nmi_spool_from_list(&ghes_nmi, FIX_APEI_GHES_NMI))
ret = NMI_HANDLED;
raw_spin_unlock(&ghes_notify_lock_nmi);
generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; if (!generic->enabled) return -ENODEV;
switch (generic->notify.type) { case ACPI_HEST_NOTIFY_POLLED: case ACPI_HEST_NOTIFY_EXTERNAL: case ACPI_HEST_NOTIFY_SCI: case ACPI_HEST_NOTIFY_GSIV: case ACPI_HEST_NOTIFY_GPIO: break;
case ACPI_HEST_NOTIFY_SEA: if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n",
generic->header.source_id);
rc = -ENOTSUPP; goto err;
} break; case ACPI_HEST_NOTIFY_NMI: if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
generic->header.source_id); goto err;
} break; case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) {
pr_warn(GHES_PFX "Generic hardware error source: %d notified via SDE Interface is not supported!\n",
generic->header.source_id); goto err;
} break; case ACPI_HEST_NOTIFY_LOCAL:
pr_warn(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
generic->header.source_id); goto err; default:
pr_warn(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
generic->notify.type, generic->header.source_id); goto err;
}
switch (generic->notify.type) { case ACPI_HEST_NOTIFY_POLLED:
timer_setup(&ghes->timer, ghes_poll_func, 0);
ghes_add_timer(ghes); break; case ACPI_HEST_NOTIFY_EXTERNAL: /* External interrupt vector is GSI */
rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq); if (rc) {
pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
generic->header.source_id); goto err;
}
rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED, "GHES IRQ", ghes); if (rc) {
pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
generic->header.source_id); goto err;
} break;
case ACPI_HEST_NOTIFY_SCI: case ACPI_HEST_NOTIFY_GSIV: case ACPI_HEST_NOTIFY_GPIO:
mutex_lock(&ghes_list_mutex); if (list_empty(&ghes_hed))
register_acpi_hed_notifier(&ghes_notifier_hed);
list_add_rcu(&ghes->list, &ghes_hed);
mutex_unlock(&ghes_list_mutex); break;
case ACPI_HEST_NOTIFY_SEA:
ghes_sea_add(ghes); break; case ACPI_HEST_NOTIFY_NMI:
ghes_nmi_add(ghes); break; case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
rc = apei_sdei_register_ghes(ghes); if (rc) goto err; break; default:
BUG();
}
ghes->flags |= GHES_EXITING; switch (generic->notify.type) { case ACPI_HEST_NOTIFY_POLLED:
timer_shutdown_sync(&ghes->timer); break; case ACPI_HEST_NOTIFY_EXTERNAL:
free_irq(ghes->irq, ghes); break;
case ACPI_HEST_NOTIFY_SCI: case ACPI_HEST_NOTIFY_GSIV: case ACPI_HEST_NOTIFY_GPIO:
mutex_lock(&ghes_list_mutex);
list_del_rcu(&ghes->list); if (list_empty(&ghes_hed))
unregister_acpi_hed_notifier(&ghes_notifier_hed);
mutex_unlock(&ghes_list_mutex);
synchronize_rcu(); break;
case ACPI_HEST_NOTIFY_SEA:
ghes_sea_remove(ghes); break; case ACPI_HEST_NOTIFY_NMI:
ghes_nmi_remove(ghes); break; case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
rc = apei_sdei_unregister_ghes(ghes); if (rc) { /* * Returning early results in a resource leak, but we're * only here if stopping the hardware failed.
*/
dev_err(&ghes_dev->dev, "Failed to unregister ghes (%pe)\n",
ERR_PTR(rc)); return;
} break; default:
BUG(); break;
}
switch (hest_disable) { case HEST_NOT_FOUND: return; case HEST_DISABLED:
pr_info(GHES_PFX "HEST is not enabled!\n"); return; default: break;
}
if (ghes_disable) {
pr_info(GHES_PFX "GHES is not enabled!\n"); return;
}
ghes_nmi_init_cxt();
rc = platform_driver_register(&ghes_platform_driver); if (rc) return;
rc = apei_osc_setup(); if (rc == 0 && osc_sb_apei_support_acked)
pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); elseif (rc == 0 && !osc_sb_apei_support_acked)
pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n"); elseif (rc && osc_sb_apei_support_acked)
pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); else
pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
}
/* * Known x86 systems that prefer GHES error reporting:
*/ staticstruct acpi_platform_list plat_list[] = {
{"HPE ", "Server ", 0, ACPI_SIG_FADT, all_versions},
{ } /* End */
};
struct list_head *ghes_get_devices(void)
{ int idx = -1;
if (IS_ENABLED(CONFIG_X86)) {
idx = acpi_match_platform_list(plat_list); if (idx < 0) { if (!ghes_edac_force_enable) return NULL;
pr_warn_once("Force-loading ghes_edac on an unsupported platform. You're on your own!\n");
}
} elseif (list_empty(&ghes_devs)) { return NULL;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.