// SPDX-License-Identifier: GPL-2.0-only /* * Machine check injection support. * Copyright 2008 Intel Corporation. * * Authors: * Andi Kleen * Ying Huang * * The AMD part (from mce_amd_inj.c): a simple MCE injection facility * for testing different aspects of the RAS code. This driver should be * built as module so that it can be loaded on production kernels for * testing purposes. * * Copyright (c) 2010-17: Borislav Petkov <bp@alien8.de> * Advanced Micro Devices Inc.
*/
/* Update fake mce registers on current CPU. */ staticvoid inject_mce(struct mce *m)
{ struct mce *i = &per_cpu(injectm, m->extcpu);
/* Make sure no one reads partially written injectm */
i->finished = 0;
mb();
m->finished = 0; /* First set the fields after finished */
i->extcpu = m->extcpu;
mb(); /* Now write record in order, finished last (except above) */
memcpy(i, m, sizeof(struct mce)); /* Finally activate it */
mb();
i->finished = 1;
}
/* Inject mce on current CPU */ staticint raise_local(void)
{ struct mce *m = this_cpu_ptr(&injectm); int context = MCJ_CTX(m->inject_flags); int ret = 0; int cpu = m->extcpu;
if (m->inject_flags & MCJ_EXCEPTION) {
pr_info("Triggering MCE exception on CPU %d\n", cpu); switch (context) { case MCJ_CTX_IRQ: /* * Could do more to fake interrupts like * calling irq_enter, but the necessary * machinery isn't exported currently.
*/
fallthrough; case MCJ_CTX_PROCESS:
raise_exception(m, NULL); break; default:
pr_info("Invalid MCE context\n");
ret = -EINVAL;
}
pr_info("MCE exception done on CPU %d\n", cpu);
} elseif (m->status) {
pr_info("Starting machine check poll CPU %d\n", cpu);
raise_poll(m);
pr_info("Machine check poll done on CPU %d\n", cpu);
} else
m->finished = 0;
return ret;
}
staticvoid __maybe_unused raise_mce(struct mce *m)
{ int context = MCJ_CTX(m->inject_flags);
inject_mce(m);
if (context == MCJ_CTX_RANDOM) return;
if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) { unsignedlong start; int cpu;
cpus_read_lock();
cpumask_copy(mce_inject_cpumask, cpu_online_mask);
cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
for_each_online_cpu(cpu) { struct mce *mcpu = &per_cpu(injectm, cpu); if (!mcpu->finished ||
MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
cpumask_clear_cpu(cpu, mce_inject_cpumask);
} if (!cpumask_empty(mce_inject_cpumask)) { if (m->inject_flags & MCJ_IRQ_BROADCAST) { /* * don't wait because mce_irq_ipi is necessary * to be sync with following raise_local
*/
preempt_disable();
smp_call_function_many(mce_inject_cpumask,
mce_irq_ipi, NULL, 0);
preempt_enable();
} elseif (m->inject_flags & MCJ_NMI_BROADCAST)
__apic_send_IPI_mask(mce_inject_cpumask, NMI_VECTOR);
}
start = jiffies; while (!cpumask_empty(mce_inject_cpumask)) { if (!time_before(jiffies, start + 2*HZ)) {
pr_err("Timeout waiting for mce inject %lx\n",
*cpumask_bits(mce_inject_cpumask)); break;
}
cpu_relax();
}
raise_local();
put_cpu();
cpus_read_unlock();
} else {
preempt_disable();
raise_local();
preempt_enable();
}
}
/* * Caller needs to be make sure this cpu doesn't disappear * from under us, i.e.: get_cpu/put_cpu.
*/ staticint toggle_hw_mce_inject(unsignedint cpu, bool enable)
{
u32 l, h; int err;
/* prep MCE global settings for the injection */
mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
if (!(i_mce.status & MCI_STATUS_PCC))
mcg_status |= MCG_STATUS_RIPV;
/* * Ensure necessary status bits for deferred errors: * - MCx_STATUS[Deferred]: make sure it is a deferred error * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
*/ if (inj_type == DFR_INT_INJ) {
i_mce.status |= MCI_STATUS_DEFERRED;
i_mce.status &= ~MCI_STATUS_UC;
}
/* * For multi node CPUs, logging and reporting of bank 4 errors happens * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for * Fam10h and later BKDGs.
*/ if (boot_cpu_has(X86_FEATURE_AMD_DCM) &&
b == 4 &&
boot_cpu_data.x86 < 0x17) {
toggle_nb_mca_mst_cpu(topology_amd_node_id(cpu));
cpu = get_nbc_for_node(topology_amd_node_id(cpu));
}
/* * This denotes into which bank we're injecting and triggers * the injection, at the same time.
*/ staticint inj_bank_set(void *data, u64 val)
{ struct mce *m = (struct mce *)data;
u8 n_banks;
u64 cap;
/* Get bank count on target CPU so we can handle non-uniform values. */
rdmsrq_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
n_banks = cap & MCG_BANKCNT_MASK;
if (val >= n_banks) {
pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu); return -EINVAL;
}
m->bank = val;
/* * sw-only injection allows to write arbitrary values into the MCA * registers because it tests only the decoding paths.
*/ if (inj_type == SW_INJ) goto inject;
/* * Read IPID value to determine if a bank is populated on the target * CPU.
*/ if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
u64 ipid;
if (rdmsrq_on_cpu(m->extcpu, MSR_AMD64_SMCA_MCx_IPID(val), &ipid)) {
pr_err("Error reading IPID on CPU%d\n", m->extcpu); return -EINVAL;
}
if (!ipid) {
pr_err("Cannot inject into unpopulated bank %llu\n", val); return -ENODEV;
}
}
staticconstchar readme_msg[] = "Description of the files and their usages:\n" "\n" "Note1: i refers to the bank number below.\n" "Note2: See respective BKDGs for the exact bit definitions of the files below\n" "as they mirror the hardware registers.\n" "\n" "status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n" "\t attributes of the error which caused the MCE.\n" "\n" "misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n" "\t used for error thresholding purposes and its validity is indicated by\n" "\t MCi_STATUS[MiscV].\n" "\n" "synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n" "\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n" "\n" "addr:\t Error address value to be written to MCi_ADDR. Log address information\n" "\t associated with the error.\n" "\n" "cpu:\t The CPU to inject the error on.\n" "\n" "bank:\t Specify the bank you want to inject the error into: the number of\n" "\t banks in a processor varies and is family/model-specific, therefore, the\n" "\t supplied value is sanity-checked. Setting the bank value also triggers the\n" "\t injection.\n" "\n" "flags:\t Injection type to be performed. Writing to this file will trigger a\n" "\t real machine check, an APIC interrupt or invoke the error decoder routines\n" "\t for AMD processors.\n" "\n" "\t Allowed error injection types:\n" "\t - \"sw\": Software error injection. Decode error to a human-readable \n" "\t format only. Safe to use.\n" "\t - \"hw\": Hardware error injection. Causes the #MC exception handler to \n" "\t handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n" "\t is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n" "\t before injecting.\n" "\t - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n" "\t error APIC interrupt handler to handle the error if the feature is \n" "\t is present in hardware. \n" "\t - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n" "\t APIC interrupt handler to handle the error. \n" "\n" "ipid:\t IPID (AMD-specific)\n" "\n";
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.