/* * Grade an mce by severity. In general the most severe ones are processed * first. Since there are quite a lot of combinations test the bits in a * table-driven way. The rules are simply processed in order, first * match wins. * * Note this is only used for machine check exceptions, the corrected * errors use much simpler rules. The exceptions still check for the corrected * errors, but only to leave them alone for the CMCI handler (except for * panic situations)
*/
MCESEV(
NO, "Invalid",
BITCLR(MCI_STATUS_VAL)
),
MCESEV(
NO, "Not enabled",
EXCP, BITCLR(MCI_STATUS_EN)
),
MCESEV(
PANIC, "Processor context corrupt",
BITSET(MCI_STATUS_PCC)
), /* When MCIP is not set something is very confused */
MCESEV(
PANIC, "MCIP not set in MCA handler",
EXCP, MCGMASK(MCG_STATUS_MCIP, 0)
), /* Neither return not error IP -- no chance to recover -> PANIC */
MCESEV(
PANIC, "Neither restart nor error IP",
EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
),
MCESEV(
PANIC, "In kernel and no restart IP",
EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
),
MCESEV(
PANIC, "In kernel and no restart IP",
EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
),
MCESEV(
KEEP, "Corrected error",
NOSER, BITCLR(MCI_STATUS_UC)
), /* * known AO MCACODs reported via MCE or CMC: * * SRAO could be signaled either via a machine check exception or * CMCI with the corresponding bit S 1 or 0. So we don't need to * check bit S for SRAO.
*/
MCESEV(
AO, "Action optional: memory scrubbing error",
SER, MASK(MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB)
),
MCESEV(
AO, "Action optional: last level cache writeback error",
SER, MASK(MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
), /* * Quirk for Skylake/Cascade Lake. Patrol scrubber may be configured * to report uncorrected errors using CMCI with a special signature. * UC=0, MSCOD=0x0010, MCACOD=binary(000X 0000 1100 XXXX) reported * in one of the memory controller banks. * Set severity to "AO" for same action as normal patrol scrub error.
*/
MCESEV(
AO, "Uncorrected Patrol Scrub Error",
SER, MASK(MCI_STATUS_UC|MCI_ADDR|0xffffeff0, MCI_ADDR|0x001000c0),
VFM_STEPPING(INTEL_SKYLAKE_X, 4), BANK_RANGE(13, 18)
),
/* ignore OVER for UCNA */
MCESEV(
UCNA, "Uncorrected no action required",
SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
),
MCESEV(
PANIC, "Illegal combination (UCNA with AR=1)",
SER,
MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR)
),
MCESEV(
KEEP, "Non signaled machine check",
SER, BITCLR(MCI_STATUS_S)
),
MCESEV(
PANIC, "Action required with lost events",
SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
),
/* known AR MCACODs: */ #ifdef CONFIG_MEMORY_FAILURE
MCESEV(
KEEP, "Action required but unaffected thread is continuable",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
),
MCESEV(
AR, "Action required: data load in error recoverable area of kernel",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
KERNEL_RECOV
),
MCESEV(
AR, "Action required: data load error in a user process",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
USER
),
MCESEV(
AR, "Action required: instruction fetch error in a user process",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
USER
),
MCESEV(
AR, "Data load error in SEAM non-root mode",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
MCGMASK(MCG_STATUS_SEAM_NR, MCG_STATUS_SEAM_NR),
KERNEL
),
MCESEV(
AR, "Instruction fetch error in SEAM non-root mode",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
MCGMASK(MCG_STATUS_SEAM_NR, MCG_STATUS_SEAM_NR),
KERNEL
),
MCESEV(
PANIC, "Data load in unrecoverable area of kernel",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
KERNEL
),
MCESEV(
PANIC, "Instruction fetch error in kernel",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
KERNEL
), #endif
MCESEV(
PANIC, "Action required: unknown MCACOD",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
),
if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE)) returnfalse;
ret = insn_decode_kernel(&insn, insn_buf); if (ret < 0) returnfalse;
switch (insn.opcode.value) { /* MOV mem,reg */ case 0x8A: case 0x8B: /* MOVZ mem,reg */ case 0xB60F: case 0xB70F:
addr = (unsignedlong)insn_get_addr_ref(&insn, regs); break; /* REP MOVS */ case 0xA4: case 0xA5:
addr = regs->si; break; default: returnfalse;
}
if (fault_in_kernel_space(addr)) returnfalse;
current->mce_vaddr = (void __user *)addr;
returntrue;
}
/* * If mcgstatus indicated that ip/cs on the stack were * no good, then "m->cs" will be zero and we will have * to assume the worst case (IN_KERNEL) as we actually * have no idea what we were executing when the machine * check hit. * If we do have a good "m->cs" (or a faked one in the * case we were executing in VM86 mode) we can use it to * distinguish an exception taken in user from from one * taken in the kernel.
*/ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
{ int fixup_type; bool copy_user;
if ((m->cs & 3) == 3) return IN_USER;
if (!mc_recoverable(m->mcgstatus)) return IN_KERNEL;
switch (fixup_type) { case EX_TYPE_FAULT_MCE_SAFE: case EX_TYPE_DEFAULT_MCE_SAFE:
m->kflags |= MCE_IN_KERNEL_RECOV; return IN_KERNEL_RECOV;
default: return IN_KERNEL;
}
}
/* See AMD PPR(s) section Machine Check Error Handling. */ static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
{ char *panic_msg = NULL; int ret;
/* * Default return value: Action required, the error must be handled * immediately.
*/
ret = MCE_AR_SEVERITY;
/* Processor Context Corrupt, no need to fumble too much, die! */ if (m->status & MCI_STATUS_PCC) {
panic_msg = "Processor Context Corrupt";
ret = MCE_PANIC_SEVERITY; goto out;
}
if (m->status & MCI_STATUS_DEFERRED) {
ret = MCE_DEFERRED_SEVERITY; goto out;
}
/* * If the UC bit is not set, the system either corrected or deferred * the error. No action will be required after logging the error.
*/ if (!(m->status & MCI_STATUS_UC)) {
ret = MCE_KEEP_SEVERITY; goto out;
}
/* * On MCA overflow, without the MCA overflow recovery feature the * system will not be able to recover, panic.
*/ if ((m->status & MCI_STATUS_OVER) && !mce_flags.overflow_recov) {
panic_msg = "Overflowed uncorrected error without MCA Overflow Recovery";
ret = MCE_PANIC_SEVERITY; goto out;
}
if (!mce_flags.succor) {
panic_msg = "Uncorrected error without MCA Recovery";
ret = MCE_PANIC_SEVERITY; goto out;
}
if (error_context(m, regs) == IN_KERNEL) {
panic_msg = "Uncorrected unrecoverable error in kernel context";
ret = MCE_PANIC_SEVERITY;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.