/* * An emergency handler can be set in any context including NMI
*/ struct nmi_desc {
raw_spinlock_t lock;
nmi_handler_t emerg_handler; struct list_head head;
};
/* * Call the emergency handler, if set * * In the case of crash_nmi_callback() emergency handler, it will * return in the case of the crashing CPU to enable it to complete * other necessary crashing actions ASAP. Other handlers in the * linked list won't need to be run.
*/
ehandler = desc->emerg_handler; if (ehandler) return ehandler(type, regs);
rcu_read_lock();
/* * NMIs are edge-triggered, which means if you have enough * of them concurrently, you can lose some because only one * can be latched at any given time. Walk the whole list * to handle those situations.
*/
list_for_each_entry_rcu(a, &desc->head, list) { int thishandled;
u64 delta;
if (WARN_ON_ONCE(!action->handler || !list_empty(&action->list))) return -EINVAL;
raw_spin_lock_irqsave(&desc->lock, flags);
/* * Indicate if there are multiple registrations on the * internal NMI handler call chains (SERR and IO_CHECK).
*/
WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head));
WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head));
/* * some handlers need to be executed first otherwise a fake * event confuses some handlers (kdump uses this flag)
*/ if (action->flags & NMI_FLAG_FIRST)
list_add_rcu(&action->list, &desc->head); else
list_add_tail_rcu(&action->list, &desc->head);
list_for_each_entry_rcu(n, &desc->head, list) { /* * the name passed in to describe the nmi handler * is used as the lookup key
*/ if (!strcmp(n->name, name)) {
WARN(in_nmi(), "Trying to free NMI (%s) from NMI context!\n", n->name);
list_del_rcu(&n->list);
found = n; break;
}
}
raw_spin_unlock_irqrestore(&desc->lock, flags); if (found) {
synchronize_rcu();
INIT_LIST_HEAD(&found->list);
}
}
EXPORT_SYMBOL_GPL(unregister_nmi_handler);
/** * set_emergency_nmi_handler - Set emergency handler * @type: NMI type * @handler: the emergency handler to be stored * * Set an emergency NMI handler which, if set, will preempt all the other * handlers in the linked list. If a NULL handler is passed in, it will clear * it. It is expected that concurrent calls to this function will not happen * or the system is screwed beyond repair.
*/ void set_emergency_nmi_handler(unsignedint type, nmi_handler_t handler)
{ struct nmi_desc *desc = nmi_to_desc(type);
if (WARN_ON_ONCE(desc->emerg_handler == handler)) return;
desc->emerg_handler = handler;
/* * Ensure the emergency handler is visible to other CPUs before * function return
*/
smp_wmb();
}
staticvoid
pci_serr_error(unsignedchar reason, struct pt_regs *regs)
{ /* check to see if anyone registered against these types of errors */ if (nmi_handle(NMI_SERR, regs)) return;
pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
reason, smp_processor_id());
if (panic_on_unrecovered_nmi)
nmi_panic(regs, "NMI: Not continuing");
pr_emerg("Dazed and confused, but trying to continue\n");
/* check to see if anyone registered against these types of errors */ if (nmi_handle(NMI_IO_CHECK, regs)) return;
pr_emerg( "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
reason, smp_processor_id());
show_regs(regs);
if (panic_on_io_nmi) {
nmi_panic(regs, "NMI IOCK error: Not continuing");
/* * If we end up here, it means we have received an NMI while * processing panic(). Simply return without delaying and * re-enabling NMIs.
*/ return;
}
/* Re-enable the IOCK line, wait for a few seconds */
reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
outb(reason, NMI_REASON_PORT);
i = 20000; while (--i) {
touch_nmi_watchdog();
udelay(100);
}
staticvoid
unknown_nmi_error(unsignedchar reason, struct pt_regs *regs)
{ int handled;
/* * As a last resort, let the "unknown" handlers make a * best-effort attempt to figure out if they can claim * responsibility for this Unknown NMI.
*/
handled = nmi_handle(NMI_UNKNOWN, regs); if (handled) {
__this_cpu_add(nmi_stats.unknown, handled); return;
}
__this_cpu_add(nmi_stats.unknown, 1);
pr_emerg_ratelimited("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
reason, smp_processor_id());
if (unknown_nmi_panic || panic_on_unrecovered_nmi)
nmi_panic(regs, "NMI: Not continuing");
pr_emerg_ratelimited("Dazed and confused, but trying to continue\n");
}
NOKPROBE_SYMBOL(unknown_nmi_error);
/* * Back-to-back NMIs are detected by comparing the RIP of the * current NMI with that of the previous NMI. If it is the same, * it is assumed that the CPU did not have a chance to jump back * into a non-NMI context and execute code in between the two * NMIs. * * They are interesting because even if there are more than two, * only a maximum of two can be detected (anything over two is * dropped due to NMI being edge-triggered). If this is the * second half of the back-to-back NMI, assume we dropped things * and process more handlers. Otherwise, reset the 'swallow' NMI * behavior.
*/ if (regs->ip == __this_cpu_read(last_nmi_rip))
b2b = true; else
__this_cpu_write(swallow_nmi, false);
__this_cpu_write(last_nmi_rip, regs->ip);
instrumentation_begin();
if (microcode_nmi_handler_enabled() && microcode_nmi_handler()) goto out;
/* * CPU-specific NMI must be processed before non-CPU-specific * NMI, otherwise we may lose it, because the CPU-specific * NMI can not be detected/processed on other CPUs.
*/
handled = nmi_handle(NMI_LOCAL, regs);
__this_cpu_add(nmi_stats.normal, handled); if (handled) { /* * There are cases when a NMI handler handles multiple * events in the current NMI. One of these events may * be queued for in the next NMI. Because the event is * already handled, the next NMI will result in an unknown * NMI. Instead lets flag this for a potential NMI to * swallow.
*/ if (handled > 1)
__this_cpu_write(swallow_nmi, true); goto out;
}
/* * Non-CPU-specific NMI: NMI sources can be processed on any CPU. * * Another CPU may be processing panic routines while holding * nmi_reason_lock. Check if the CPU issued the IPI for crash dumping, * and if so, call its callback directly. If there is no CPU preparing * crash dump, we simply loop here.
*/ while (!raw_spin_trylock(&nmi_reason_lock)) {
run_crash_ipi_callback(regs);
cpu_relax();
}
reason = x86_platform.get_nmi_reason();
if (reason & NMI_REASON_MASK) { if (reason & NMI_REASON_SERR)
pci_serr_error(reason, regs); elseif (reason & NMI_REASON_IOCHK)
io_check_error(reason, regs);
/* * Reassert NMI in case it became active * meanwhile as it's edge-triggered:
*/ if (IS_ENABLED(CONFIG_X86_32))
reassert_nmi();
/* * Only one NMI can be latched at a time. To handle * this we may process multiple nmi handlers at once to * cover the case where an NMI is dropped. The downside * to this approach is we may process an NMI prematurely, * while its real NMI is sitting latched. This will cause * an unknown NMI on the next run of the NMI processing. * * We tried to flag that condition above, by setting the * swallow_nmi flag when we process more than one event. * This condition is also only present on the second half * of a back-to-back NMI, so we flag that condition too. * * If both are true, we assume we already processed this * NMI previously and we swallow it. Otherwise we reset * the logic. * * There are scenarios where we may accidentally swallow * a 'real' unknown NMI. For example, while processing * a perf NMI another perf NMI comes in along with a * 'real' unknown NMI. These two NMIs get combined into * one (as described above). When the next NMI gets * processed, it will be flagged by perf as handled, but * no one will know that there was a 'real' unknown NMI sent * also. As a result it gets swallowed. Or if the first * perf NMI returns two events handled then the second * NMI will get eaten by the logic below, again losing a * 'real' unknown NMI. But this is the best we can do * for now.
*/ if (b2b && __this_cpu_read(swallow_nmi))
__this_cpu_add(nmi_stats.swallow, 1); else
unknown_nmi_error(reason, regs);
out:
instrumentation_end();
}
/* * NMIs can page fault or hit breakpoints which will cause it to lose * its NMI context with the CPU when the breakpoint or page fault does an IRET. * * As a result, NMIs can nest if NMIs get unmasked due an IRET during * NMI processing. On x86_64, the asm glue protects us from nested NMIs * if the outer NMI came from kernel mode, but we can still nest if the * outer NMI came from user mode. * * To handle these nested NMIs, we have three states: * * 1) not running * 2) executing * 3) latched * * When no NMI is in progress, it is in the "not running" state. * When an NMI comes in, it goes into the "executing" state. * Normally, if another NMI is triggered, it does not interrupt * the running NMI and the HW will simply latch it so that when * the first NMI finishes, it will restart the second NMI. * (Note, the latch is binary, thus multiple NMIs triggering, * when one is running, are ignored. Only one NMI is restarted.) * * If an NMI executes an iret, another NMI can preempt it. We do not * want to allow this new NMI to run, but we want to execute it when the * first one finishes. We set the state to "latched", and the exit of * the first NMI will perform a dec_return, if the result is zero * (NOT_RUNNING), then it will simply exit the NMI handler. If not, the * dec_return would have set the state to NMI_EXECUTING (what we want it * to be when we are running). In this case, we simply jump back to * rerun the NMI handler again, and restart the 'latched' NMI. * * No trap (breakpoint or page fault) should be hit before nmi_restart, * thus there is no race between the first check of state for NOT_RUNNING * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs * at this point. * * In case the NMI takes a page fault, we need to save off the CR2 * because the NMI could have preempted another page fault and corrupt * the CR2 that is about to be read. As nested NMIs must be restarted * and they can not take breakpoints or page faults, the update of the * CR2 must be done before converting the nmi state back to NOT_RUNNING. * Otherwise, there would be a race of another nested NMI coming in * after setting state to NOT_RUNNING but before updating the nmi_cr2.
*/ enum nmi_states {
NMI_NOT_RUNNING = 0,
NMI_EXECUTING,
NMI_LATCHED,
}; static DEFINE_PER_CPU(enum nmi_states, nmi_state); static DEFINE_PER_CPU(unsignedlong, nmi_cr2); static DEFINE_PER_CPU(unsignedlong, nmi_dr7);
/* * Re-enable NMIs right here when running as an SEV-ES guest. This might * cause nested NMIs, but those can be handled safely.
*/
sev_es_nmi_complete(); if (IS_ENABLED(CONFIG_NMI_CHECK_CPU))
raw_atomic_long_inc(&nsp->idt_calls);
if (arch_cpu_is_offline(smp_processor_id())) { if (microcode_nmi_handler_enabled())
microcode_offline_nmi_handler(); return;
}
/* * Needs to happen before DR7 is accessed, because the hypervisor can * intercept DR7 reads/writes, turning those into #VC exceptions.
*/
sev_es_ist_enter(regs);
staticchar *nmi_check_stall_msg[] = { /* */ /* +--------- nmi_seq & 0x1: CPU is currently in NMI handler. */ /* | +------ cpu_is_offline(cpu) */ /* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls): */ /* | | | NMI handler has been invoked. */ /* | | | */ /* V V V */ /* 0 0 0 */ "NMIs are not reaching exc_nmi() handler", /* 0 0 1 */ "exc_nmi() handler is ignoring NMIs", /* 0 1 0 */ "CPU is offline and NMIs are not reaching exc_nmi() handler", /* 0 1 1 */ "CPU is offline and exc_nmi() handler is legitimately ignoring NMIs", /* 1 0 0 */ "CPU is in exc_nmi() handler and no further NMIs are reaching handler", /* 1 0 1 */ "CPU is in exc_nmi() handler which is legitimately ignoring NMIs", /* 1 1 0 */ "CPU is offline in exc_nmi() handler and no more NMIs are reaching exc_nmi() handler", /* 1 1 1 */ "CPU is offline in exc_nmi() handler which is legitimately ignoring NMIs",
};
void nmi_backtrace_stall_snap(conststruct cpumask *btp)
{ int cpu; struct nmi_stats *nsp;
for_each_cpu(cpu, btp) {
nsp = per_cpu_ptr(&nmi_stats, cpu);
modp = "";
msghp = "";
nmi_seq = READ_ONCE(nsp->idt_nmi_seq); if (nsp->idt_nmi_seq_snap + 1 == nmi_seq && (nmi_seq & 0x1)) {
msgp = "CPU entered NMI handler function, but has not exited";
} elseif (nsp->idt_nmi_seq_snap == nmi_seq ||
nsp->idt_nmi_seq_snap + 1 == nmi_seq) {
idx = ((nmi_seq & 0x1) << 2) |
(cpu_is_offline(cpu) << 1) |
(nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls));
msgp = nmi_check_stall_msg[idx]; if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1))
modp = ", but OK because ignore_nmis was set"; if (nsp->idt_nmi_seq_snap + 1 == nmi_seq)
msghp = " (CPU exited one NMI handler function)"; elseif (nmi_seq & 0x1)
msghp = " (CPU currently in NMI handler function)"; else
msghp = " (CPU was never in an NMI handler function)";
} else {
msgp = "CPU is handling NMIs";
}
pr_alert("%s: CPU %d: %s%s%s\n", __func__, cpu, msgp, modp, msghp);
pr_alert("%s: last activity: %lu jiffies ago.\n",
__func__, j - READ_ONCE(nsp->recv_jiffies));
}
}
#endif
#ifdef CONFIG_X86_FRED /* * With FRED, CR2/DR6 is pushed to #PF/#DB stack frame during FRED * event delivery, i.e., there is no problem of transient states. * And NMI unblocking only happens when the stack frame indicates * that so should happen. * * Thus, the NMI entry stub for FRED is really straightforward and * as simple as most exception handlers. As such, #DB is allowed * during NMI handling.
*/
DEFINE_FREDENTRY_NMI(exc_nmi)
{
irqentry_state_t irq_state;
if (arch_cpu_is_offline(smp_processor_id())) { if (microcode_nmi_handler_enabled())
microcode_offline_nmi_handler(); return;
}
/* * Save CR2 for eventual restore to cover the case where the NMI * hits the VMENTER/VMEXIT region where guest CR2 is life. This * prevents guest state corruption in case that the NMI handler * takes a page fault.
*/
this_cpu_write(nmi_cr2, read_cr2());
irq_state = irqentry_nmi_enter(regs);
inc_irq_stat(__nmi_count);
default_do_nmi(regs);
irqentry_nmi_exit(regs, irq_state);
if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
write_cr2(this_cpu_read(nmi_cr2));
} #endif
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.