/* * Decode and save high level MCE information into per cpu buffer which * is an array of machine_check_event structure.
*/ void save_mce_event(struct pt_regs *regs, long handled, struct mce_error_info *mce_err,
uint64_t nip, uint64_t addr, uint64_t phys_addr)
{ int index = local_paca->mce_info->mce_nest_count++; struct machine_check_event *mce;
mce = &local_paca->mce_info->mce_event[index]; /* * Return if we don't have enough space to log mce event. * mce_nest_count may go beyond MAX_MC_EVT but that's ok, * the check below will stop buffer overrun.
*/ if (index >= MAX_MC_EVT) return;
/* Mark it recovered if we have handled it and MSR(RI=1). */ if (handled && (regs->msr & MSR_RI))
mce->disposition = MCE_DISPOSITION_RECOVERED; else
mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
/* * Populate the mce error_type and type-specific error_type.
*/
mce_set_error_info(mce, mce_err); if (mce->error_type == MCE_ERROR_TYPE_UE)
mce->u.ue_error.ignore_event = mce_err->ignore_event;
/* * Raise irq work, So that we don't miss to log the error for * unrecoverable errors.
*/ if (mce->disposition == MCE_DISPOSITION_NOT_RECOVERED)
mce_irq_work_queue();
/* * get_mce_event: * mce Pointer to machine_check_event structure to be filled. * release Flag to indicate whether to free the event slot or not. * 0 <= do not release the mce event. Caller will invoke * release_mce_event() once event has been consumed. * 1 <= release the slot. * * return 1 = success * 0 = failure * * get_mce_event() will be called by platform specific machine check * handle routine and in KVM. * When we call get_mce_event(), we are still in interrupt context and * preemption will not be scheduled until ret_from_expect() routine * is called.
*/ int get_mce_event(struct machine_check_event *mce, bool release)
{ int index = local_paca->mce_info->mce_nest_count - 1; struct machine_check_event *mc_evt; int ret = 0;
/* Sanity check */ if (index < 0) return ret;
/* Check if we have MCE info to process. */ if (index < MAX_MC_EVT) {
mc_evt = &local_paca->mce_info->mce_event[index]; /* Copy the event structure and release the original */ if (mce)
*mce = *mc_evt; if (release)
mc_evt->in_use = 0;
ret = 1;
} /* Decrement the count to free the slot. */ if (release)
local_paca->mce_info->mce_nest_count--;
/* * Queue up the MCE event which then can be handled later.
*/ staticvoid machine_check_ue_event(struct machine_check_event *evt)
{ int index;
index = local_paca->mce_info->mce_ue_count++; /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) {
local_paca->mce_info->mce_ue_count--; return;
}
memcpy(&local_paca->mce_info->mce_ue_event_queue[index],
evt, sizeof(*evt));
}
/* * Queue up the MCE event which then can be handled later.
*/ void machine_check_queue_event(void)
{ int index; struct machine_check_event evt;
if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return;
index = local_paca->mce_info->mce_queue_count++; /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) {
local_paca->mce_info->mce_queue_count--; return;
}
memcpy(&local_paca->mce_info->mce_event_queue[index],
&evt, sizeof(evt));
/* * process pending MCE event from the mce event queue. This function will be * called during syscall exit.
*/ staticvoid machine_process_ue_event(struct work_struct *work)
{ int index; struct machine_check_event *evt;
while (local_paca->mce_info->mce_ue_count > 0) {
index = local_paca->mce_info->mce_ue_count - 1;
evt = &local_paca->mce_info->mce_ue_event_queue[index];
blocking_notifier_call_chain(&mce_notifier_list, 0, evt); #ifdef CONFIG_MEMORY_FAILURE /* * This should probably queued elsewhere, but * oh! well * * Don't report this machine check because the caller has a * asked us to ignore the event, it has a fixup handler which * will do the appropriate error handling and reporting.
*/ if (evt->error_type == MCE_ERROR_TYPE_UE) { if (evt->u.ue_error.ignore_event) {
local_paca->mce_info->mce_ue_count--; continue;
}
if (evt->u.ue_error.physical_address_provided) { unsignedlong pfn;
pfn = evt->u.ue_error.physical_address >>
PAGE_SHIFT;
memory_failure(pfn, 0);
} else
pr_warn("Failed to identify bad address from " "where the uncorrectable error (UE) " "was generated\n");
} #endif
local_paca->mce_info->mce_ue_count--;
}
} /* * process pending MCE event from the mce event queue. This function will be * called during syscall exit.
*/ staticvoid machine_check_process_queued_event(void)
{ int index; struct machine_check_event *evt;
/* * For now just print it to console. * TODO: log this error event to FSP or nvram.
*/ while (local_paca->mce_info->mce_queue_count > 0) {
index = local_paca->mce_info->mce_queue_count - 1;
evt = &local_paca->mce_info->mce_event_queue[index];
/* * This function is called in real mode. Strictly no printk's please. * * regs->nip and regs->msr contains srr0 and ssr1.
*/
DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early)
{ long handled = 0;
hv_nmi_check_nonrecoverable(regs);
/* * See if platform is capable of handling machine check.
*/ if (ppc_md.machine_check_early)
handled = ppc_md.machine_check_early(regs);
return handled;
}
/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */ staticenum {
DTRIG_UNKNOWN,
DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
} hmer_debug_trig_function;
/* First look in the device tree */
preempt_disable();
cpun = of_get_cpu_node(smp_processor_id(), NULL); if (cpun) {
of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
prop, str) { if (strcmp(str, "bit17-vector-ci-load") == 0)
hmer_debug_trig_function = DTRIG_VECTOR_CI; elseif (strcmp(str, "bit17-tm-suspend-escape") == 0)
hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
}
of_node_put(cpun);
}
preempt_enable();
/* If we found the property, don't look at PVR */ if (prop) goto out;
pvr = mfspr(SPRN_PVR); /* Check for POWER9 Nimbus (scale-out) */ if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) { /* DD2.2 and later */ if ((pvr & 0xfff) >= 0x202)
hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; /* DD2.0 and DD2.1 - used for vector CI load emulation */ elseif ((pvr & 0xfff) >= 0x200)
hmer_debug_trig_function = DTRIG_VECTOR_CI;
}
out: switch (hmer_debug_trig_function) { case DTRIG_VECTOR_CI:
pr_debug("HMI debug trigger used for vector CI load\n"); break; case DTRIG_SUSPEND_ESCAPE:
pr_debug("HMI debug trigger used for TM suspend escape\n"); break; default: break;
} return 0;
}
__initcall(init_debug_trig_function);
/* * Handle HMIs that occur as a result of a debug trigger. * Return values: * -1 means this is not a HMI cause that we know about * 0 means no further handling is required * 1 means further handling is required
*/ long hmi_handle_debugtrig(struct pt_regs *regs)
{ unsignedlong hmer = mfspr(SPRN_HMER); long ret = 0;
/* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */ if (!((hmer & HMER_DEBUG_TRIG)
&& hmer_debug_trig_function != DTRIG_UNKNOWN)) return -1;
hmer &= ~HMER_DEBUG_TRIG; /* HMER is a write-AND register */
mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
switch (hmer_debug_trig_function) { case DTRIG_VECTOR_CI: /* * Now to avoid problems with soft-disable we * only do the emulation if we are coming from * host user space
*/ if (regs && user_mode(regs))
ret = local_paca->hmi_p9_special_emu = 1;
break;
default: break;
}
/* * See if any other HMI causes remain to be handled
*/ if (hmer & mfspr(SPRN_HMEER)) return -1;
return ret;
}
/* * Return values:
*/
DEFINE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode)
{ int ret;
local_paca->hmi_irqs++;
ret = hmi_handle_debugtrig(regs); if (ret >= 0) return ret;
wait_for_subcore_guest_exit();
if (ppc_md.hmi_exception_early)
ppc_md.hmi_exception_early(regs);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.