/* When an irrecoverable trap occurs at tl > 0, the trap entry * code logs the trap state registers at every level in the trap * stack. It is found at (pt_regs + sizeof(pt_regs)) and the layout * is as follows:
*/ struct tl1_traplog { struct { unsignedlong tstate; unsignedlong tpc; unsignedlong tnpc; unsignedlong tt;
} trapstack[4]; unsignedlong tl;
};
staticvoid dump_tl1_traplog(struct tl1_traplog *p)
{ int i, limit;
if (get_user(insn, (u32 __user *)regs->tpc) == -EFAULT) returnfalse;
/* * Must do a little instruction decoding here in order to * decide on a course of action. The bits of interest are: * insn[31:30] = op, where 3 indicates the load/store group * insn[24:19] = op3, which identifies individual opcodes * insn[13] indicates an immediate offset * op3[4]=1 identifies alternate space instructions * op3[5:4]=3 identifies floating point instructions * op3[2]=1 identifies stores * See "Opcode Maps" in the appendix of any Sparc V9 * architecture spec for full details.
*/ if ((insn & 0xc0800000) == 0xc0800000) { /* op=3, op3[4]=1 */ if (insn & 0x2000) /* immediate offset */
asi = (regs->tstate >> 24); /* saved %asi */ else
asi = (insn >> 5); /* immediate asi */ if ((asi & 0xf6) == ASI_PNF) { if (insn & 0x200000) /* op3[2], stores */ returnfalse; if (insn & 0x1000000) /* op3[5:4]=3 (fp) */
handle_ldf_stq(insn, regs); else
handle_ld_nf(insn, regs); returntrue;
}
} returnfalse;
}
if (test_thread_flag(TIF_32BIT)) {
regs->tpc &= 0xffffffff;
regs->tnpc &= 0xffffffff;
} if (is_no_fault_exception(regs)) return;
/* MCD (Memory Corruption Detection) disabled trap (TT=0x19) in HV * is vectored thorugh data access exception trap with fault type * set to HV_FAULT_TYPE_MCD_DIS. Check for MCD disabled trap. * Accessing an address with invalid ASI for the address, for * example setting an ADI tag on an address with ASI_MCD_PRIMARY * when TTE.mcd is not set for the VA, is also vectored into * kerbel by HV as data access exception with fault type set to * HV_FAULT_TYPE_INV_ASI.
*/ switch (type) { case HV_FAULT_TYPE_INV_ASI:
force_sig_fault(SIGILL, ILL_ILLADR, (void __user *)addr); break; case HV_FAULT_TYPE_MCD_DIS:
force_sig_fault(SIGSEGV, SEGV_ACCADI, (void __user *)addr); break; default:
force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)addr); break;
}
}
/* We always log it, even if someone is listening for this * trap.
*/
notify_die(DIE_TRAP, "Correctable ECC Error", regs,
0, TRAP_TYPE_CEE, SIGTRAP);
/* The Correctable ECC Error trap does not disable I/D caches. So * we only have to restore the ESTATE Error Enable register.
*/
spitfire_enable_estate_errors();
}
/* We always log it, even if someone is listening for this * trap.
*/
notify_die(DIE_TRAP, "Uncorrectable Error", regs,
0, tt, SIGTRAP);
if (regs->tstate & TSTATE_PRIV) { if (tl1)
dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
die_if_kernel("UE", regs);
}
/* XXX need more intelligent processing here, such as is implemented * XXX for cheetah errors, in fact if the E-cache still holds the * XXX line with bad parity this will loop
*/
if (tt == TRAP_TYPE_CEE) { /* Handle the case where we took a CEE trap, but ACK'd * only the UE state in the UDB error registers.
*/ if (afsr & SFAFSR_UE) { if (udbh & UDBE_CE) {
__asm__ __volatile__( "stxa %0, [%1] %2\n\t" "membar #Sync"
: /* no outputs */
: "r" (udbh & UDBE_CE), "r" (0x0), "i" (ASI_UDB_ERROR_W));
} if (udbl & UDBE_CE) {
__asm__ __volatile__( "stxa %0, [%1] %2\n\t" "membar #Sync"
: /* no outputs */
: "r" (udbl & UDBE_CE), "r" (0x18), "i" (ASI_UDB_ERROR_W));
}
}
void __init cheetah_ecache_flush_init(void)
{ unsignedlong largest_size, smallest_linesize, order, ver; int i, sz;
/* Scan all cpu device tree nodes, note two values: * 1) largest E-cache size * 2) smallest E-cache line size
*/
largest_size = 0UL;
smallest_linesize = ~0UL;
for (i = 0; i < NR_CPUS; i++) { unsignedlong val;
val = cpu_data(i).ecache_size; if (!val) continue;
if (val > largest_size)
largest_size = val;
val = cpu_data(i).ecache_line_size; if (val < smallest_linesize)
smallest_linesize = val;
}
if (largest_size == 0UL || smallest_linesize == ~0UL) {
prom_printf("cheetah_ecache_flush_init: Cannot probe cpu E-cache " "parameters.\n");
prom_halt();
}
/* Mark all AFSRs as invalid so that the trap handler will * log new new information there.
*/ for (i = 0; i < 2 * NR_CPUS; i++)
cheetah_error_log[i].afsr = CHAFSR_INVALID;
/* Unfortunately, the diagnostic access to the I-cache tags we need to * use to clear the thing interferes with I-cache coherency transactions. * * So we must only flush the I-cache when it is disabled.
*/ staticvoid __cheetah_flush_icache(void)
{ unsignedint icache_size, icache_line_size; unsignedlong addr;
/* In order to make the even parity correct we must do two things. * First, we clear DC_data_parity and set DC_utag to an appropriate value. * Next, we clear out all 32-bytes of data for that line. Data of * all-zero + tag parity value of zero == correct parity.
*/ staticvoid cheetah_plus_zap_dcache_parity(void)
{ unsignedint dcache_size, dcache_line_size; unsignedlong addr;
/* Conversion tables used to frob Cheetah AFSR syndrome values into * something palatable to the memory controller driver get_unumber * routine.
*/ #define MT0 137 #define MT1 138 #define MT2 139 #define NONE 254 #define MTC0 140 #define MTC1 141 #define MTC2 142 #define MTC3 143 #define C0 128 #define C1 129 #define C2 130 #define C3 131 #define C4 132 #define C5 133 #define C6 134 #define C7 135 #define C8 136 #define M2 144 #define M3 145 #define M4 146 #define M 147 staticunsignedchar cheetah_ecc_syntab[] = { /*00*/NONE, C0, C1, M2, C2, M2, M3, 47, C3, M2, M2, 53, M2, 41, 29, M, /*01*/C4, M, M, 50, M2, 38, 25, M2, M2, 33, 24, M2, 11, M, M2, 16, /*02*/C5, M, M, 46, M2, 37, 19, M2, M, 31, 32, M, 7, M2, M2, 10, /*03*/M2, 40, 13, M2, 59, M, M2, 66, M, M2, M2, 0, M2, 67, 71, M, /*04*/C6, M, M, 43, M, 36, 18, M, M2, 49, 15, M, 63, M2, M2, 6, /*05*/M2, 44, 28, M2, M, M2, M2, 52, 68, M2, M2, 62, M2, M3, M3, M4, /*06*/M2, 26, 106, M2, 64, M, M2, 2, 120, M, M2, M3, M, M3, M3, M4, /*07*/116, M2, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3, /*08*/C7, M2, M, 42, M, 35, 17, M2, M, 45, 14, M2, 21, M2, M2, 5, /*09*/M, 27, M, M, 99, M, M, 3, 114, M2, M2, 20, M2, M3, M3, M, /*0a*/M2, 23, 113, M2, 112, M2, M, 51, 95, M, M2, M3, M2, M3, M3, M2, /*0b*/103, M, M2, M3, M2, M3, M3, M4, M2, 48, M, M, 73, M2, M, M3, /*0c*/M2, 22, 110, M2, 109, M2, M, 9, 108, M2, M, M3, M2, M3, M3, M, /*0d*/102, M2, M, M, M2, M3, M3, M, M2, M3, M3, M2, M, M4, M, M3, /*0e*/98, M, M2, M3, M2, M, M3, M4, M2, M3, M3, M4, M3, M, M, M, /*0f*/M2, M3, M3, M, M3, M, M, M, 56, M4, M, M3, M4, M, M, M, /*10*/C8, M, M2, 39, M, 34, 105, M2, M, 30, 104, M, 101, M, M, 4, /*11*/M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, M2, M, M3, M, /*12*/M2, 97, 82, M2, 78, M2, M2, 1, 96, M, M, M, M, M, M3, M2, /*13*/94, M, M2, M3, M2, M, M3, M, M2, M, 79, M, 69, M, M4, M, /*14*/M2, 93, 92, M, 91, M, M2, 8, 90, M2, M2, M, M, M, M, M4, /*15*/89, M, M, M3, M2, M3, M3, M, M, M, M3, M2, M3, M2, M, M3, /*16*/86, M, M2, M3, M2, M, M3, M, M2, M, M3, M, M3, M, M, M3, /*17*/M, M, M3, M2, M3, M2, M4, M, 60, M, M2, M3, M4, M, M, M2, /*18*/M2, 88, 85, M2, 84, M, M2, 55, 81, M2, M2, M3, M2, M3, M3, M4, /*19*/77, M, M, M, M2, M3, M, M, M2, M3, M3, M4, M3, M2, M, M, /*1a*/74, M, M2, M3, M, M, M3, M, M, M, M3, M, M3, M, M4, M3, /*1b*/M2, 70, 107, M4, 65, M2, M2, M, 127, M, M, M, M2, M3, M3, M, /*1c*/80, M2, M2, 72, M, 119, 118, M, M2, 126, 76, M, 125, M, M4, M3, /*1d*/M2, 115, 124, M, 75, M, M, M3, 61, M, M4, M, M4, M, M, M, /*1e*/M, 123, 122, M4, 121, M4, M, M3, 117, M2, M2, M3, M4, M3, M, M, /*1f*/111, M, M, M, M4, M3, M3, M, M, M, M3, M, M3, M2, M, M
}; staticunsignedchar cheetah_mtag_syntab[] = {
NONE, MTC0,
MTC1, NONE,
MTC2, NONE,
NONE, MT0,
MTC3, NONE,
NONE, MT1,
NONE, MT2,
NONE, NONE
};
/* Return the highest priority error conditon mentioned. */ staticinlineunsignedlong cheetah_get_hipri(unsignedlong afsr)
{ unsignedlong tmp = 0; int i;
for (i = 0; cheetah_error_table[i].mask; i++) { if ((tmp = (afsr & cheetah_error_table[i].mask)) != 0UL) return tmp;
} return tmp;
}
staticconstchar *cheetah_get_string(unsignedlong bit)
{ int i;
for (i = 0; cheetah_error_table[i].mask; i++) { if ((bit & cheetah_error_table[i].mask) != 0UL) return cheetah_error_table[i].name;
} return"???";
}
p = cheetah_get_error_log(afsr); if (!p) {
prom_printf("ERROR: Early Fast-ECC error afsr[%016lx] afar[%016lx]\n",
afsr, afar);
prom_printf("ERROR: CPU(%d) TPC[%016lx] TNPC[%016lx] TSTATE[%016lx]\n",
smp_processor_id(), regs->tpc, regs->tnpc, regs->tstate);
prom_halt();
}
/* Grab snapshot of logged error. */
memcpy(&local_snapshot, p, sizeof(local_snapshot));
/* If the current trap snapshot does not match what the * trap handler passed along into our args, big trouble. * In such a case, mark the local copy as invalid. * * Else, it matches and we mark the afsr in the non-local * copy as invalid so we may log new error traps there.
*/ if (p->afsr != afsr || p->afar != afar)
local_snapshot.afsr = CHAFSR_INVALID; else
p->afsr = CHAFSR_INVALID;
/* Decide if we can continue after handling this trap and * logging the error.
*/
recoverable = 1; if (afsr & (CHAFSR_PERR | CHAFSR_IERR | CHAFSR_ISAP))
recoverable = 0;
/* Re-check AFSR/AFAR. What we are looking for here is whether a new * error was logged while we had error reporting traps disabled.
*/ if (cheetah_recheck_errors(&local_snapshot)) { unsignedlong new_afsr = local_snapshot.afsr;
/* If we got a new asynchronous error, die... */ if (new_afsr & (CHAFSR_EMU | CHAFSR_EDU |
CHAFSR_WDU | CHAFSR_CPU |
CHAFSR_IVU | CHAFSR_UE |
CHAFSR_BERR | CHAFSR_TO))
recoverable = 0;
}
if (!recoverable)
panic("Irrecoverable Fast-ECC error trap.\n");
/* Flush E-cache to kick the error trap handlers out. */
cheetah_flush_ecache();
}
/* Try to fix a correctable error by pushing the line out from * the E-cache. Recheck error reporting registers to see if the * problem is intermittent.
*/ staticint cheetah_fix_ce(unsignedlong physaddr)
{ unsignedlong orig_estate; unsignedlong alias1, alias2; int ret;
/* We calculate alias addresses that will force the * cache line in question out of the E-cache. Then * we bring it back in with an atomic instruction so * that we get it in some modified/exclusive state, * then we displace it again to try and get proper ECC * pushed back into the system.
*/
physaddr &= ~(8UL - 1UL);
alias1 = (ecache_flush_physbase +
(physaddr & ((ecache_flush_size >> 1) - 1)));
alias2 = alias1 + (ecache_flush_size >> 1);
__asm__ __volatile__("ldxa [%0] %3, %%g0\n\t" "ldxa [%1] %3, %%g0\n\t" "casxa [%2] %3, %%g0, %%g0\n\t" "ldxa [%0] %3, %%g0\n\t" "ldxa [%1] %3, %%g0\n\t" "membar #Sync"
: /* no outputs */
: "r" (alias1), "r" (alias2), "r" (physaddr), "i" (ASI_PHYS_USE_EC));
/* Did that trigger another error? */ if (cheetah_recheck_errors(NULL)) { /* Try one more time. */
__asm__ __volatile__("ldxa [%0] %1, %%g0\n\t" "membar #Sync"
: : "r" (physaddr), "i" (ASI_PHYS_USE_EC)); if (cheetah_recheck_errors(NULL))
ret = 2; else
ret = 1;
} else { /* No new error, intermittent problem. */
ret = 0;
}
p = cheetah_get_error_log(afsr); if (!p) {
prom_printf("ERROR: Early CEE error afsr[%016lx] afar[%016lx]\n",
afsr, afar);
prom_printf("ERROR: CPU(%d) TPC[%016lx] TNPC[%016lx] TSTATE[%016lx]\n",
smp_processor_id(), regs->tpc, regs->tnpc, regs->tstate);
prom_halt();
}
/* Grab snapshot of logged error. */
memcpy(&local_snapshot, p, sizeof(local_snapshot));
/* If the current trap snapshot does not match what the * trap handler passed along into our args, big trouble. * In such a case, mark the local copy as invalid. * * Else, it matches and we mark the afsr in the non-local * copy as invalid so we may log new error traps there.
*/ if (p->afsr != afsr || p->afar != afar)
local_snapshot.afsr = CHAFSR_INVALID; else
p->afsr = CHAFSR_INVALID;
is_memory = cheetah_check_main_memory(afar);
if (is_memory && (afsr & CHAFSR_CE) != 0UL) { /* XXX Might want to log the results of this operation * XXX somewhere... -DaveM
*/
cheetah_fix_ce(afar);
}
/* Decide if we can continue after handling this trap and * logging the error.
*/
recoverable = 1; if (afsr & (CHAFSR_PERR | CHAFSR_IERR | CHAFSR_ISAP))
recoverable = 0;
#ifdef CONFIG_PCI /* Check for the special PCI poke sequence. */ if (pci_poke_in_progress && pci_poke_cpu == smp_processor_id()) {
cheetah_flush_icache();
cheetah_flush_dcache();
p = cheetah_get_error_log(afsr); if (!p) {
prom_printf("ERROR: Early deferred error afsr[%016lx] afar[%016lx]\n",
afsr, afar);
prom_printf("ERROR: CPU(%d) TPC[%016lx] TNPC[%016lx] TSTATE[%016lx]\n",
smp_processor_id(), regs->tpc, regs->tnpc, regs->tstate);
prom_halt();
}
/* Grab snapshot of logged error. */
memcpy(&local_snapshot, p, sizeof(local_snapshot));
/* If the current trap snapshot does not match what the * trap handler passed along into our args, big trouble. * In such a case, mark the local copy as invalid. * * Else, it matches and we mark the afsr in the non-local * copy as invalid so we may log new error traps there.
*/ if (p->afsr != afsr || p->afar != afar)
local_snapshot.afsr = CHAFSR_INVALID; else
p->afsr = CHAFSR_INVALID;
/* Decide if we can continue after handling this trap and * logging the error.
*/
recoverable = 1; if (afsr & (CHAFSR_PERR | CHAFSR_IERR | CHAFSR_ISAP))
recoverable = 0;
/* Re-check AFSR/AFAR. What we are looking for here is whether a new * error was logged while we had error reporting traps disabled.
*/ if (cheetah_recheck_errors(&local_snapshot)) { unsignedlong new_afsr = local_snapshot.afsr;
/* If we got a new asynchronous error, die... */ if (new_afsr & (CHAFSR_EMU | CHAFSR_EDU |
CHAFSR_WDU | CHAFSR_CPU |
CHAFSR_IVU | CHAFSR_UE |
CHAFSR_BERR | CHAFSR_TO))
recoverable = 0;
}
/* "Recoverable" here means we try to yank the page from ever * being newly used again. This depends upon a few things: * 1) Must be main memory, and AFAR must be valid. * 2) If we trapped from user, OK. * 3) Else, if we trapped from kernel we must find exception * table entry (ie. we have to have been accessing user * space). * * If AFAR is not in main memory, or we trapped from kernel * and cannot find an exception table entry, it is unacceptable * to try and continue.
*/ if (recoverable && is_memory) { if ((regs->tstate & TSTATE_PRIV) == 0UL) { /* OK, usermode access. */
recoverable = 1;
} else { conststruct exception_table_entry *entry;
entry = search_exception_tables(regs->tpc); if (entry) { /* OK, kernel access to userspace. */
recoverable = 1;
} else { /* BAD, privileged state is corrupted. */
recoverable = 0;
}
if (recoverable) { if (pfn_valid(afar >> PAGE_SHIFT))
get_page(pfn_to_page(afar >> PAGE_SHIFT)); else
recoverable = 0;
/* Only perform fixup if we still have a * recoverable condition.
*/ if (recoverable) {
regs->tpc = entry->fixup;
regs->tnpc = regs->tpc + 4;
}
}
}
} else {
recoverable = 0;
}
if (!recoverable)
panic("Irrecoverable deferred error trap.\n");
}
/* Handle a D/I cache parity error trap. TYPE is encoded as: * * Bit0: 0=dcache,1=icache * Bit1: 0=recoverable,1=unrecoverable * * The hardware has disabled both the I-cache and D-cache in * the %dcr register.
*/ void cheetah_plus_parity_error(int type, struct pt_regs *regs)
{ if (type & 0x1)
__cheetah_flush_icache(); else
cheetah_plus_zap_dcache_parity();
cheetah_flush_dcache();
if (attrs & SUN4V_ERR_ATTRS_RES_QUEUE_FULL)
pr_cont("res-queue-full ");
}
/* When the report contains a real-address of "-1" it means that the * hardware did not provide the address. So we compute the effective * address of the load or store instruction at regs->tpc and report * that. Usually when this happens it's a PIO and in such a case we * are using physical addresses with bypass ASIs anyways, so what we * report here is exactly what we want.
*/ staticvoid sun4v_report_real_raddr(constchar *pfx, struct pt_regs *regs)
{ unsignedint insn;
u64 addr;
/* Various fields in the error report are only valid if * certain attribute bits are set.
*/ if (attrs & (SUN4V_ERR_ATTRS_MEMORY |
SUN4V_ERR_ATTRS_PIO |
SUN4V_ERR_ATTRS_ASI)) {
printk("%s: raddr [0x%016llx]\n", pfx, ent->err_raddr);
if (ent->err_raddr == ~(u64)0)
sun4v_report_real_raddr(pfx, regs);
}
/* Handle memory corruption detected error which is vectored in * through resumable error trap.
*/ staticvoid do_mcd_err(struct pt_regs *regs, struct sun4v_error_entry ent)
{ if (notify_die(DIE_TRAP, "MCD error", regs, 0, 0x34,
SIGSEGV) == NOTIFY_STOP) return;
if (regs->tstate & TSTATE_PRIV) { /* MCD exception could happen because the task was * running a system call with MCD enabled and passed a * non-versioned pointer or pointer with bad version * tag to the system call. In such cases, hypervisor * places the address of offending instruction in the * resumable error report. This is a deferred error, * so the read/write that caused the trap was potentially * retired long time back and we may have no choice * but to send SIGSEGV to the process.
*/ conststruct exception_table_entry *entry;
entry = search_exception_tables(regs->tpc); if (entry) { /* Looks like a bad syscall parameter */ #ifdef DEBUG_EXCEPTIONS
pr_emerg("Exception: PC<%016lx> faddr\n",
regs->tpc);
pr_emerg("EX_TABLE: insn<%016lx> fixup<%016lx>\n",
ent.err_raddr, entry->fixup); #endif
regs->tpc = entry->fixup;
regs->tnpc = regs->tpc + 4; return;
}
}
/* Send SIGSEGV to the userspace process with the right signal * code
*/
force_sig_fault(SIGSEGV, SEGV_ADIDERR, (void __user *)ent.err_raddr);
}
/* We run with %pil set to PIL_NORMAL_MAX and PSTATE_IE enabled in %pstate. * Log the event and clear the first word of the entry.
*/ void sun4v_resum_error(struct pt_regs *regs, unsignedlong offset)
{ enum ctx_state prev_state = exception_enter(); struct sun4v_error_entry *ent, local_copy; struct trap_per_cpu *tb; unsignedlong paddr; int cpu;
/* We have a local copy now, so release the entry. */
ent->err_handle = 0;
wmb();
put_cpu();
if (local_copy.err_type == SUN4V_ERR_TYPE_SHUTDOWN_RQST) { /* We should really take the seconds field of * the error report and use it for the shutdown * invocation, but for now do the same thing we * do for a DS shutdown request.
*/
pr_info("Shutdown request, %u seconds...\n",
local_copy.err_secs);
orderly_poweroff(true); goto out;
}
/* If this is a memory corruption detected error vectored in * by HV through resumable error trap, call the handler
*/ if (local_copy.err_attrs & SUN4V_ERR_ATTRS_MCD) {
do_mcd_err(regs, local_copy); return;
}
/* If we try to printk() we'll probably make matters worse, by trying * to retake locks this cpu already holds or causing more errors. So * just bump a counter, and we'll report these counter bumps above.
*/ void sun4v_resum_overflow(struct pt_regs *regs)
{
atomic_inc(&sun4v_resum_oflow_cnt);
}
/* Given a set of registers, get the virtual addressi that was being accessed * by the faulting instructions at tpc.
*/ staticunsignedlong sun4v_get_vaddr(struct pt_regs *regs)
{ unsignedint insn;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.