/* * In SMCA enabled processors, we can have multiple banks for a given IP type. * So to define a unique name for each bank, we use a temp c-string to append * the MCA_IPID[InstanceId] to type's name in get_name(). * * InstanceId is 32 bits which is 8 characters. Make sure MAX_MCATYPE_NAME_LEN * is greater than 8 plus 1 (for underscore) plus length of longest type name.
*/ #define MAX_MCATYPE_NAME_LEN 30 staticchar buf_mcatype[MAX_MCATYPE_NAME_LEN];
struct threshold_block { /* This block's number within its bank. */ unsignedint block; /* MCA bank number that contains this block. */ unsignedint bank; /* CPU which controls this block's MCA bank. */ unsignedint cpu; /* MCA_MISC MSR address for this block. */
u32 address; /* Enable/Disable APIC interrupt. */ bool interrupt_enable; /* Bank can generate an interrupt. */ bool interrupt_capable; /* Value upon which threshold interrupt is generated. */
u16 threshold_limit; /* sysfs object */ struct kobject kobj; /* List of threshold blocks within this block's MCA bank. */ struct list_head miscj;
};
/* * A list of the banks enabled on each logical CPU. Controls which respective * descriptors to initialize later in mce_threshold_create_device().
*/ static DEFINE_PER_CPU(u64, bank_map);
/* Map of banks that have more than MCA_MISC0 available. */ static DEFINE_PER_CPU(u64, smca_misc_banks_map);
staticvoid smca_set_misc_banks_map(unsignedint bank, unsignedint cpu)
{
u32 low, high;
/* * For SMCA enabled processors, BLKPTR field of the first MISC register * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
*/ if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) return;
if (!(low & MCI_CONFIG_MCAX)) return;
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high)) return;
if (low & MASK_BLKPTR_LO)
per_cpu(smca_misc_banks_map, cpu) |= BIT_ULL(bank);
/* Set appropriate bits in MCA_CONFIG */ if (!rdmsr_safe(smca_config, &low, &high)) { /* * OS is required to set the MCAX bit to acknowledge that it is * now using the new MSR ranges and new registers under each * bank. It also means that the OS will configure deferred * errors in the new MCx_CONFIG register. If the bit is not set, * uncorrectable errors will cause a system panic. * * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.)
*/
high |= BIT(0);
/* * SMCA sets the Deferred Error Interrupt type per bank. * * MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us * if the DeferredIntType bit field is available. * * MCA_CONFIG[DeferredIntType] is bits [38:37] ([6:5] in the * high portion of the MSR). OS should set this to 0x1 to enable * APIC based interrupt. First, check that no interrupt has been * set.
*/ if ((low & BIT(5)) && !((high >> 5) & 0x3))
high |= BIT(5);
staticbool lvt_interrupt_supported(unsignedint bank, u32 msr_high_bits)
{ /* * bank 4 supports APIC LVT interrupts implicitly since forever.
*/ if (bank == 4) returntrue;
/* * IntP: interrupt present; if this bit is set, the thresholding * bank can generate APIC LVT interrupts
*/ return msr_high_bits & BIT(28);
}
staticbool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
{ int msr = (hi & MASK_LVTOFF_HI) >> 20;
if (apic < 0) {
pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt " "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
b->bank, b->block, b->address, hi, lo); returnfalse;
}
if (apic != msr) { /* * On SMCA CPUs, LVT offset is programmed at a different MSR, and * the BIOS provides the value. The original field where LVT offset * was set is reserved. Return early here:
*/ if (mce_flags.smca) returnfalse;
/* NB GART TLB error reporting is disabled by default. */ if (c->x86 < 0x17) { if (m->bank == 4 && XEC(m->status, 0x1f) == 0x5) returntrue;
}
returnfalse;
}
/* * Turn off thresholding banks for the following conditions: * - MC4_MISC thresholding is not supported on Family 0x15. * - Prevent possible spurious interrupts from the IF bank on Family 0x17 * Models 0x10-0x2F due to Erratum #1114.
*/ staticvoid disable_err_thresholding(struct cpuinfo_x86 *c, unsignedint bank)
{ int i, num_msrs;
u64 hwcr; bool need_toggle;
u32 msrs[NR_BLOCKS];
/* McStatusWrEn has to be set */
need_toggle = !(hwcr & BIT(18)); if (need_toggle)
wrmsrq(MSR_K7_HWCR, hwcr | BIT(18));
/* Clear CntP bit safely */ for (i = 0; i < num_msrs; i++)
msr_clear_bit(msrs[i], 62);
/* restore old settings */ if (need_toggle)
wrmsrq(MSR_K7_HWCR, hwcr);
}
/* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c)
{ unsignedint bank, block, cpu = smp_processor_id();
u32 low = 0, high = 0, address = 0; int offset = -1;
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { if (mce_flags.smca)
smca_configure(bank, cpu);
disable_err_thresholding(c, bank);
for (block = 0; block < NR_BLOCKS; ++block) {
address = get_block_address(address, low, high, bank, block, cpu); if (!address) break;
if (rdmsr_safe(address, &low, &high)) break;
if (!(high & MASK_VALID_HI)) continue;
if (!(high & MASK_CNTP_HI) ||
(high & MASK_LOCKED_HI)) continue;
/* * AMD systems do not have an explicit indicator that the value in MCA_ADDR is * a system physical address. Therefore, individual cases need to be detected. * Future cases and checks will be added as needed. * * 1) General case * a) Assume address is not usable. * 2) Poison errors * a) Indicated by MCA_STATUS[43]: poison. Defined for all banks except legacy * northbridge (bank 4). * b) Refers to poison consumption in the core. Does not include "no action", * "action optional", or "deferred" error severities. * c) Will include a usable address so that immediate action can be taken. * 3) Northbridge DRAM ECC errors * a) Reported in legacy bank 4 with extended error code (XEC) 8. * b) MCA_STATUS[43] is *not* defined as poison in legacy bank 4. Therefore, * this bit should not be checked. * * NOTE: SMCA UMC memory errors fall into case #1.
*/ bool amd_mce_usable_address(struct mce *m)
{ /* Check special northbridge case 3) first. */ if (!mce_flags.smca) { if (legacy_mce_is_memory_error(m)) returntrue; elseif (m->bank == 4) returnfalse;
}
/* Check poison bit for all other bank types. */ if (m->status & MCI_STATUS_POISON) returntrue;
/* Assume address is not usable for all others. */ returnfalse;
}
/* * Returns true if the logged error is deferred. False, otherwise.
*/ staticinlinebool
_log_error_bank(unsignedint bank, u32 msr_stat, u32 msr_addr, u64 misc)
{
u64 status, addr = 0;
rdmsrq(msr_stat, status); if (!(status & MCI_STATUS_VAL)) returnfalse;
if (status & MCI_STATUS_ADDRV)
rdmsrq(msr_addr, addr);
__log_error(bank, status, addr, misc);
wrmsrq(msr_stat, 0);
return status & MCI_STATUS_DEFERRED;
}
staticbool _log_error_deferred(unsignedint bank, u32 misc)
{ if (!_log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS),
mca_msr_reg(bank, MCA_ADDR), misc)) returnfalse;
/* * Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers. * Return true here to avoid accessing these registers.
*/ if (!mce_flags.smca) returntrue;
/* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */
wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0); returntrue;
}
/* * We have three scenarios for checking for Deferred errors: * * 1) Non-SMCA systems check MCA_STATUS and log error if found. * 2) SMCA systems check MCA_STATUS. If error is found then log it and also * clear MCA_DESTAT. * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and * log it.
*/ staticvoid log_error_deferred(unsignedint bank)
{ if (_log_error_deferred(bank, 0)) return;
/* * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check * for a valid error.
*/
_log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank),
MSR_AMD64_SMCA_MCx_DEADDR(bank), 0);
}
/* * Threshold interrupt handler will service THRESHOLD_APIC_VECTOR. The interrupt * goes off when error_count reaches threshold_limit.
*/ staticvoid amd_threshold_interrupt(void)
{ struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL; struct threshold_bank **bp = this_cpu_read(threshold_banks); unsignedint bank, cpu = smp_processor_id();
/* * Validate that the threshold bank has been initialized already. The * handler is installed at boot time, but on a hotplug event the * interrupt might fire before the data has been initialized.
*/ if (!bp) return;
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { if (!(per_cpu(bank_map, cpu) & BIT_ULL(bank))) continue;
first_block = bp[bank]->blocks; if (!first_block) continue;
/* * The first block is also the head of the list. Check it first * before iterating over the rest.
*/
log_and_reset_block(first_block);
list_for_each_entry_safe(block, tmp, &first_block->miscj, miscj)
log_and_reset_block(block);
}
}
b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); if (!b) {
err = -ENOMEM; goto out;
}
/* Associate the bank with the per-CPU MCE device */
b->kobj = kobject_create_and_add(name, &dev->kobj); if (!b->kobj) {
err = -EINVAL; goto out_free;
}
err = allocate_threshold_blocks(cpu, b, bank, 0, mca_msr_reg(bank, MCA_MISC)); if (err) goto out_kobj;
int mce_threshold_remove_device(unsignedint cpu)
{ struct threshold_bank **bp = this_cpu_read(threshold_banks);
if (!bp) return 0;
/* * Clear the pointer before cleaning up, so that the interrupt won't * touch anything of this.
*/
this_cpu_write(threshold_banks, NULL);
__threshold_remove_device(bp); return 0;
}
/** * mce_threshold_create_device - Create the per-CPU MCE threshold device * @cpu: The plugged in CPU * * Create directories and files for all valid threshold banks. * * This is invoked from the CPU hotplug callback which was installed in * mcheck_init_device(). The invocation happens in context of the hotplug * thread running on @cpu. The callback is invoked on all CPUs which are * online when the callback is installed or during a real hotplug event.
*/ int mce_threshold_create_device(unsignedint cpu)
{ unsignedint numbanks, bank; struct threshold_bank **bp; int err;
if (!mce_flags.amd_threshold) return 0;
bp = this_cpu_read(threshold_banks); if (bp) return 0;
numbanks = this_cpu_read(mce_num_banks);
bp = kcalloc(numbanks, sizeof(*bp), GFP_KERNEL); if (!bp) return -ENOMEM;
for (bank = 0; bank < numbanks; ++bank) { if (!(this_cpu_read(bank_map) & BIT_ULL(bank))) continue;
err = threshold_create_bank(bp, cpu, bank); if (err) {
__threshold_remove_device(bp); return err;
}
}
this_cpu_write(threshold_banks, bp);
if (thresholding_irq_en)
mce_threshold_vector = amd_threshold_interrupt; return 0;
}
Messung V0.5
¤ Dauer der Verarbeitung: 0.4 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.