/* First or last read error. */
first = (mode == FRE_SCRUB || mode == FRE_DEMAND); /* Patrol scrub or on-demand read error. */
scrub = (mode == FRE_SCRUB || mode == LRE_SCRUB);
v = read_imc_reg(imc, chan, offset, width);
if (enable) { /* Save default configurations. */
*rrl_ctl = v;
v &= ~rrl->uc_mask;
if (first)
v |= rrl->noover_mask; else
v &= ~rrl->noover_mask;
if (scrub)
v |= rrl->en_patspr_mask; else
v &= ~rrl->en_patspr_mask;
v |= rrl->en_mask;
} else { /* Restore default configurations. */ if (*rrl_ctl & rrl->uc_mask)
v |= rrl->uc_mask;
if (first) { if (!(*rrl_ctl & rrl->noover_mask))
v &= ~rrl->noover_mask;
} else { if (*rrl_ctl & rrl->noover_mask)
v |= rrl->noover_mask;
}
if (scrub) { if (!(*rrl_ctl & rrl->en_patspr_mask))
v &= ~rrl->en_patspr_mask;
} else { if (*rrl_ctl & rrl->en_patspr_mask)
v |= rrl->en_patspr_mask;
}
if (!(*rrl_ctl & rrl->en_mask))
v &= ~rrl->en_mask;
}
write_imc_reg(imc, chan, offset, width, v);
}
staticvoid enable_rrls(struct skx_imc *imc, int chan, struct reg_rrl *rrl, bool enable, u32 *rrl_ctl)
{ for (int i = 0; i < rrl->set_num; i++)
enable_rrl(imc, chan, rrl, i, enable, rrl_ctl + i);
}
if (!imc->mbase || !imc->hbm_mc || !rrl_hbm[0] || !rrl_hbm[1]) return;
for (i = 0; i < chan_num; i++) {
enable_rrls(imc, i, rrl_hbm[0], enable, chan[i].rrl_ctl[0]);
enable_rrls(imc, i, rrl_hbm[1], enable, chan[i].rrl_ctl[1]);
}
}
staticvoid enable_retry_rd_err_log(bool enable)
{ struct skx_dev *d; int i, imc_num;
edac_dbg(2, "\n");
list_for_each_entry(d, i10nm_edac_list, list) {
imc_num = res_cfg->ddr_imc_num; for (i = 0; i < imc_num; i++)
enable_rrls_ddr(&d->imc[i], enable);
imc_num += res_cfg->hbm_imc_num; for (; i < imc_num; i++)
enable_rrls_hbm(&d->imc[i], enable);
}
}
n = scnprintf(msg, len, " retry_rd_err_log["); for (i = 0; i < rrl->set_num; i++) {
scrub = (rrl->modes[i] == FRE_SCRUB || rrl->modes[i] == LRE_SCRUB); if (scrub_err != scrub) continue;
for (j = 0; j < rrl->reg_num && len - n > 0; j++) {
offset = rrl->offsets[i][j];
width = rrl->widths[j];
log = read_imc_reg(imc, ch, offset, width);
if (width == 4)
n += scnprintf(msg + n, len - n, "%.8llx ", log); else
n += scnprintf(msg + n, len - n, "%.16llx ", log);
/* Clear RRL status if RRL in Linux control mode. */ if (retry_rd_err_log == 2 && !j && (log & status_mask))
write_imc_reg(imc, ch, offset, width, log & ~status_mask);
}
}
/* Move back one space. */
n--;
n += scnprintf(msg + n, len - n, "]");
if (len - n > 0) {
n += scnprintf(msg + n, len - n, " correrrcnt["); for (i = 0; i < rrl->cecnt_num && len - n > 0; i++) {
offset = rrl->cecnt_offsets[i];
width = rrl->cecnt_widths[i];
corr = read_imc_reg(imc, ch, offset, width);
/* CPUs {ICX,SPR} encode two counters per 4-byte CORRERRCNT register. */ if (res_cfg->type <= SPR) {
n += scnprintf(msg + n, len - n, "%.4llx %.4llx ",
corr & 0xffff, corr >> 16);
} else { /* CPUs {GNR} encode one counter per CORRERRCNT register. */ if (width == 4)
n += scnprintf(msg + n, len - n, "%.8llx ", corr); else
n += scnprintf(msg + n, len - n, "%.16llx ", corr);
}
}
/* Move back one space. */
n--;
n += scnprintf(msg + n, len - n, "]");
}
}
pdev = pci_get_domain_bus_and_slot(dom, bus, PCI_DEVFN(dev, fun)); if (!pdev) {
edac_dbg(2, "No device %02x:%02x.%x\n",
bus, dev, fun); return NULL;
}
if (unlikely(pci_enable_device(pdev) < 0)) {
edac_dbg(2, "Failed to enable device %02x:%02x.%x\n",
bus, dev, fun);
pci_dev_put(pdev); return NULL;
}
return pdev;
}
/** * i10nm_get_imc_num() - Get the number of present DDR memory controllers. * * @cfg : The pointer to the structure of EDAC resource configurations. * * For Granite Rapids CPUs, the number of present DDR memory controllers read * at runtime overwrites the value statically configured in @cfg->ddr_imc_num. * For other CPUs, the number of present DDR memory controllers is statically * configured in @cfg->ddr_imc_num. * * RETURNS : 0 on success, < 0 on failure.
*/ staticint i10nm_get_imc_num(struct res_config *cfg)
{ int n, imc_num, chan_num = 0; struct skx_dev *d;
u32 reg;
switch (cfg->type) { case GNR: /* * One channel per DDR memory controller for Granite Rapids CPUs.
*/
imc_num = chan_num;
if (!imc_num) {
i10nm_printk(KERN_ERR, "Invalid DDR MC number\n"); return -ENODEV;
}
if (imc_num > I10NM_NUM_DDR_IMC) {
i10nm_printk(KERN_ERR, "Need to make I10NM_NUM_DDR_IMC >= %d\n", imc_num); return -EINVAL;
}
if (cfg->ddr_imc_num != imc_num) { /* * Store the number of present DDR memory controllers.
*/
cfg->ddr_imc_num = imc_num;
edac_dbg(2, "Set DDR MC number: %d", imc_num);
}
return 0; default: /* * For other CPUs, the number of present DDR memory controllers * is statically pre-configured in cfg->ddr_imc_num.
*/ return 0;
}
}
for (i = 0; i < I10NM_MAX_SAD; i++) {
I10NM_GET_SAD(d, cfg->sad_all_offset, i, reg); if (I10NM_SAD_ENABLE(reg) && I10NM_SAD_NM_CACHEABLE(reg)) {
edac_dbg(2, "2-level memory configuration.\n"); returntrue;
}
}
}
returnfalse;
}
/* * Check whether the error comes from DDRT by ICX/Tremont/SPR model specific error code. * Refer to SDM vol3B 17.11.3/17.13.2 Intel IMC MC error codes for IA32_MCi_STATUS.
*/ staticbool i10nm_mscod_is_ddrt(u32 mscod)
{ switch (res_cfg->type) { case I10NM: switch (mscod) { case 0x0106: case 0x0107: case 0x0800: case 0x0804: case 0x0806 ... 0x0808: case 0x080a ... 0x080e: case 0x0810: case 0x0811: case 0x0816: case 0x081e: case 0x081f: returntrue;
}
break; case SPR: switch (mscod) { case 0x0800: case 0x0804: case 0x0806 ... 0x0808: case 0x080a ... 0x080e: case 0x0810: case 0x0811: case 0x0816: case 0x081e: case 0x081f: returntrue;
}
switch (res_cfg->type) { case I10NM: /* Check whether the bank is one of {13,14,17,18,21,22,25,26} */ if (!(ICX_IMCx_CHy & (1 << bank))) returnfalse; break; case SPR: if (bank < 13 || bank > 20) returnfalse; break; default: returnfalse;
}
/* DDRT errors can't be decoded from MCA bank registers */ if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT) returnfalse;
if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status))) returnfalse;
if (!res->dev) {
skx_printk(KERN_ERR, "No device for src_id %d imc %d\n",
m->socketid, res->imc); returnfalse;
}
returntrue;
}
/** * get_gnr_mdev() - Get the PCI device of the @logical_idx-th DDR memory controller. * * @d : The pointer to the structure of CPU socket EDAC device. * @logical_idx : The logical index of the present memory controller (0 ~ max present MC# - 1). * @physical_idx : To store the corresponding physical index of @logical_idx. * * RETURNS : The PCI device of the @logical_idx-th DDR memory controller, NULL on failure.
*/ staticstruct pci_dev *get_gnr_mdev(struct skx_dev *d, int logical_idx, int *physical_idx)
{ #define GNR_MAX_IMC_PCI_CNT 28
struct pci_dev *mdev; int i, logical = 0;
/* * Detect present memory controllers from { PCI device: 8-5, function 7-1 }
*/ for (i = 0; i < GNR_MAX_IMC_PCI_CNT; i++) {
mdev = pci_get_dev_wrapper(d->seg,
d->bus[res_cfg->ddr_mdev_bdf.bus],
res_cfg->ddr_mdev_bdf.dev + i / 7,
res_cfg->ddr_mdev_bdf.fun + i % 7);
if (mdev) { if (logical == logical_idx) {
*physical_idx = i; return mdev;
}
pci_dev_put(mdev);
logical++;
}
}
return NULL;
}
static u32 get_gnr_imc_mmio_offset(void)
{ if (boot_cpu_data.x86_vfm == INTEL_GRANITERAPIDS_D) return I10NM_GNR_D_IMC_MMIO_OFFSET;
return I10NM_GNR_IMC_MMIO_OFFSET;
}
/** * get_ddr_munit() - Get the resource of the i-th DDR memory controller. * * @d : The pointer to the structure of CPU socket EDAC device. * @i : The index of the CPU socket relative DDR memory controller. * @offset : To store the MMIO offset of the i-th DDR memory controller. * @size : To store the MMIO size of the i-th DDR memory controller. * * RETURNS : The PCI device of the i-th DDR memory controller, NULL on failure.
*/ staticstruct pci_dev *get_ddr_munit(struct skx_dev *d, int i, u32 *offset, unsignedlong *size)
{ struct pci_dev *mdev; int physical_idx;
u32 reg;
switch (res_cfg->type) { case GNR: if (I10NM_GET_IMC_BAR(d, 0, reg)) {
i10nm_printk(KERN_ERR, "Failed to get mc0 bar\n"); return NULL;
}
mdev = get_gnr_mdev(d, i, &physical_idx); if (!mdev) return NULL;
/** * i10nm_imc_absent() - Check whether the memory controller @imc is absent * * @imc : The pointer to the structure of memory controller EDAC device. * * RETURNS : true if the memory controller EDAC device is absent, false otherwise.
*/ staticbool i10nm_imc_absent(struct skx_imc *imc)
{
u32 mcmtr; int i;
switch (res_cfg->type) { case SPR: for (i = 0; i < res_cfg->ddr_chan_num; i++) {
mcmtr = I10NM_GET_MCMTR(imc, i);
edac_dbg(1, "ch%d mcmtr reg %x\n", i, mcmtr); if (mcmtr != ~0) returnfalse;
}
/* * Some workstations' absent memory controllers still * appear as PCIe devices, misleading the EDAC driver. * By observing that the MMIO registers of these absent * memory controllers consistently hold the value of ~0. * * We identify a memory controller as absent by checking * if its MMIO register "mcmtr" == ~0 in all its channels.
*/ returntrue; default: returnfalse;
}
}
list_for_each_entry(d, i10nm_edac_list, list) { if (!d->pcu_cr3) return -ENODEV;
if (!i10nm_check_hbm_imc(d)) {
i10nm_printk(KERN_DEBUG, "No hbm memory\n"); return -ENODEV;
}
if (I10NM_GET_SCK_BAR(d, reg)) {
i10nm_printk(KERN_ERR, "Failed to get socket bar\n"); return -ENODEV;
}
base = I10NM_GET_SCK_MMIO_BASE(reg);
if (I10NM_GET_HBM_IMC_BAR(d, reg)) {
i10nm_printk(KERN_ERR, "Failed to get hbm mc bar\n"); return -ENODEV;
}
base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg);
lmc = res_cfg->ddr_imc_num;
for (i = 0; i < res_cfg->hbm_imc_num; i++) {
mdev = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->hbm_mdev_bdf.bus],
res_cfg->hbm_mdev_bdf.dev + i / 4,
res_cfg->hbm_mdev_bdf.fun + i % 4);
if (i == 0 && !mdev) {
i10nm_printk(KERN_ERR, "No hbm mc found\n"); return -ENODEV;
} if (!mdev) continue;
d->imc[lmc].mdev = mdev;
off = i * I10NM_HBM_IMC_MMIO_SIZE;
edac_dbg(2, "hbm mc%d mmio base 0x%llx size 0x%x\n",
lmc, base + off, I10NM_HBM_IMC_MMIO_SIZE);
module_param(retry_rd_err_log, int, 0444);
MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)");
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors");
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.