/* * Set by command line parameter. If BIOS has enabled the ECC, this override is * cleared to prevent re-enabling the hardware by this driver.
*/ staticint ecc_enable_override;
module_param(ecc_enable_override, int, 0644);
/* * * Depending on the family, F2 DCT reads need special handling: * * K8: has a single DCT only and no address offsets >= 0x100 * * F10h: each DCT has its own set of regs * DCT0 -> F2x040.. * DCT1 -> F2x140.. * * F16h: has only 1 DCT * * F15h: we select which DCT we access using F1x10C[DctCfgSel]
*/ staticinlineint amd64_read_dct_pci_cfg(struct amd64_pvt *pvt, u8 dct, int offset, u32 *val)
{ switch (pvt->fam) { case 0xf: if (dct || offset >= 0x100) return -EINVAL; break;
case 0x10: if (dct) { /* * Note: If ganging is enabled, barring the regs * F2x[1,0]98 and F2x[1,0]9C; reads reads to F2x1xx * return 0. (cf. Section 2.8.1 F10h BKDG)
*/ if (dct_ganging_enabled(pvt)) return 0;
offset += 0x100;
} break;
case 0x15: /* * F15h: F2x1xx addresses do not map explicitly to DCT1. * We should select which DCT we access using F1x10C[DctCfgSel]
*/
dct = (dct && pvt->model == 0x30) ? 3 : dct;
f15h_select_dct(pvt, dct); break;
/* * Memory scrubber control interface. For K8, memory scrubbing is handled by * hardware and can involve L2 cache, dcache as well as the main memory. With * F10, this is extended to L3 cache scrubbing on CPU models sporting that * functionality. * * This causes the "units" for the scrubbing speed to vary from 64 byte blocks * (dram) over to cache lines. This is nasty, so we will use bandwidth in * bytes/sec for the setting. * * Currently, we only do dram scrubbing. If the scrubbing is done in software on * other archs, we might not have access to the caches directly.
*/
/* * Scan the scrub rate mapping table for a close or matching bandwidth value to * issue. If requested is too big, then use last maximum value found.
*/ staticint __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate)
{
u32 scrubval; int i;
/* * map the configured rate (new_bw) to a value specific to the AMD64 * memory controller and apply to register. Search for the first * bandwidth entry that is greater or equal than the setting requested * and program that. If at last entry, turn off DRAM scrubbing. * * If no suitable bandwidth is found, turn off DRAM scrubbing entirely * by falling back to the last element in scrubrates[].
*/ for (i = 0; i < ARRAY_SIZE(scrubrates) - 1; i++) { /* * skip scrub rates which aren't recommended * (see F10 BKDG, F3x58)
*/ if (scrubrates[i].scrubval < min_rate) continue;
for (i = 0; i < ARRAY_SIZE(scrubrates); i++) { if (scrubrates[i].scrubval == scrubval) {
retval = scrubrates[i].bandwidth; break;
}
} return retval;
}
/* * returns true if the SysAddr given by sys_addr matches the * DRAM base/limit associated with node_id
*/ staticbool base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, u8 nid)
{
u64 addr;
/* The K8 treats this as a 40-bit value. However, bits 63-40 will be * all ones if the most significant implemented address bit is 1. * Here we discard bits 63-40. See section 3.4.2 of AMD publication * 24592: AMD x86-64 Architecture Programmer's Manual Volume 1 * Application Programming.
*/
addr = sys_addr & 0x000000ffffffffffull;
/* * Attempt to map a SysAddr to a node. On success, return a pointer to the * mem_ctl_info structure for the node that the SysAddr maps to. * * On failure, return NULL.
*/ staticstruct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
u64 sys_addr)
{ struct amd64_pvt *pvt;
u8 node_id;
u32 intlv_en, bits;
/* * Here we use the DRAM Base (section 3.4.4.1) and DRAM Limit (section * 3.4.4.2) registers to map the SysAddr to a node ID.
*/
pvt = mci->pvt_info;
/* * The value of this field should be the same for all DRAM Base * registers. Therefore we arbitrarily choose to read it from the * register for node 0.
*/
intlv_en = dram_intlv_en(pvt, 0);
if (intlv_en == 0) { for (node_id = 0; node_id < DRAM_RANGES; node_id++) { if (base_limit_match(pvt, sys_addr, node_id)) goto found;
} goto err_no_match;
}
for (node_id = 0; ; ) { if ((dram_intlv_sel(pvt, node_id) & intlv_en) == bits) break; /* intlv_sel field matches */
if (++node_id >= DRAM_RANGES) goto err_no_match;
}
/* sanity test for sys_addr */ if (unlikely(!base_limit_match(pvt, sys_addr, node_id))) {
amd64_warn("%s: sys_addr 0x%llx falls outside base/limit address" "range for node %d with node interleaving enabled.\n",
__func__, sys_addr, node_id); return NULL;
}
found: return edac_mc_find((int)node_id);
err_no_match:
edac_dbg(2, "sys_addr 0x%lx doesn't match any node\n",
(unsignedlong)sys_addr);
return NULL;
}
/* * compute the CS base address of the @csrow on the DRAM controller @dct. * For details see F2x[5C:40] in the processor's BKDG
*/ staticvoid get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
u64 *base, u64 *mask)
{
u64 csbase, csmask, base_bits, mask_bits;
u8 addr_shift;
#define for_each_chip_select_mask(i, dct, pvt) \ for (i = 0; i < pvt->csels[dct].m_cnt; i++)
#define for_each_umc(i) \ for (i = 0; i < pvt->max_mcs; i++)
/* * @input_addr is an InputAddr associated with the node given by mci. Return the * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
*/ staticint input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr)
{ struct amd64_pvt *pvt; int csrow;
u64 base, mask;
pvt = mci->pvt_info;
for_each_chip_select(csrow, 0, pvt) { if (!csrow_enabled(csrow, 0, pvt)) continue;
return csrow;
}
}
edac_dbg(2, "no matching csrow for InputAddr 0x%lx (MC node %d)\n",
(unsignedlong)input_addr, pvt->mc_node_id);
return -1;
}
/* * Obtain info from the DRAM Hole Address Register (section 3.4.8, pub #26094) * for the node represented by mci. Info is passed back in *hole_base, * *hole_offset, and *hole_size. Function returns 0 if info is valid or 1 if * info is invalid. Info may be invalid for either of the following reasons: * * - The revision of the node is not E or greater. In this case, the DRAM Hole * Address Register does not exist. * * - The DramHoleValid bit is cleared in the DRAM Hole Address Register, * indicating that its contents are not valid. * * The values passed back in *hole_base, *hole_offset, and *hole_size are * complete 32-bit values despite the fact that the bitfields in the DHAR * only represent bits 31-24 of the base and offset values.
*/ staticint get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
u64 *hole_offset, u64 *hole_size)
{ struct amd64_pvt *pvt = mci->pvt_info;
/* only revE and later have the DRAM Hole Address Register */ if (pvt->fam == 0xf && pvt->ext_model < K8_REV_E) {
edac_dbg(1, " revision %d for node %d does not support DHAR\n",
pvt->ext_model, pvt->mc_node_id); return 1;
}
/* valid for Fam10h and above */ if (pvt->fam >= 0x10 && !dhar_mem_hoist_valid(pvt)) {
edac_dbg(1, " Dram Memory Hoisting is DISABLED on this system\n"); return 1;
}
if (!dhar_valid(pvt)) {
edac_dbg(1, " Dram Memory Hoisting is DISABLED on this node %d\n",
pvt->mc_node_id); return 1;
}
/* This node has Memory Hoisting */
/* +------------------+--------------------+--------------------+----- * | memory | DRAM hole | relocated | * | [0, (x - 1)] | [x, 0xffffffff] | addresses from | * | | | DRAM hole | * | | | [0x100000000, | * | | | (0x100000000+ | * | | | (0xffffffff-x))] | * +------------------+--------------------+--------------------+----- * * Above is a diagram of physical memory showing the DRAM hole and the * relocated addresses from the DRAM hole. As shown, the DRAM hole * starts at address x (the base address) and extends through address * 0xffffffff. The DRAM Hole Address Register (DHAR) relocates the * addresses in the hole so that they start at 0x100000000.
*/
/* * store 16 bit error injection vector which enables injecting errors to the * corresponding bit within the error injection word above. When used during a * DRAM ECC read, it holds the contents of the of the DRAM ECC bits.
*/ static ssize_t inject_ecc_vector_store(struct device *dev, struct device_attribute *mattr, constchar *data, size_t count)
{ struct mem_ctl_info *mci = to_mci(dev); struct amd64_pvt *pvt = mci->pvt_info; unsignedlong value; int ret;
ret = kstrtoul(data, 16, &value); if (ret < 0) return ret;
/* * Do a DRAM ECC read. Assemble staged values in the pvt area, format into * fields needed by the injection registers and read the NB Array Data Port.
*/ static ssize_t inject_read_store(struct device *dev, struct device_attribute *mattr, constchar *data, size_t count)
{ struct mem_ctl_info *mci = to_mci(dev); struct amd64_pvt *pvt = mci->pvt_info; unsignedlong value;
u32 section, word_bits; int ret;
ret = kstrtoul(data, 10, &value); if (ret < 0) return ret;
/* Form value to choose 16-byte section of cacheline */
section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
/* * Do a DRAM ECC write. Assemble staged values in the pvt area and format into * fields needed by the injection registers.
*/ static ssize_t inject_write_store(struct device *dev, struct device_attribute *mattr, constchar *data, size_t count)
{ struct mem_ctl_info *mci = to_mci(dev); struct amd64_pvt *pvt = mci->pvt_info;
u32 section, word_bits, tmp; unsignedlong value; int ret;
ret = kstrtoul(data, 10, &value); if (ret < 0) return ret;
/* Form value to choose 16-byte section of cacheline */
section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
pr_notice_once("Don't forget to decrease MCE polling interval in\n" "/sys/bus/machinecheck/devices/machinecheck<CPUNUM>/check_interval\n" "so that you can get the error report faster.\n");
on_each_cpu(disable_caches, NULL, 1);
/* Issue 'word' and 'bit' along with the READ request */
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
retry: /* wait until injection happens */
amd64_read_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, &tmp); if (tmp & F10_NB_ARR_ECC_WR_REQ) {
cpu_relax(); goto retry;
}
/* * Return the DramAddr that the SysAddr given by @sys_addr maps to. It is * assumed that sys_addr maps to the node given by mci. * * The first part of section 3.4.4 (p. 70) shows how the DRAM Base (section * 3.4.4.1) and DRAM Limit (section 3.4.4.2) registers are used to translate a * SysAddr to a DramAddr. If the DRAM Hole Address Register (DHAR) is enabled, * then it is also involved in translating a SysAddr to a DramAddr. Sections * 3.4.8 and 3.5.8.2 describe the DHAR and how it is used for memory hoisting. * These parts of the documentation are unclear. I interpret them as follows: * * When node n receives a SysAddr, it processes the SysAddr as follows: * * 1. It extracts the DRAMBase and DRAMLimit values from the DRAM Base and DRAM * Limit registers for node n. If the SysAddr is not within the range * specified by the base and limit values, then node n ignores the Sysaddr * (since it does not map to node n). Otherwise continue to step 2 below. * * 2. If the DramHoleValid bit of the DHAR for node n is clear, the DHAR is * disabled so skip to step 3 below. Otherwise see if the SysAddr is within * the range of relocated addresses (starting at 0x100000000) from the DRAM * hole. If not, skip to step 3 below. Else get the value of the * DramHoleOffset field from the DHAR. To obtain the DramAddr, subtract the * offset defined by this value from the SysAddr. * * 3. Obtain the base address for node n from the DRAMBase field of the DRAM * Base register for node n. To obtain the DramAddr, subtract the base * address from the SysAddr, as shown near the start of section 3.4.4 (p.70).
*/ static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
{ struct amd64_pvt *pvt = mci->pvt_info;
u64 dram_base, hole_base, hole_offset, hole_size, dram_addr; int ret;
dram_base = get_dram_base(pvt, pvt->mc_node_id);
ret = get_dram_hole_info(mci, &hole_base, &hole_offset, &hole_size); if (!ret) { if ((sys_addr >= (1ULL << 32)) &&
(sys_addr < ((1ULL << 32) + hole_size))) { /* use DHAR to translate SysAddr to DramAddr */
dram_addr = sys_addr - hole_offset;
edac_dbg(2, "using DHAR to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
(unsignedlong)sys_addr,
(unsignedlong)dram_addr);
return dram_addr;
}
}
/* * Translate the SysAddr to a DramAddr as shown near the start of * section 3.4.4 (p. 70). Although sys_addr is a 64-bit value, the k8 * only deals with 40-bit values. Therefore we discard bits 63-40 of * sys_addr below. If bit 39 of sys_addr is 1 then the bits we * discard are all 1s. Otherwise the bits we discard are all 0s. See * section 3.4.2 of AMD publication 24592: AMD x86-64 Architecture * Programmer's Manual Volume 1 Application Programming.
*/
dram_addr = (sys_addr & GENMASK_ULL(39, 0)) - dram_base;
edac_dbg(2, "using DRAM Base register to translate SysAddr 0x%lx to DramAddr 0x%lx\n",
(unsignedlong)sys_addr, (unsignedlong)dram_addr); return dram_addr;
}
/* * @intlv_en is the value of the IntlvEn field from a DRAM Base register * (section 3.4.4.1). Return the number of bits from a SysAddr that are used * for node interleaving.
*/ staticint num_node_interleave_bits(unsigned intlv_en)
{ staticconstint intlv_shift_table[] = { 0, 1, 0, 2, 0, 0, 0, 3 }; int n;
BUG_ON(intlv_en > 7);
n = intlv_shift_table[intlv_en]; return n;
}
/* Translate the DramAddr given by @dram_addr to an InputAddr. */ static u64 dram_addr_to_input_addr(struct mem_ctl_info *mci, u64 dram_addr)
{ struct amd64_pvt *pvt; int intlv_shift;
u64 input_addr;
pvt = mci->pvt_info;
/* * See the start of section 3.4.4 (p. 70, BKDG #26094, K8, revA-E) * concerning translating a DramAddr to an InputAddr.
*/
intlv_shift = num_node_interleave_bits(dram_intlv_en(pvt, 0));
input_addr = ((dram_addr >> intlv_shift) & GENMASK_ULL(35, 12)) +
(dram_addr & 0xfff);
/* * Translate the SysAddr represented by @sys_addr to an InputAddr. It is * assumed that @sys_addr maps to the node given by mci.
*/ static u64 sys_addr_to_input_addr(struct mem_ctl_info *mci, u64 sys_addr)
{
u64 input_addr;
edac_dbg(2, "SysAddr 0x%lx translates to InputAddr 0x%lx\n",
(unsignedlong)sys_addr, (unsignedlong)input_addr);
return input_addr;
}
/* Map the Error address to a PAGE and PAGE OFFSET. */ staticinlinevoid error_address_to_page_and_offset(u64 error_address, struct err_info *err)
{
err->page = (u32) (error_address >> PAGE_SHIFT);
err->offset = ((u32) error_address) & ~PAGE_MASK;
}
/* * @sys_addr is an error address (a SysAddr) extracted from the MCA NB Address * Low (section 3.6.4.5) and MCA NB Address High (section 3.6.4.6) registers * of a node that detected an ECC memory error. mci represents the node that * the error address maps to (possibly different from the node that detected * the error). Return the number of the csrow that sys_addr maps to, or -1 on * error.
*/ staticint sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
{ int csrow;
if (csrow == -1)
amd64_mc_err(mci, "Failed to translate InputAddr to csrow for " "address 0x%lx\n", (unsignedlong)sys_addr); return csrow;
}
/* * See AMD PPR DF::LclNodeTypeMap * * This register gives information for nodes of the same type within a system. * * Reading this register from a GPU node will tell how many GPU nodes are in the * system and what the lowest AMD Node ID value is for the GPU nodes. Use this * info to fixup the Linux logical "Node ID" value set in the AMD NB code and EDAC.
*/ staticstruct local_node_map {
u16 node_count;
u16 base_node_id;
} gpu_node_map;
/* * Mapping of nodes from hardware-provided AMD Node ID to a * Linux logical one is applicable for MI200 models. Therefore, * return early for other heterogeneous systems.
*/ if (pvt->F3->device != PCI_DEVICE_ID_AMD_MI200_DF_F3) return 0;
/* * Node ID 0 is reserved for CPUs. Therefore, a non-zero Node ID * means the values have been already cached.
*/ if (gpu_node_map.base_node_id) return 0;
pdev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F1, NULL); if (!pdev) {
ret = -ENODEV; goto out;
}
ret = pci_read_config_dword(pdev, REG_LOCAL_NODE_TYPE_MAP, &tmp); if (ret) {
ret = pcibios_err_to_errno(ret); goto out;
}
/* * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs * are ECC capable.
*/ staticunsignedlong dct_determine_edac_cap(struct amd64_pvt *pvt)
{ unsignedlong edac_cap = EDAC_FLAG_NONE;
u8 bit;
/* Dump memory sizes for DIMM and its CSROWs */ for (dimm = 0; dimm < 4; dimm++) {
size0 = 0; if (dcsb[dimm * 2] & DCSB_CS_ENABLE) /* * For F15m60h, we need multiplier for LRDIMM cs_size * calculation. We pass dimm value to the dbam_to_cs * mapper so we can find the multiplier from the * corresponding DCSM.
*/
size0 = pvt->ops->dbam_to_cs(pvt, ctrl,
DBAM_DIMM(dimm, dbam),
dimm);
if (pvt->dram_type == MEM_LRDDR3) {
u32 dcsm = pvt->csels[chan].csmasks[0]; /* * It's assumed all LRDIMMs in a DCT are going to be of * same 'type' until proven otherwise. So, use a cs * value of '0' here to get dcsm value.
*/
edac_dbg(1, " LRDIMM %dx rank multiply\n", (dcsm & 0x3));
}
edac_dbg(1, "All DIMMs support ECC: %s\n", str_yes_no(dclr & BIT(19)));
/* * 3 Rank inteleaving support. * There should be only three bases enabled and their two masks should * be equal.
*/
for_each_chip_select(base, ctrl, pvt)
count += csrow_enabled(base, ctrl, pvt);
if (count == 3 &&
pvt->csels[ctrl].csmasks[0] == pvt->csels[ctrl].csmasks[1]) {
edac_dbg(1, "3R interleaving in use.\n");
cs_mode |= CS_3R_INTERLEAVE;
}
/* * The number of zero bits in the mask is equal to the number of bits * in a full mask minus the number of bits in the current mask. * * The MSB is the number of bits in the full mask because BIT[0] is * always 0. * * In the special 3 Rank interleaving case, a single bit is flipped * without swapping with the most significant bit. This can be handled * by keeping the MSB where it is and ignoring the single zero bit.
*/
msb = fls(mask) - 1;
weight = hweight_long(mask);
num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
/* Take the number of zero bits off from the top of the mask. */
deinterleaved_mask = GENMASK(msb - num_zero_bits, 1);
edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", deinterleaved_mask);
return (deinterleaved_mask >> 2) + 1;
}
staticint __addr_mask_to_cs_size(u32 addr_mask, u32 addr_mask_sec, unsignedint cs_mode, int csrow_nr, int dimm)
{ int size;
staticint umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, unsignedint cs_mode, int csrow_nr)
{
u32 addr_mask = 0, addr_mask_sec = 0; int cs_mask_nr = csrow_nr; int dimm, size = 0;
/* No Chip Selects are enabled. */ if (!cs_mode) return size;
/* Requested size of an even CS but none are enabled. */ if (!(cs_mode & CS_EVEN) && !(csrow_nr & 1)) return size;
/* Requested size of an odd CS but none are enabled. */ if (!(cs_mode & CS_ODD) && (csrow_nr & 1)) return size;
/* * Family 17h introduced systems with one mask per DIMM, * and two Chip Selects per DIMM. * * CS0 and CS1 -> MASK0 / DIMM0 * CS2 and CS3 -> MASK1 / DIMM1 * * Family 19h Model 10h introduced systems with one mask per Chip Select, * and two Chip Selects per DIMM. * * CS0 -> MASK0 -> DIMM0 * CS1 -> MASK1 -> DIMM0 * CS2 -> MASK2 -> DIMM1 * CS3 -> MASK3 -> DIMM1 * * Keep the mask number equal to the Chip Select number for newer systems, * and shift the mask number for older systems.
*/
dimm = csrow_nr >> 1;
if (!pvt->flags.zn_regs_v2)
cs_mask_nr >>= 1;
if (cs_mode & (CS_EVEN_PRIMARY | CS_ODD_PRIMARY))
addr_mask = pvt->csels[umc].csmasks[cs_mask_nr];
if (cs_mode & (CS_EVEN_SECONDARY | CS_ODD_SECONDARY))
addr_mask_sec = pvt->csels[umc].csmasks_sec[cs_mask_nr];
/* * Model 0x60h needs special handling: * * We use a Chip Select value of '0' to obtain dcsm. * Theoretically, it is possible to populate LRDIMMs of different * 'Rank' value on a DCT. But this is not the common case. So, * it's reasonable to assume all DIMMs are going to be of same * 'type' until proven otherwise.
*/
amd64_read_dct_pci_cfg(pvt, 0, DRAM_CONTROL, &dram_ctrl);
dcsm = pvt->csels[0].csmasks[0];
/* * Find out which node the error address belongs to. This may be * different from the node that detected the error.
*/
err->src_mci = find_mc_by_sys_addr(mci, sys_addr); if (!err->src_mci) {
amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
(unsignedlong)sys_addr);
err->err_code = ERR_NODE; return;
}
/* Now map the sys_addr to a CSROW */
err->csrow = sys_addr_to_csrow(err->src_mci, sys_addr); if (err->csrow < 0) {
err->err_code = ERR_CSROW; return;
}
/* CHIPKILL enabled */ if (pvt->nbcfg & NBCFG_CHIPKILL) {
err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome); if (err->channel < 0) { /* * Syndrome didn't map, so we don't know which of the * 2 DIMMs is in error. So we need to ID 'both' of them * as suspect.
*/
amd64_mc_warn(err->src_mci, "unknown syndrome 0x%04x - " "possible error reporting race\n",
err->syndrome);
err->err_code = ERR_CHANNEL; return;
}
} else { /* * non-chipkill ecc mode * * The k8 documentation is unclear about how to determine the * channel number when using non-chipkill memory. This method * was obtained from email communication with someone at AMD. * (Wish the email was placed in this comment - norsk)
*/
err->channel = ((sys_addr & BIT(3)) != 0);
}
}
staticint ddr2_cs_size(unsigned i, bool dct_width)
{ unsigned shift = 0;
if (i <= 2)
shift = i; elseif (!(i & 0x1))
shift = i >> 1; else
shift = (i + 1) >> 1;
if (!dct_ganging_enabled(pvt))
edac_dbg(0, " Address range split per DCT: %s\n",
str_yes_no(dct_high_range_enabled(pvt)));
edac_dbg(0, " data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n",
str_enabled_disabled(dct_data_intlv_enabled(pvt)),
str_yes_no(dct_memory_cleared(pvt)));
if (hi_rng) { /* * if * base address of high range is below 4Gb * (bits [47:27] at [31:11]) * DRAM address space on this DCT is hoisted above 4Gb && * sys_addr > 4Gb * * remove hole offset from sys_addr * else * remove high range offset from sys_addr
*/ if ((!(dct_sel_base_addr >> 16) ||
dct_sel_base_addr < dhar_base(pvt)) &&
dhar_valid(pvt) &&
(sys_addr >= BIT_64(32)))
chan_off = hole_off; else
chan_off = dct_sel_base_off;
} else { /* * if * we have a valid hole && * sys_addr > 4Gb * * remove hole * else * remove dram base to normalize to DCT address
*/ if (dhar_valid(pvt) && (sys_addr >= BIT_64(32)))
chan_off = hole_off; else
chan_off = dram_base;
}
/* * checks if the csrow passed in is marked as SPARED, if so returns the new * spare row
*/ staticint f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow)
{ int tmp_cs;
if (online_spare_swap_done(pvt, dct) &&
csrow == online_spare_bad_dramcs(pvt, dct)) {
/* * Iterate over the DRAM DCT "base" and "mask" registers looking for a * SystemAddr match on the specified 'ChannelSelect' and 'NodeID' * * Return: * -EINVAL: NOT FOUND * 0..csrow = Chip-Select Row
*/ staticint f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct)
{ struct mem_ctl_info *mci; struct amd64_pvt *pvt;
u64 cs_base, cs_mask; int cs_found = -EINVAL; int csrow;
mci = edac_mc_find(nid); if (!mci) return cs_found;
/* * See F2x10C. Non-interleaved graphics framebuffer memory under the 16G is * swapped with a region located at the bottom of memory so that the GPU can use * the interleaved region and thus two channels.
*/ static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr)
{
u32 swap_reg, swap_base, swap_limit, rgn_size, tmp_addr;
if (pvt->fam == 0x10) { /* only revC3 and revE have that feature */ if (pvt->model < 4 || (pvt->model < 0xa && pvt->stepping < 3)) return sys_addr;
}
/* For a given @dram_range, check if @sys_addr falls within it. */ staticint f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
u64 sys_addr, int *chan_sel)
{ int cs_found = -EINVAL;
u64 chan_addr;
u32 dct_sel_base;
u8 channel; bool high_range = false;
/* * check whether addresses >= DctSelBaseAddr[47:27] are to be used to * select between DCT0 and DCT1.
*/ if (dct_high_range_enabled(pvt) &&
!dct_ganging_enabled(pvt) &&
((sys_addr >> 27) >= (dct_sel_base >> 11)))
high_range = true;
/* * Find Chip select: * if channel = 3, then alias it to 1. This is because, in F15 M30h, * there is support for 4 DCT's, but only 2 are currently functional. * They are DCT0 and DCT3. But we have read all registers of DCT3 into * pvt->csels[1]. So we need to use '1' here to get correct info. * Refer F15 M30h BKDG Section 2.10 and 2.10.3 for clarifications.
*/
alias_channel = (channel == 3) ? 1 : channel;
/* * For reference see "2.8.5 Routing DRAM Requests" in F10 BKDG. This code maps * a @sys_addr to NodeID, DCT (channel) and chip select (CSROW). * * The @sys_addr is usually an error address received from the hardware * (MCX_ADDR).
*/ staticvoid f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, struct err_info *err)
{ struct amd64_pvt *pvt = mci->pvt_info;
/* * We need the syndromes for channel detection only when we're * ganged. Otherwise @chan should already contain the channel at * this point.
*/ if (dct_ganging_enabled(pvt))
err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
}
/* * These are tables of eigenvectors (one per line) which can be used for the * construction of the syndrome tables. The modified syndrome search algorithm * uses those to find the symbol in error and thus the DIMM. * * Algorithm courtesy of Ross LaFetra from AMD.
*/ staticconst u16 x4_vectors[] = {
0x2f57, 0x1afe, 0x66cc, 0xdd88,
0x11eb, 0x3396, 0x7f4c, 0xeac8,
0x0001, 0x0002, 0x0004, 0x0008,
0x1013, 0x3032, 0x4044, 0x8088,
0x106b, 0x30d6, 0x70fc, 0xe0a8,
0x4857, 0xc4fe, 0x13cc, 0x3288,
0x1ac5, 0x2f4a, 0x5394, 0xa1e8,
0x1f39, 0x251e, 0xbd6c, 0x6bd8,
0x15c1, 0x2a42, 0x89ac, 0x4758,
0x2b03, 0x1602, 0x4f0c, 0xca08,
0x1f07, 0x3a0e, 0x6b04, 0xbd08,
0x8ba7, 0x465e, 0x244c, 0x1cc8,
0x2b87, 0x164e, 0x642c, 0xdc18,
0x40b9, 0x80de, 0x1094, 0x20e8,
0x27db, 0x1eb6, 0x9dac, 0x7b58,
0x11c1, 0x2242, 0x84ac, 0x4c58,
0x1be5, 0x2d7a, 0x5e34, 0xa718,
0x4b39, 0x8d1e, 0x14b4, 0x28d8,
0x4c97, 0xc87e, 0x11fc, 0x33a8,
0x8e97, 0x497e, 0x2ffc, 0x1aa8,
0x16b3, 0x3d62, 0x4f34, 0x8518,
0x1e2f, 0x391a, 0x5cac, 0xf858,
0x1d9f, 0x3b7a, 0x572c, 0xfe18,
0x15f5, 0x2a5a, 0x5264, 0xa3b8,
0x1dbb, 0x3b66, 0x715c, 0xe3f8,
0x4397, 0xc27e, 0x17fc, 0x3ea8,
0x1617, 0x3d3e, 0x6464, 0xb8b8,
0x23ff, 0x12aa, 0xab6c, 0x56d8,
0x2dfb, 0x1ba6, 0x913c, 0x7328,
0x185d, 0x2ca6, 0x7914, 0x9e28,
0x171b, 0x3e36, 0x7d7c, 0xebe8,
0x4199, 0x82ee, 0x19f4, 0x2e58,
0x4807, 0xc40e, 0x130c, 0x3208,
0x1905, 0x2e0a, 0x5804, 0xac08,
0x213f, 0x132a, 0xadfc, 0x5ba8,
0x19a9, 0x2efe, 0xb5cc, 0x6f88,
};
/* * To find the UMC channel represented by this bank we need to match on its * instance_id. The instance_id of a bank is held in the lower 32 bits of its * IPID. * * Currently, we can derive the channel number by looking at the 6th nibble in * the instance_id. For example, instance_id=0xYXXXXX where Y is the channel * number. * * For DRAM ECC errors, the Chip Select number is given in bits [2:0] of * the MCA_SYND[ErrorInformation] field.
*/ staticvoid umc_get_err_info(struct mce *m, struct err_info *err)
{
err->channel = (m->ipid & GENMASK(31, 0)) >> 20;
err->csrow = m->synd & 0x7;
}
/* * Use pvt->F3 which contains the F3 CPU PCI device to get the related * F1 (AddrMap) and F2 (Dct) devices. Return negative value on error.
*/ staticint
reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 pci_id1, u16 pci_id2)
{ /* Reserve the ADDRESS MAP Device */
pvt->F1 = pci_get_related_function(pvt->F3->vendor, pci_id1, pvt->F3); if (!pvt->F1) {
edac_dbg(1, "F1 not found: device 0x%x\n", pci_id1); return -ENODEV;
}
/* Reserve the DCT Device */
pvt->F2 = pci_get_related_function(pvt->F3->vendor, pci_id2, pvt->F3); if (!pvt->F2) {
pci_dev_put(pvt->F1);
pvt->F1 = NULL;
edac_dbg(1, "F2 not found: device 0x%x\n", pci_id2); return -ENODEV;
}
amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp); /* F16h has only DCT0, so no need to read dbam1. */ if (pvt->fam != 0x16)
amd64_read_dct_pci_cfg(pvt, 1, DBAM0, &pvt->dbam1);
/* F10h, revD and later can do x8 ECC too. */ if ((pvt->fam > 0x10 || pvt->model > 7) && tmp & BIT(25))
pvt->ecc_sym_sz = 8;
}
}
/* * Retrieve the hardware registers of the memory controller.
*/ staticvoid umc_read_mc_regs(struct amd64_pvt *pvt)
{
u8 nid = pvt->mc_node_id; struct amd64_umc *umc;
u32 i, tmp, umc_base;
/* Read registers from each UMC */
for_each_umc(i) {
umc_base = get_umc_base(i);
umc = &pvt->umc[i];
if (!amd_smn_read(nid, umc_base + get_umc_reg(pvt, UMCCH_DIMM_CFG), &tmp))
umc->dimm_cfg = tmp;
if (!amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &tmp))
umc->umc_cfg = tmp;
if (!amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &tmp))
umc->sdp_ctrl = tmp;
if (!amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &tmp))
umc->ecc_ctrl = tmp;
/* * Retrieve the hardware registers of the memory controller (this includes the * 'Address Map' and 'Misc' device regs)
*/ staticvoid dct_read_mc_regs(struct amd64_pvt *pvt)
{ unsignedint range;
u64 msr_val;
/* * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since * those are Read-As-Zero.
*/
rdmsrq(MSR_K8_TOP_MEM1, pvt->top_mem);
edac_dbg(0, " TOP_MEM: 0x%016llx\n", pvt->top_mem);
/* Check first whether TOP_MEM2 is enabled: */
rdmsrq(MSR_AMD64_SYSCFG, msr_val); if (msr_val & BIT(21)) {
rdmsrq(MSR_K8_TOP_MEM2, pvt->top_mem2);
edac_dbg(0, " TOP_MEM2: 0x%016llx\n", pvt->top_mem2);
} else {
edac_dbg(0, " TOP_MEM2 disabled\n");
}
amd64_read_pci_cfg(pvt->F3, NBCAP, &pvt->nbcap);
read_dram_ctl_register(pvt);
for (range = 0; range < DRAM_RANGES; range++) {
u8 rw;
/* read settings for this DRAM range */
read_dram_base_limit_regs(pvt, range);
/* * NOTE: CPU Revision Dependent code * * Input: * @csrow_nr ChipSelect Row Number (0..NUM_CHIPSELECTS-1) * k8 private pointer to --> * DRAM Bank Address mapping register * node_id * DCL register where dual_channel_active is * * The DBAM register consists of 4 sets of 4 bits each definitions: * * Bits: CSROWs * 0-3 CSROWs 0 and 1 * 4-7 CSROWs 2 and 3 * 8-11 CSROWs 4 and 5 * 12-15 CSROWs 6 and 7 * * Values range from: 0 to 15 * The meaning of the values depends on CPU revision and dual-channel state, * see relevant BKDG more info. * * The memory controller provides for total of only 8 CSROWs in its current * architecture. Each "pair" of CSROWs normally represents just one DIMM in * single channel or two (2) DIMMs in dual channel mode. * * The following code logic collapses the various tables for CSROW based on CPU * revision. * * Returns: * The number of PAGE_SIZE pages on the specified CSROW number it * encompasses *
*/ static u32 dct_get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
{
u32 dbam = dct ? pvt->dbam1 : pvt->dbam0;
u32 cs_mode, nr_pages;
/* * We iterate over DCT0 here but we look at DCT1 in parallel, if needed.
*/
for_each_chip_select(i, 0, pvt) { bool row_dct0 = !!csrow_enabled(i, 0, pvt); bool row_dct1 = false;
if (pvt->fam != 0xf)
row_dct1 = !!csrow_enabled(i, 1, pvt);
if (on) { if (reg->l & MSR_MCGCTL_NBE)
s->flags.nb_mce_enable = 1;
reg->l |= MSR_MCGCTL_NBE;
} else { /* * Turn off NB MCE reporting only when it was off before
*/ if (!s->flags.nb_mce_enable)
reg->l &= ~MSR_MCGCTL_NBE;
}
}
wrmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
nb_mce_en = nb_mce_bank_enabled_on_node(nid); if (!nb_mce_en)
edac_dbg(0, "NB MCE bank disabled, set MSR 0x%08x[4] on node %d to enable.\n",
MSR_IA32_MCG_CTL, nid);
edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, str_enabled_disabled(ecc_en));
/* * The CPUs have one channel per UMC, so UMC number is equivalent to a * channel number. The GPUs have 8 channels per UMC, so the UMC number no * longer works as a channel number. * * The channel number within a GPU UMC is given in MCA_IPID[15:12]. * However, the IDs are split such that two UMC values go to one UMC, and * the channel numbers are split in two groups of four. * * Refer to comment on gpu_get_umc_base(). * * For example, * UMC0 CH[3:0] = 0x0005[3:0]000 * UMC0 CH[7:4] = 0x0015[3:0]000 * UMC1 CH[3:0] = 0x0025[3:0]000 * UMC1 CH[7:4] = 0x0035[3:0]000
*/ staticvoid gpu_get_err_info(struct mce *m, struct err_info *err)
{
u8 ch = (m->ipid & GENMASK(31, 0)) >> 20;
u8 phy = ((m->ipid >> 12) & 0xf);
/* ECC is enabled by default on GPU nodes */ staticbool gpu_ecc_enabled(struct amd64_pvt *pvt)
{ returntrue;
}
staticinline u32 gpu_get_umc_base(struct amd64_pvt *pvt, u8 umc, u8 channel)
{ /* * On CPUs, there is one channel per UMC, so UMC numbering equals * channel numbering. On GPUs, there are eight channels per UMC, * so the channel numbering is different from UMC numbering. * * On CPU nodes channels are selected in 6th nibble * UMC chY[3:0]= [(chY*2 + 1) : (chY*2)]50000; * * On GPU nodes channels are selected in 3rd nibble * HBM chX[3:0]= [Y ]5X[3:0]000; * HBM chX[7:4]= [Y+1]5X[3:0]000 * * On MI300 APU nodes, same as GPU nodes but channels are selected * in the base address of 0x90000
*/
umc *= 2;
/* * Decide on which ops group to use here and do any family/model * overrides below.
*/ if (pvt->fam >= 0x17)
pvt->ops = &umc_ops; else
pvt->ops = &dct_ops;
/* * For heterogeneous and APU models EDAC CHIP_SELECT and CHANNEL layers * should be swapped to fit into the layers.
*/ staticunsignedint get_layer_size(struct amd64_pvt *pvt, u8 layer)
{ bool is_gpu = (pvt->ops == &gpu_ops);
ret = -ENOMEM;
s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL); if (!s) goto err_out;
ecc_stngs[nid] = s;
pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); if (!pvt) goto err_settings;
pvt->mc_node_id = nid;
pvt->F3 = F3;
ret = per_family_init(pvt); if (ret < 0) goto err_enable;
ret = pvt->ops->hw_info_get(pvt); if (ret < 0) goto err_enable;
ret = 0; if (!instance_has_memory(pvt)) {
amd64_info("Node %d: No DIMMs detected.\n", nid); goto err_enable;
}
if (!pvt->ops->ecc_enabled(pvt)) {
ret = -ENODEV;
if (!ecc_enable_override) goto err_enable;
if (boot_cpu_data.x86 >= 0x17) {
amd64_warn("Forcing ECC on is not recommended on newer systems. Please enable ECC in BIOS."); goto err_enable;
} else
amd64_warn("Forcing ECC on!\n");
if (!enable_ecc_error_reporting(s, nid, F3)) goto err_enable;
}
ret = init_one_instance(pvt); if (ret < 0) {
amd64_err("Error probing instance: %d\n", nid);
if (boot_cpu_data.x86 < 0x17)
restore_ecc_error_reporting(s, nid, F3);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, Dave Peterson, Thayne Harbaugh; AMD");
MODULE_DESCRIPTION("MC support for AMD64 memory controllers");
¤ Diese beiden folgenden Angebotsgruppen bietet das Unternehmen0.55Angebot
(Wie Sie bei der Firma Beratungs- und Dienstleistungen beauftragen können 2026-04-29)
¤