// SPDX-License-Identifier: GPL-2.0-only /* * Resource Director Technology(RDT) * - Monitoring code * * Copyright (C) 2017 Intel Corporation * * Author: * Vikas Shivappa <vikas.shivappa@intel.com> * * This replaces the cqm.c based on perf but we reuse a lot of * code and datastructures originally from Peter Zijlstra and Matt Fleming. * * More information about RDT be found in the Intel (R) x86 Architecture * Software Developer Manual June 2016, volume 3, section 17.17.
*/
/* * The correction factor table is documented in Documentation/filesystems/resctrl.rst. * If rmid > rmid threshold, MBM total and local values should be multiplied * by the correction factor. * * The original table is modified for better code: * * 1. The threshold 0 is changed to rmid count - 1 so don't do correction * for the case. * 2. MBM total and local correction table indexed by core counter which is * equal to (x86_cache_max_rmid + 1) / 8 - 1 and is from 0 up to 27. * 3. The correction factor is normalized to 2^20 (1048576) so it's faster * to calculate corrected value by shifting: * corrected_value = (original_value * correction_factor) >> 20
*/ staticconststruct mbm_correction_factor_table {
u32 rmidthreshold;
u64 cf;
} mbm_cf_table[] __initconst = {
{7, CF(1.000000)},
{15, CF(1.000000)},
{15, CF(0.969650)},
{31, CF(1.000000)},
{31, CF(1.066667)},
{31, CF(0.969650)},
{47, CF(1.142857)},
{63, CF(1.000000)},
{63, CF(1.185115)},
{63, CF(1.066553)},
{79, CF(1.454545)},
{95, CF(1.000000)},
{95, CF(1.230769)},
{95, CF(1.142857)},
{95, CF(1.066667)},
{127, CF(1.000000)},
{127, CF(1.254863)},
{127, CF(1.185255)},
{151, CF(1.000000)},
{127, CF(1.066667)},
{167, CF(1.000000)},
{159, CF(1.454334)},
{183, CF(1.000000)},
{127, CF(0.969744)},
{191, CF(1.280246)},
{191, CF(1.230921)},
{215, CF(1.000000)},
{191, CF(1.143118)},
};
/* * When Sub-NUMA Cluster (SNC) mode is not enabled (as indicated by * "snc_nodes_per_l3_cache == 1") no translation of the RMID value is * needed. The physical RMID is the same as the logical RMID. * * On a platform with SNC mode enabled, Linux enables RMID sharing mode * via MSR 0xCA0 (see the "RMID Sharing Mode" section in the "Intel * Resource Director Technology Architecture Specification" for a full * description of RMID sharing mode). * * In RMID sharing mode there are fewer "logical RMID" values available * to accumulate data ("physical RMIDs" are divided evenly between SNC * nodes that share an L3 cache). Linux creates an rdt_mon_domain for * each SNC node. * * The value loaded into IA32_PQR_ASSOC is the "logical RMID". * * Data is collected independently on each SNC node and can be retrieved * using the "physical RMID" value computed by this function and loaded * into IA32_QM_EVTSEL. @cpu can be any CPU in the SNC node. * * The scope of the IA32_QM_EVTSEL and IA32_QM_CTR MSRs is at the L3 * cache. So a "physical RMID" may be read from any CPU that shares * the L3 cache with the desired SNC node, not just from a CPU in * the specific SNC node.
*/ staticint logical_rmid_to_physical_rmid(int cpu, int lrmid)
{ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
/* * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured * with a valid event code for supported resource type and the bits * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID, * IA32_QM_CTR.data (bits 61:0) reports the monitored data. * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) * are error bits.
*/
wrmsr(MSR_IA32_QM_EVTSEL, eventid, prmid);
rdmsrq(MSR_IA32_QM_CTR, msr_val);
if (msr_val & RMID_VAL_ERROR) return -EIO; if (msr_val & RMID_VAL_UNAVAIL) return -EINVAL;
*val = msr_val; return 0;
}
staticstruct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_dom,
u32 rmid, enum resctrl_event_id eventid)
{ switch (eventid) { case QOS_L3_OCCUP_EVENT_ID: return NULL; case QOS_L3_MBM_TOTAL_EVENT_ID: return &hw_dom->arch_mbm_total[rmid]; case QOS_L3_MBM_LOCAL_EVENT_ID: return &hw_dom->arch_mbm_local[rmid]; default: /* Never expect to get here */
WARN_ON_ONCE(1); return NULL;
}
}
am = get_arch_mbm_state(hw_dom, rmid, eventid); if (am) {
memset(am, 0, sizeof(*am));
prmid = logical_rmid_to_physical_rmid(cpu, rmid); /* Record any initial, non-zero count value. */
__rmid_read_phys(prmid, eventid, &am->prev_msr);
}
}
/* * Assumes that hardware counters are also reset and thus that there is * no need to record initial non-zero counts.
*/ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d)
{ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
if (resctrl_arch_is_mbm_total_enabled())
memset(hw_dom->arch_mbm_total, 0, sizeof(*hw_dom->arch_mbm_total) * r->num_rmid);
if (resctrl_arch_is_mbm_local_enabled())
memset(hw_dom->arch_mbm_local, 0, sizeof(*hw_dom->arch_mbm_local) * r->num_rmid);
}
int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 unused, u32 rmid, enum resctrl_event_id eventid,
u64 *val, void *ignored)
{ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); int cpu = cpumask_any(&d->hdr.cpu_mask); struct arch_mbm_state *am;
u64 msr_val;
u32 prmid; int ret;
resctrl_arch_rmid_read_context_check();
prmid = logical_rmid_to_physical_rmid(cpu, rmid);
ret = __rmid_read_phys(prmid, eventid, &msr_val);
if (!ret) {
*val = get_corrected_val(r, d, rmid, eventid, msr_val);
} elseif (ret == -EINVAL) {
am = get_arch_mbm_state(hw_dom, rmid, eventid); if (am)
am->prev_msr = 0;
}
return ret;
}
/* * The power-on reset value of MSR_RMID_SNC_CONFIG is 0x1 * which indicates that RMIDs are configured in legacy mode. * This mode is incompatible with Linux resctrl semantics * as RMIDs are partitioned between SNC nodes, which requires * a user to know which RMID is allocated to a task. * Clearing bit 0 reconfigures the RMID counters for use * in RMID sharing mode. This mode is better for Linux. * The RMID space is divided between all SNC nodes with the * RMIDs renumbered to start from zero in each node when * counting operations from tasks. Code to read the counters * must adjust RMID counter numbers based on SNC node. See * logical_rmid_to_physical_rmid() for code that does this.
*/ void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d)
{ if (snc_nodes_per_l3_cache > 1)
msr_clear_bit(MSR_RMID_SNC_CONFIG, 0);
}
/* CPU models that support MSR_RMID_SNC_CONFIG */ staticconststruct x86_cpu_id snc_cpu_ids[] __initconst = {
X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, 0),
X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, 0),
X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, 0),
X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, 0),
{}
};
/* * There isn't a simple hardware bit that indicates whether a CPU is running * in Sub-NUMA Cluster (SNC) mode. Infer the state by comparing the * number of CPUs sharing the L3 cache with CPU0 to the number of CPUs in * the same NUMA node as CPU0. * It is not possible to accurately determine SNC state if the system is * booted with a maxcpus=N parameter. That distorts the ratio of SNC nodes * to L3 caches. It will be OK if system is booted with hyperthreading * disabled (since this doesn't affect the ratio).
*/ static __init int snc_get_config(void)
{ struct cacheinfo *ci = get_cpu_cacheinfo_level(0, RESCTRL_L3_CACHE); const cpumask_t *node0_cpumask; int cpus_per_node, cpus_per_l3; int ret;
if (!x86_match_cpu(snc_cpu_ids) || !ci) return 1;
cpus_read_lock(); if (num_online_cpus() != num_present_cpus())
pr_warn("Some CPUs offline, SNC detection may be incorrect\n");
cpus_read_unlock();
/* * A reasonable upper limit on the max threshold is the number * of lines tagged per RMID if all RMIDs have the same number of * lines tagged in the LLC. * * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
*/
threshold = resctrl_rmid_realloc_limit / r->num_rmid;
/* * Because num_rmid may not be a power of two, round the value * to the nearest multiple of hw_res->mon_scale so it matches a * value the hardware will measure. mon_scale may not be a power of 2.
*/
resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(threshold);
if (rdt_cpu_has(X86_FEATURE_BMEC)) {
u32 eax, ebx, ecx, edx;
/* Detect list of bandwidth sources that can be tracked */
cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx);
r->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS;
}
r->mon_capable = true;
return 0;
}
void __init intel_rdt_mbm_apply_quirk(void)
{ int cf_index;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.