if (!checked_chas) { int fd = perf_pmu__event_source_devices_fd(); struct io_dir dir; struct io_dirent64 *dent;
if (fd < 0) return -1;
io_dir__init(&dir, fd);
while ((dent = io_dir__readdir(&dir)) != NULL) { /* Note, dent->d_type will be DT_LNK and so isn't a useful filter. */ if (starts_with(dent->d_name, "uncore_cha_"))
num_chas++;
}
close(fd);
checked_chas = true;
} return num_chas;
}
#define MAX_SNCS 6
staticint uncore_cha_snc(struct perf_pmu *pmu)
{ // CHA SNC numbers are ordered correspond to the CHAs number. unsignedint cha_num; int num_cha, chas_per_node, cha_snc; int snc_nodes = snc_nodes_per_l3_cache();
if (snc_nodes <= 1) return 0;
num_cha = num_chas(); if (num_cha <= 0) {
pr_warning("Unexpected: no CHAs found\n"); return 0;
}
/* Compute SNC for PMU. */ if (sscanf(pmu->name, "uncore_cha_%u", &cha_num) != 1) {
pr_warning("Unexpected: unable to compute CHA number '%s'\n", pmu->name); return 0;
}
chas_per_node = num_cha / snc_nodes;
cha_snc = cha_num / chas_per_node;
/* Range check cha_snc. for unexpected out of bounds. */ return cha_snc >= MAX_SNCS ? 0 : cha_snc;
}
/* Was adjust already computed? */ if (checked_cpu_adjust[pmu_snc]) return cpu_adjust[pmu_snc];
/* SNC0 doesn't need an adjust. */ if (pmu_snc == 0) {
cpu_adjust[0] = 0;
checked_cpu_adjust[0] = true; return 0;
}
/* * Use NUMA topology to compute first CPU of the NUMA node, we want to * adjust CPU 0 to be this and similarly for other CPUs if there is >1 * socket.
*/
assert(pmu_snc >= 0 && pmu_snc <= 9);
node_path[24] += pmu_snc; // Shift node0 to be node<pmu_snc>.
node_cpus = read_sysfs_cpu_map(node_path);
cpu_adjust[pmu_snc] = perf_cpu_map__cpu(node_cpus, 0).cpu; if (cpu_adjust[pmu_snc] < 0) {
pr_debug("Failed to read valid CPU list from /%s\n", node_path);
cpu_adjust[pmu_snc] = 0;
} else {
checked_cpu_adjust[pmu_snc] = true;
}
perf_cpu_map__put(node_cpus); return cpu_adjust[pmu_snc];
}
staticvoid gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
{ // With sub-NUMA clustering (SNC) there is a NUMA node per SNC in the // topology. For example, a two socket graniterapids machine may be set // up with 3-way SNC meaning there are 6 NUMA nodes that should be // displayed with --per-node. The cpumask of the CHA and IMC PMUs // reflects per-socket information meaning, for example, uncore_cha_60 // on a two socket graniterapids machine with 120 cores per socket will // have a cpumask of "0,120". This cpumask needs adjusting to "40,160" // to reflect that uncore_cha_60 is used for the 2nd SNC of each // socket. Without the adjustment events on uncore_cha_60 will appear in // node 0 and node 3 (in our example 2 socket 3-way set up), but with // the adjustment they will appear in node 1 and node 4. The number of // CHAs is typically larger than the number of cores. The CHA numbers // are assumed to split evenly and inorder wrt core numbers. There are // fewer memory IMC PMUs than cores and mapping is handled using lookup // tables. staticstruct perf_cpu_map *cha_adjusted[MAX_SNCS]; staticstruct perf_cpu_map *imc_adjusted[MAX_SNCS]; struct perf_cpu_map **adjusted = cha ? cha_adjusted : imc_adjusted; int idx, pmu_snc, cpu_adjust; struct perf_cpu cpu; bool alloc;
// Cpus from the kernel holds first CPU of each socket. e.g. 0,120. if (perf_cpu_map__cpu(pmu->cpus, 0).cpu != 0) {
pr_debug("Ignoring cpumask adjust for %s as unexpected first CPU\n", pmu->name); return;
}
pmu_snc = cha ? uncore_cha_snc(pmu) : uncore_imc_snc(pmu); if (pmu_snc == 0) { // No adjustment necessary for the first SNC. return;
}
alloc = adjusted[pmu_snc] == NULL; if (alloc) { // Hold onto the perf_cpu_map globally to avoid recomputation.
cpu_adjust = uncore_cha_imc_compute_cpu_adjust(pmu_snc);
adjusted[pmu_snc] = perf_cpu_map__empty_new(perf_cpu_map__nr(pmu->cpus)); if (!adjusted[pmu_snc]) return;
}
perf_cpu_map__for_each_cpu(cpu, idx, pmu->cpus) { // Compute the new cpu map values or if not allocating, assert // that they match expectations. asserts will be removed to // avoid overhead in NDEBUG builds. if (alloc) {
RC_CHK_ACCESS(adjusted[pmu_snc])->map[idx].cpu = cpu.cpu + cpu_adjust;
} elseif (idx == 0) {
cpu_adjust = perf_cpu_map__cpu(adjusted[pmu_snc], idx).cpu - cpu.cpu;
assert(uncore_cha_imc_compute_cpu_adjust(pmu_snc) == cpu_adjust);
} else {
assert(perf_cpu_map__cpu(adjusted[pmu_snc], idx).cpu ==
cpu.cpu + cpu_adjust);
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.