#ifdefined(CONFIG_X86_64) && defined(CONFIG_SMP) /* * APERF/MPERF frequency ratio computation. * * The scheduler wants to do frequency invariant accounting and needs a <1 * ratio to account for the 'current' frequency, corresponding to * freq_curr / freq_max. * * Since the frequency freq_curr on x86 is controlled by micro-controller and * our P-state setting is little more than a request/hint, we need to observe * the effective frequency 'BusyMHz', i.e. the average frequency over a time * interval after discarding idle time. This is given by: * * BusyMHz = delta_APERF / delta_MPERF * freq_base * * where freq_base is the max non-turbo P-state. * * The freq_max term has to be set to a somewhat arbitrary value, because we * can't know which turbo states will be available at a given point in time: * it all depends on the thermal headroom of the entire package. We set it to * the turbo level with 4 cores active. * * Benchmarks show that's a good compromise between the 1C turbo ratio * (freq_curr/freq_max would rarely reach 1) and something close to freq_base, * which would ignore the entire turbo range (a conspicuous part, making * freq_curr/freq_max always maxed out). * * An exception to the heuristic above is the Atom uarch, where we choose the * highest turbo level for freq_max since Atom's are generally oriented towards * power efficiency. * * Setting freq_max to anything less than the 1C turbo ratio makes the ratio * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
*/
if (slv_set_max_freq_ratio(&base_freq, &turbo_freq)) goto out;
if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1)) goto out;
if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1)) goto out;
if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4)) goto out;
if (core_set_max_freq_ratio(&base_freq, &turbo_freq)) goto out;
returnfalse;
out: /* * Some hypervisors advertise X86_FEATURE_APERFMPERF * but then fill all MSR's with zeroes. * Some CPUs have turbo boost but don't declare any turbo ratio * in MSR_TURBO_RATIO_LIMIT.
*/ if (!base_freq || !turbo_freq) {
pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n"); returnfalse;
}
turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq); if (!turbo_ratio) {
pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n"); returnfalse;
}
staticvoid freq_invariance_enable(void)
{ if (static_branch_unlikely(&arch_scale_freq_key)) {
WARN_ON_ONCE(1); return;
}
static_branch_enable_cpuslocked(&arch_scale_freq_key);
register_freq_invariance_syscore_ops();
pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
}
staticvoid __init bp_init_freq_invariance(void)
{ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return;
if (intel_set_max_freq_ratio()) {
guard(cpus_read_lock)();
freq_invariance_enable();
}
}
staticvoid disable_freq_invariance_workfn(struct work_struct *work)
{ int cpu;
static_branch_disable(&arch_scale_freq_key);
/* * Set arch_freq_scale to a default value on all cpus * This negates the effect of scaling
*/
for_each_possible_cpu(cpu)
per_cpu(arch_freq_scale, cpu) = SCHED_CAPACITY_SCALE;
}
/** * arch_enable_hybrid_capacity_scale() - Enable hybrid CPU capacity scaling * * Allocate memory for per-CPU data used by hybrid CPU capacity scaling, * initialize it and set the static key controlling its code paths. * * Must be called before arch_set_cpu_capacity().
*/ bool arch_enable_hybrid_capacity_scale(void)
{ int cpu;
if (static_branch_unlikely(&arch_hybrid_cap_scale_key)) {
WARN_ONCE(1, "Hybrid CPU capacity scaling already enabled"); returntrue;
}
arch_cpu_scale = alloc_percpu(struct arch_hybrid_cpu_scale); if (!arch_cpu_scale) returnfalse;
/** * arch_set_cpu_capacity() - Set scale-invariance parameters for a CPU * @cpu: Target CPU. * @cap: Capacity of @cpu at its maximum frequency, relative to @max_cap. * @max_cap: System-wide maximum CPU capacity. * @cap_freq: Frequency of @cpu corresponding to @cap. * @base_freq: Frequency of @cpu at which MPERF counts. * * The units in which @cap and @max_cap are expressed do not matter, so long * as they are consistent, because the former is effectively divided by the * latter. Analogously for @cap_freq and @base_freq. * * After calling this function for all CPUs, call arch_rebuild_sched_domains() * to let the scheduler know that capacity-aware scheduling can be used going * forward.
*/ void arch_set_cpu_capacity(int cpu, unsignedlong cap, unsignedlong max_cap, unsignedlong cap_freq, unsignedlong base_freq)
{ if (static_branch_likely(&arch_hybrid_cap_scale_key)) {
WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity,
div_u64(cap << SCHED_CAPACITY_SHIFT, max_cap));
WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio,
div_u64(cap_freq << SCHED_CAPACITY_SHIFT, base_freq));
} else {
WARN_ONCE(1, "Hybrid CPU capacity scaling not enabled");
}
}
unsignedlong arch_scale_cpu_capacity(int cpu)
{ if (static_branch_unlikely(&arch_hybrid_cap_scale_key)) return READ_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity);
/* * Discard samples older than the define maximum sample age of 20ms. There * is no point in sending IPIs in such a case. If the scheduler tick was * not running then the CPU is either idle or isolated.
*/ #define MAX_SAMPLE_AGE ((unsignedlong)HZ / 50)
if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF)) goto fallback;
do {
seq = raw_read_seqcount_begin(&s->seq);
last = s->last_update;
acnt = s->acnt;
mcnt = s->mcnt;
} while (read_seqcount_retry(&s->seq, seq));
/* * Bail on invalid count and when the last update was too long ago, * which covers idle and NOHZ full CPUs.
*/ if (!mcnt || (jiffies - last) > MAX_SAMPLE_AGE) goto fallback;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.