/* The next fields are only needed if fast switch cannot be used: */ struct irq_work irq_work; struct kthread_work work; struct mutex work_lock; struct kthread_worker worker; struct task_struct *thread; bool work_in_progress;
/* * Since cpufreq_update_util() is called with rq->lock held for * the @target_cpu, our per-CPU data is fully serialized. * * However, drivers cannot in general deal with cross-CPU * requests, so while get_next_freq() will work, our * sugov_update_commit() call may not for the fast switching platforms. * * Hence stop here for remote requests if they aren't supported * by the hardware, as calculating the frequency is pointless if * we cannot in fact act on it. * * This is needed on the slow switching platforms too to prevent CPUs * going offline from leaving stale IRQ work items behind.
*/ if (!cpufreq_this_cpu_can_update(sg_policy->policy)) returnfalse;
if (unlikely(READ_ONCE(sg_policy->limits_changed))) {
WRITE_ONCE(sg_policy->limits_changed, false);
sg_policy->need_freq_update = true;
/* * The above limits_changed update must occur before the reads * of policy limits in cpufreq_driver_resolve_freq() or a policy * limits update might be missed, so use a memory barrier to * ensure it. * * This pairs with the write memory barrier in sugov_limits().
*/
smp_mb();
returntrue;
} elseif (sg_policy->need_freq_update) { /* ignore_dl_rate_limit() wants a new frequency to be found. */ returntrue;
}
delta_ns = time - sg_policy->last_freq_update_time;
staticbool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time, unsignedint next_freq)
{ if (sg_policy->need_freq_update) {
sg_policy->need_freq_update = false; /* * The policy limits have changed, but if the return value of * cpufreq_driver_resolve_freq() after applying the new limits * is still equal to the previously selected frequency, the * driver callback need not be invoked unless the driver * specifically wants that to happen on every update of the * policy limits.
*/ if (sg_policy->next_freq == next_freq &&
!cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS)) returnfalse;
} elseif (sg_policy->next_freq == next_freq) { returnfalse;
}
/** * get_capacity_ref_freq - get the reference frequency that has been used to * correlate frequency and compute capacity for a given cpufreq policy. We use * the CPU managing it for the arch_scale_freq_ref() call in the function. * @policy: the cpufreq policy of the CPU in question. * * Return: the reference CPU frequency to compute a capacity.
*/ static __always_inline unsignedlong get_capacity_ref_freq(struct cpufreq_policy *policy)
{ unsignedint freq = arch_scale_freq_ref(policy->cpu);
if (freq) return freq;
if (arch_scale_freq_invariant()) return policy->cpuinfo.max_freq;
/* * Apply a 25% margin so that we select a higher frequency than * the current one before the CPU is fully busy:
*/ return policy->cur + (policy->cur >> 2);
}
/** * get_next_freq - Compute a new frequency for a given cpufreq policy. * @sg_policy: schedutil policy object to compute the new frequency for. * @util: Current CPU utilization. * @max: CPU capacity. * * If the utilization is frequency-invariant, choose the new frequency to be * proportional to it, that is * * next_freq = C * max_freq * util / max * * Otherwise, approximate the would-be frequency-invariant utilization by * util_raw * (curr_freq / max_freq) which leads to * * next_freq = C * curr_freq * util_raw / max * * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. * * The lowest driver-supported frequency which is equal or greater than the raw * next_freq (as calculated above) is returned, subject to policy min/max and * cpufreq driver limitations.
*/ staticunsignedint get_next_freq(struct sugov_policy *sg_policy, unsignedlong util, unsignedlong max)
{ struct cpufreq_policy *policy = sg_policy->policy; unsignedint freq;
unsignedlong sugov_effective_cpu_perf(int cpu, unsignedlong actual, unsignedlong min, unsignedlong max)
{ /* Add dvfs headroom to actual utilization */
actual = map_util_perf(actual); /* Actually we don't need to target the max performance */ if (actual < max)
max = actual;
/* * Ensure at least minimum performance while providing more compute * capacity when possible.
*/ return max(min, max);
}
/** * sugov_iowait_reset() - Reset the IO boost status of a CPU. * @sg_cpu: the sugov data for the CPU to boost * @time: the update time from the caller * @set_iowait_boost: true if an IO boost has been requested * * The IO wait boost of a task is disabled after a tick since the last update * of a CPU. If a new IO wait boost is requested after more then a tick, then * we enable the boost starting from IOWAIT_BOOST_MIN, which improves energy * efficiency by ignoring sporadic wakeups from IO.
*/ staticbool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, bool set_iowait_boost)
{
s64 delta_ns = time - sg_cpu->last_update;
/* Reset boost only if a tick has elapsed since last request */ if (delta_ns <= TICK_NSEC) returnfalse;
/** * sugov_iowait_boost() - Updates the IO boost status of a CPU. * @sg_cpu: the sugov data for the CPU to boost * @time: the update time from the caller * @flags: SCHED_CPUFREQ_IOWAIT if the task is waking up after an IO wait * * Each time a task wakes up after an IO operation, the CPU utilization can be * boosted to a certain utilization which doubles at each "frequent and * successive" wakeup from IO, ranging from IOWAIT_BOOST_MIN to the utilization * of the maximum OPP. * * To keep doubling, an IO boost has to be requested at least once per tick, * otherwise we restart from the utilization of the minimum OPP.
*/ staticvoid sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, unsignedint flags)
{ bool set_iowait_boost = flags & SCHED_CPUFREQ_IOWAIT;
/* Reset boost if the CPU appears to have been idle enough */ if (sg_cpu->iowait_boost &&
sugov_iowait_reset(sg_cpu, time, set_iowait_boost)) return;
/* Boost only tasks waking up after IO */ if (!set_iowait_boost) return;
/* Ensure boost doubles only one time at each request */ if (sg_cpu->iowait_boost_pending) return;
sg_cpu->iowait_boost_pending = true;
/* Double the boost at each request */ if (sg_cpu->iowait_boost) {
sg_cpu->iowait_boost =
min_t(unsignedint, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE); return;
}
/* First wakeup after IO: start with minimum boost */
sg_cpu->iowait_boost = IOWAIT_BOOST_MIN;
}
/** * sugov_iowait_apply() - Apply the IO boost to a CPU. * @sg_cpu: the sugov data for the cpu to boost * @time: the update time from the caller * @max_cap: the max CPU capacity * * A CPU running a task which woken up after an IO operation can have its * utilization boosted to speed up the completion of those IO operations. * The IO boost value is increased each time a task wakes up from IO, in * sugov_iowait_apply(), and it's instead decreased by this function, * each time an increase has not been requested (!iowait_boost_pending). * * A CPU which also appears to have been idle for at least one tick has also * its IO boost utilization reset. * * This mechanism is designed to boost high frequently IO waiting tasks, while * being more conservative on tasks which does sporadic IO operations.
*/ staticunsignedlong sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, unsignedlong max_cap)
{ /* No boost currently required */ if (!sg_cpu->iowait_boost) return 0;
/* Reset boost if the CPU appears to have been idle enough */ if (sugov_iowait_reset(sg_cpu, time, false)) return 0;
if (!sg_cpu->iowait_boost_pending) { /* * No boost pending; reduce the boost value.
*/
sg_cpu->iowait_boost >>= 1; if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) {
sg_cpu->iowait_boost = 0; return 0;
}
}
sg_cpu->iowait_boost_pending = false;
/* * sg_cpu->util is already in capacity scale; convert iowait_boost * into the same scale so we can compare.
*/ return (sg_cpu->iowait_boost * max_cap) >> SCHED_CAPACITY_SHIFT;
}
/* * The heuristics in this function is for the fair class. For SCX, the * performance target comes directly from the BPF scheduler. Let's just * follow it.
*/ if (scx_switched_all()) returnfalse;
/* if capped by uclamp_max, always update to be in compliance */ if (uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu))) returnfalse;
/* * Maintain the frequency if the CPU has not been idle recently, as * reduction is likely to be premature.
*/
idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
ret = idle_calls == sg_cpu->saved_idle_calls;
/* * Make sugov_should_update_freq() ignore the rate limit when DL * has increased the utilization.
*/ staticinlinevoid ignore_dl_rate_limit(struct sugov_cpu *sg_cpu)
{ if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_min)
sg_cpu->sg_policy->need_freq_update = true;
}
/* Restore cached freq as next_freq has changed */
sg_policy->cached_raw_freq = cached_freq;
}
if (!sugov_update_next_freq(sg_policy, time, next_f)) return;
/* * This code runs under rq->lock for the target CPU, so it won't run * concurrently on two different CPUs for the same target and it is not * necessary to acquire the lock in the fast switch case.
*/ if (sg_policy->policy->fast_switch_enabled) {
cpufreq_driver_fast_switch(sg_policy->policy, next_f);
} else {
raw_spin_lock(&sg_policy->update_lock);
sugov_deferred_update(sg_policy);
raw_spin_unlock(&sg_policy->update_lock);
}
}
/* * Fall back to the "frequency" path if frequency invariance is not * supported, because the direct mapping between the utilization and * the performance levels depends on the frequency invariance.
*/ if (!arch_scale_freq_invariant()) {
sugov_update_single_freq(hook, time, flags); return;
}
max_cap = arch_scale_cpu_capacity(sg_cpu->cpu);
if (!sugov_update_single_common(sg_cpu, time, max_cap, flags)) return;
if (sugov_hold_freq(sg_cpu) && sg_cpu->util < prev_util)
sg_cpu->util = prev_util;
/* * Hold sg_policy->update_lock shortly to handle the case where: * in case sg_policy->next_freq is read here, and then updated by * sugov_deferred_update() just before work_in_progress is set to false * here, we may miss queueing the new update. * * Note: If a work was queued after the update_lock is released, * sugov_work() will just be called again by kthread_work code; and the * request will be proceed before the sugov thread sleeps.
*/
raw_spin_lock_irqsave(&sg_policy->update_lock, flags);
freq = sg_policy->next_freq;
sg_policy->work_in_progress = false;
raw_spin_unlock_irqrestore(&sg_policy->update_lock, flags);
ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype,
get_governor_parent_kobj(policy), "%s",
schedutil_gov.name); if (ret) goto fail;
out: /* * Schedutil is the preferred governor for EAS, so rebuild sched domains * on governor changes to make sure the scheduler knows about them.
*/
em_rebuild_sched_domains();
mutex_unlock(&global_tunables_lock); return 0;
if (!policy->fast_switch_enabled) {
mutex_lock(&sg_policy->work_lock);
cpufreq_policy_apply_limits(policy);
mutex_unlock(&sg_policy->work_lock);
}
/* * The limits_changed update below must take place before the updates * of policy limits in cpufreq_set_policy() or a policy limits update * might be missed, so use a memory barrier to ensure it. * * This pairs with the memory barrier in sugov_should_update_freq().
*/
smp_wmb();
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.