staticinlinevoid do_start_rt_bandwidth(struct rt_bandwidth *rt_b)
{
raw_spin_lock(&rt_b->rt_runtime_lock); if (!rt_b->rt_period_active) {
rt_b->rt_period_active = 1; /* * SCHED_DEADLINE updates the bandwidth, as a run away * RT task with a DL task could hog a CPU. But DL does * not reset the period. If a deadline task was running * without an RT task running, it can cause RT tasks to * throttle when they start up. Kick the timer right away * to update the period.
*/
hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
hrtimer_start_expires(&rt_b->rt_period_timer,
HRTIMER_MODE_ABS_PINNED_HARD);
}
raw_spin_unlock(&rt_b->rt_runtime_lock);
}
staticinlinevoid rt_set_overload(struct rq *rq)
{ if (!rq->online) return;
cpumask_set_cpu(rq->cpu, rq->rd->rto_mask); /* * Make sure the mask is visible before we set * the overload count. That is checked to determine * if we should look at the mask. It would be a shame * if we looked at the mask, but the mask was not * updated yet. * * Matched by the barrier in pull_rt_task().
*/
smp_wmb();
atomic_inc(&rq->rd->rto_count);
}
staticinlinevoid rt_clear_overload(struct rq *rq)
{ if (!rq->online) return;
/* the order here really doesn't matter */
atomic_dec(&rq->rd->rto_count);
cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
}
#ifdef CONFIG_UCLAMP_TASK /* * Verify the fitness of task @p to run on @cpu taking into account the uclamp * settings. * * This check is only important for heterogeneous systems where uclamp_min value * is higher than the capacity of a @cpu. For non-heterogeneous system this * function will always return true. * * The function will return true if the capacity of the @cpu is >= the * uclamp_min and false otherwise. * * Note that uclamp_min will be clamped to uclamp_max if uclamp_min * > uclamp_max.
*/ staticinlinebool rt_task_fits_capacity(struct task_struct *p, int cpu)
{ unsignedint min_cap; unsignedint max_cap; unsignedint cpu_cap;
/* Only heterogeneous systems can benefit from this check */ if (!sched_asym_cpucap_active()) returntrue;
/* * We ran out of runtime, see if we can borrow some from our neighbours.
*/ staticvoid do_balance_runtime(struct rt_rq *rt_rq)
{ struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd; int i, weight;
u64 rt_period;
raw_spin_lock(&iter->rt_runtime_lock); /* * Either all rqs have inf runtime and there's nothing to steal * or __disable_runtime() below sets a specific rq to inf to * indicate its been disabled and disallow stealing.
*/ if (iter->rt_runtime == RUNTIME_INF) goto next;
/* * From runqueues with spare time, take 1/n part of their * spare time, but no more than our period.
*/
diff = iter->rt_runtime - iter->rt_time; if (diff > 0) {
diff = div_u64((u64)diff, weight); if (rt_rq->rt_runtime + diff > rt_period)
diff = rt_period - rt_rq->rt_runtime;
iter->rt_runtime -= diff;
rt_rq->rt_runtime += diff; if (rt_rq->rt_runtime == rt_period) {
raw_spin_unlock(&iter->rt_runtime_lock); break;
}
}
next:
raw_spin_unlock(&iter->rt_runtime_lock);
}
raw_spin_unlock(&rt_b->rt_runtime_lock);
}
/* * Ensure this RQ takes back all the runtime it lend to its neighbours.
*/ staticvoid __disable_runtime(struct rq *rq)
{ struct root_domain *rd = rq->rd;
rt_rq_iter_t iter; struct rt_rq *rt_rq;
raw_spin_lock(&rt_b->rt_runtime_lock);
raw_spin_lock(&rt_rq->rt_runtime_lock); /* * Either we're all inf and nobody needs to borrow, or we're * already disabled and thus have nothing to do, or we have * exactly the right amount of runtime to take out.
*/ if (rt_rq->rt_runtime == RUNTIME_INF ||
rt_rq->rt_runtime == rt_b->rt_runtime) goto balanced;
raw_spin_unlock(&rt_rq->rt_runtime_lock);
/* * Calculate the difference between what we started out with * and what we current have, that's the amount of runtime * we lend and now have to reclaim.
*/
want = rt_b->rt_runtime - rt_rq->rt_runtime;
/* * Greedy reclaim, take back as much as we can.
*/
for_each_cpu(i, rd->span) { struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
s64 diff;
/* * Can't reclaim from ourselves or disabled runqueues.
*/ if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF) continue;
raw_spin_lock(&rt_rq->rt_runtime_lock); /* * We cannot be left wanting - that would mean some runtime * leaked out of the system.
*/
WARN_ON_ONCE(want);
balanced: /* * Disable all the borrow logic by pretending we have inf * runtime - in which case borrowing doesn't make sense.
*/
rt_rq->rt_runtime = RUNTIME_INF;
rt_rq->rt_throttled = 0;
raw_spin_unlock(&rt_rq->rt_runtime_lock);
raw_spin_unlock(&rt_b->rt_runtime_lock);
/* Make rt_rq available for pick_next_task() */
sched_rt_rq_enqueue(rt_rq);
}
}
staticvoid balance_runtime(struct rt_rq *rt_rq)
{ if (!sched_feat(RT_RUNTIME_SHARE)) return;
if (rt_rq->rt_time > rt_rq->rt_runtime) {
raw_spin_unlock(&rt_rq->rt_runtime_lock);
do_balance_runtime(rt_rq);
raw_spin_lock(&rt_rq->rt_runtime_lock);
}
}
staticint do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
{ int i, idle = 1, throttled = 0; conststruct cpumask *span;
span = sched_rt_period_mask();
/* * FIXME: isolated CPUs should really leave the root task group, * whether they are isolcpus or were isolated via cpusets, lest * the timer run on a CPU which does not service all runqueues, * potentially leaving other CPUs indefinitely throttled. If * isolation is really required, the user will turn the throttle * off to kill the perturbations it causes anyway. Meanwhile, * this maintains functionality for boot and/or troubleshooting.
*/ if (rt_b == &root_task_group.rt_bandwidth)
span = cpu_online_mask;
/* * When span == cpu_online_mask, taking each rq->lock * can be time-consuming. Try to avoid it when possible.
*/
raw_spin_lock(&rt_rq->rt_runtime_lock); if (!sched_feat(RT_RUNTIME_SHARE) && rt_rq->rt_runtime != RUNTIME_INF)
rt_rq->rt_runtime = rt_b->rt_runtime;
skip = !rt_rq->rt_time && !rt_rq->rt_nr_running;
raw_spin_unlock(&rt_rq->rt_runtime_lock); if (skip) continue;
/* * When we're idle and a woken (rt) task is * throttled wakeup_preempt() will set * skip_update and the time between the wakeup * and this unthrottle will get accounted as * 'runtime'.
*/ if (rt_rq->rt_nr_running && rq->curr == rq->idle)
rq_clock_cancel_skipupdate(rq);
} if (rt_rq->rt_time || rt_rq->rt_nr_running)
idle = 0;
raw_spin_unlock(&rt_rq->rt_runtime_lock);
} elseif (rt_rq->rt_nr_running) {
idle = 0; if (!rt_rq_throttled(rt_rq))
enqueue = 1;
} if (rt_rq->rt_throttled)
throttled = 1;
if (enqueue)
sched_rt_rq_enqueue(rt_rq);
rq_unlock(rq, &rf);
}
if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)) return 1;
if (rt_rq->rt_throttled) return rt_rq_throttled(rt_rq);
if (runtime >= sched_rt_period(rt_rq)) return 0;
balance_runtime(rt_rq);
runtime = sched_rt_runtime(rt_rq); if (runtime == RUNTIME_INF) return 0;
if (rt_rq->rt_time > runtime) { struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
/* * Don't actually throttle groups that have no runtime assigned * but accrue some time due to boosting.
*/ if (likely(rt_b->rt_runtime)) {
rt_rq->rt_throttled = 1;
printk_deferred_once("sched: RT throttling activated\n");
} else { /* * In case we did anyway, make it go away, * replenishment is a joke, since it will replenish us * with exactly 0 ns.
*/
rt_rq->rt_time = 0;
}
if (rt_rq_throttled(rt_rq)) {
sched_rt_rq_dequeue(rt_rq); return 1;
}
}
if (rt_rq) return rt_rq->highest_prio.curr; #endif
return rt_task_of(rt_se)->prio;
}
/* * Update the current task's runtime statistics. Skip current tasks that * are not in our scheduling class.
*/ staticvoid update_curr_rt(struct rq *rq)
{ struct task_struct *donor = rq->donor;
s64 delta_exec;
if (donor->sched_class != &rt_sched_class) return;
delta_exec = update_curr_common(rq); if (unlikely(delta_exec <= 0)) return;
if (rt_rq->rt_nr_running) {
add_nr_running(rq, rt_rq->rt_nr_running);
rt_rq->rt_queued = 1;
}
/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
cpufreq_update_util(rq, 0);
}
staticvoid
inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
{ struct rq *rq = rq_of_rt_rq(rt_rq);
/* * Change rq's cpupri only if rt_rq is the top queue.
*/ if (IS_ENABLED(CONFIG_RT_GROUP_SCHED) && &rq->rt != rt_rq) return;
if (rq->online && prio < prev_prio)
cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
}
staticvoid
dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
{ struct rq *rq = rq_of_rt_rq(rt_rq);
/* * Change rq's cpupri only if rt_rq is the top queue.
*/ if (IS_ENABLED(CONFIG_RT_GROUP_SCHED) && &rq->rt != rt_rq) return;
if (rq->online && rt_rq->highest_prio.curr != prev_prio)
cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
}
staticvoid
inc_rt_prio(struct rt_rq *rt_rq, int prio)
{ int prev_prio = rt_rq->highest_prio.curr;
if (prio < prev_prio)
rt_rq->highest_prio.curr = prio;
inc_rt_prio_smp(rt_rq, prio, prev_prio);
}
staticvoid
dec_rt_prio(struct rt_rq *rt_rq, int prio)
{ int prev_prio = rt_rq->highest_prio.curr;
if (rt_rq->rt_nr_running) {
WARN_ON(prio < prev_prio);
/* * This may have been our highest task, and therefore * we may have some re-computation to do
*/ if (prio == prev_prio) { struct rt_prio_array *array = &rt_rq->active;
if (list_empty(array->queue + rt_se_prio(rt_se)))
__clear_bit(rt_se_prio(rt_se), array->bitmap);
rt_se->on_list = 0;
}
staticinlinestruct sched_statistics *
__schedstats_from_rt_se(struct sched_rt_entity *rt_se)
{ /* schedstats is not supported for rt group. */ if (!rt_entity_is_task(rt_se)) return NULL;
/* * Don't enqueue the group if its throttled, or when empty. * The latter is a consequence of the former when a child group * get throttled and the current group doesn't have any other * active members.
*/ if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) { if (rt_se->on_list)
__delist_rt_entity(rt_se, array); return;
}
if (move_entity(flags)) {
WARN_ON_ONCE(rt_se->on_list); if (flags & ENQUEUE_HEAD)
list_add(&rt_se->run_list, queue); else
list_add_tail(&rt_se->run_list, queue);
if (move_entity(flags)) {
WARN_ON_ONCE(!rt_se->on_list);
__delist_rt_entity(rt_se, array);
}
rt_se->on_rq = 0;
dec_rt_tasks(rt_se, rt_rq);
}
/* * Because the prio of an upper entry depends on the lower * entries, we must remove entries top - down.
*/ staticvoid dequeue_rt_stack(struct sched_rt_entity *rt_se, unsignedint flags)
{ struct sched_rt_entity *back = NULL; unsignedint rt_nr_running;
for_each_sched_rt_entity(rt_se) {
rt_se->back = back;
back = rt_se;
}
rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
for (rt_se = back; rt_se; rt_se = rt_se->back) { if (on_rt_rq(rt_se))
__dequeue_rt_entity(rt_se, flags);
}
/* * Put task to the head or the end of the run list without the overhead of * dequeue followed by enqueue.
*/ staticvoid
requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
{ if (on_rt_rq(rt_se)) { struct rt_prio_array *array = &rt_rq->active; struct list_head *queue = array->queue + rt_se_prio(rt_se);
if (head)
list_move(&rt_se->run_list, queue); else
list_move_tail(&rt_se->run_list, queue);
}
}
/* * If the current task on @p's runqueue is an RT task, then * try to see if we can wake this RT task up on another * runqueue. Otherwise simply start this RT task * on its current runqueue. * * We want to avoid overloading runqueues. If the woken * task is a higher priority, then it will stay on this CPU * and the lower prio task should be moved to another CPU. * Even though this will probably make the lower prio task * lose its cache, we do not want to bounce a higher task * around just because it gave up its CPU, perhaps for a * lock? * * For equal prio tasks, we just let the scheduler sort it out. * * Otherwise, just let it ride on the affine RQ and the * post-schedule router will push the preempted task away * * This test is optimistic, if we get it wrong the load-balancer * will have to sort it out. * * We take into account the capacity of the CPU to ensure it fits the * requirement of the task - which is only important on heterogeneous * systems like big.LITTLE.
*/
test = curr &&
unlikely(rt_task(donor)) &&
(curr->nr_cpus_allowed < 2 || donor->prio <= p->prio);
if (test || !rt_task_fits_capacity(p, cpu)) { int target = find_lowest_rq(p);
/* * Bail out if we were forcing a migration to find a better * fitting CPU but our search failed.
*/ if (!test && target != -1 && !rt_task_fits_capacity(p, target)) goto out_unlock;
/* * Don't bother moving it if the destination CPU is * not running a lower priority task.
*/ if (target != -1 &&
p->prio < cpu_rq(target)->rt.highest_prio.curr)
cpu = target;
}
/* * p is migratable, so let's not schedule it and * see if it is pushed or pulled somewhere else.
*/ if (p->nr_cpus_allowed != 1 &&
cpupri_find(&rq->rd->cpupri, p, NULL)) return;
/* * There appear to be other CPUs that can accept * the current task but none can run 'p', so lets reschedule * to try and push the current task away:
*/
requeue_task_rt(rq, p, 1);
resched_curr(rq);
}
staticint balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
{ if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) { /* * This is OK, because current is on_cpu, which avoids it being * picked for load-balance and preemption/IRQs are still * disabled avoiding further scheduler activity on it and we've * not yet started the picking loop.
*/
rq_unpin_lock(rq, rf);
pull_rt_task(rq);
rq_repin_lock(rq, rf);
}
/* * Preempt the current task with a newly woken task if needed:
*/ staticvoid wakeup_preempt_rt(struct rq *rq, struct task_struct *p, int flags)
{ struct task_struct *donor = rq->donor;
if (p->prio < donor->prio) {
resched_curr(rq); return;
}
/* * If: * * - the newly woken task is of equal priority to the current task * - the newly woken task is non-migratable while current is migratable * - current will be preempted on the next reschedule * * we should check to see if current can readily move to a different * cpu. If so, we will reschedule to allow the push logic to try * to move current somewhere else, making room for our non-migratable * task.
*/ if (p->prio == donor->prio && !test_tsk_need_resched(rq->curr))
check_preempt_equal_prio(rq, p);
}
p->se.exec_start = rq_clock_task(rq); if (on_rt_rq(&p->rt))
update_stats_wait_end_rt(rt_rq, rt_se);
/* The running task is never eligible for pushing */
dequeue_pushable_task(rq, p);
if (!first) return;
/* * If prev task was rt, put_prev_task() has already updated the * utilization. We only care of the case where we start to schedule a * rt task
*/ if (rq->donor->sched_class != &rt_sched_class)
update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
if (on_rt_rq(&p->rt))
update_stats_wait_start_rt(rt_rq, rt_se);
update_curr_rt(rq);
update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
if (task_is_blocked(p)) return; /* * The previous task needs to be made eligible for pushing * if it is still active
*/ if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
enqueue_pushable_task(rq, p);
}
/* Only try algorithms three times */ #define RT_MAX_TRIES 3
/* * Return the highest pushable rq's task, which is suitable to be executed * on the CPU, NULL otherwise
*/ staticstruct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
{ struct plist_head *head = &rq->rt.pushable_tasks; struct task_struct *p;
staticint find_lowest_rq(struct task_struct *task)
{ struct sched_domain *sd; struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); int this_cpu = smp_processor_id(); int cpu = task_cpu(task); int ret;
/* Make sure the mask is initialized first */ if (unlikely(!lowest_mask)) return -1;
if (task->nr_cpus_allowed == 1) return -1; /* No other targets possible */
/* * If we're on asym system ensure we consider the different capacities * of the CPUs when searching for the lowest_mask.
*/ if (sched_asym_cpucap_active()) {
ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri,
task, lowest_mask,
rt_task_fits_capacity);
} else {
ret = cpupri_find(&task_rq(task)->rd->cpupri,
task, lowest_mask);
}
if (!ret) return -1; /* No targets found */
/* * At this point we have built a mask of CPUs representing the * lowest priority tasks in the system. Now we want to elect * the best one based on our affinity and topology. * * We prioritize the last CPU that the task executed on since * it is most likely cache-hot in that location.
*/ if (cpumask_test_cpu(cpu, lowest_mask)) return cpu;
/* * Otherwise, we consult the sched_domains span maps to figure * out which CPU is logically closest to our hot cache data.
*/ if (!cpumask_test_cpu(this_cpu, lowest_mask))
this_cpu = -1; /* Skip this_cpu opt if not among lowest */
rcu_read_lock();
for_each_domain(cpu, sd) { if (sd->flags & SD_WAKE_AFFINE) { int best_cpu;
/* * "this_cpu" is cheaper to preempt than a * remote processor.
*/ if (this_cpu != -1 &&
cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
rcu_read_unlock(); return this_cpu;
}
/* * And finally, if there were no matches within the domains * just give the caller *something* to work with from the compatible * locations.
*/ if (this_cpu != -1) return this_cpu;
cpu = cpumask_any_distribute(lowest_mask); if (cpu < nr_cpu_ids) return cpu;
/* Will lock the rq it finds */ staticstruct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
{ struct rq *lowest_rq = NULL; int tries; int cpu;
for (tries = 0; tries < RT_MAX_TRIES; tries++) {
cpu = find_lowest_rq(task);
if ((cpu == -1) || (cpu == rq->cpu)) break;
lowest_rq = cpu_rq(cpu);
if (lowest_rq->rt.highest_prio.curr <= task->prio) { /* * Target rq has tasks of equal or higher priority, * retrying does not release any lock and is unlikely * to yield a different result.
*/
lowest_rq = NULL; break;
}
/* if the prio of this runqueue changed, try again */ if (double_lock_balance(rq, lowest_rq)) { /* * We had to unlock the run queue. In * the mean time, task could have * migrated already or had its affinity changed, * therefore check if the task is still at the * head of the pushable tasks list. * It is possible the task was scheduled, set * "migrate_disabled" and then got preempted, so we must * check the task migration disable flag here too.
*/ if (unlikely(is_migration_disabled(task) ||
!cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) ||
task != pick_next_pushable_task(rq))) {
/* If this rq is still suitable use it. */ if (lowest_rq->rt.highest_prio.curr > task->prio) break;
/* try again */
double_unlock_balance(rq, lowest_rq);
lowest_rq = NULL;
}
return lowest_rq;
}
/* * If the current CPU has more than one RT task, see if the non * running task can migrate over to a CPU that is running a task * of lesser priority.
*/ staticint push_rt_task(struct rq *rq, bool pull)
{ struct task_struct *next_task; struct rq *lowest_rq; int ret = 0;
if (!rq->rt.overloaded) return 0;
next_task = pick_next_pushable_task(rq); if (!next_task) return 0;
retry: /* * It's possible that the next_task slipped in of * higher priority than current. If that's the case * just reschedule current.
*/ if (unlikely(next_task->prio < rq->donor->prio)) {
resched_curr(rq); return 0;
}
if (is_migration_disabled(next_task)) { struct task_struct *push_task = NULL; int cpu;
if (!pull || rq->push_busy) return 0;
/* * Invoking find_lowest_rq() on anything but an RT task doesn't * make sense. Per the above priority check, curr has to * be of higher priority than next_task, so no need to * reschedule when bailing out. * * Note that the stoppers are masqueraded as SCHED_FIFO * (cf. sched_set_stop_task()), so we can't rely on rt_task().
*/ if (rq->donor->sched_class != &rt_sched_class) return 0;
cpu = find_lowest_rq(rq->curr); if (cpu == -1 || cpu == rq->cpu) return 0;
/* * Given we found a CPU with lower priority than @next_task, * therefore it should be running. However we cannot migrate it * to this other CPU, instead attempt to push the current * running task on this CPU away.
*/
push_task = get_push_task(rq); if (push_task) {
preempt_disable();
raw_spin_rq_unlock(rq);
stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
push_task, &rq->push_work);
preempt_enable();
raw_spin_rq_lock(rq);
}
return 0;
}
if (WARN_ON(next_task == rq->curr)) return 0;
/* We might release rq lock */
get_task_struct(next_task);
/* find_lock_lowest_rq locks the rq if found */
lowest_rq = find_lock_lowest_rq(next_task, rq); if (!lowest_rq) { struct task_struct *task; /* * find_lock_lowest_rq releases rq->lock * so it is possible that next_task has migrated. * * We need to make sure that the task is still on the same * run-queue and is also still the next task eligible for * pushing.
*/
task = pick_next_pushable_task(rq); if (task == next_task) { /* * The task hasn't migrated, and is still the next * eligible task, but we failed to find a run-queue * to push it to. Do not retry in this case, since * other CPUs will pull from us when ready.
*/ goto out;
}
if (!task) /* No more tasks, just exit */ goto out;
staticvoid push_rt_tasks(struct rq *rq)
{ /* push_rt_task will return true if it moved an RT */ while (push_rt_task(rq, false))
;
}
#ifdef HAVE_RT_PUSH_IPI
/* * When a high priority task schedules out from a CPU and a lower priority * task is scheduled in, a check is made to see if there's any RT tasks * on other CPUs that are waiting to run because a higher priority RT task * is currently running on its CPU. In this case, the CPU with multiple RT * tasks queued on it (overloaded) needs to be notified that a CPU has opened * up that may be able to run one of its non-running queued RT tasks. * * All CPUs with overloaded RT tasks need to be notified as there is currently * no way to know which of these CPUs have the highest priority task waiting * to run. Instead of trying to take a spinlock on each of these CPUs, * which has shown to cause large latency when done on machines with many * CPUs, sending an IPI to the CPUs to have them push off the overloaded * RT tasks waiting to run. * * Just sending an IPI to each of the CPUs is also an issue, as on large * count CPU machines, this can cause an IPI storm on a CPU, especially * if its the only CPU with multiple RT tasks queued, and a large number * of CPUs scheduling a lower priority task at the same time. * * Each root domain has its own IRQ work function that can iterate over * all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT * task must be checked if there's one or many CPUs that are lowering * their priority, there's a single IRQ work iterator that will try to * push off RT tasks that are waiting to run. * * When a CPU schedules a lower priority task, it will kick off the * IRQ work iterator that will jump to each CPU with overloaded RT tasks. * As it only takes the first CPU that schedules a lower priority task * to start the process, the rto_start variable is incremented and if * the atomic result is one, then that CPU will try to take the rto_lock. * This prevents high contention on the lock as the process handles all * CPUs scheduling lower priority tasks. * * All CPUs that are scheduling a lower priority task will increment the * rt_loop_next variable. This will make sure that the IRQ work iterator * checks all RT overloaded CPUs whenever a CPU schedules a new lower * priority task, even if the iterator is in the middle of a scan. Incrementing * the rt_loop_next will cause the iterator to perform another scan. *
*/ staticint rto_next_cpu(struct root_domain *rd)
{ int next; int cpu;
/* * When starting the IPI RT pushing, the rto_cpu is set to -1, * rt_next_cpu() will simply return the first CPU found in * the rto_mask. * * If rto_next_cpu() is called with rto_cpu is a valid CPU, it * will return the next CPU found in the rto_mask. * * If there are no more CPUs left in the rto_mask, then a check is made * against rto_loop and rto_loop_next. rto_loop is only updated with * the rto_lock held, but any CPU may increment the rto_loop_next * without any locking.
*/ for (;;) {
/* When rto_cpu is -1 this acts like cpumask_first() */
cpu = cpumask_next(rd->rto_cpu, rd->rto_mask);
rd->rto_cpu = cpu;
if (cpu < nr_cpu_ids) return cpu;
rd->rto_cpu = -1;
/* * ACQUIRE ensures we see the @rto_mask changes * made prior to the @next value observed. * * Matches WMB in rt_set_overload().
*/
next = atomic_read_acquire(&rd->rto_loop_next);
staticvoid tell_cpu_to_push(struct rq *rq)
{ int cpu = -1;
/* Keep the loop going if the IPI is currently active */
atomic_inc(&rq->rd->rto_loop_next);
/* Only one CPU can initiate a loop at a time */ if (!rto_start_trylock(&rq->rd->rto_loop_start)) return;
raw_spin_lock(&rq->rd->rto_lock);
/* * The rto_cpu is updated under the lock, if it has a valid CPU * then the IPI is still running and will continue due to the * update to loop_next, and nothing needs to be done here. * Otherwise it is finishing up and an IPI needs to be sent.
*/ if (rq->rd->rto_cpu < 0)
cpu = rto_next_cpu(rq->rd);
raw_spin_unlock(&rq->rd->rto_lock);
rto_start_unlock(&rq->rd->rto_loop_start);
if (cpu >= 0) { /* Make sure the rd does not get freed while pushing */
sched_get_rd(rq->rd);
irq_work_queue_on(&rq->rd->rto_push_work, cpu);
}
}
/* Called from hardirq context */ void rto_push_irq_work_func(struct irq_work *work)
{ struct root_domain *rd =
container_of(work, struct root_domain, rto_push_work); struct rq *rq; int cpu;
rq = this_rq();
/* * We do not need to grab the lock to check for has_pushable_tasks. * When it gets updated, a check is made if a push is possible.
*/ if (has_pushable_tasks(rq)) {
raw_spin_rq_lock(rq); while (push_rt_task(rq, true))
;
raw_spin_rq_unlock(rq);
}
raw_spin_lock(&rd->rto_lock);
/* Pass the IPI to the next rt overloaded queue */
cpu = rto_next_cpu(rd);
raw_spin_unlock(&rd->rto_lock);
if (cpu < 0) {
sched_put_rd(rd); return;
}
/* Try the next RT overloaded CPU */
irq_work_queue_on(&rd->rto_push_work, cpu);
} #endif/* HAVE_RT_PUSH_IPI */
/* * Match the barrier from rt_set_overloaded; this guarantees that if we * see overloaded we must also see the rto_mask bit.
*/
smp_rmb();
/* If we are the only overloaded CPU do nothing */ if (rt_overload_count == 1 &&
cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask)) return;
#ifdef HAVE_RT_PUSH_IPI if (sched_feat(RT_PUSH_IPI)) {
tell_cpu_to_push(this_rq); return;
} #endif
for_each_cpu(cpu, this_rq->rd->rto_mask) { if (this_cpu == cpu) continue;
src_rq = cpu_rq(cpu);
/* * Don't bother taking the src_rq->lock if the next highest * task is known to be lower-priority than our current task. * This may look racy, but if this value is about to go * logically higher, the src_rq will push this task away. * And if its going logically lower, we do not care
*/ if (src_rq->rt.highest_prio.next >=
this_rq->rt.highest_prio.curr) continue;
/* * We can potentially drop this_rq's lock in * double_lock_balance, and another CPU could * alter this_rq
*/
push_task = NULL;
double_lock_balance(this_rq, src_rq);
/* * We can pull only a task, which is pushable * on its rq, and no others.
*/
p = pick_highest_pushable_task(src_rq, this_cpu);
/* * Do we have an RT task that preempts * the to-be-scheduled task?
*/ if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
WARN_ON(p == src_rq->curr);
WARN_ON(!task_on_rq_queued(p));
/* * There's a chance that p is higher in priority * than what's currently running on its CPU. * This is just that p is waking up and hasn't * had a chance to schedule. We only pull * p if it is lower in priority than the * current task on the run queue
*/ if (p->prio < src_rq->donor->prio) goto skip;
if (is_migration_disabled(p)) {
push_task = get_push_task(src_rq);
} else {
move_queued_task_locked(src_rq, this_rq, p);
resched = true;
} /* * We continue with the search, just in * case there's an even higher prio task * in another runqueue. (low likelihood * but possible)
*/
}
skip:
double_unlock_balance(this_rq, src_rq);
/* * If we are not running and we are not going to reschedule soon, we should * try to push tasks away now
*/ staticvoid task_woken_rt(struct rq *rq, struct task_struct *p)
{ bool need_to_push = !task_on_cpu(rq, p) &&
!test_tsk_need_resched(rq->curr) &&
p->nr_cpus_allowed > 1 &&
(dl_task(rq->donor) || rt_task(rq->donor)) &&
(rq->curr->nr_cpus_allowed < 2 ||
rq->donor->prio <= p->prio);
if (need_to_push)
push_rt_tasks(rq);
}
/* Assumes rq->lock is held */ staticvoid rq_online_rt(struct rq *rq)
{ if (rq->rt.overloaded)
rt_set_overload(rq);
/* * When switch from the rt queue, we bring ourselves to a position * that we might want to pull RT tasks from other runqueues.
*/ staticvoid switched_from_rt(struct rq *rq, struct task_struct *p)
{ /* * If there are other RT tasks then we will reschedule * and the scheduling of the other RT tasks will handle * the balancing. But if we are the last RT task * we may need to handle the pulling of RT tasks * now.
*/ if (!task_on_rq_queued(p) || rq->rt.rt_nr_running) return;
/* * When switching a task to RT, we may overload the runqueue * with RT tasks. In this case we try to push them off to * other runqueues.
*/ staticvoid switched_to_rt(struct rq *rq, struct task_struct *p)
{ /* * If we are running, update the avg_rt tracking, as the running time * will now on be accounted into the latter.
*/ if (task_current(rq, p)) {
update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0); return;
}
/* * If we are not running we may need to preempt the current * running task. If that current running task is also an RT task * then see if we can move to another run queue.
*/ if (task_on_rq_queued(p)) { if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
rt_queue_push_tasks(rq); if (p->prio < rq->donor->prio && cpu_online(cpu_of(rq)))
resched_curr(rq);
}
}
/* * Priority of the task has changed. This may cause * us to initiate a push or pull.
*/ staticvoid
prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
{ if (!task_on_rq_queued(p)) return;
if (task_current_donor(rq, p)) { /* * If our priority decreases while running, we * may need to pull tasks to this runqueue.
*/ if (oldprio < p->prio)
rt_queue_pull_task(rq);
/* * If there's a higher priority task waiting to run * then reschedule.
*/ if (p->prio > rq->rt.highest_prio.curr)
resched_curr(rq);
} else { /* * This task is not running, but if it is * greater than the current running task * then reschedule.
*/ if (p->prio < rq->donor->prio)
resched_curr(rq);
}
}
/* * scheduler tick hitting a task of our scheduling class. * * NOTE: This function can be called remotely by the tick offload that * goes along full dynticks. Therefore no local assumption can be made * and everything must be accessed through the @rq and @curr passed in * parameters.
*/ staticvoid task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
{ struct sched_rt_entity *rt_se = &p->rt;
/* * RR tasks need a special form of time-slice management. * FIFO tasks have no timeslices.
*/ if (p->policy != SCHED_RR) return;
if (--p->rt.time_slice) return;
p->rt.time_slice = sched_rr_timeslice;
/* * Requeue to the end of queue if we (and all of our ancestors) are not * the only element on the queue
*/
for_each_sched_rt_entity(rt_se) { if (rt_se->run_list.prev != rt_se->run_list.next) {
requeue_task_rt(rq, p, 0);
resched_curr(rq); return;
}
}
}
staticunsignedint get_rr_interval_rt(struct rq *rq, struct task_struct *task)
{ /* * Time slice is 0 for SCHED_FIFO tasks
*/ if (task->policy == SCHED_RR) return sched_rr_timeslice; else return 0;
}
period = ktime_to_ns(tg->rt_bandwidth.rt_period);
runtime = tg->rt_bandwidth.rt_runtime;
if (tg == d->tg) {
period = d->rt_period;
runtime = d->rt_runtime;
}
/* * Cannot have more runtime than the period.
*/ if (runtime > period && runtime != RUNTIME_INF) return -EINVAL;
/* * Ensure we don't starve existing RT tasks if runtime turns zero.
*/ if (rt_bandwidth_enabled() && !runtime &&
tg->rt_bandwidth.rt_runtime && tg_has_rt_tasks(tg)) return -EBUSY;
if (WARN_ON(!rt_group_sched_enabled() && tg != &root_task_group)) return -EBUSY;
total = to_ratio(period, runtime);
/* * Nobody can have more than the global setting allows.
*/ if (total > to_ratio(global_rt_period(), global_rt_runtime())) return -EINVAL;
/* * The sum of our children's runtime should not exceed our own.
*/
list_for_each_entry_rcu(child, &tg->children, siblings) {
period = ktime_to_ns(child->rt_bandwidth.rt_period);
runtime = child->rt_bandwidth.rt_runtime;
if (child == d->tg) {
period = d->rt_period;
runtime = d->rt_runtime;
}
sum += to_ratio(period, runtime);
}
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.32 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.