// SPDX-License-Identifier: GPL-2.0 /* * This file contains the base functions to manage periodic tick * related events. * * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*/ #include <linux/compiler.h> #include <linux/cpu.h> #include <linux/err.h> #include <linux/hrtimer.h> #include <linux/interrupt.h> #include <linux/nmi.h> #include <linux/percpu.h> #include <linux/profile.h> #include <linux/sched.h> #include <linux/module.h> #include <trace/events/power.h>
#include <asm/irq_regs.h>
#include"tick-internal.h"
/* * Tick devices
*/
DEFINE_PER_CPU(struct tick_device, tick_cpu_device); /* * Tick next event: keeps track of the tick time. It's updated by the * CPU which handles the tick and protected by jiffies_lock. There is * no requirement to write hold the jiffies seqcount for it.
*/
ktime_t tick_next_period;
/* * tick_do_timer_cpu is a timer core internal variable which holds the CPU NR * which is responsible for calling do_timer(), i.e. the timekeeping stuff. This * variable has two functions: * * 1) Prevent a thundering herd issue of a gazillion of CPUs trying to grab the * timekeeping lock all at once. Only the CPU which is assigned to do the * update is handling it. * * 2) Hand off the duty in the NOHZ idle case by setting the value to * TICK_DO_TIMER_NONE, i.e. a non existing CPU. So the next cpu which looks * at it will take over and keep the time keeping alive. The handover * procedure also covers cpu hotplug.
*/ int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; #ifdef CONFIG_NO_HZ_FULL /* * tick_do_timer_boot_cpu indicates the boot CPU temporarily owns * tick_do_timer_cpu and it should be taken over by an eligible secondary * when one comes online.
*/ staticint tick_do_timer_boot_cpu __read_mostly = -1; #endif
/* * Event handler for periodic ticks
*/ void tick_handle_periodic(struct clock_event_device *dev)
{ int cpu = smp_processor_id();
ktime_t next = dev->next_event;
tick_periodic(cpu);
/* * The cpu might have transitioned to HIGHRES or NOHZ mode via * update_process_times() -> run_local_timers() -> * hrtimer_run_queues().
*/ if (IS_ENABLED(CONFIG_TICK_ONESHOT) && dev->event_handler != tick_handle_periodic) return;
if (!clockevent_state_oneshot(dev)) return; for (;;) { /* * Setup the next period for devices, which do not have * periodic mode:
*/
next = ktime_add_ns(next, TICK_NSEC);
if (!clockevents_program_event(dev, next, false)) return; /* * Have to be careful here. If we're in oneshot mode, * before we call tick_periodic() in a loop, we need * to be sure we're using a real hardware clocksource. * Otherwise we could get trapped in an infinite * loop, as the tick_periodic() increments jiffies, * which then will increment time, possibly causing * the loop to trigger again and again.
*/ if (timekeeping_valid_for_hres())
tick_periodic(cpu);
}
}
/* * Setup the device for a periodic tick
*/ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
{
tick_set_periodic_handler(dev, broadcast);
/* Broadcast setup ? */ if (!tick_device_is_functional(dev)) return;
/* * First device setup ?
*/ if (!td->evtdev) { /* * If no cpu took the do_timer update, assign it to * this cpu:
*/ if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) {
WRITE_ONCE(tick_do_timer_cpu, cpu);
tick_next_period = ktime_get(); #ifdef CONFIG_NO_HZ_FULL /* * The boot CPU may be nohz_full, in which case the * first housekeeping secondary will take do_timer() * from it.
*/ if (tick_nohz_full_cpu(cpu))
tick_do_timer_boot_cpu = cpu;
} elseif (tick_do_timer_boot_cpu != -1 && !tick_nohz_full_cpu(cpu)) {
tick_do_timer_boot_cpu = -1; /* * The boot CPU will stay in periodic (NOHZ disabled) * mode until clocksource_done_booting() called after * smp_init() selects a high resolution clocksource and * timekeeping_notify() kicks the NOHZ stuff alive. * * So this WRITE_ONCE can only race with the READ_ONCE * check in tick_periodic() but this race is harmless.
*/
WRITE_ONCE(tick_do_timer_cpu, cpu); #endif
}
/* * When the device is not per cpu, pin the interrupt to the * current cpu:
*/ if (!cpumask_equal(newdev->cpumask, cpumask))
irq_set_affinity(newdev->irq, cpumask);
/* * When global broadcasting is active, check if the current * device is registered as a placeholder for broadcast mode. * This allows us to handle this x86 misfeature in a generic * way. This function also returns !=0 when we keep the * current active broadcast state for this CPU.
*/ if (tick_device_uses_broadcast(newdev, cpu)) return;
staticbool tick_check_percpu(struct clock_event_device *curdev, struct clock_event_device *newdev, int cpu)
{ if (!cpumask_test_cpu(cpu, newdev->cpumask)) returnfalse; if (cpumask_equal(newdev->cpumask, cpumask_of(cpu))) returntrue; /* Check if irq affinity can be set */ if (newdev->irq >= 0 && !irq_can_set_affinity(newdev->irq)) returnfalse; /* Prefer an existing cpu local device */ if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu))) returnfalse; returntrue;
}
staticbool tick_check_preferred(struct clock_event_device *curdev, struct clock_event_device *newdev)
{ /* Prefer oneshot capable device */ if (!(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) { if (curdev && (curdev->features & CLOCK_EVT_FEAT_ONESHOT)) returnfalse; if (tick_oneshot_mode_active()) returnfalse;
}
/* * Use the higher rated one, but prefer a CPU local device with a lower * rating than a non-CPU local device
*/ return !curdev ||
newdev->rating > curdev->rating ||
!cpumask_equal(curdev->cpumask, newdev->cpumask);
}
/* * Check whether the new device is a better fit than curdev. curdev * can be NULL !
*/ bool tick_check_replacement(struct clock_event_device *curdev, struct clock_event_device *newdev)
{ if (!tick_check_percpu(curdev, newdev, smp_processor_id())) returnfalse;
return tick_check_preferred(curdev, newdev);
}
/* * Check, if the new registered device should be used. Called with * clockevents_lock held and interrupts disabled.
*/ void tick_check_new_device(struct clock_event_device *newdev)
{ struct clock_event_device *curdev; struct tick_device *td; int cpu;
cpu = smp_processor_id();
td = &per_cpu(tick_cpu_device, cpu);
curdev = td->evtdev;
if (!tick_check_replacement(curdev, newdev)) goto out_bc;
if (!try_module_get(newdev->owner)) return;
/* * Replace the eventually existing device by the new * device. If the current device is the broadcast device, do * not give it back to the clockevents layer !
*/ if (tick_is_broadcast_device(curdev)) {
clockevents_shutdown(curdev);
curdev = NULL;
}
clockevents_exchange_device(curdev, newdev);
tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
tick_oneshot_notify(); return;
out_bc: /* * Can the new device be used as a broadcast device ?
*/
tick_install_broadcast_device(newdev, cpu);
}
/** * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode * @state: The target state (enter/exit) * * The system enters/leaves a state, where affected devices might stop * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. * * Called with interrupts disabled, so clockevents_lock is not * required here because the local clock event device cannot go away * under us.
*/ int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
{ struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
if (!(td->evtdev->features & CLOCK_EVT_FEAT_C3STOP)) return 0;
#ifdef CONFIG_HOTPLUG_CPU void tick_assert_timekeeping_handover(void)
{
WARN_ON_ONCE(tick_do_timer_cpu == smp_processor_id());
} /* * Stop the tick and transfer the timekeeping job away from a dying cpu.
*/ int tick_cpu_dying(unsignedint dying_cpu)
{ /* * If the current CPU is the timekeeper, it's the only one that can * safely hand over its duty. Also all online CPUs are in stop * machine, guaranteed not to be idle, therefore there is no * concurrency and it's safe to pick any online successor.
*/ if (tick_do_timer_cpu == dying_cpu)
tick_do_timer_cpu = cpumask_first(cpu_online_mask);
/* Make sure the CPU won't try to retake the timekeeping duty */
tick_sched_timer_dying(dying_cpu);
/* Remove CPU from timer broadcasting */
tick_offline_cpu(dying_cpu);
return 0;
}
/* * Shutdown an event device on the outgoing CPU: * * Called by the dying CPU during teardown, with clockevents_lock held * and interrupts disabled.
*/ void tick_shutdown(void)
{ struct tick_device *td = this_cpu_ptr(&tick_cpu_device); struct clock_event_device *dev = td->evtdev;
/** * tick_suspend_local - Suspend the local tick device * * Called from the local cpu for freeze with interrupts disabled. * * No locks required. Nothing can change the per cpu device.
*/ void tick_suspend_local(void)
{ struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
clockevents_shutdown(td->evtdev);
}
/** * tick_resume_local - Resume the local tick device * * Called from the local CPU for unfreeze or XEN resume magic. * * No locks required. Nothing can change the per cpu device.
*/ void tick_resume_local(void)
{ struct tick_device *td = this_cpu_ptr(&tick_cpu_device); bool broadcast = tick_resume_check_broadcast();
clockevents_tick_resume(td->evtdev); if (!broadcast) { if (td->mode == TICKDEV_MODE_PERIODIC)
tick_setup_periodic(td->evtdev, 0); else
tick_resume_oneshot();
}
/* * Ensure that hrtimers are up to date and the clockevents device * is reprogrammed correctly when high resolution timers are * enabled.
*/
hrtimers_resume_local();
}
/** * tick_suspend - Suspend the tick and the broadcast device * * Called from syscore_suspend() via timekeeping_suspend with only one * CPU online and interrupts disabled or from tick_unfreeze() under * tick_freeze_lock. * * No locks required. Nothing can change the per cpu device.
*/ void tick_suspend(void)
{
tick_suspend_local();
tick_suspend_broadcast();
}
/** * tick_resume - Resume the tick and the broadcast device * * Called from syscore_resume() via timekeeping_resume with only one * CPU online and interrupts disabled. * * No locks required. Nothing can change the per cpu device.
*/ void tick_resume(void)
{
tick_resume_broadcast();
tick_resume_local();
}
/** * tick_freeze - Suspend the local tick and (possibly) timekeeping. * * Check if this is the last online CPU executing the function and if so, * suspend timekeeping. Otherwise suspend the local tick. * * Call with interrupts disabled. Must be balanced with %tick_unfreeze(). * Interrupts must not be enabled before the subsequent %tick_unfreeze().
*/ void tick_freeze(void)
{
raw_spin_lock(&tick_freeze_lock);
tick_freeze_depth++; if (tick_freeze_depth == num_online_cpus()) {
trace_suspend_resume(TPS("timekeeping_freeze"),
smp_processor_id(), true); /* * All other CPUs have their interrupts disabled and are * suspended to idle. Other tasks have been frozen so there * is no scheduling happening. This means that there is no * concurrency in the system at this point. Therefore it is * okay to acquire a sleeping lock on PREEMPT_RT, such as a * spinlock, because the lock cannot be held by other CPUs * or threads and acquiring it cannot block. * * Inform lockdep about the situation.
*/
lock_map_acquire_try(&tick_freeze_map);
system_state = SYSTEM_SUSPEND;
sched_clock_suspend();
timekeeping_suspend();
lock_map_release(&tick_freeze_map);
} else {
tick_suspend_local();
}
raw_spin_unlock(&tick_freeze_lock);
}
/** * tick_unfreeze - Resume the local tick and (possibly) timekeeping. * * Check if this is the first CPU executing the function and if so, resume * timekeeping. Otherwise resume the local tick. * * Call with interrupts disabled. Must be balanced with %tick_freeze(). * Interrupts must not be enabled after the preceding %tick_freeze().
*/ void tick_unfreeze(void)
{
raw_spin_lock(&tick_freeze_lock);
if (tick_freeze_depth == num_online_cpus()) { /* * Similar to tick_freeze(). On resumption the first CPU may * acquire uncontended sleeping locks while other CPUs block on * tick_freeze_lock.
*/
lock_map_acquire_try(&tick_freeze_map);
timekeeping_resume();
sched_clock_resume();
lock_map_release(&tick_freeze_map);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.