// SPDX-License-Identifier: GPL-2.0+ /* * 2002-10-15 Posix Clocks & timers * by George Anzinger george@mvista.com * Copyright (C) 2002 2003 by MontaVista Software. * * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug. * Copyright (C) 2004 Boris Hu * * These are all the functions necessary to implement POSIX clocks & timers
*/ #include <linux/compat.h> #include <linux/compiler.h> #include <linux/init.h> #include <linux/jhash.h> #include <linux/interrupt.h> #include <linux/list.h> #include <linux/memblock.h> #include <linux/nospec.h> #include <linux/posix-clock.h> #include <linux/posix-timers.h> #include <linux/prctl.h> #include <linux/sched/task.h> #include <linux/slab.h> #include <linux/syscalls.h> #include <linux/time.h> #include <linux/time_namespace.h> #include <linux/uaccess.h>
#include"timekeeping.h" #include"posix-timers.h"
/* * Timers are managed in a hash table for lockless lookup. The hash key is * constructed from current::signal and the timer ID and the timer is * matched against current::signal and the timer ID when walking the hash * bucket list. * * This allows checkpoint/restore to reconstruct the exact timer IDs for * a process.
*/ struct timer_hash_bucket {
spinlock_t lock; struct hlist_head head;
};
/* SIGEV_THREAD_ID cannot share a bit with the other SIGEV values. */ #if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \
~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD)) #error"SIGEV_THREAD_ID must not share bit with other SIGEV values!" #endif
hlist_for_each_entry_rcu(timer, &bucket->head, t_hash) { /* timer->it_signal can be set concurrently */ if ((READ_ONCE(timer->it_signal) == sig) && (timer->it_id == id)) return timer;
} return NULL;
}
staticinlinestruct signal_struct *posix_sig_owner(conststruct k_itimer *timer)
{ unsignedlong val = (unsignedlong)timer->it_signal;
/* * Mask out bit 0, which acts as invalid marker to prevent * posix_timer_by_id() detecting it as valid.
*/ return (struct signal_struct *)(val & ~1UL);
}
scoped_guard (spinlock, &bucket->lock) { /* * Validate under the lock as this could have raced against * another thread ending up with the same ID, which is * highly unlikely, but possible.
*/ if (!posix_timer_hashed(bucket, sig, id)) { /* * Set the timer ID and the signal pointer to make * it identifiable in the hash table. The signal * pointer has bit 0 set to indicate that it is not * yet fully initialized. posix_timer_hashed() * masks this bit out, but the syscall lookup fails * to match due to it being set. This guarantees * that there can't be duplicate timer IDs handed * out.
*/
timer->it_id = (timer_t)id;
timer->it_signal = (struct signal_struct *)((unsignedlong)sig | 1UL);
hlist_add_head_rcu(&timer->t_hash, &bucket->head); returntrue;
}
} returnfalse;
}
if (unlikely(req_id != TIMER_ANY_ID)) { if (!posix_timer_add_at(timer, sig, req_id)) return -EBUSY;
/* * Move the ID counter past the requested ID, so that after * switching back to normal mode the IDs are outside of the * exact allocated region. That avoids ID collisions on the * next regular timer_create() invocations.
*/
atomic_set(&sig->next_posix_timer_id, req_id + 1); return req_id;
}
for (unsignedint cnt = 0; cnt <= INT_MAX; cnt++) { /* Get the next timer ID and clamp it to positive space */ unsignedint id = atomic_fetch_inc(&sig->next_posix_timer_id) & INT_MAX;
if (posix_timer_add_at(timer, sig, id)) return id;
cond_resched();
} /* POSIX return code when no timer ID could be allocated */ return -EAGAIN;
}
/* * The siginfo si_overrun field and the return value of timer_getoverrun(2) * are of type int. Clamp the overrun value to INT_MAX
*/ staticinlineint timer_overrun_to_int(struct k_itimer *timr)
{ if (timr->it_overrun_last > (s64)INT_MAX) return INT_MAX;
/* * Check if the timer is still alive or whether it got modified * since the signal was queued. In either case, don't rearm and * drop the signal.
*/ if (timr->it_signal_seq != timr->it_sigqueue_seq || WARN_ON_ONCE(!posixtimer_valid(timr))) returnfalse;
if (!timr->it_interval || WARN_ON_ONCE(timr->it_status != POSIX_TIMER_REQUEUE_PENDING)) returntrue;
/* * This function is called from the signal delivery code. It decides * whether the signal should be dropped and rearms interval timers. The * timer can be unconditionally accessed as there is a reference held on * it.
*/ bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq)
{ struct k_itimer *timr = container_of(timer_sigq, struct k_itimer, sigq); bool ret;
/* * Release siglock to ensure proper locking order versus * timr::it_lock. Keep interrupts disabled.
*/
spin_unlock(¤t->sighand->siglock);
ret = __posixtimer_deliver_signal(info, timr);
/* Drop the reference which was acquired when the signal was queued */
posixtimer_putref(timr);
/* * This function gets called when a POSIX.1b interval timer expires from * the HRTIMER interrupt (soft interrupt on RT kernels). * * Handles CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME and CLOCK_TAI * based timers.
*/ staticenum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
{ struct k_itimer *timr = container_of(timer, struct k_itimer, it.real.timer);
if (!kc) return -EINVAL; if (!kc->timer_create) return -EOPNOTSUPP;
/* Special case for CRIU to restore timers with a given timer ID. */ if (unlikely(current->signal->timer_create_restore_ids)) { if (copy_from_user(&req_id, created_timer_id, sizeof(req_id))) return -EFAULT; /* Valid IDs are 0..INT_MAX */ if ((unsignedint)req_id > INT_MAX) return -EINVAL;
}
new_timer = alloc_posix_timer(); if (unlikely(!new_timer)) return -EAGAIN;
spin_lock_init(&new_timer->it_lock);
/* * Add the timer to the hash table. The timer is not yet valid * after insertion, but has a unique ID allocated.
*/
new_timer_id = posix_timer_add(new_timer, req_id); if (new_timer_id < 0) {
posixtimer_free_timer(new_timer); return new_timer_id;
}
if (copy_to_user(created_timer_id, &new_timer_id, sizeof (new_timer_id))) {
error = -EFAULT; goto out;
} /* * After succesful copy out, the timer ID is visible to user space * now but not yet valid because new_timer::signal low order bit is 1. * * Complete the initialization with the clock specific create * callback.
*/
error = kc->timer_create(new_timer); if (error) goto out;
/* * timer::it_lock ensures that __lock_timer() observes a fully * initialized timer when it observes a valid timer::it_signal. * * sighand::siglock is required to protect signal::posix_timers.
*/
scoped_guard (spinlock_irq, &new_timer->it_lock) {
guard(spinlock)(¤t->sighand->siglock); /* * new_timer::it_signal contains the signal pointer with * bit 0 set, which makes it invalid for syscall operations. * Store the unmodified signal pointer to make it valid.
*/
WRITE_ONCE(new_timer->it_signal, current->signal);
hlist_add_head_rcu(&new_timer->list, ¤t->signal->posix_timers);
} /* * After unlocking @new_timer is subject to concurrent removal and * cannot be touched anymore
*/ return 0;
out:
posix_timer_unhash_and_free(new_timer); return error;
}
/* * timer_t could be any type >= int and we want to make sure any * @timer_id outside positive int range fails lookup.
*/ if ((unsignedlonglong)timer_id > INT_MAX) return NULL;
/* * The hash lookup and the timers are RCU protected. * * Timers are added to the hash in invalid state where * timr::it_signal is marked invalid. timer::it_signal is only set * after the rest of the initialization succeeded. * * Timer destruction happens in steps: * 1) Set timr::it_signal marked invalid with timr::it_lock held * 2) Release timr::it_lock * 3) Remove from the hash under hash_lock * 4) Put the reference count. * * The reference count might not drop to zero if timr::sigq is * queued. In that case the signal delivery or flush will put the * last reference count. * * When the reference count reaches zero, the timer is scheduled * for RCU removal after the grace period. * * Holding rcu_read_lock() across the lookup ensures that * the timer cannot be freed. * * The lookup validates locklessly that timr::it_signal == * current::it_signal and timr::it_id == @timer_id. timr::it_id * can't change, but timr::it_signal can become invalid during * destruction, which makes the locked check fail.
*/
guard(rcu)();
timr = posix_timer_by_id(timer_id); if (timr) {
spin_lock_irq(&timr->it_lock); /* * Validate under timr::it_lock that timr::it_signal is * still valid. Pairs with #1 above.
*/ if (timr->it_signal == current->signal) return timr;
spin_unlock_irq(&timr->it_lock);
} return NULL;
}
/* * Get the time remaining on a POSIX.1b interval timer. * * Two issues to handle here: * * 1) The timer has a requeue pending. The return value must appear as * if the timer has been requeued right now. * * 2) The timer is a SIGEV_NONE timer. These timers are never enqueued * into the hrtimer queue and therefore never expired. Emulate expiry * here taking #1 into account.
*/ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
{ conststruct k_clock *kc = timr->kclock;
ktime_t now, remaining, iv; bool sig_none;
sig_none = timr->it_sigev_notify == SIGEV_NONE;
iv = timr->it_interval;
/* interval timer ? */ if (iv) {
cur_setting->it_interval = ktime_to_timespec64(iv);
} elseif (timr->it_status == POSIX_TIMER_DISARMED) { /* * SIGEV_NONE oneshot timers are never queued and therefore * timr->it_status is always DISARMED. The check below * vs. remaining time will handle this case. * * For all other timers there is nothing to update here, so * return.
*/ if (!sig_none) return;
}
now = kc->clock_get_ktime(timr->it_clock);
/* * If this is an interval timer and either has requeue pending or * is a SIGEV_NONE timer move the expiry time forward by intervals, * so expiry is > now.
*/ if (iv && timr->it_status != POSIX_TIMER_ARMED)
timr->it_overrun += kc->timer_forward(timr, now);
remaining = kc->timer_remaining(timr, now); /* * As @now is retrieved before a possible timer_forward() and * cannot be reevaluated by the compiler @remaining is based on the * same @now value. Therefore @remaining is consistent vs. @now. * * Consequently all interval timers, i.e. @iv > 0, cannot have a * remaining time <= 0 because timer_forward() guarantees to move * them forward so that the next timer expiry is > @now.
*/ if (remaining <= 0) { /* * A single shot SIGEV_NONE timer must return 0, when it is * expired! Timers which have a real signal delivery mode * must return a remaining time greater than 0 because the * signal has not yet been delivered.
*/ if (!sig_none)
cur_setting->it_value.tv_nsec = 1;
} else {
cur_setting->it_value = ktime_to_timespec64(remaining);
}
}
/* Get the time remaining on a POSIX.1b interval timer. */
SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, struct __kernel_itimerspec __user *, setting)
{ struct itimerspec64 cur_setting;
int ret = do_timer_gettime(timer_id, &cur_setting); if (!ret) { if (put_itimerspec64(&cur_setting, setting))
ret = -EFAULT;
} return ret;
}
int ret = do_timer_gettime(timer_id, &cur_setting); if (!ret) { if (put_old_itimerspec32(&cur_setting, setting))
ret = -EFAULT;
} return ret;
}
#endif
/** * sys_timer_getoverrun - Get the number of overruns of a POSIX.1b interval timer * @timer_id: The timer ID which identifies the timer * * The "overrun count" of a timer is one plus the number of expiration * intervals which have elapsed between the first expiry, which queues the * signal and the actual signal delivery. On signal delivery the "overrun * count" is calculated and cached, so it can be returned directly here. * * As this is relative to the last queued signal the returned overrun count * is meaningless outside of the signal delivery path and even there it * does not accurately reflect the current state when user space evaluates * it. * * Returns: * -EINVAL @timer_id is invalid * 1..INT_MAX The number of overruns related to the last delivered signal
*/
SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
{
scoped_timer_get_or_fail(timer_id) return timer_overrun_to_int(scoped_timer);
}
mode = absolute ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL; /* * Posix magic: Relative CLOCK_REALTIME timers are not affected by * clock modifications, so they become CLOCK_MONOTONIC based under the * hood. See hrtimer_setup(). Update timr->kclock, so the generic * functions which use timr->kclock->clock_get_*() work. * * Note: it_clock stays unmodified, because the next timer_set() might * use ABSTIME, so it needs to switch back.
*/ if (timr->it_clock == CLOCK_REALTIME)
timr->kclock = absolute ? &clock_realtime : &clock_monotonic;
/* * On PREEMPT_RT this prevents priority inversion and a potential livelock * against the ksoftirqd thread in case that ksoftirqd gets preempted while * executing a hrtimer callback. * * See the comments in hrtimer_cancel_wait_running(). For PREEMPT_RT=n this * just results in a cpu_relax(). * * For POSIX CPU timers with CONFIG_POSIX_CPU_TIMERS_TASK_WORK=n this is * just a cpu_relax(). With CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y this * prevents spinning on an eventually scheduled out task and a livelock * when the task which tries to delete or disarm the timer has preempted * the task which runs the expiry in task work context.
*/ staticvoid timer_wait_running(struct k_itimer *timer)
{ /* * kc->timer_wait_running() might drop RCU lock. So @timer * cannot be touched anymore after the function returns!
*/
timer->kclock->timer_wait_running(timer);
}
/* * Set up the new interval and reset the signal delivery data
*/ void posix_timer_set_common(struct k_itimer *timer, struct itimerspec64 *new_setting)
{ if (new_setting->it_value.tv_sec || new_setting->it_value.tv_nsec)
timer->it_interval = timespec64_to_ktime(new_setting->it_interval); else
timer->it_interval = 0;
/* Set a POSIX.1b interval timer. */ int common_timer_set(struct k_itimer *timr, int flags, struct itimerspec64 *new_setting, struct itimerspec64 *old_setting)
{ conststruct k_clock *kc = timr->kclock; bool sigev_none;
ktime_t expires;
if (old_setting)
common_timer_get(timr, old_setting);
/* * Careful here. On SMP systems the timer expiry function could be * active and spinning on timr->it_lock.
*/ if (kc->timer_try_to_cancel(timr) < 0) return TIMER_RETRY;
/* * If the deleted timer is on the ignored list, remove it and * drop the associated reference.
*/ staticinlinevoid posix_timer_cleanup_ignored(struct k_itimer *tmr)
{ if (!hlist_unhashed(&tmr->ignored_list)) {
hlist_del_init(&tmr->ignored_list);
posixtimer_putref(tmr);
}
}
staticvoid posix_timer_delete(struct k_itimer *timer)
{ /* * Invalidate the timer, remove it from the linked list and remove * it from the ignored list if pending. * * The invalidation must be written with siglock held so that the * signal code observes the invalidated timer::it_signal in * do_sigaction(), which prevents it from moving a pending signal * of a deleted timer to the ignore list. * * The invalidation also prevents signal queueing, signal delivery * and therefore rearming from the signal delivery path. * * A concurrent lookup can still find the timer in the hash, but it * will check timer::it_signal with timer::it_lock held and observe * bit 0 set, which invalidates it. That also prevents the timer ID * from being handed out before this timer is completely gone.
*/
timer->it_signal_seq++;
scoped_guard (spinlock, ¤t->sighand->siglock) { unsignedlong sig = (unsignedlong)timer->it_signal | 1UL;
scoped_timer_get_or_fail(timer_id) {
timer = scoped_timer;
posix_timer_delete(timer);
} /* Remove it from the hash, which frees up the timer ID */
posix_timer_unhash_and_free(timer); return 0;
}
/* * Invoked from do_exit() when the last thread of a thread group exits. * At that point no other task can access the timers of the dying * task anymore.
*/ void exit_itimers(struct task_struct *tsk)
{ struct hlist_head timers; struct hlist_node *next; struct k_itimer *timer;
/* Clear restore mode for exec() */
tsk->signal->timer_create_restore_ids = 0;
if (hlist_empty(&tsk->signal->posix_timers)) return;
/* Protect against concurrent read via /proc/$PID/timers */
scoped_guard (spinlock_irq, &tsk->sighand->siglock)
hlist_move_list(&tsk->signal->posix_timers, &timers);
/* The timers are not longer accessible via tsk::signal */
hlist_for_each_entry_safe(timer, next, &timers, list) {
scoped_guard (spinlock_irq, &timer->it_lock)
posix_timer_delete(timer);
posix_timer_unhash_and_free(timer);
cond_resched();
}
/* * There should be no timers on the ignored list. itimer_delete() has * mopped them up.
*/ if (!WARN_ON_ONCE(!hlist_empty(&tsk->signal->ignored_posix_timers))) return;
if (copy_from_user(&ktx, utx, sizeof(ktx))) return -EFAULT;
err = do_clock_adjtime(which_clock, &ktx);
if (err >= 0 && copy_to_user(utx, &ktx, sizeof(ktx))) return -EFAULT;
return err;
}
/** * sys_clock_getres - Get the resolution of a clock * @which_clock: The clock to get the resolution for * @tp: Pointer to a a user space timespec64 for storage * * POSIX defines: * * "The clock_getres() function shall return the resolution of any * clock. Clock resolutions are implementation-defined and cannot be set by * a process. If the argument res is not NULL, the resolution of the * specified clock shall be stored in the location pointed to by res. If * res is NULL, the clock resolution is not returned. If the time argument * of clock_settime() is not a multiple of res, then the value is truncated * to a multiple of res." * * Due to the various hardware constraints the real resolution can vary * wildly and even change during runtime when the underlying devices are * replaced. The kernel also can use hardware devices with different * resolutions for reading the time and for arming timers. * * The kernel therefore deviates from the POSIX spec in various aspects: * * 1) The resolution returned to user space * * For CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME, CLOCK_TAI, * CLOCK_REALTIME_ALARM, CLOCK_BOOTTIME_ALAREM and CLOCK_MONOTONIC_RAW * the kernel differentiates only two cases: * * I) Low resolution mode: * * When high resolution timers are disabled at compile or runtime * the resolution returned is nanoseconds per tick, which represents * the precision at which timers expire. * * II) High resolution mode: * * When high resolution timers are enabled the resolution returned * is always one nanosecond independent of the actual resolution of * the underlying hardware devices. * * For CLOCK_*_ALARM the actual resolution depends on system * state. When system is running the resolution is the same as the * resolution of the other clocks. During suspend the actual * resolution is the resolution of the underlying RTC device which * might be way less precise than the clockevent device used during * running state. * * For CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE the resolution * returned is always nanoseconds per tick. * * For CLOCK_PROCESS_CPUTIME and CLOCK_THREAD_CPUTIME the resolution * returned is always one nanosecond under the assumption that the * underlying scheduler clock has a better resolution than nanoseconds * per tick. * * For dynamic POSIX clocks (PTP devices) the resolution returned is * always one nanosecond. * * 2) Affect on sys_clock_settime() * * The kernel does not truncate the time which is handed in to * sys_clock_settime(). The kernel internal timekeeping is always using * nanoseconds precision independent of the clocksource device which is * used to read the time from. The resolution of that device only * affects the presicion of the time returned by sys_clock_gettime(). * * Returns: * 0 Success. @tp contains the resolution * -EINVAL @which_clock is not a valid clock ID * -EFAULT Copying the resolution to @tp faulted * -ENODEV Dynamic POSIX clock is not backed by a device * -EOPNOTSUPP Dynamic POSIX clock does not support getres()
*/
SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, struct __kernel_timespec __user *, tp)
{ conststruct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 rtn_tp; int error;
if (!kc) return -EINVAL;
error = kc->clock_getres(which_clock, &rtn_tp);
if (!error && tp && put_timespec64(&rtn_tp, tp))
error = -EFAULT;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.