enum timekeeping_adv_mode { /* Update timekeeper when a tick has passed */
TK_ADV_TICK,
/* Update timekeeper on a direct frequency change */
TK_ADV_FREQ
};
/* * The most important data for readout fits into a single 64 byte * cache line.
*/ struct tk_data {
seqcount_raw_spinlock_t seq; struct timekeeper timekeeper; struct timekeeper shadow_timekeeper;
raw_spinlock_t lock;
} ____cacheline_aligned;
/* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended;
/** * struct tk_fast - NMI safe timekeeper * @seq: Sequence counter for protecting updates. The lowest bit * is the index for the tk_read_base array * @base: tk_read_base array. Access is indexed by the lowest bit of * @seq. * * See @update_fast_timekeeper() below.
*/ struct tk_fast {
seqcount_latch_t seq; struct tk_read_base base[2];
};
/* Suspend-time cycles value for halted fast timekeeper. */ static u64 cycles_at_suspend;
/* * Boot time initialization which allows local_clock() to be utilized * during early boot when clocksources are not available. local_clock() * returns nanoseconds already so no conversion is required, hence mult=1 * and shift=0. When the first proper clocksource is installed then * the fast time keepers are updated with the correct values.
*/ #define FAST_TK_INIT \
{ \
.clock = &dummy_clock, \
.mask = CLOCKSOURCE_MASK(64), \
.mult = 1, \
.shift = 0, \
}
/* * Multigrain timestamps require tracking the latest fine-grained timestamp * that has been issued, and never returning a coarse-grained timestamp that is * earlier than that value. * * mg_floor represents the latest fine-grained time that has been handed out as * a file timestamp on the system. This is tracked as a monotonic ktime_t, and * converted to a realtime clock value on an as-needed basis. * * Maintaining mg_floor ensures the multigrain interfaces never issue a * timestamp earlier than one that has been previously issued. * * The exception to this rule is when there is a backward realtime clock jump. If * such an event occurs, a timestamp can appear to be earlier than a previous one.
*/ static __cacheline_aligned_in_smp atomic64_t mg_floor;
/* * Update the nanoseconds part for the coarse time keepers. They can't rely * on xtime_nsec because xtime_nsec could be adjusted by a small negative * amount when the multiplication factor of the clock is adjusted, which * could cause the coarse clocks to go slightly backwards. See * timekeeping_apply_adjustment(). Thus we keep a separate copy for the coarse * clockids which only is updated when the clock has been set or we have * accumulated time.
*/ staticinlinevoid tk_update_coarse_nsecs(struct timekeeper *tk)
{
tk->coarse_nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
}
staticinlinevoid tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
{ /* Paired with READ_ONCE() in ktime_mono_to_any() */
WRITE_ONCE(tk->offs_boot, ktime_add(tk->offs_boot, delta)); /* * Timespec representation for VDSO update to avoid 64bit division * on every update.
*/
tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot);
}
/* * tk_clock_read - atomic clocksource read() helper * * This helper is necessary to use in the read paths because, while the * seqcount ensures we don't return a bad value while structures are updated, * it doesn't protect from potential crashes. There is the possibility that * the tkr's clocksource may change between the read reference, and the * clock reference passed to the read function. This can cause crashes if * the wrong clocksource is passed to the wrong read function. * This isn't necessary to use when holding the tk_core.lock or doing * a read of the fast-timekeeper tkrs (which is protected by its own locking * and update logic).
*/ staticinline u64 tk_clock_read(conststruct tk_read_base *tkr)
{ struct clocksource *clock = READ_ONCE(tkr->clock);
return clock->read(clock);
}
/** * tk_setup_internals - Set up internals to use clocksource clock. * * @tk: The target timekeeper to setup. * @clock: Pointer to clocksource. * * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment * pair and interval request. * * Unless you're the timekeeping code, you should not be using this!
*/ staticvoid tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
{
u64 interval;
u64 tmp, ntpinterval; struct clocksource *old_clock;
/* * The timekeeper keeps its own mult values for the currently * active clocksource. These value will be adjusted via NTP * to counteract clock drifting.
*/
tk->tkr_mono.mult = clock->mult;
tk->tkr_raw.mult = clock->mult;
tk->ntp_err_mult = 0;
tk->skip_second_overflow = 0;
}
staticinline u64 timekeeping_cycles_to_ns(conststruct tk_read_base *tkr, u64 cycles)
{ /* Calculate the delta since the last update_wall_time() */
u64 mask = tkr->mask, delta = (cycles - tkr->cycle_last) & mask;
/* * This detects both negative motion and the case where the delta * overflows the multiplication with tkr->mult.
*/ if (unlikely(delta > tkr->clock->max_cycles)) { /* * Handle clocksource inconsistency between CPUs to prevent * time from going backwards by checking for the MSB of the * mask being set in the delta.
*/ if (delta & ~(mask >> 1)) return tkr->xtime_nsec >> tkr->shift;
/** * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. * @tkr: Timekeeping readout base from which we take the update * @tkf: Pointer to NMI safe timekeeper * * We want to use this from any context including NMI and tracing / * instrumenting the timekeeping code itself. * * Employ the latch technique; see @write_seqcount_latch. * * So if a NMI hits the update of base[0] then it will use base[1] * which is still consistent. In the worst case this can result is a * slightly wrong timestamp (a few nanoseconds). See * @ktime_get_mono_fast_ns.
*/ staticvoid update_fast_timekeeper(conststruct tk_read_base *tkr, struct tk_fast *tkf)
{ struct tk_read_base *base = tkf->base;
/* Force readers off to base[1] */
write_seqcount_latch_begin(&tkf->seq);
do {
seq = read_seqcount_latch(&tkf->seq);
tkr = tkf->base + (seq & 0x01);
now = ktime_to_ns(tkr->base);
now += timekeeping_get_ns(tkr);
} while (read_seqcount_latch_retry(&tkf->seq, seq));
return now;
}
/** * ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic * * This timestamp is not guaranteed to be monotonic across an update. * The timestamp is calculated by: * * now = base_mono + clock_delta * slope * * So if the update lowers the slope, readers who are forced to the * not yet updated second array are still using the old steeper slope. * * tmono * ^ * | o n * | o n * | u * | o * |o * |12345678---> reader order * * o = old slope * u = update * n = new slope * * So reader 6 will observe time going backwards versus reader 5. * * While other CPUs are likely to be able to observe that, the only way * for a CPU local observation is when an NMI hits in the middle of * the update. Timestamps taken from that NMI context might be ahead * of the following timestamps. Callers need to be aware of that and * deal with it.
*/
u64 notrace ktime_get_mono_fast_ns(void)
{ return __ktime_get_fast_ns(&tk_fast_mono);
}
EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
/** * ktime_get_raw_fast_ns - Fast NMI safe access to clock monotonic raw * * Contrary to ktime_get_mono_fast_ns() this is always correct because the * conversion factor is not affected by NTP/PTP correction.
*/
u64 notrace ktime_get_raw_fast_ns(void)
{ return __ktime_get_fast_ns(&tk_fast_raw);
}
EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
/** * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock. * * To keep it NMI safe since we're accessing from tracing, we're not using a * separate timekeeper with updates to monotonic clock and boot offset * protected with seqcounts. This has the following minor side effects: * * (1) Its possible that a timestamp be taken after the boot offset is updated * but before the timekeeper is updated. If this happens, the new boot offset * is added to the old timekeeping making the clock appear to update slightly * earlier: * CPU 0 CPU 1 * timekeeping_inject_sleeptime64() * __timekeeping_inject_sleeptime(tk, delta); * timestamp(); * timekeeping_update_staged(tkd, TK_CLEAR_NTP...); * * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be * partially updated. Since the tk->offs_boot update is a rare event, this * should be a rare occurrence which postprocessing should be able to handle. * * The caveats vs. timestamp ordering as documented for ktime_get_mono_fast_ns() * apply as well.
*/
u64 notrace ktime_get_boot_fast_ns(void)
{ struct timekeeper *tk = &tk_core.timekeeper;
/** * ktime_get_tai_fast_ns - NMI safe and fast access to tai clock. * * The same limitations as described for ktime_get_boot_fast_ns() apply. The * mono time and the TAI offset are not read atomically which may yield wrong * readouts. However, an update of the TAI offset is an rare event e.g., caused * by settime or adjtimex with an offset. The user of this function has to deal * with the possibility of wrong timestamps in post processing.
*/
u64 notrace ktime_get_tai_fast_ns(void)
{ struct timekeeper *tk = &tk_core.timekeeper;
/** * ktime_get_real_fast_ns: - NMI safe and fast access to clock realtime. * * See ktime_get_mono_fast_ns() for documentation of the time stamp ordering.
*/
u64 ktime_get_real_fast_ns(void)
{ struct tk_fast *tkf = &tk_fast_mono; struct tk_read_base *tkr;
u64 baser, delta; unsignedint seq;
/** * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource. * @tk: Timekeeper to snapshot. * * It generally is unsafe to access the clocksource after timekeeping has been * suspended, so take a snapshot of the readout base of @tk and use it as the * fast timekeeper's readout base while suspended. It will return the same * number of cycles every time until timekeeping is resumed at which time the * proper readout base for the fast timekeeper will be restored automatically.
*/ staticvoid halt_fast_timekeeper(conststruct timekeeper *tk)
{ staticstruct tk_read_base tkr_dummy; conststruct tk_read_base *tkr = &tk->tkr_mono;
/** * pvclock_gtod_unregister_notifier - unregister a pvclock * timedata update listener * @nb: Pointer to the notifier block to unregister
*/ int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
{
guard(raw_spinlock_irqsave)(&tk_core.lock); return raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
}
EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
/* * tk_update_leap_state - helper to update the next_leap_ktime
*/ staticinlinevoid tk_update_leap_state(struct timekeeper *tk)
{
tk->next_leap_ktime = ntp_get_next_leap(tk->id); if (tk->next_leap_ktime != KTIME_MAX) /* Convert to monotonic time */
tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real);
}
/* * Leap state update for both shadow and the real timekeeper * Separate to spare a full memcpy() of the timekeeper.
*/ staticvoid tk_update_leap_state_all(struct tk_data *tkd)
{
write_seqcount_begin(&tkd->seq);
tk_update_leap_state(&tkd->shadow_timekeeper);
tkd->timekeeper.next_leap_ktime = tkd->shadow_timekeeper.next_leap_ktime;
write_seqcount_end(&tkd->seq);
}
/* * Update the ktime_t based scalar nsec members of the timekeeper
*/ staticinlinevoid tk_update_ktime_data(struct timekeeper *tk)
{
u64 seconds;
u32 nsec;
/* * The sum of the nanoseconds portions of xtime and * wall_to_monotonic can be greater/equal one second. Take * this into account before updating tk->ktime_sec.
*/
nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); if (nsec >= NSEC_PER_SEC)
seconds++;
tk->ktime_sec = seconds;
/* Update the monotonic raw base */
tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC);
}
/* * Restore the shadow timekeeper from the real timekeeper.
*/ staticvoid timekeeping_restore_shadow(struct tk_data *tkd)
{
lockdep_assert_held(&tkd->lock);
memcpy(&tkd->shadow_timekeeper, &tkd->timekeeper, sizeof(tkd->timekeeper));
}
/* * Block out readers before running the updates below because that * updates VDSO and other time related infrastructure. Not blocking * the readers might let a reader see time going backwards when * reading from the VDSO after the VDSO update and then reading in * the kernel from the timekeeper before that got updated.
*/
write_seqcount_begin(&tkd->seq);
if (action & TK_CLEAR_NTP) {
tk->ntp_error = 0;
ntp_clear(tk->id);
}
if (action & TK_CLOCK_WAS_SET)
tk->clock_was_set_seq++;
/* * Update the real timekeeper. * * We could avoid this memcpy() by switching pointers, but that has * the downside that the reader side does not longer benefit from * the cacheline optimized data layout of the timekeeper and requires * another indirection.
*/
memcpy(&tkd->timekeeper, tk, sizeof(*tk));
write_seqcount_end(&tkd->seq);
}
/** * timekeeping_forward_now - update clock to the current time * @tk: Pointer to the timekeeper to update * * Forward the current clock to update its state since the last call to * update_wall_time(). This is useful before significant clock changes, * as it avoids having to deal with this time offset explicitly.
*/ staticvoid timekeeping_forward_now(struct timekeeper *tk)
{
u64 cycle_now, delta;
/** * ktime_get_real_ts64 - Returns the time of day in a timespec64. * @ts: pointer to the timespec to be set * * Returns the time of day in a timespec64 (WARN if suspended).
*/ void ktime_get_real_ts64(struct timespec64 *ts)
{ struct timekeeper *tk = &tk_core.timekeeper; unsignedint seq;
u64 nsecs;
/** * ktime_mono_to_any() - convert monotonic time to any other time * @tmono: time to convert. * @offs: which offset to use
*/
ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs)
{
ktime_t *offset = offsets[offs]; unsignedint seq;
ktime_t tconv;
if (IS_ENABLED(CONFIG_64BIT)) { /* * Paired with WRITE_ONCE()s in tk_set_wall_to_mono() and * tk_update_sleep_time().
*/ return ktime_add(tmono, READ_ONCE(*offset));
}
do {
seq = read_seqcount_begin(&tk_core.seq);
tconv = ktime_add(tmono, *offset);
} while (read_seqcount_retry(&tk_core.seq, seq));
/** * ktime_get_ts64 - get the monotonic clock in timespec64 format * @ts: pointer to timespec variable * * The function calculates the monotonic clock from the realtime * clock and the wall_to_monotonic offset and stores the result * in normalized timespec64 format in the variable pointed to by @ts.
*/ void ktime_get_ts64(struct timespec64 *ts)
{ struct timekeeper *tk = &tk_core.timekeeper; struct timespec64 tomono; unsignedint seq;
u64 nsec;
/** * ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC * * Returns the seconds portion of CLOCK_MONOTONIC with a single non * serialized read. tk->ktime_sec is of type 'unsigned long' so this * works on both 32 and 64 bit systems. On 32 bit systems the readout * covers ~136 years of uptime which should be enough to prevent * premature wrap arounds.
*/
time64_t ktime_get_seconds(void)
{ struct timekeeper *tk = &tk_core.timekeeper;
/** * ktime_get_real_seconds - Get the seconds portion of CLOCK_REALTIME * * Returns the wall clock seconds since 1970. * * For 64bit systems the fast access to tk->xtime_sec is preserved. On * 32bit systems the access must be protected with the sequence * counter to provide "atomic" access to the 64bit tk->xtime_sec * value.
*/
time64_t ktime_get_real_seconds(void)
{ struct timekeeper *tk = &tk_core.timekeeper;
time64_t seconds; unsignedint seq;
if (IS_ENABLED(CONFIG_64BIT)) return tk->xtime_sec;
do {
seq = read_seqcount_begin(&tk_core.seq);
seconds = tk->xtime_sec;
/** * __ktime_get_real_seconds - Unprotected access to CLOCK_REALTIME seconds * * The same as ktime_get_real_seconds() but without the sequence counter * protection. This function is used in restricted contexts like the x86 MCE * handler and in KGDB. It's unprotected on 32-bit vs. concurrent half * completed modification and only to be used for such critical contexts. * * Returns: Racy snapshot of the CLOCK_REALTIME seconds value
*/
noinstr time64_t __ktime_get_real_seconds(void)
{ struct timekeeper *tk = &tk_core.timekeeper;
return tk->xtime_sec;
}
/** * ktime_get_snapshot - snapshots the realtime/monotonic raw clocks with counter * @systime_snapshot: pointer to struct receiving the system time snapshot
*/ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
{ struct timekeeper *tk = &tk_core.timekeeper; unsignedint seq;
ktime_t base_raw;
ktime_t base_real;
ktime_t base_boot;
u64 nsec_raw;
u64 nsec_real;
u64 now;
/** * adjust_historical_crosststamp - adjust crosstimestamp previous to current interval * @history: Snapshot representing start of history * @partial_history_cycles: Cycle offset into history (fractional part) * @total_history_cycles: Total history length in cycles * @discontinuity: True indicates clock was set on history period * @ts: Cross timestamp that should be adjusted using * partial/total ratio * * Helper function used by get_device_system_crosststamp() to correct the * crosstimestamp corresponding to the start of the current interval to the * system counter value (timestamp point) provided by the driver. The * total_history_* quantities are the total history starting at the provided * reference point and ending at the start of the current interval. The cycle * count between the driver timestamp point and the start of the current * interval is partial_history_cycles.
*/ staticint adjust_historical_crosststamp(struct system_time_snapshot *history,
u64 partial_history_cycles,
u64 total_history_cycles, bool discontinuity, struct system_device_crosststamp *ts)
{ struct timekeeper *tk = &tk_core.timekeeper;
u64 corr_raw, corr_real; bool interp_forward; int ret;
if (total_history_cycles == 0 || partial_history_cycles == 0) return 0;
/* Interpolate shortest distance from beginning or end of history */
interp_forward = partial_history_cycles > total_history_cycles / 2;
partial_history_cycles = interp_forward ?
total_history_cycles - partial_history_cycles :
partial_history_cycles;
/* * Scale the monotonic raw time delta by: * partial_history_cycles / total_history_cycles
*/
corr_raw = (u64)ktime_to_ns(
ktime_sub(ts->sys_monoraw, history->raw));
ret = scale64_check_overflow(partial_history_cycles,
total_history_cycles, &corr_raw); if (ret) return ret;
/* * If there is a discontinuity in the history, scale monotonic raw * correction by: * mult(real)/mult(raw) yielding the realtime correction * Otherwise, calculate the realtime correction similar to monotonic * raw calculation
*/ if (discontinuity) {
corr_real = mul_u64_u32_div
(corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult);
} else {
corr_real = (u64)ktime_to_ns(
ktime_sub(ts->sys_realtime, history->real));
ret = scale64_check_overflow(partial_history_cycles,
total_history_cycles, &corr_real); if (ret) return ret;
}
/* Fixup monotonic raw and real time time values */ if (interp_forward) {
ts->sys_monoraw = ktime_add_ns(history->raw, corr_raw);
ts->sys_realtime = ktime_add_ns(history->real, corr_real);
} else {
ts->sys_monoraw = ktime_sub_ns(ts->sys_monoraw, corr_raw);
ts->sys_realtime = ktime_sub_ns(ts->sys_realtime, corr_real);
}
return 0;
}
/* * timestamp_in_interval - true if ts is chronologically in [start, end] * * True if ts occurs chronologically at or after start, and before or at end.
*/ staticbool timestamp_in_interval(u64 start, u64 end, u64 ts)
{ if (ts >= start && ts <= end) returntrue; if (start > end && (ts >= start || ts <= end)) returntrue; returnfalse;
}
/* The timestamp was taken from the time keeper clock source */ if (cs->id == scv->cs_id) returntrue;
/* * Check whether cs_id matches the base clock. Prevent the compiler from * re-evaluating @base as the clocksource might change concurrently.
*/
base = READ_ONCE(cs->base); if (!base || base->id != scv->cs_id) returnfalse;
num = scv->use_nsecs ? cs->freq_khz : base->numerator;
den = scv->use_nsecs ? USEC_PER_SEC : base->denominator;
if (!convert_clock(&scv->cycles, num, den)) returnfalse;
/* * Check whether base_id matches the base clock. Prevent the compiler from * re-evaluating @base as the clocksource might change concurrently.
*/
base = READ_ONCE(cs->base); if (!base || base->id != base_id) returnfalse;
*cycles -= base->offset; if (!convert_clock(cycles, base->denominator, base->numerator)) returnfalse; returntrue;
}
/** * ktime_real_to_base_clock() - Convert CLOCK_REALTIME timestamp to a base clock timestamp * @treal: CLOCK_REALTIME timestamp to convert * @base_id: base clocksource id * @cycles: pointer to store the converted base clock timestamp * * Converts a supplied, future realtime clock value to the corresponding base clock value. * * Return: true if the conversion is successful, false otherwise.
*/ bool ktime_real_to_base_clock(ktime_t treal, enum clocksource_ids base_id, u64 *cycles)
{ struct timekeeper *tk = &tk_core.timekeeper; unsignedint seq;
u64 delta;
do {
seq = read_seqcount_begin(&tk_core.seq); if ((u64)treal < tk->tkr_mono.base_real) returnfalse;
delta = (u64)treal - tk->tkr_mono.base_real; if (!convert_ns_to_cs(&delta)) returnfalse;
*cycles = tk->tkr_mono.cycle_last + delta; if (!convert_cs_to_base(cycles, base_id)) returnfalse;
} while (read_seqcount_retry(&tk_core.seq, seq));
/** * get_device_system_crosststamp - Synchronously capture system/device timestamp * @get_time_fn: Callback to get simultaneous device time and * system counter from the device driver * @ctx: Context passed to get_time_fn() * @history_begin: Historical reference point used to interpolate system * time when counter provided by the driver is before the current interval * @xtstamp: Receives simultaneously captured system and device time * * Reads a timestamp from a device and correlates it to system time
*/ int get_device_system_crosststamp(int (*get_time_fn)
(ktime_t *device_time, struct system_counterval_t *sys_counterval, void *ctx), void *ctx, struct system_time_snapshot *history_begin, struct system_device_crosststamp *xtstamp)
{ struct system_counterval_t system_counterval = {}; struct timekeeper *tk = &tk_core.timekeeper;
u64 cycles, now, interval_start; unsignedint clock_was_set_seq = 0;
ktime_t base_real, base_raw;
u64 nsec_real, nsec_raw;
u8 cs_was_changed_seq; unsignedint seq; bool do_interp; int ret;
do {
seq = read_seqcount_begin(&tk_core.seq); /* * Try to synchronously capture device time and a system * counter value calling back into the device driver
*/
ret = get_time_fn(&xtstamp->device, &system_counterval, ctx); if (ret) return ret;
/* * Verify that the clocksource ID associated with the captured * system counter value is the same as for the currently * installed timekeeper clocksource
*/ if (system_counterval.cs_id == CSID_GENERIC ||
!convert_base_to_cs(&system_counterval)) return -ENODEV;
cycles = system_counterval.cycles;
/* * Check whether the system counter value provided by the * device driver is on the current timekeeping interval.
*/
now = tk_clock_read(&tk->tkr_mono);
interval_start = tk->tkr_mono.cycle_last; if (!timestamp_in_interval(interval_start, now, cycles)) {
clock_was_set_seq = tk->clock_was_set_seq;
cs_was_changed_seq = tk->cs_was_changed_seq;
cycles = interval_start;
do_interp = true;
} else {
do_interp = false;
}
/* * Interpolate if necessary, adjusting back from the start of the * current interval
*/ if (do_interp) {
u64 partial_history_cycles, total_history_cycles; bool discontinuity;
/* * Check that the counter value is not before the provided * history reference and that the history doesn't cross a * clocksource change
*/ if (!history_begin ||
!timestamp_in_interval(history_begin->cycles,
cycles, system_counterval.cycles) ||
history_begin->cs_was_changed_seq != cs_was_changed_seq) return -EINVAL;
partial_history_cycles = cycles - system_counterval.cycles;
total_history_cycles = cycles - history_begin->cycles;
discontinuity =
history_begin->clock_was_set_seq != clock_was_set_seq;
ret = adjust_historical_crosststamp(history_begin,
partial_history_cycles,
total_history_cycles,
discontinuity, xtstamp); if (ret) return ret;
}
/** * timekeeping_clocksource_has_base - Check whether the current clocksource * is based on given a base clock * @id: base clocksource ID * * Note: The return value is a snapshot which can become invalid right * after the function returns. * * Return: true if the timekeeper clocksource has a base clock with @id, * false otherwise
*/ bool timekeeping_clocksource_has_base(enum clocksource_ids id)
{ /* * This is a snapshot, so no point in using the sequence * count. Just prevent the compiler from re-evaluating @base as the * clocksource might change concurrently.
*/ struct clocksource_base *base = READ_ONCE(tk_core.timekeeper.tkr_mono.clock->base);
return base ? base->id == id : false;
}
EXPORT_SYMBOL_GPL(timekeeping_clocksource_has_base);
/** * do_settimeofday64 - Sets the time of day. * @ts: pointer to the timespec64 variable containing the new time * * Sets the time of day to the new time and update NTP and notify hrtimers
*/ int do_settimeofday64(conststruct timespec64 *ts)
{ struct timespec64 ts_delta, xt;
/** * __timekeeping_inject_offset - Adds or subtracts from the current time. * @tkd: Pointer to the timekeeper to modify * @ts: Pointer to the timespec variable containing the offset * * Adds or subtracts an offset value from the current time.
*/ staticint __timekeeping_inject_offset(struct tk_data *tkd, conststruct timespec64 *ts)
{ struct timekeeper *tks = &tkd->shadow_timekeeper; struct timespec64 tmp;
if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC) return -EINVAL;
timekeeping_forward_now(tks);
if (timekeeper_is_core_tk(tks)) { /* Make sure the proposed value is valid */
tmp = timespec64_add(tk_xtime(tks), *ts); if (timespec64_compare(&tks->wall_to_monotonic, ts) > 0 ||
!timespec64_valid_settod(&tmp)) {
timekeeping_restore_shadow(tkd); return -EINVAL;
}
/* Get the current time */
now = ktime_add_ns(tkr_mono->base, timekeeping_get_ns(tkr_mono)); /* Add the relative offset change */
offs = ktime_add(tks->offs_aux, timespec64_to_ktime(*ts));
/* Prevent that the resulting time becomes negative */ if (ktime_add(now, offs) < 0) {
timekeeping_restore_shadow(tkd); return -EINVAL;
}
tk_update_aux_offs(tks, offs);
}
staticint timekeeping_inject_offset(conststruct timespec64 *ts)
{ int ret;
scoped_guard (raw_spinlock_irqsave, &tk_core.lock)
ret = __timekeeping_inject_offset(&tk_core, ts);
/* Signal hrtimers about time change */ if (!ret)
clock_was_set(CLOCK_SET_WALL); return ret;
}
/* * Indicates if there is an offset between the system clock and the hardware * clock/persistent clock/rtc.
*/ int persistent_clock_is_local;
/* * Adjust the time obtained from the CMOS to be UTC time instead of * local time. * * This is ugly, but preferable to the alternatives. Otherwise we * would either need to write a program to do it in /etc/rc (and risk * confusion if the program gets run more than once; it would also be * hard to make the program warp the clock precisely n hours) or * compile in the timezone information into the kernel. Bad, bad.... * * - TYT, 1992-01-01 * * The best thing to do is to keep the CMOS clock in universal time (UTC) * as real UNIX machines always do it. This avoids all headaches about * daylight saving times and warping kernel clocks.
*/ void timekeeping_warp_clock(void)
{ if (sys_tz.tz_minuteswest != 0) { struct timespec64 adjust;
/* * __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic
*/ staticvoid __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
{
tk->tai_offset = tai_offset;
tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0));
}
/* * change_clocksource - Swaps clocksources if a new one is available * * Accumulates current time interval and initializes new clocksource
*/ staticint change_clocksource(void *data)
{ struct clocksource *new = data, *old = NULL;
/* * If the clocksource is in a module, get a module reference. * Succeeds for built-in code (owner == NULL) as well. Abort if the * reference can't be acquired.
*/ if (!try_module_get(new->owner)) return 0;
/* Abort if the device can't be enabled */ if (new->enable && new->enable(new) != 0) {
module_put(new->owner); return 0;
}
timekeeping_forward_now(tks);
old = tks->tkr_mono.clock;
tk_setup_internals(tks, new);
timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL);
}
tk_aux_update_clocksource();
if (old) { if (old->disable)
old->disable(old);
module_put(old->owner);
}
return 0;
}
/** * timekeeping_notify - Install a new clock source * @clock: pointer to the clock source * * This function is called from clocksource.c after a new, better clock * source has been registered. The caller holds the clocksource_mutex.
*/ int timekeeping_notify(struct clocksource *clock)
{ struct timekeeper *tk = &tk_core.timekeeper;
/** * ktime_get_raw_ts64 - Returns the raw monotonic time in a timespec * @ts: pointer to the timespec64 to be set * * Returns the raw monotonic time (completely un-modified by ntp)
*/ void ktime_get_raw_ts64(struct timespec64 *ts)
{ struct timekeeper *tk = &tk_core.timekeeper; unsignedint seq;
u64 nsecs;
/** * ktime_get_clock_ts64 - Returns time of a clock in a timespec * @id: POSIX clock ID of the clock to read * @ts: Pointer to the timespec64 to be set * * The timestamp is invalidated (@ts->sec is set to -1) if the * clock @id is not available.
*/ void ktime_get_clock_ts64(clockid_t id, struct timespec64 *ts)
{ /* Invalidate time stamp */
ts->tv_sec = -1;
ts->tv_nsec = 0;
switch (id) { case CLOCK_REALTIME:
ktime_get_real_ts64(ts); return; case CLOCK_MONOTONIC:
ktime_get_ts64(ts); return; case CLOCK_MONOTONIC_RAW:
ktime_get_raw_ts64(ts); return; case CLOCK_AUX ... CLOCK_AUX_LAST: if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS))
ktime_get_aux_ts64(id, ts); return; default:
WARN_ON_ONCE(1);
}
}
EXPORT_SYMBOL_GPL(ktime_get_clock_ts64);
/** * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
*/ int timekeeping_valid_for_hres(void)
{ struct timekeeper *tk = &tk_core.timekeeper; unsignedint seq; int ret;
do {
seq = read_seqcount_begin(&tk_core.seq);
ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
} while (read_seqcount_retry(&tk_core.seq, seq));
return ret;
}
/** * timekeeping_max_deferment - Returns max time the clocksource can be deferred
*/
u64 timekeeping_max_deferment(void)
{ struct timekeeper *tk = &tk_core.timekeeper; unsignedint seq;
u64 ret;
do {
seq = read_seqcount_begin(&tk_core.seq);
ret = tk->tkr_mono.clock->max_idle_ns;
} while (read_seqcount_retry(&tk_core.seq, seq));
return ret;
}
/** * read_persistent_clock64 - Return time from the persistent clock. * @ts: Pointer to the storage for the readout value * * Weak dummy function for arches that do not yet support it. * Reads the time from the battery backed persistent clock. * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported. * * XXX - Do be sure to remove it once all arches implement it.
*/ void __weak read_persistent_clock64(struct timespec64 *ts)
{
ts->tv_sec = 0;
ts->tv_nsec = 0;
}
/** * read_persistent_wall_and_boot_offset - Read persistent clock, and also offset * from the boot. * @wall_time: current time as returned by persistent clock * @boot_offset: offset that is defined as wall_time - boot_time * * Weak dummy function for arches that do not yet support it. * * The default function calculates offset based on the current value of * local_clock(). This way architectures that support sched_clock() but don't * support dedicated boot time clock will provide the best estimate of the * boot time.
*/ void __weak __init
read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, struct timespec64 *boot_offset)
{
read_persistent_clock64(wall_time);
*boot_offset = ns_to_timespec64(local_clock());
}
/* * Flag reflecting whether timekeeping_resume() has injected sleeptime. * * The flag starts of false and is only set when a suspend reaches * timekeeping_suspend(), timekeeping_resume() sets it to false when the * timekeeper clocksource is not stopping across suspend and has been * used to update sleep time. If the timekeeper clocksource has stopped * then the flag stays true and is used by the RTC resume code to decide * whether sleeptime must be injected and if so the flag gets false then. * * If a suspend fails before reaching timekeeping_resume() then the flag * stays false and prevents erroneous sleeptime injection.
*/ staticbool suspend_timing_needed;
/* Flag for if there is a persistent clock on this platform */ staticbool persistent_clock_exists;
/* * timekeeping_init - Initializes the clocksource and common timekeeping values
*/ void __init timekeeping_init(void)
{ struct timespec64 wall_time, boot_offset, wall_to_mono; struct timekeeper *tks = &tk_core.shadow_timekeeper; struct clocksource *clock;
if (timespec64_compare(&wall_time, &boot_offset) < 0)
boot_offset = (struct timespec64){0};
/* * We want set wall_to_mono, so the following is true: * wall time + wall_to_mono = boot time
*/
wall_to_mono = timespec64_sub(boot_offset, wall_time);
guard(raw_spinlock_irqsave)(&tk_core.lock);
ntp_init();
clock = clocksource_default_clock(); if (clock->enable)
clock->enable(clock);
tk_setup_internals(tks, clock);
/* time in seconds when suspend began for persistent clock */ staticstruct timespec64 timekeeping_suspend_time;
/** * __timekeeping_inject_sleeptime - Internal function to add sleep interval * @tk: Pointer to the timekeeper to be updated * @delta: Pointer to the delta value in timespec64 format * * Takes a timespec offset measuring a suspend interval and properly * adds the sleep offset to the timekeeping variables.
*/ staticvoid __timekeeping_inject_sleeptime(struct timekeeper *tk, conststruct timespec64 *delta)
{ if (!timespec64_valid_strict(delta)) {
printk_deferred(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " "sleep delta value!\n"); return;
}
tk_xtime_add(tk, delta);
tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta));
tk_update_sleep_time(tk, timespec64_to_ktime(*delta));
tk_debug_account_sleep_time(delta);
}
#ifdefined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE) /* * We have three kinds of time sources to use for sleep time * injection, the preference order is: * 1) non-stop clocksource * 2) persistent clock (ie: RTC accessible when irqs are off) * 3) RTC * * 1) and 2) are used by timekeeping, 3) by RTC subsystem. * If system has neither 1) nor 2), 3) will be used finally. * * * If timekeeping has injected sleeptime via either 1) or 2), * 3) becomes needless, so in this case we don't need to call * rtc_resume(), and this is what timekeeping_rtc_skipresume() * means.
*/ bool timekeeping_rtc_skipresume(void)
{ return !suspend_timing_needed;
}
/* * 1) can be determined whether to use or not only when doing * timekeeping_resume() which is invoked after rtc_suspend(), * so we can't skip rtc_suspend() surely if system has 1). * * But if system has 2), 2) will definitely be used, so in this * case we don't need to call rtc_suspend(), and this is what * timekeeping_rtc_skipsuspend() means.
*/ bool timekeeping_rtc_skipsuspend(void)
{ return persistent_clock_exists;
}
/** * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values * @delta: pointer to a timespec64 delta value * * This hook is for architectures that cannot support read_persistent_clock64 * because their RTC/persistent clock is only accessible when irqs are enabled. * and also don't have an effective nonstop clocksource. * * This function should only be called by rtc_resume(), and allows * a suspend offset to be injected into the timekeeping values.
*/ void timekeeping_inject_sleeptime64(conststruct timespec64 *delta)
{
scoped_guard(raw_spinlock_irqsave, &tk_core.lock) { struct timekeeper *tks = &tk_core.shadow_timekeeper;
/* * After system resumes, we need to calculate the suspended time and * compensate it for the OS time. There are 3 sources that could be * used: Nonstop clocksource during suspend, persistent clock and rtc * device. * * One specific platform may have 1 or 2 or all of them, and the * preference will be: * suspend-nonstop clocksource -> persistent clock -> rtc * The less preferred source will only be tried if there is no better * usable source. The rtc part is handled separately in rtc core code.
*/
cycle_now = tk_clock_read(&tks->tkr_mono);
nsec = clocksource_stop_suspend_timing(clock, cycle_now); if (nsec > 0) {
ts_delta = ns_to_timespec64(nsec);
inject_sleeptime = true;
} elseif (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time);
inject_sleeptime = true;
}
if (inject_sleeptime) {
suspend_timing_needed = false;
__timekeeping_inject_sleeptime(tks, &ts_delta);
}
/* Re-base the last cycle value */
tks->tkr_mono.cycle_last = cycle_now;
tks->tkr_raw.cycle_last = cycle_now;
/* * On some systems the persistent_clock can not be detected at * timekeeping_init by its return value, so if we see a valid * value returned, update the persistent_clock_exists flag.
*/ if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec)
persistent_clock_exists = true;
/* * Since we've called forward_now, cycle_last stores the value * just read from the current clocksource. Save this to potentially * use in suspend timing.
*/
curr_clock = tks->tkr_mono.clock;
cycle_now = tks->tkr_mono.cycle_last;
clocksource_start_suspend_timing(curr_clock, cycle_now);
if (persistent_clock_exists) { /* * To avoid drift caused by repeated suspend/resumes, * which each can add ~1 second drift error, * try to compensate so the difference in system time * and persistent_clock time stays close to constant.
*/
delta = timespec64_sub(tk_xtime(tks), timekeeping_suspend_time);
delta_delta = timespec64_sub(delta, old_delta); if (abs(delta_delta.tv_sec) >= 2) { /* * if delta_delta is too large, assume time correction * has occurred and set old_delta to the current delta.
*/
old_delta = delta;
} else { /* Otherwise try to adjust old_system to compensate */
timekeeping_suspend_time =
timespec64_add(timekeeping_suspend_time, delta_delta);
}
}
/* * So the following can be confusing. * * To keep things simple, lets assume mult_adj == 1 for now. * * When mult_adj != 1, remember that the interval and offset values * have been appropriately scaled so the math is the same. * * The basic idea here is that we're increasing the multiplier * by one, this causes the xtime_interval to be incremented by * one cycle_interval. This is because: * xtime_interval = cycle_interval * mult * So if mult is being incremented by one: * xtime_interval = cycle_interval * (mult + 1) * Its the same as: * xtime_interval = (cycle_interval * mult) + cycle_interval * Which can be shortened to: * xtime_interval += cycle_interval * * So offset stores the non-accumulated cycles. Thus the current * time (in shifted nanoseconds) is: * now = (offset * adj) + xtime_nsec * Now, even though we're adjusting the clock frequency, we have * to keep time consistent. In other words, we can't jump back * in time, and we also want to avoid jumping forward in time. * * So given the same offset value, we need the time to be the same * both before and after the freq adjustment. * now = (offset * adj_1) + xtime_nsec_1 * now = (offset * adj_2) + xtime_nsec_2 * So: * (offset * adj_1) + xtime_nsec_1 = * (offset * adj_2) + xtime_nsec_2 * And we know: * adj_2 = adj_1 + 1 * So: * (offset * adj_1) + xtime_nsec_1 = * (offset * (adj_1+1)) + xtime_nsec_2 * (offset * adj_1) + xtime_nsec_1 = * (offset * adj_1) + offset + xtime_nsec_2 * Canceling the sides: * xtime_nsec_1 = offset + xtime_nsec_2 * Which gives us: * xtime_nsec_2 = xtime_nsec_1 - offset * Which simplifies to: * xtime_nsec -= offset
*/ if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) { /* NTP adjustment caused clocksource mult overflow */
WARN_ON_ONCE(1); return;
}
/* * Adjust the timekeeper's multiplier to the correct frequency * and also to reduce the accumulated error value.
*/ staticvoid timekeeping_adjust(struct timekeeper *tk, s64 offset)
{
u64 ntp_tl = ntp_tick_length(tk->id);
u32 mult;
/* * Determine the multiplier from the current NTP tick length. * Avoid expensive division when the tick length doesn't change.
*/ if (likely(tk->ntp_tick == ntp_tl)) {
mult = tk->tkr_mono.mult - tk->ntp_err_mult;
} else {
tk->ntp_tick = ntp_tl;
mult = div64_u64((tk->ntp_tick >> tk->ntp_error_shift) -
tk->xtime_remainder, tk->cycle_interval);
}
/* * If the clock is behind the NTP time, increase the multiplier by 1 * to catch up with it. If it's ahead and there was a remainder in the * tick division, the clock will slow down. Otherwise it will stay * ahead until the tick length changes to a non-divisible value.
*/
tk->ntp_err_mult = tk->ntp_error > 0 ? 1 : 0;
mult += tk->ntp_err_mult;
timekeeping_apply_adjustment(tk, offset, mult - tk->tkr_mono.mult);
if (unlikely(tk->tkr_mono.clock->maxadj &&
(abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
> tk->tkr_mono.clock->maxadj))) {
printk_once(KERN_WARNING "Adjusting %s more than 11%% (%ld vs %ld)\n",
tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
(long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
}
/* * It may be possible that when we entered this function, xtime_nsec * was very small. Further, if we're slightly speeding the clocksource * in the code above, its possible the required corrective factor to * xtime_nsec could cause it to underflow. * * Now, since we have already accumulated the second and the NTP * subsystem has been notified via second_overflow(), we need to skip * the next update.
*/ if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
tk->tkr_mono.xtime_nsec += (u64)NSEC_PER_SEC <<
tk->tkr_mono.shift;
tk->xtime_sec--;
tk->skip_second_overflow = 1;
}
}
/* * accumulate_nsecs_to_secs - Accumulates nsecs into secs * * Helper function that accumulates the nsecs greater than a second * from the xtime_nsec field to the xtime_secs field. * It also calls into the NTP code to handle leapsecond processing.
*/ staticinlineunsignedint accumulate_nsecs_to_secs(struct timekeeper *tk)
{
u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift; unsignedint clock_set = 0;
while (tk->tkr_mono.xtime_nsec >= nsecps) { int leap;