// SPDX-License-Identifier: GPL-2.0-or-later /* * Common time routines among all ppc machines. * * Written by Cort Dougan (cort@cs.nmt.edu) to merge * Paul Mackerras' version and mine for PReP and Pmac. * MPC8xx/MBX changes by Dan Malek (dmalek@jlc.net). * Converted for 64-bit by Mike Corrigan (mikejc@us.ibm.com) * * First round of bugfixes by Gabriel Paubert (paubert@iram.es) * to make clock more stable (2.4.0-test5). The only thing * that this code assumes is that the timebases have been synchronized * by firmware on SMP and are never stopped (never do sleep * on SMP then, nap and doze are OK). * * Speeded up do_gettimeofday by getting rid of references to * xtime (which required locks for consistency). (mikejc@us.ibm.com) * * TODO (not necessarily in this file): * - improve precision and reproducibility of timebase frequency * measurement at boot time. * - for astronomical applications: add a new function to get * non ambiguous timestamps even around leap seconds. This needs * a new timestamp format and a good name. * * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 * "A Kernel Model for Precision Timekeeping" by Dave Mills
*/
/* * This always puts next_tb beyond now, so the clock event will never fire * with the usual comparison, no need for a separate test for stopped.
*/ #define DEC_CLOCKEVENT_STOPPED ~0ULL
DEFINE_PER_CPU(u64, decrementers_next_tb) = DEC_CLOCKEVENT_STOPPED;
EXPORT_SYMBOL_GPL(decrementers_next_tb); static DEFINE_PER_CPU(struct clock_event_device, decrementers);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* * Read the SPURR on systems that have it, otherwise the PURR, * or if that doesn't exist return the timebase value passed in.
*/ staticinlineunsignedlong read_spurr(unsignedlong tb)
{ if (cpu_has_feature(CPU_FTR_SPURR)) return mfspr(SPRN_SPURR); if (cpu_has_feature(CPU_FTR_PURR)) return mfspr(SPRN_PURR); return tb;
}
/* * Account time for a transition between system, hard irq * or soft irq state.
*/ staticunsignedlong vtime_delta_scaled(struct cpu_accounting_data *acct, unsignedlong now, unsignedlong stime)
{ unsignedlong stime_scaled = 0; #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME unsignedlong nowscaled, deltascaled; unsignedlong utime, utime_scaled;
/* * Because we don't read the SPURR on every kernel entry/exit, * deltascaled includes both user and system SPURR ticks. * Apportion these ticks to system SPURR ticks and user * SPURR ticks in the same ratio as the system time (delta) * and user time (udelta) values obtained from the timebase * over the same interval. The system ticks get accounted here; * the user ticks get saved up in paca->user_time_scaled to be * used by account_process_tick.
*/
stime_scaled = stime;
utime_scaled = utime; if (deltascaled != stime + utime) { if (utime) {
stime_scaled = deltascaled * stime / (stime + utime);
utime_scaled = deltascaled - stime_scaled;
} else {
stime_scaled = deltascaled;
}
}
acct->utime_scaled += utime_scaled; #endif
/* * Account the whole cputime accumulated in the paca * Must be called with interrupts disabled. * Assumes that vtime_account_kernel/idle() has been called * recently (i.e. since the last entry from usermode) so that * get_paca()->user_time_scaled is up to date.
*/ void vtime_flush(struct task_struct *tsk)
{ struct cpu_accounting_data *acct = get_accounting(tsk);
if (acct->utime)
account_user_time(tsk, cputime_to_nsecs(acct->utime));
if (acct->gtime)
account_guest_time(tsk, cputime_to_nsecs(acct->gtime));
if (IS_ENABLED(CONFIG_PPC_SPLPAR) && acct->steal_time) {
account_steal_time(cputime_to_nsecs(acct->steal_time));
acct->steal_time = 0;
}
if (acct->idle_time)
account_idle_time(cputime_to_nsecs(acct->idle_time));
if (acct->stime)
account_system_index_time(tsk, cputime_to_nsecs(acct->stime),
CPUTIME_SYSTEM);
if (acct->hardirq_time)
account_system_index_time(tsk, cputime_to_nsecs(acct->hardirq_time),
CPUTIME_IRQ); if (acct->softirq_time)
account_system_index_time(tsk, cputime_to_nsecs(acct->softirq_time),
CPUTIME_SOFTIRQ);
/* * Called from the context switch with interrupts disabled, to charge all * accumulated times to the current process, and to prepare accounting on * the next process.
*/ void vtime_task_switch(struct task_struct *prev)
{ if (is_idle_task(prev))
vtime_account_idle(prev); else
vtime_account_kernel(prev);
spin_begin(); if (tb_invalid) { /* * TB is in error state and isn't ticking anymore. * HMI handler was unable to recover from TB error. * Return immediately, so that kernel won't get stuck here.
*/
spin_cpu_relax();
} else {
start = mftb(); while (mftb() - start < loops)
spin_cpu_relax();
}
spin_end();
}
EXPORT_SYMBOL(__delay);
void arch_irq_work_raise(void)
{ /* * 64-bit code that uses irq soft-mask can just cause an immediate * interrupt here that gets soft masked, if this is called under * local_irq_disable(). It might be possible to prevent that happening * by noticing interrupts are disabled and setting decrementer pending * to be replayed when irqs are enabled. The problem there is that * tracing can call irq_work_raise, including in code that does low * level manipulations of irq soft-mask state (e.g., trace_hardirqs_on) * which could get tangled up if we're messing with the same state * here.
*/
preempt_disable();
set_irq_work_pending_flag();
set_dec(1);
preempt_enable();
}
staticvoid set_dec_or_work(u64 val)
{
set_dec(val); /* We may have raced with new irq work */ if (unlikely(test_irq_work_pending()))
set_dec(1);
}
if (now >= *next_tb) {
local_paca->irq_happened |= PACA_IRQ_DEC;
} else {
now = *next_tb - now; if (now > decrementer_max)
now = decrementer_max;
set_dec_or_work(now);
}
}
EXPORT_SYMBOL_GPL(timer_rearm_host_dec); #endif
/* * timer_interrupt - gets called when the decrementer overflows, * with interrupts disabled.
*/
DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
{ struct clock_event_device *evt = this_cpu_ptr(&decrementers);
u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); struct pt_regs *old_regs;
u64 now;
/* * Some implementations of hotplug will get timer interrupts while * offline, just ignore these.
*/ if (unlikely(!cpu_online(smp_processor_id()))) {
set_dec(decrementer_max); return;
}
/* Conditionally hard-enable interrupts. */ if (should_hard_irq_enable(regs)) { /* * Ensure a positive value is written to the decrementer, or * else some CPUs will continue to take decrementer exceptions. * When the PPC_WATCHDOG (decrementer based) is configured, * keep this at most 31 bits, which is about 4 seconds on most * systems, which gives the watchdog a chance of catching timer * interrupt hard lockups.
*/ if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
set_dec(0x7fffffff); else
set_dec(decrementer_max);
do_hard_irq_enable();
}
#ifdefined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0)
__do_IRQ(regs); #endif
old_regs = set_irq_regs(regs);
trace_timer_interrupt_entry(regs);
if (test_irq_work_pending()) {
clear_irq_work_pending();
mce_run_irq_context_handlers();
irq_work_run();
}
now = get_tb(); if (now >= *next_tb) {
evt->event_handler(evt);
__this_cpu_inc(irq_stat.timer_irqs_event);
} else {
now = *next_tb - now; if (now > decrementer_max)
now = decrementer_max;
set_dec_or_work(now);
__this_cpu_inc(irq_stat.timer_irqs_others);
}
#ifdef CONFIG_SUSPEND /* Overrides the weak version in kernel/power/main.c */ void arch_suspend_disable_irqs(void)
{ if (ppc_md.suspend_disable_irqs)
ppc_md.suspend_disable_irqs();
/* Disable the decrementer, so that it doesn't interfere * with suspending.
*/
/* * Scheduler clock - returns current time in nanosec units. * * Note: mulhdu(a, b) (multiply high double unsigned) returns * the high 64 bits of a * b, i.e. (a * b) >> 64, where a and b * are 64-bit unsigned numbers.
*/
notrace unsignedlonglong sched_clock(void)
{ return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
}
#ifdef CONFIG_PPC_PSERIES
/* * Running clock - attempts to give a view of time passing for a virtualised * kernels. * Uses the VTB register if available otherwise a next best guess.
*/ unsignedlonglong running_clock(void)
{ /* * Don't read the VTB as a host since KVM does not switch in host * timebase into the VTB when it takes a guest off the CPU, reading the * VTB would result in reading 'last switched out' guest VTB. * * Host kernels are often compiled with CONFIG_PPC_PSERIES checked, it * would be unsafe to rely only on the #ifdef above.
*/ if (firmware_has_feature(FW_FEATURE_LPAR) &&
cpu_has_feature(CPU_FTR_ARCH_207S)) return mulhdu(get_vtb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
/* * This is a next best approximation without a VTB. * On a host which is running bare metal there should never be any stolen * time and on a host which doesn't do any virtualisation TB *should* equal * VTB so it makes no difference anyway.
*/ return local_clock() - kcpustat_this_cpu->cpustat[CPUTIME_STEAL];
} #endif
staticint __init get_freq(char *name, int cells, unsignedlong *val)
{ struct device_node *cpu; const __be32 *fp; int found = 0;
/* The cpu node should have timebase and clock frequency properties */
cpu = of_find_node_by_type(NULL, "cpu");
if (cpu) {
fp = of_get_property(cpu, name, NULL); if (fp) {
found = 1;
*val = of_read_ulong(fp, cells);
}
ts->tv_nsec = 0; /* XXX this is a little fragile but will work okay in the short term */ if (first) {
first = 0; if (ppc_md.time_init)
timezone_offset = ppc_md.time_init();
/* get_boot_time() isn't guaranteed to be safe to call late */ if (ppc_md.get_boot_time) {
ts->tv_sec = ppc_md.get_boot_time() - timezone_offset; return;
}
} if (!ppc_md.get_rtc_time) {
ts->tv_sec = 0; return;
}
ppc_md.get_rtc_time(&tm);
/* Set values for KVM, see kvm_emulate_dec() */
decrementer_clockevent.mult = dec->mult;
decrementer_clockevent.shift = dec->shift;
}
staticvoid enable_large_decrementer(void)
{ if (!cpu_has_feature(CPU_FTR_ARCH_300)) return;
if (decrementer_max <= DECREMENTER_DEFAULT_MAX) return;
/* * If we're running as the hypervisor we need to enable the LD manually * otherwise firmware should have done it for us.
*/ if (cpu_has_feature(CPU_FTR_HVMODE))
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_LD);
}
/* * Compute scale factor for sched_clock. * The calibrate_decr() function has set tb_ticks_per_sec, * which is the timebase frequency. * We compute 1e9 * 2^64 / tb_ticks_per_sec and interpret * the 128-bit result as a 64.64 fixed-point number. * We then shift that number right until it is less than 1.0, * giving us the scale factor and shift count to use in * sched_clock().
*/
div128_by_32(1000000000, 0, tb_ticks_per_sec, &res);
scale = res.result_low; for (shift = 0; res.result_high != 0; ++shift) {
scale = (scale >> 1) | (res.result_high << 63);
res.result_high >>= 1;
}
tb_to_ns_scale = scale;
tb_to_ns_shift = shift; /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */
boot_tb = get_tb();
/* If platform provided a timezone (pmac), we correct the time */ if (timezone_offset) {
sys_tz.tz_minuteswest = -timezone_offset / 60;
sys_tz.tz_dsttime = 0;
}
/* We don't need to calibrate delay, we use the CPU timebase for that */ void calibrate_delay(void)
{ /* Some generic code (such as spinlock debug) use loops_per_jiffy * as the number of __delay(1) in a jiffy, so make it so
*/
loops_per_jiffy = tb_ticks_per_jiffy;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.