// SPDX-License-Identifier: GPL-2.0 /* * OS Noise Tracer: computes the OS Noise suffered by a running thread. * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread. * * Based on "hwlat_detector" tracer by: * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> * With feedback from Clark Williams <williams@redhat.com> * * And also based on the rtsl tracer presented on: * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020. * * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com>
*/
/* * osnoise_instance_registered - check if a tr is already registered
*/ staticint osnoise_instance_registered(struct trace_array *tr)
{ struct osnoise_instance *inst; int found = 0;
rcu_read_lock();
list_for_each_entry_rcu(inst, &osnoise_instances, list) { if (inst->tr == tr)
found = 1;
}
rcu_read_unlock();
return found;
}
/* * osnoise_register_instance - register a new trace instance * * Register a trace_array *tr in the list of instances running * osnoise/timerlat tracers.
*/ staticint osnoise_register_instance(struct trace_array *tr)
{ struct osnoise_instance *inst;
/* * register/unregister serialization is provided by trace's * trace_types_lock.
*/
lockdep_assert_held(&trace_types_lock);
inst = kmalloc(sizeof(*inst), GFP_KERNEL); if (!inst) return -ENOMEM;
/* * osnoise_unregister_instance - unregister a registered trace instance * * Remove the trace_array *tr from the list of instances running * osnoise/timerlat tracers.
*/ staticvoid osnoise_unregister_instance(struct trace_array *tr)
{ struct osnoise_instance *inst; int found = 0;
/* * register/unregister serialization is provided by trace's * trace_types_lock.
*/
list_for_each_entry_rcu(inst, &osnoise_instances, list,
lockdep_is_held(&trace_types_lock)) { if (inst->tr == tr) {
list_del_rcu(&inst->list);
found = 1; break;
}
}
/* * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU
*/ staticinlinestruct osnoise_variables *this_cpu_osn_var(void)
{ return this_cpu_ptr(&per_cpu_osnoise_var);
}
/* * Protect the interface.
*/ staticstruct mutex interface_lock;
/* * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU
*/ staticinlinestruct timerlat_variables *this_cpu_tmr_var(void)
{ return this_cpu_ptr(&per_cpu_timerlat_var);
}
/* * tlat_var_reset - Reset the values of the given timerlat_variables
*/ staticinlinevoid tlat_var_reset(void)
{ struct timerlat_variables *tlat_var; int cpu;
/* Synchronize with the timerlat interfaces */
mutex_lock(&interface_lock); /* * So far, all the values are initialized as 0, so * zeroing the structure is perfect.
*/
for_each_cpu(cpu, cpu_online_mask) {
tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); if (tlat_var->kthread)
hrtimer_cancel(&tlat_var->timer);
memset(tlat_var, 0, sizeof(*tlat_var));
}
mutex_unlock(&interface_lock);
} #else/* CONFIG_TIMERLAT_TRACER */ #define tlat_var_reset() do {} while (0) #endif/* CONFIG_TIMERLAT_TRACER */
/* * osn_var_reset - Reset the values of the given osnoise_variables
*/ staticinlinevoid osn_var_reset(void)
{ struct osnoise_variables *osn_var; int cpu;
/* * So far, all the values are initialized as 0, so * zeroing the structure is perfect.
*/
for_each_cpu(cpu, cpu_online_mask) {
osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
memset(osn_var, 0, sizeof(*osn_var));
}
}
/* * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables
*/ staticinlinevoid osn_var_reset_all(void)
{
osn_var_reset();
tlat_var_reset();
}
/* * Tells NMIs to call back to the osnoise tracer to record timestamps.
*/ bool trace_osnoise_callback_enabled;
/* * Tracer data.
*/ staticstruct osnoise_data {
u64 sample_period; /* total sampling period */
u64 sample_runtime; /* active sampling portion of period */
u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */
u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */ #ifdef CONFIG_TIMERLAT_TRACER
u64 timerlat_period; /* timerlat period */
u64 print_stack; /* print IRQ stack if total > */ int timerlat_tracer; /* timerlat tracer */ #endif bool tainted; /* infor users and developers about a problem */
} osnoise_data = {
.sample_period = DEFAULT_SAMPLE_PERIOD,
.sample_runtime = DEFAULT_SAMPLE_RUNTIME,
.stop_tracing = 0,
.stop_tracing_total = 0, #ifdef CONFIG_TIMERLAT_TRACER
.print_stack = 0,
.timerlat_period = DEFAULT_TIMERLAT_PERIOD,
.timerlat_tracer = 0, #endif
};
staticinlineint timerlat_softirq_exit(struct osnoise_variables *osn_var)
{ struct timerlat_variables *tlat_var = this_cpu_tmr_var(); /* * If the timerlat is enabled, but the irq handler did * not run yet enabling timerlat_tracer, do not trace.
*/ if (!tlat_var->tracing_thread) {
osn_var->softirq.arrival_time = 0;
osn_var->softirq.delta_start = 0; return 0;
} return 1;
}
staticinlineint timerlat_thread_exit(struct osnoise_variables *osn_var)
{ struct timerlat_variables *tlat_var = this_cpu_tmr_var(); /* * If the timerlat is enabled, but the irq handler did * not run yet enabling timerlat_tracer, do not trace.
*/ if (!tlat_var->tracing_thread) {
osn_var->thread.delta_start = 0;
osn_var->thread.arrival_time = 0; return 0;
} return 1;
} #else/* CONFIG_TIMERLAT_TRACER */ staticinlinebool timerlat_enabled(void)
{ returnfalse;
}
/* * Record an osnoise_sample on all osnoise instances and fire trace event.
*/ staticvoid record_osnoise_sample(struct osnoise_sample *sample)
{ struct osnoise_instance *inst; struct trace_buffer *buffer;
/* * Stack trace will take place only at IRQ level, so, no need * to control nesting here.
*/ struct trace_stack { int stack_size; int nr_entries; unsignedlong calls[MAX_CALLS];
};
/* * timerlat_save_stack - save a stack trace without printing * * Save the current stack trace without printing. The * stack will be printed later, after the end of the measurement.
*/ staticvoid timerlat_save_stack(int skip)
{ unsignedint size, nr_entries; struct trace_stack *fstack;
}
rcu_read_unlock();
preempt_enable_notrace();
} #else/* CONFIG_STACKTRACE */ #define timerlat_dump_stack(u64 latency) do {} while (0) #define timerlat_save_stack(a) do {} while (0) #endif/* CONFIG_STACKTRACE */ #endif/* CONFIG_TIMERLAT_TRACER */
/* * Macros to encapsulate the time capturing infrastructure.
*/ #define time_get() trace_clock_local() #define time_to_us(x) div_u64(x, 1000) #define time_sub(a, b) ((a) - (b))
/* * cond_move_irq_delta_start - Forward the delta_start of a running IRQ * * If an IRQ is preempted by an NMI, its delta_start is pushed forward * to discount the NMI interference. * * See get_int_safe_duration().
*/ staticinlinevoid
cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration)
{ if (osn_var->irq.delta_start)
osn_var->irq.delta_start += duration;
}
#ifndef CONFIG_PREEMPT_RT /* * cond_move_softirq_delta_start - Forward the delta_start of a running softirq. * * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed * forward to discount the interference. * * See get_int_safe_duration().
*/ staticinlinevoid
cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration)
{ if (osn_var->softirq.delta_start)
osn_var->softirq.delta_start += duration;
} #else/* CONFIG_PREEMPT_RT */ #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0) #endif
/* * cond_move_thread_delta_start - Forward the delta_start of a running thread * * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start * is pushed forward to discount the interference. * * See get_int_safe_duration().
*/ staticinlinevoid
cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration)
{ if (osn_var->thread.delta_start)
osn_var->thread.delta_start += duration;
}
/* * get_int_safe_duration - Get the duration of a window * * The irq, softirq and thread varaibles need to have its duration without * the interference from higher priority interrupts. Instead of keeping a * variable to discount the interrupt interference from these variables, the * starting time of these variables are pushed forward with the interrupt's * duration. In this way, a single variable is used to: * * - Know if a given window is being measured. * - Account its duration. * - Discount the interference. * * To avoid getting inconsistent values, e.g.,: * * now = time_get() * ---> interrupt! * delta_start -= int duration; * <--- * duration = now - delta_start; * * result: negative duration if the variable duration before the * interrupt was smaller than the interrupt execution. * * A counter of interrupts is used. If the counter increased, try * to capture an interference safe duration.
*/ staticinline s64
get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start)
{
u64 int_counter, now;
s64 duration;
do {
int_counter = local_read(&osn_var->int_counter); /* synchronize with interrupts */
barrier();
now = time_get();
duration = (now - *delta_start);
/* synchronize with interrupts */
barrier();
} while (int_counter != local_read(&osn_var->int_counter));
/* * This is an evidence of race conditions that cause * a value to be "discounted" too much.
*/ if (duration < 0)
osnoise_taint("Negative duration!\n");
*delta_start = 0;
return duration;
}
/* * * set_int_safe_time - Save the current time on *time, aware of interference * * Get the time, taking into consideration a possible interference from * higher priority interrupts. * * See get_int_safe_duration() for an explanation.
*/ static u64
set_int_safe_time(struct osnoise_variables *osn_var, u64 *time)
{
u64 int_counter;
do {
int_counter = local_read(&osn_var->int_counter); /* synchronize with interrupts */
barrier();
*time = time_get();
/* synchronize with interrupts */
barrier();
} while (int_counter != local_read(&osn_var->int_counter));
/* * trace_osnoise_callback - NMI entry/exit callback * * This function is called at the entry and exit NMI code. The bool enter * distinguishes between either case. This function is used to note a NMI * occurrence, compute the noise caused by the NMI, and to remove the noise * it is potentially causing on other interference variables.
*/ void trace_osnoise_callback(bool enter)
{ struct osnoise_variables *osn_var = this_cpu_osn_var();
u64 duration;
if (!osn_var->sampling) return;
/* * Currently trace_clock_local() calls sched_clock() and the * generic version is not NMI safe.
*/ if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { if (enter) {
osn_var->nmi.delta_start = time_get();
local_inc(&osn_var->int_counter);
} else {
duration = time_get() - osn_var->nmi.delta_start;
/* * osnoise_trace_irq_entry - Note the starting of an IRQ * * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs, * it is safe to use a single variable (ons_var->irq) to save the statistics. * The arrival_time is used to report... the arrival time. The delta_start * is used to compute the duration at the IRQ exit handler. See * cond_move_irq_delta_start().
*/ void osnoise_trace_irq_entry(int id)
{ struct osnoise_variables *osn_var = this_cpu_osn_var();
if (!osn_var->sampling) return; /* * This value will be used in the report, but not to compute * the execution time, so it is safe to get it unsafe.
*/
osn_var->irq.arrival_time = time_get();
set_int_safe_time(osn_var, &osn_var->irq.delta_start);
osn_var->irq.count++;
local_inc(&osn_var->int_counter);
}
/* * osnoise_irq_exit - Note the end of an IRQ, sava data and trace * * Computes the duration of the IRQ noise, and trace it. Also discounts the * interference from other sources of noise could be currently being accounted.
*/ void osnoise_trace_irq_exit(int id, constchar *desc)
{ struct osnoise_variables *osn_var = this_cpu_osn_var();
s64 duration;
/* * trace_irqentry_callback - Callback to the irq:irq_entry traceevent * * Used to note the starting of an IRQ occurece.
*/ staticvoid trace_irqentry_callback(void *data, int irq, struct irqaction *action)
{
osnoise_trace_irq_entry(irq);
}
/* * trace_irqexit_callback - Callback to the irq:irq_exit traceevent * * Used to note the end of an IRQ occurece.
*/ staticvoid trace_irqexit_callback(void *data, int irq, struct irqaction *action, int ret)
{
osnoise_trace_irq_exit(irq, action->name);
}
/* * arch specific register function.
*/ int __weak osnoise_arch_register(void)
{ return 0;
}
/* * hook_irq_events - Hook IRQ handling events * * This function hooks the IRQ related callbacks to the respective trace * events.
*/ staticint hook_irq_events(void)
{ int ret;
ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL); if (ret) goto out_err;
ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL); if (ret) goto out_unregister_entry;
ret = osnoise_arch_register(); if (ret) goto out_irq_exit;
/* * unhook_irq_events - Unhook IRQ handling events * * This function unhooks the IRQ related callbacks to the respective trace * events.
*/ staticvoid unhook_irq_events(void)
{
osnoise_arch_unregister();
unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
}
#ifndef CONFIG_PREEMPT_RT /* * trace_softirq_entry_callback - Note the starting of a softirq * * Save the starting time of a softirq. As softirqs are non-preemptive to * other softirqs, it is safe to use a single variable (ons_var->softirq) * to save the statistics. The arrival_time is used to report... the * arrival time. The delta_start is used to compute the duration at the * softirq exit handler. See cond_move_softirq_delta_start().
*/ staticvoid trace_softirq_entry_callback(void *data, unsignedint vec_nr)
{ struct osnoise_variables *osn_var = this_cpu_osn_var();
if (!osn_var->sampling) return; /* * This value will be used in the report, but not to compute * the execution time, so it is safe to get it unsafe.
*/
osn_var->softirq.arrival_time = time_get();
set_int_safe_time(osn_var, &osn_var->softirq.delta_start);
osn_var->softirq.count++;
local_inc(&osn_var->int_counter);
}
/* * trace_softirq_exit_callback - Note the end of an softirq * * Computes the duration of the softirq noise, and trace it. Also discounts the * interference from other sources of noise could be currently being accounted.
*/ staticvoid trace_softirq_exit_callback(void *data, unsignedint vec_nr)
{ struct osnoise_variables *osn_var = this_cpu_osn_var();
s64 duration;
if (!osn_var->sampling) return;
if (unlikely(timerlat_enabled())) if (!timerlat_softirq_exit(osn_var)) return;
/* * hook_softirq_events - Hook softirq handling events * * This function hooks the softirq related callbacks to the respective trace * events.
*/ staticint hook_softirq_events(void)
{ int ret;
ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL); if (ret) goto out_err;
ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL); if (ret) goto out_unreg_entry;
/* * unhook_softirq_events - Unhook softirq handling events * * This function hooks the softirq related callbacks to the respective trace * events.
*/ staticvoid unhook_softirq_events(void)
{
unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL);
} #else/* CONFIG_PREEMPT_RT */ /* * softirq are threads on the PREEMPT_RT mode.
*/ staticint hook_softirq_events(void)
{ return 0;
} staticvoid unhook_softirq_events(void)
{
} #endif
/* * thread_entry - Record the starting of a thread noise window * * It saves the context switch time for a noisy thread, and increments * the interference counters.
*/ staticvoid
thread_entry(struct osnoise_variables *osn_var, struct task_struct *t)
{ if (!osn_var->sampling) return; /* * The arrival time will be used in the report, but not to compute * the execution time, so it is safe to get it unsafe.
*/
osn_var->thread.arrival_time = time_get();
/* * thread_exit - Report the end of a thread noise window * * It computes the total noise from a thread, tracing if needed.
*/ staticvoid
thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
{
s64 duration;
if (!osn_var->sampling) return;
if (unlikely(timerlat_enabled())) if (!timerlat_thread_exit(osn_var)) return;
#ifdef CONFIG_TIMERLAT_TRACER /* * osnoise_stop_exception - Stop tracing and the tracer.
*/ static __always_inline void osnoise_stop_exception(char *msg, int cpu)
{ struct osnoise_instance *inst; struct trace_array *tr;
rcu_read_lock();
list_for_each_entry_rcu(inst, &osnoise_instances, list) {
tr = inst->tr;
trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, "stop tracing hit on cpu %d due to exception: %s\n",
smp_processor_id(),
msg);
if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
panic("tracer hit on cpu %d due to exception: %s\n",
smp_processor_id(),
msg);
tracer_tracing_off(tr);
}
rcu_read_unlock();
}
/* * trace_sched_migrate_callback - sched:sched_migrate_task trace event handler * * his function is hooked to the sched:sched_migrate_task trace event, and monitors * timerlat user-space thread migration.
*/ staticvoid trace_sched_migrate_callback(void *data, struct task_struct *p, int dest_cpu)
{ struct osnoise_variables *osn_var; long cpu = task_cpu(p);
staticint register_migration_monitor(void)
{ int ret = 0;
/* * Timerlat thread migration check is only required when running timerlat in user-space. * Thus, enable callback only if timerlat is set with no workload.
*/ if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options)) { if (WARN_ON_ONCE(monitor_enabled)) return 0;
ret = register_trace_sched_migrate_task(trace_sched_migrate_callback, NULL); if (!ret)
monitor_enabled = true;
}
return ret;
}
staticvoid unregister_migration_monitor(void)
{ if (!monitor_enabled) return;
unregister_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
monitor_enabled = false;
} #else staticint register_migration_monitor(void)
{ return 0;
} staticvoid unregister_migration_monitor(void) {} #endif /* * trace_sched_switch - sched:sched_switch trace event handler * * This function is hooked to the sched:sched_switch trace event, and it is * used to record the beginning and to report the end of a thread noise window.
*/ staticvoid
trace_sched_switch_callback(void *data, bool preempt, struct task_struct *p, struct task_struct *n, unsignedint prev_state)
{ struct osnoise_variables *osn_var = this_cpu_osn_var(); int workload = test_bit(OSN_WORKLOAD, &osnoise_options);
if ((p->pid != osn_var->pid) || !workload)
thread_exit(osn_var, p);
if ((n->pid != osn_var->pid) || !workload)
thread_entry(osn_var, n);
}
/* * hook_thread_events - Hook the instrumentation for thread noise * * Hook the osnoise tracer callbacks to handle the noise from other * threads on the necessary kernel events.
*/ staticint hook_thread_events(void)
{ int ret;
ret = register_trace_sched_switch(trace_sched_switch_callback, NULL); if (ret) return -EINVAL;
ret = register_migration_monitor(); if (ret) goto out_unreg;
/* * unhook_thread_events - unhook the instrumentation for thread noise * * Unook the osnoise tracer callbacks to handle the noise from other * threads on the necessary kernel events.
*/ staticvoid unhook_thread_events(void)
{
unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
unregister_migration_monitor();
}
/* * save_osn_sample_stats - Save the osnoise_sample statistics * * Save the osnoise_sample statistics before the sampling phase. These * values will be used later to compute the diff betwneen the statistics * before and after the osnoise sampling.
*/ staticvoid
save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
{
s->nmi_count = osn_var->nmi.count;
s->irq_count = osn_var->irq.count;
s->softirq_count = osn_var->softirq.count;
s->thread_count = osn_var->thread.count;
}
/* * diff_osn_sample_stats - Compute the osnoise_sample statistics * * After a sample period, compute the difference on the osnoise_sample * statistics. The struct osnoise_sample *s contains the statistics saved via * save_osn_sample_stats() before the osnoise sampling.
*/ staticvoid
diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
{
s->nmi_count = osn_var->nmi.count - s->nmi_count;
s->irq_count = osn_var->irq.count - s->irq_count;
s->softirq_count = osn_var->softirq.count - s->softirq_count;
s->thread_count = osn_var->thread.count - s->thread_count;
}
rcu_read_lock();
list_for_each_entry_rcu(inst, &osnoise_instances, list) {
tr = inst->tr;
trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, "stop tracing hit on cpu %d\n", smp_processor_id());
if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
panic("tracer hit stop condition on CPU %d\n", smp_processor_id());
tracer_tracing_off(tr);
}
rcu_read_unlock();
}
/* * osnoise_has_tracing_on - Check if there is at least one instance on
*/ static __always_inline int osnoise_has_tracing_on(void)
{ struct osnoise_instance *inst; int trace_is_on = 0;
/* * run_osnoise - Sample the time and look for osnoise * * Used to capture the time, looking for potential osnoise latency repeatedly. * Different from hwlat_detector, it is called with preemption and interrupts * enabled. This allows irqs, softirqs and threads to run, interfering on the * osnoise sampling thread, as they would do with a regular thread.
*/ staticint run_osnoise(void)
{ bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options); struct osnoise_variables *osn_var = this_cpu_osn_var();
u64 start, sample, last_sample;
u64 last_int_count, int_count;
s64 noise = 0, max_noise = 0;
s64 total, last_total = 0; struct osnoise_sample s; bool disable_preemption; unsignedint threshold;
u64 runtime, stop_in;
u64 sum_noise = 0; int hw_count = 0; int ret = -1;
/* * Disabling preemption is only required if IRQs are enabled, * and the options is set on.
*/
disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options);
/* * Considers the current thread as the workload.
*/
osn_var->pid = current->pid;
/* * Save the current stats for the diff
*/
save_osn_sample_stats(osn_var, &s);
/* * if threshold is 0, use the default value of 1 us.
*/
threshold = tracing_thresh ? : 1000;
/* * Apply PREEMPT and IRQ disabled options.
*/ if (disable_irq)
local_irq_disable();
if (disable_preemption)
preempt_disable();
/* * Make sure NMIs see sampling first
*/
osn_var->sampling = true;
barrier();
/* * Transform the *_us config to nanoseconds to avoid the * division on the main loop.
*/
runtime = osnoise_data.sample_runtime * NSEC_PER_USEC;
stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC;
if (osnoise_data.stop_tracing) if (noise > stop_in)
osnoise_stop_tracing();
}
/* * In some cases, notably when running on a nohz_full CPU with * a stopped tick PREEMPT_RCU or PREEMPT_LAZY have no way to * account for QSs. This will eventually cause unwarranted * noise as RCU forces preemption as the means of ending the * current grace period. We avoid this by calling * rcu_momentary_eqs(), which performs a zero duration EQS * allowing RCU to end the current grace period. This call * shouldn't be wrapped inside an RCU critical section. * * Normally QSs for other cases are handled through cond_resched(). * For simplicity, however, we call rcu_momentary_eqs() for all * configurations here.
*/ if (!disable_irq)
local_irq_disable();
rcu_momentary_eqs();
if (!disable_irq)
local_irq_enable();
/* * For the non-preemptive kernel config: let threads runs, if * they so wish, unless set not do to so.
*/ if (!disable_irq && !disable_preemption)
cond_resched();
last_sample = sample;
last_int_count = int_count;
} while (total < runtime && !kthread_should_stop());
/* * Finish the above in the view for interrupts.
*/
barrier();
osn_var->sampling = false;
/* * Make sure sampling data is no longer updated.
*/
barrier();
/* * Return to the preemptive state.
*/ if (disable_preemption)
preempt_enable();
/* * differently from hwlat_detector, the osnoise tracer can run * without a pause because preemption is on.
*/ if (!interval) { /* Let synchronize_rcu_tasks() make progress */
cond_resched_tasks_rcu_qs(); return;
}
while (schedule_hrtimeout(&wake_time, HRTIMER_MODE_ABS)) { if (kthread_should_stop()) break;
}
}
/* * osnoise_migration_pending - checks if the task needs to migrate * * osnoise/timerlat threads are per-cpu. If there is a pending request to * migrate the thread away from the current CPU, something bad has happened. * Play the good citizen and leave. * * Returns 0 if it is safe to continue, 1 otherwise.
*/ staticinlineint osnoise_migration_pending(void)
{ if (!current->migration_pending) return 0;
/* * If migration is pending, there is a task waiting for the * tracer to enable migration. The tracer does not allow migration, * thus: taint and leave to unblock the blocked thread.
*/
osnoise_taint("migration requested to osnoise threads, leaving.");
/* * Unset this thread from the threads managed by the interface. * The tracers are responsible for cleaning their env before * exiting.
*/
mutex_lock(&interface_lock);
this_cpu_osn_var()->kthread = NULL;
cpumask_clear_cpu(smp_processor_id(), &kthread_cpumask);
mutex_unlock(&interface_lock);
return 1;
}
/* * osnoise_main - The osnoise detection kernel thread * * Calls run_osnoise() function to measure the osnoise for the configured runtime, * every period.
*/ staticint osnoise_main(void *data)
{ unsignedlong flags;
/* * This thread was created pinned to the CPU using PF_NO_SETAFFINITY. * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread. * * To work around this limitation, disable migration and remove the * flag.
*/
migrate_disable();
raw_spin_lock_irqsave(¤t->pi_lock, flags);
current->flags &= ~(PF_NO_SETAFFINITY);
raw_spin_unlock_irqrestore(¤t->pi_lock, flags);
while (!kthread_should_stop()) { if (osnoise_migration_pending()) break;
/* skip a period if tracing is off on all instances */ if (!osnoise_has_tracing_on()) {
osnoise_sleep(true); continue;
}
/* * I am not sure if the timer was armed for this CPU. So, get * the timerlat struct from the timer itself, not from this * CPU.
*/
tlat = container_of(timer, struct timerlat_variables, timer);
now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
/* * Enable the osnoise: events for thread an softirq.
*/
tlat->tracing_thread = true;
osn_var->thread.arrival_time = time_get();
/* * A hardirq is running: the timer IRQ. It is for sure preempting * a thread, and potentially preempting a softirq. * * At this point, it is not interesting to know the duration of the * preempted thread (and maybe softirq), but how much time they will * delay the beginning of the execution of the timer thread. * * To get the correct (net) delay added by the softirq, its delta_start * is set as the IRQ one. In this way, at the return of the IRQ, the delta * start of the sofitrq will be zeroed, accounting then only the time * after that. * * The thread follows the same principle. However, if a softirq is * running, the thread needs to receive the softirq delta_start. The * reason being is that the softirq will be the last to be unfolded, * resseting the thread delay to zero. * * The PREEMPT_RT is a special case, though. As softirqs run as threads * on RT, moving the thread is enough.
*/ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) {
copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
&osn_var->softirq.delta_start);
if (osnoise_data.stop_tracing) { if (time_to_us(diff) >= osnoise_data.stop_tracing) {
/* * At this point, if stop_tracing is set and <= print_stack, * print_stack is set and would be printed in the thread handler. * * Thus, print the stack trace as it is helpful to define the * root cause of an IRQ latency.
*/ if (osnoise_data.stop_tracing <= osnoise_data.print_stack) {
timerlat_save_stack(0);
timerlat_dump_stack(time_to_us(diff));
}
if (osnoise_data.print_stack)
timerlat_save_stack(0);
return HRTIMER_NORESTART;
}
/* * wait_next_period - Wait for the next period for timerlat
*/ staticint wait_next_period(struct timerlat_variables *tlat)
{
ktime_t next_abs_period, now;
u64 rel_period = osnoise_data.timerlat_period * 1000;
now = hrtimer_cb_get_time(&tlat->timer);
next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
/* * Save the next abs_period.
*/
tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
/* * If the new abs_period is in the past, skip the activation.
*/ while (ktime_compare(now, next_abs_period) > 0) {
next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
}
/* * Make the thread RT, that is how cyclictest is usually used.
*/
sp.sched_priority = DEFAULT_TIMERLAT_PRIO;
sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
/* * This thread was created pinned to the CPU using PF_NO_SETAFFINITY. * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread. * * To work around this limitation, disable migration and remove the * flag.
*/
migrate_disable();
raw_spin_lock_irqsave(¤t->pi_lock, flags);
current->flags &= ~(PF_NO_SETAFFINITY);
raw_spin_unlock_irqrestore(¤t->pi_lock, flags);
kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL); if (kthread) { if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) &&
!WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) {
kthread_stop(kthread);
} elseif (!WARN_ON(test_bit(OSN_WORKLOAD, &osnoise_options))) { /* * This is a user thread waiting on the timerlat_fd. We need * to close all users, and the best way to guarantee this is * by killing the thread. NOTE: this is a purpose specific file.
*/
kill_pid(kthread->thread_pid, SIGKILL, 1);
put_task_struct(kthread);
}
} else { /* if no workload, just return */ if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { /* * This is set in the osnoise tracer case.
*/
per_cpu(per_cpu_osnoise_var, cpu).sampling = false;
barrier();
}
}
}
/* * stop_per_cpu_kthread - Stop per-cpu threads * * Stop the osnoise sampling htread. Use this on unload and at system * shutdown.
*/ staticvoid stop_per_cpu_kthreads(void)
{ int cpu;
/* * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads * * This starts the kernel thread that will look for osnoise on many * cpus.
*/ staticint start_per_cpu_kthreads(void)
{ struct cpumask *current_mask = &save_cpumask; int retval = 0; int cpu;
if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { if (timerlat_enabled()) return 0;
}
cpus_read_lock(); /* * Run only on online CPUs in which osnoise is allowed to run.
*/
cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask);
for_each_possible_cpu(cpu) { if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) { struct task_struct *kthread;
kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL); if (!WARN_ON(!kthread))
kthread_stop(kthread);
}
}
/* * osnoise_cpu_init - CPU hotplug online callback function
*/ staticint osnoise_cpu_init(unsignedint cpu)
{
schedule_work_on(cpu, &osnoise_hotplug_work); return 0;
}
/* * osnoise_cpu_die - CPU hotplug offline callback function
*/ staticint osnoise_cpu_die(unsignedint cpu)
{
stop_kthread(cpu); return 0;
}
staticvoid osnoise_init_hotplug_support(void)
{ int ret;
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online",
osnoise_cpu_init, osnoise_cpu_die); if (ret < 0)
pr_warn(BANNER "Error to init cpu hotplug support\n");
/** * osnoise_options_write - Write function for "options" entry * @filp: The active open file structure * @ubuf: The user buffer that contains the value to write * @cnt: The maximum number of bytes to write to "file" * @ppos: The current position in @file * * Writing the option name sets the option, writing the "NO_" * prefix in front of the option name disables it. * * Writing "DEFAULTS" resets the option values to the default ones.
*/ static ssize_t osnoise_options_write(struct file *filp, constchar __user *ubuf,
size_t cnt, loff_t *ppos)
{ int running, option, enable, retval; char buf[256], *option_str;
if (cnt >= 256) return -EINVAL;
if (copy_from_user(buf, ubuf, cnt)) return -EFAULT;
/* * trace_types_lock is taken to avoid concurrency on start/stop.
*/
mutex_lock(&trace_types_lock);
running = osnoise_has_registered_instances(); if (running)
stop_per_cpu_kthreads();
mutex_lock(&interface_lock); /* * avoid CPU hotplug operations that might read options.
*/
cpus_read_lock();
retval = cnt;
if (enable) { if (option == OSN_DEFAULTS)
osnoise_options = OSN_DEFAULT_OPTIONS; else
set_bit(option, &osnoise_options);
} else { if (option == OSN_DEFAULTS)
retval = -EINVAL; else
clear_bit(option, &osnoise_options);
}
if (running)
start_per_cpu_kthreads();
mutex_unlock(&trace_types_lock);
return retval;
}
/* * osnoise_cpus_read - Read function for reading the "cpus" file * @filp: The active open file structure * @ubuf: The userspace provided buffer to read value into * @cnt: The maximum number of bytes to read * @ppos: The current "file" position * * Prints the "cpus" output into the user-provided buffer.
*/ static ssize_t
osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count,
loff_t *ppos)
{ char *mask_str __free(kfree) = NULL; int len;
guard(mutex)(&interface_lock);
len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1;
mask_str = kmalloc(len, GFP_KERNEL); if (!mask_str) return -ENOMEM;
len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)); if (len >= count) return -EINVAL;
/* * osnoise_cpus_write - Write function for "cpus" entry * @filp: The active open file structure * @ubuf: The user buffer that contains the value to write * @count: The maximum number of bytes to write to "file" * @ppos: The current position in @file * * This function provides a write implementation for the "cpus" * interface to the osnoise trace. By default, it lists all CPUs, * in this way, allowing osnoise threads to run on any online CPU * of the system. It serves to restrict the execution of osnoise to the * set of CPUs writing via this interface. Why not use "tracing_cpumask"? * Because the user might be interested in tracing what is running on * other CPUs. For instance, one might run osnoise in one HT CPU * while observing what is running on the sibling HT CPU.
*/ static ssize_t
osnoise_cpus_write(struct file *filp, constchar __user *ubuf, size_t count,
loff_t *ppos)
{
cpumask_var_t osnoise_cpumask_new; int running, err; char *buf __free(kfree) = NULL;
if (count < 1) return 0;
buf = kmalloc(count + 1, GFP_KERNEL); if (!buf) return -ENOMEM;
if (copy_from_user(buf, ubuf, count)) return -EFAULT;
buf[count] = '\0';
if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL)) return -ENOMEM;
err = cpulist_parse(buf, osnoise_cpumask_new); if (err) goto err_free;
/* * trace_types_lock is taken to avoid concurrency on start/stop.
*/
mutex_lock(&trace_types_lock);
running = osnoise_has_registered_instances(); if (running)
stop_per_cpu_kthreads();
mutex_lock(&interface_lock); /* * osnoise_cpumask is read by CPU hotplug operations.
*/
cpus_read_lock();
/* * This file is accessible only if timerlat is enabled, and * NO_OSNOISE_WORKLOAD is set.
*/ if (!timerlat_enabled() || test_bit(OSN_WORKLOAD, &osnoise_options)) {
mutex_unlock(&interface_lock); return -EINVAL;
}
migrate_disable();
osn_var = this_cpu_osn_var();
/* * The osn_var->pid holds the single access to this file.
*/ if (osn_var->pid) {
mutex_unlock(&interface_lock);
migrate_enable(); return -EBUSY;
}
/* * timerlat tracer is a per-cpu tracer. Check if the user-space too * is pinned to a single CPU. The tracer laters monitor if the task * migrates and then disables tracer if it does. However, it is * worth doing this basic acceptance test to avoid obviusly wrong * setup.
*/ if (current->nr_cpus_allowed > 1 || cpu != smp_processor_id()) {
mutex_unlock(&interface_lock);
migrate_enable(); return -EPERM;
}
/* * From now on, it is good to go.
*/
file->private_data = inode->i_cdev;
/* * timerlat_fd_read - Read function for "timerlat_fd" file * @file: The active open file structure * @ubuf: The userspace provided buffer to read value into * @cnt: The maximum number of bytes to read * @ppos: The current "file" position * * Prints 1 on timerlat, the number of interferences on osnoise, -1 on error.
*/ static ssize_t
timerlat_fd_read(struct file *file, char __user *ubuf, size_t count,
loff_t *ppos)
{ long cpu = (long) file->private_data; struct osnoise_variables *osn_var; struct timerlat_variables *tlat; struct timerlat_sample s;
s64 diff;
u64 now;
migrate_disable();
tlat = this_cpu_tmr_var();
/* * While in user-space, the thread is migratable. There is nothing * we can do about it. * So, if the thread is running on another CPU, stop the machinery.
*/ if (cpu == smp_processor_id()) { if (tlat->uthread_migrate) {
migrate_enable(); return -EINVAL;
}
} else {
per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1;
osnoise_taint("timerlat user thread migrate\n");
osnoise_stop_tracing();
migrate_enable(); return -EINVAL;
}
osn_var = this_cpu_osn_var();
/* * The timerlat in user-space runs in a different order: * the read() starts from the execution of the previous occurrence, * sleeping for the next occurrence. * * So, skip if we are entering on read() before the first wakeup * from timerlat IRQ:
*/ if (likely(osn_var->sampling)) {
now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
diff = now - tlat->abs_period;
/* * it was not a timer firing, but some other signal?
*/ if (diff < 0) goto out;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.