// SPDX-License-Identifier: GPL-2.0 /* * ring buffer based function tracer * * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com> * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> * * Originally taken from the RT patch by: * Arnaldo Carvalho de Melo <acme@redhat.com> * * Based on code from the latency_tracer, that is: * Copyright (C) 2004-2006 Ingo Molnar * Copyright (C) 2004 Nadia Yvette Chambers
*/ #include <linux/ring_buffer.h> #include <linux/utsname.h> #include <linux/stacktrace.h> #include <linux/writeback.h> #include <linux/kallsyms.h> #include <linux/security.h> #include <linux/seq_file.h> #include <linux/irqflags.h> #include <linux/debugfs.h> #include <linux/tracefs.h> #include <linux/pagemap.h> #include <linux/hardirq.h> #include <linux/linkage.h> #include <linux/uaccess.h> #include <linux/cleanup.h> #include <linux/vmalloc.h> #include <linux/ftrace.h> #include <linux/module.h> #include <linux/percpu.h> #include <linux/splice.h> #include <linux/kdebug.h> #include <linux/string.h> #include <linux/mount.h> #include <linux/rwsem.h> #include <linux/slab.h> #include <linux/ctype.h> #include <linux/init.h> #include <linux/panic_notifier.h> #include <linux/poll.h> #include <linux/nmi.h> #include <linux/fs.h> #include <linux/trace.h> #include <linux/sched/clock.h> #include <linux/sched/rt.h> #include <linux/fsnotify.h> #include <linux/irq_work.h> #include <linux/workqueue.h> #include <linux/sort.h> #include <linux/io.h> /* vmap_page_range() */ #include <linux/fs_context.h>
#include <asm/setup.h> /* COMMAND_LINE_SIZE */
#include"trace.h" #include"trace_output.h"
#ifdef CONFIG_FTRACE_STARTUP_TEST /* * We need to change this state when a selftest is running. * A selftest will lurk into the ring-buffer to count the * entries inserted during the selftest although some concurrent * insertions into the ring-buffer such as trace_printk could occurred * at the same time, giving false positive or negative results.
*/ staticbool __read_mostly tracing_selftest_running;
/* * If boot-time tracing including tracers/events via kernel cmdline * is running, we do not want to run SELFTEST.
*/ bool __read_mostly tracing_selftest_disabled;
void __init disable_tracing_selftest(constchar *reason)
{ if (!tracing_selftest_disabled) {
tracing_selftest_disabled = true;
pr_info("Ftrace startup test is disabled due to %s\n", reason);
}
} #else #define tracing_selftest_running 0 #define tracing_selftest_disabled 0 #endif
/* Pipe tracepoints to printk */ staticstruct trace_iterator *tracepoint_print_iter; int tracepoint_printk; staticbool tracepoint_printk_stop_on_boot __initdata; staticbool traceoff_after_boot __initdata; static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
/* For tracers that don't implement custom flags */ staticstruct tracer_opt dummy_tracer_opt[] = {
{ }
};
/* * To prevent the comm cache from being overwritten when no * tracing is active, only save the comm when a trace event * occurred.
*/
DEFINE_PER_CPU(bool, trace_taskinfo_save);
/* * Kill all tracing for good (never come back). * It is initialized to 1 but will turn to zero if the initialization * of the tracer is successful. But that is the only place that sets * this back to zero.
*/ staticint tracing_disabled = 1;
cpumask_var_t __read_mostly tracing_buffer_mask;
#define MAX_TRACER_SIZE 100 /* * ftrace_dump_on_oops - variable to dump ftrace buffer on oops * * If there is an oops (or kernel panic) and the ftrace_dump_on_oops * is set, then ftrace_dump is called. This will output the contents * of the ftrace buffers to the console. This is very useful for * capturing traces that lead to crashes and outputing it to a * serial console. * * It is default off, but you can enable it with either specifying * "ftrace_dump_on_oops" in the kernel command line, or setting * /proc/sys/kernel/ftrace_dump_on_oops * Set 1 if you want to dump buffers of all CPUs * Set 2 if you want to dump the buffer of the CPU that triggered oops * Set instance name if you want to dump the specific trace instance * Multiple instance dump is also supported, and instances are seperated * by commas.
*/ /* Set to string format zero to disable by default */ char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
/* When set, tracing will stop when a WARN*() is hit */ staticint __disable_trace_on_warning;
#ifdef CONFIG_TRACE_EVAL_MAP_FILE /* Map of enums to their values, for "eval_map" file */ struct trace_eval_map_head { struct module *mod; unsignedlong length;
};
union trace_eval_map_item;
struct trace_eval_map_tail { /* * "end" is first and points to NULL as it must be different * than "mod" or "eval_string"
*/ union trace_eval_map_item *next; constchar *end; /* points to NULL */
};
static DEFINE_MUTEX(trace_eval_mutex);
/* * The trace_eval_maps are saved in an array with two extra elements, * one at the beginning, and one at the end. The beginning item contains * the count of the saved maps (head.length), and the module they * belong to if not built in (head.mod). The ending item contains a * pointer to the next array of saved eval_map items.
*/ union trace_eval_map_item { struct trace_eval_map map; struct trace_eval_map_head head; struct trace_eval_map_tail tail;
};
staticint __init boot_alloc_snapshot(char *str)
{ char *slot = boot_snapshot_info + boot_snapshot_index; int left = sizeof(boot_snapshot_info) - boot_snapshot_index; int ret;
if (str[0] == '=') {
str++; if (strlen(str) >= left) return -1;
ret = snprintf(slot, left, "%s\t", str);
boot_snapshot_index += ret;
} else {
allocate_snapshot = true; /* We also need the main ring buffer expanded */
trace_set_ring_buffer_expanded(NULL);
} return 1;
}
__setup("alloc_snapshot", boot_alloc_snapshot);
staticinlinevoid
add_trace_export(struct trace_export **list, struct trace_export *export)
{
rcu_assign_pointer(export->next, *list); /* * We are entering export into the list but another * CPU might be walking that list. We need to make sure * the export->next pointer is valid before another CPU sees * the export pointer included into the list.
*/
rcu_assign_pointer(*list, export);
}
/* trace_options that are only supported by global_trace */ #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
/* trace_flags that are default zero for instances */ #define ZEROED_TRACE_FLAGS \
(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK | \
TRACE_ITER_COPY_MARKER)
/* * The global_trace is the descriptor that holds the top-level tracing * buffers for the live tracing.
*/ staticstruct trace_array global_trace = {
.trace_flags = TRACE_DEFAULT_FLAGS,
};
/* List of trace_arrays interested in the top level trace_marker */ static LIST_HEAD(marker_copies);
static __always_inline bool printk_binsafe(struct trace_array *tr)
{ /* * The binary format of traceprintk can cause a crash if used * by a buffer from another boot. Force the use of the * non binary version of trace_printk if the trace_printk * buffer is a boot mapped ring buffer.
*/ return !(tr->flags & TRACE_ARRAY_FL_BOOT);
}
staticvoid update_printk_trace(struct trace_array *tr)
{ if (printk_trace == tr) return;
/* Returns true if the status of tr changed */ staticbool update_marker_trace(struct trace_array *tr, int enabled)
{
lockdep_assert_held(&event_mutex);
if (enabled) { if (!list_empty(&tr->marker_list)) returnfalse;
/** * trace_array_put - Decrement the reference counter for this trace array. * @this_tr : pointer to the trace array * * NOTE: Use this when we no longer need the trace array returned by * trace_array_get_by_name(). This ensures the trace array can be later * destroyed. *
*/ void trace_array_put(struct trace_array *this_tr)
{ if (!this_tr) return;
int tracing_check_open_get_tr(struct trace_array *tr)
{ int ret;
ret = security_locked_down(LOCKDOWN_TRACEFS); if (ret) return ret;
if (tracing_disabled) return -ENODEV;
if (tr && trace_array_get(tr) < 0) return -ENODEV;
return 0;
}
/** * trace_find_filtered_pid - check if a pid exists in a filtered_pid list * @filtered_pids: The list of pids to check * @search_pid: The PID to find in @filtered_pids * * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
*/ bool
trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
{ return trace_pid_list_is_set(filtered_pids, search_pid);
}
/** * trace_ignore_this_task - should a task be ignored for tracing * @filtered_pids: The list of pids to check * @filtered_no_pids: The list of pids not to be traced * @task: The task that should be ignored if not filtered * * Checks if @task should be traced or not from @filtered_pids. * Returns true if @task should *NOT* be traced. * Returns false if @task should be traced.
*/ bool
trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct trace_pid_list *filtered_no_pids, struct task_struct *task)
{ /* * If filtered_no_pids is not empty, and the task's pid is listed * in filtered_no_pids, then return true. * Otherwise, if filtered_pids is empty, that means we can * trace all tasks. If it has content, then only trace pids * within filtered_pids.
*/
/** * trace_filter_add_remove_task - Add or remove a task from a pid_list * @pid_list: The list to modify * @self: The current task for fork or NULL for exit * @task: The task to add or remove * * If adding a task, if @self is defined, the task is only added if @self * is also included in @pid_list. This happens on fork and tasks should * only be added when the parent is listed. If @self is NULL, then the * @task pid will be removed from the list, which would happen on exit * of a task.
*/ void trace_filter_add_remove_task(struct trace_pid_list *pid_list, struct task_struct *self, struct task_struct *task)
{ if (!pid_list) return;
/* For forks, we only add if the forking task is listed */ if (self) { if (!trace_find_filtered_pid(pid_list, self->pid)) return;
}
/* "self" is set for forks, and NULL for exits */ if (self)
trace_pid_list_set(pid_list, task->pid); else
trace_pid_list_clear(pid_list, task->pid);
}
/** * trace_pid_next - Used for seq_file to get to the next pid of a pid_list * @pid_list: The pid list to show * @v: The last pid that was shown (+1 the actual pid to let zero be displayed) * @pos: The position of the file * * This is used by the seq_file "next" operation to iterate the pids * listed in a trace_pid_list structure. * * Returns the pid+1 as we want to display pid of zero, but NULL would * stop the iteration.
*/ void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
{ long pid = (unsignedlong)v; unsignedint next;
(*pos)++;
/* pid already is +1 of the actual previous bit */ if (trace_pid_list_next(pid_list, pid, &next) < 0) return NULL;
pid = next;
/* Return pid + 1 to allow zero to be represented */ return (void *)(pid + 1);
}
/** * trace_pid_start - Used for seq_file to start reading pid lists * @pid_list: The pid list to show * @pos: The position of the file * * This is used by seq_file "start" operation to start the iteration * of listing pids. * * Returns the pid+1 as we want to display pid of zero, but NULL would * stop the iteration.
*/ void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
{ unsignedlong pid; unsignedint first;
loff_t l = 0;
if (trace_pid_list_first(pid_list, &first) < 0) return NULL;
pid = first;
/* Return pid + 1 so that zero can be the exit value */ for (pid++; pid && l < *pos;
pid = (unsignedlong)trace_pid_next(pid_list, (void *)pid, &l))
; return (void *)pid;
}
/** * trace_pid_show - show the current pid in seq_file processing * @m: The seq_file structure to write into * @v: A void pointer of the pid (+1) value to display * * Can be directly used by seq_file operations to display the current * pid value.
*/ int trace_pid_show(struct seq_file *m, void *v)
{ unsignedlong pid = (unsignedlong)v - 1;
seq_printf(m, "%lu\n", pid); return 0;
}
/* 128 should be much more than enough */ #define PID_BUF_SIZE 127
if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1)) return -ENOMEM;
/* * Always recreate a new array. The write is an all or nothing * operation. Always create a new array when adding new pids by * the user. If the operation fails, then the current list is * not modified.
*/
pid_list = trace_pid_list_alloc(); if (!pid_list) {
trace_parser_put(&parser); return -ENOMEM;
}
if (filtered_pids) { /* copy the current bits to the new max */
ret = trace_pid_list_first(filtered_pids, &pid); while (!ret) {
ret = trace_pid_list_set(pid_list, pid); if (ret < 0) goto out;
ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
nr_pids++;
}
}
ret = 0; while (cnt > 0) {
pos = 0;
ret = trace_get_user(&parser, ubuf, cnt, &pos); if (ret < 0) break;
read += ret;
ubuf += ret;
cnt -= ret;
if (!trace_parser_loaded(&parser)) break;
ret = -EINVAL; if (kstrtoul(parser.buffer, 0, &val)) break;
pid = (pid_t)val;
if (trace_pid_list_set(pid_list, pid) < 0) {
ret = -1; break;
}
nr_pids++;
trace_parser_clear(&parser);
ret = 0;
}
out:
trace_parser_put(&parser);
if (ret < 0) {
trace_pid_list_free(pid_list); return ret;
}
if (!nr_pids) { /* Cleared the list of pids */
trace_pid_list_free(pid_list);
pid_list = NULL;
}
*new_pid_list = pid_list;
return read;
}
static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
{
u64 ts;
/* Early boot up does not have a buffer yet */ if (!buf->buffer) return trace_clock_local();
/** * tracing_is_enabled - Show if global_trace has been enabled * * Shows if the global trace has been enabled or not. It uses the * mirror flag "buffer_disabled" to be used in fast paths such as for * the irqsoff tracer. But it may be inaccurate due to races. If you * need to know the accurate state, use tracing_is_on() which is a little * slower, but accurate.
*/ int tracing_is_enabled(void)
{ /* * For quick access (irqsoff uses this in fast path), just * return the mirror variable of the state of the ring buffer. * It's a little racy, but we don't really care.
*/ return !global_trace.buffer_disabled;
}
/* * trace_buf_size is the size in bytes that is allocated * for a buffer. Note, the number of bytes is always rounded * to page size. * * This number is purposely set to a low number of 16384. * If the dump on oops happens, it will be much appreciated * to not have to wait for all that output. Anyway this can be * boot time and run time configurable.
*/ #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
/* trace_types holds a link list of available tracers. */ staticstruct tracer *trace_types __read_mostly;
/* * trace_types_lock is used to protect the trace_types list.
*/
DEFINE_MUTEX(trace_types_lock);
/* * serialize the access of the ring buffer * * ring buffer serializes readers, but it is low level protection. * The validity of the events (which returns by ring_buffer_peek() ..etc) * are not protected by ring buffer. * * The content of events may become garbage if we allow other process consumes * these events concurrently: * A) the page of the consumed events may become a normal page * (not reader page) in ring buffer, and this page will be rewritten * by events producer. * B) The page of the consumed events may become a page for splice_read, * and this page will be returned to system. * * These primitives allow multi process access to different cpu ring buffer * concurrently. * * These primitives don't distinguish read-only and read-consume access. * Multi read-only access are also serialized.
*/
staticinlinevoid trace_access_lock(int cpu)
{ if (cpu == RING_BUFFER_ALL_CPUS) { /* gain it for accessing the whole ring buffer. */
down_write(&all_cpu_access_lock);
} else { /* gain it for accessing a cpu ring buffer. */
/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
down_read(&all_cpu_access_lock);
/* Secondly block other access to this @cpu ring buffer. */
mutex_lock(&per_cpu(cpu_access_lock, cpu));
}
}
void tracer_tracing_on(struct trace_array *tr)
{ if (tr->array_buffer.buffer)
ring_buffer_record_on(tr->array_buffer.buffer); /* * This flag is looked at when buffers haven't been allocated * yet, or by some tracers (like irqsoff), that just want to * know if the ring buffer has been disabled, but it can handle * races of where it gets disabled but we still do a record. * As the check is in the fast path of the tracers, it is more * important to be fast than accurate.
*/
tr->buffer_disabled = 0;
}
/** * tracing_on - enable tracing buffers * * This function enables tracing buffers that may have been * disabled with tracing_off.
*/ void tracing_on(void)
{
tracer_tracing_on(&global_trace);
}
EXPORT_SYMBOL_GPL(tracing_on);
/* If this is the temp buffer, we need to commit fully */ if (this_cpu_read(trace_buffered_event) == event) { /* Length is in event->array[0] */
ring_buffer_write(buffer, event->array[0], &event->array[1]); /* Release the temp buffer */
this_cpu_dec(trace_buffered_event_cnt); /* ring_buffer_unlock_commit() enables preemption */
preempt_enable_notrace();
} else
ring_buffer_unlock_commit(buffer);
}
int __trace_array_puts(struct trace_array *tr, unsignedlong ip, constchar *str, int size)
{ struct ring_buffer_event *event; struct trace_buffer *buffer; struct print_entry *entry; unsignedint trace_ctx; int alloc;
if (!(tr->trace_flags & TRACE_ITER_PRINTK)) return 0;
if (unlikely(tracing_selftest_running && tr == &global_trace)) return 0;
/** * __trace_puts - write a constant string into the trace buffer. * @ip: The address of the caller * @str: The constant string to write * @size: The size of the string.
*/ int __trace_puts(unsignedlong ip, constchar *str, int size)
{ return __trace_array_puts(printk_trace, ip, str, size);
}
EXPORT_SYMBOL_GPL(__trace_puts);
/** * __trace_bputs - write the pointer to a constant string into trace buffer * @ip: The address of the caller * @str: The constant string to write to the buffer to
*/ int __trace_bputs(unsignedlong ip, constchar *str)
{ struct trace_array *tr = READ_ONCE(printk_trace); struct ring_buffer_event *event; struct trace_buffer *buffer; struct bputs_entry *entry; unsignedint trace_ctx; int size = sizeof(struct bputs_entry);
if (!printk_binsafe(tr)) return __trace_puts(ip, str, strlen(str));
if (!(tr->trace_flags & TRACE_ITER_PRINTK)) return 0;
if (unlikely(tracing_selftest_running || tracing_disabled)) return 0;
if (in_nmi()) {
trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
trace_array_puts(tr, "*** snapshot is being ignored ***\n"); return;
}
if (!tr->allocated_snapshot) {
trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
trace_array_puts(tr, "*** stopping trace here! ***\n");
tracer_tracing_off(tr); return;
}
/* Note, snapshot can not be used when the tracer uses it */ if (tracer->use_max_tr) {
trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); return;
}
if (tr->mapped) {
trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); return;
}
/** * tracing_snapshot - take a snapshot of the current buffer. * * This causes a swap between the snapshot buffer and the current live * tracing buffer. You can use this to take snapshots of the live * trace when some condition is triggered, but continue to trace. * * Note, make sure to allocate the snapshot with either * a tracing_snapshot_alloc(), or by doing it manually * with: echo 1 > /sys/kernel/tracing/snapshot * * If the snapshot buffer is not allocated, it will stop tracing. * Basically making a permanent snapshot.
*/ void tracing_snapshot(void)
{ struct trace_array *tr = &global_trace;
/** * tracing_snapshot_cond - conditionally take a snapshot of the current buffer. * @tr: The tracing instance to snapshot * @cond_data: The data to be tested conditionally, and possibly saved * * This is the same as tracing_snapshot() except that the snapshot is * conditional - the snapshot will only happen if the * cond_snapshot.update() implementation receiving the cond_data * returns true, which means that the trace array's cond_snapshot * update() operation used the cond_data to determine whether the * snapshot should be taken, and if it was, presumably saved it along * with the snapshot.
*/ void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
{
tracing_snapshot_instance_cond(tr, cond_data);
}
EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
/** * tracing_cond_snapshot_data - get the user data associated with a snapshot * @tr: The tracing instance * * When the user enables a conditional snapshot using * tracing_snapshot_cond_enable(), the user-defined cond_data is saved * with the snapshot. This accessor is used to retrieve it. * * Should not be called from cond_snapshot.update(), since it takes * the tr->max_lock lock, which the code calling * cond_snapshot.update() has already done. * * Returns the cond_data associated with the trace array's snapshot.
*/ void *tracing_cond_snapshot_data(struct trace_array *tr)
{ void *cond_data = NULL;
int tracing_alloc_snapshot_instance(struct trace_array *tr)
{ int order; int ret;
if (!tr->allocated_snapshot) {
/* Make the snapshot buffer have the same order as main buffer */
order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); if (ret < 0) return ret;
/* allocate spare buffer */
ret = resize_buffer_duplicate_size(&tr->max_buffer,
&tr->array_buffer, RING_BUFFER_ALL_CPUS); if (ret < 0) return ret;
tr->allocated_snapshot = true;
}
return 0;
}
staticvoid free_snapshot(struct trace_array *tr)
{ /* * We don't free the ring buffer. instead, resize it because * The max_tr ring buffer has some state (e.g. ring->clock) and * we want preserve it.
*/
ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
set_buffer_entries(&tr->max_buffer, 1);
tracing_reset_online_cpus(&tr->max_buffer);
tr->allocated_snapshot = false;
}
staticint tracing_arm_snapshot_locked(struct trace_array *tr)
{ int ret;
ret = tracing_alloc_snapshot_instance(tr); if (ret) {
spin_lock(&tr->snapshot_trigger_lock);
tr->snapshot--;
spin_unlock(&tr->snapshot_trigger_lock);
}
return ret;
}
int tracing_arm_snapshot(struct trace_array *tr)
{
guard(mutex)(&trace_types_lock); return tracing_arm_snapshot_locked(tr);
}
/** * tracing_alloc_snapshot - allocate snapshot buffer. * * This only allocates the snapshot buffer if it isn't already * allocated - it doesn't also take a snapshot. * * This is meant to be used in cases where the snapshot buffer needs * to be set up for events that can't sleep but need to be able to * trigger a snapshot.
*/ int tracing_alloc_snapshot(void)
{ struct trace_array *tr = &global_trace; int ret;
ret = tracing_alloc_snapshot_instance(tr);
WARN_ON(ret < 0);
/** * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer. * * This is similar to tracing_snapshot(), but it will allocate the * snapshot buffer if it isn't already allocated. Use this only * where it is safe to sleep, as the allocation may sleep. * * This causes a swap between the snapshot buffer and the current live * tracing buffer. You can use this to take snapshots of the live * trace when some condition is triggered, but continue to trace.
*/ void tracing_snapshot_alloc(void)
{ int ret;
ret = tracing_alloc_snapshot(); if (ret < 0) return;
/** * tracing_snapshot_cond_enable - enable conditional snapshot for an instance * @tr: The tracing instance * @cond_data: User data to associate with the snapshot * @update: Implementation of the cond_snapshot update function * * Check whether the conditional snapshot for the given instance has * already been enabled, or if the current tracer is already using a * snapshot; if so, return -EBUSY, else create a cond_snapshot and * save the cond_data and update function inside. * * Returns 0 if successful, error otherwise.
*/ int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
cond_update_fn_t update)
{ struct cond_snapshot *cond_snapshot __free(kfree) =
kzalloc(sizeof(*cond_snapshot), GFP_KERNEL); int ret;
/* * The cond_snapshot can only change to NULL without the * trace_types_lock. We don't care if we race with it going * to NULL, but we want to make sure that it's not set to * something other than NULL when we get here, which we can * do safely with only holding the trace_types_lock and not * having to take the max_lock.
*/ if (tr->cond_snapshot) return -EBUSY;
ret = tracing_arm_snapshot_locked(tr); if (ret) return ret;
/** * tracing_snapshot_cond_disable - disable conditional snapshot for an instance * @tr: The tracing instance * * Check whether the conditional snapshot for the given instance is * enabled; if so, free the cond_snapshot associated with it, * otherwise return -EINVAL. * * Returns 0 if successful, error otherwise.
*/ int tracing_snapshot_cond_disable(struct trace_array *tr)
{ int ret = 0;
return ret;
}
EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); #else void tracing_snapshot(void)
{
WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
}
EXPORT_SYMBOL_GPL(tracing_snapshot); void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
{
WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
}
EXPORT_SYMBOL_GPL(tracing_snapshot_cond); int tracing_alloc_snapshot(void)
{
WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used"); return -ENODEV;
}
EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); void tracing_snapshot_alloc(void)
{ /* Give warning */
tracing_snapshot();
}
EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); void *tracing_cond_snapshot_data(struct trace_array *tr)
{ return NULL;
}
EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
{ return -ENODEV;
}
EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); int tracing_snapshot_cond_disable(struct trace_array *tr)
{ returnfalse;
}
EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); #define free_snapshot(tr) do { } while (0) #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; }) #endif/* CONFIG_TRACER_SNAPSHOT */
void tracer_tracing_off(struct trace_array *tr)
{ if (tr->array_buffer.buffer)
ring_buffer_record_off(tr->array_buffer.buffer); /* * This flag is looked at when buffers haven't been allocated * yet, or by some tracers (like irqsoff), that just want to * know if the ring buffer has been disabled, but it can handle * races of where it gets disabled but we still do a record. * As the check is in the fast path of the tracers, it is more * important to be fast than accurate.
*/
tr->buffer_disabled = 1;
}
/** * tracer_tracing_disable() - temporary disable the buffer from write * @tr: The trace array to disable its buffer for * * Expects trace_tracing_enable() to re-enable tracing. * The difference between this and tracer_tracing_off() is that this * is a counter and can nest, whereas, tracer_tracing_off() can * be called multiple times and a single trace_tracing_on() will * enable it.
*/ void tracer_tracing_disable(struct trace_array *tr)
{ if (WARN_ON_ONCE(!tr->array_buffer.buffer)) return;
/** * tracer_tracing_enable() - counter part of tracer_tracing_disable() * @tr: The trace array that had tracer_tracincg_disable() called on it * * This is called after tracer_tracing_disable() has been called on @tr, * when it's safe to re-enable tracing.
*/ void tracer_tracing_enable(struct trace_array *tr)
{ if (WARN_ON_ONCE(!tr->array_buffer.buffer)) return;
/** * tracing_off - turn off tracing buffers * * This function stops the tracing buffers from recording data. * It does not disable any overhead the tracers themselves may * be causing. This function simply causes all recording to * the ring buffers to fail.
*/ void tracing_off(void)
{
tracer_tracing_off(&global_trace);
}
EXPORT_SYMBOL_GPL(tracing_off);
void disable_trace_on_warning(void)
{ if (__disable_trace_on_warning) {
trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_, "Disabling tracing due to warning\n");
tracing_off();
}
}
/** * tracer_tracing_is_on - show real state of ring buffer enabled * @tr : the trace array to know if ring buffer is enabled * * Shows real state of the ring buffer if it is enabled or not.
*/ bool tracer_tracing_is_on(struct trace_array *tr)
{ if (tr->array_buffer.buffer) return ring_buffer_record_is_set_on(tr->array_buffer.buffer); return !tr->buffer_disabled;
}
/** * tracing_is_on - show state of ring buffers enabled
*/ int tracing_is_on(void)
{ return tracer_tracing_is_on(&global_trace);
}
EXPORT_SYMBOL_GPL(tracing_is_on);
if (!str) return 0;
buf_size = memparse(str, &str); /* * nr_entries can not be zero and the startup * tests require some buffer space. Therefore * ensure we have at least 4096 bytes of buffer.
*/
trace_buf_size = max(4096UL, buf_size); return 1;
}
__setup("trace_buf_size=", set_buf_size);
staticint __init set_tracing_thresh(char *str)
{ unsignedlong threshold; int ret;
if (!str) return 0;
ret = kstrtoul(str, 0, &threshold); if (ret < 0) return 0;
tracing_thresh = threshold * 1000; return 1;
}
__setup("tracing_thresh=", set_tracing_thresh);
/* * TRACE_FLAGS is defined as a tuple matching bit masks with strings. * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list * of strings in the order that the evals (enum) were defined.
*/ #undef C #define C(a, b) b
/* These must match the bit positions in trace_iterator_flags */ staticconstchar *trace_options[] = {
TRACE_FLAGS
NULL
};
/* * trace_get_user - reads the user input string separated by space * (matched by isspace(ch)) * * For each string found the 'struct trace_parser' is updated, * and the function returns. * * Returns number of bytes read. * * See kernel/trace/trace.h for 'struct trace_parser' details.
*/ int trace_get_user(struct trace_parser *parser, constchar __user *ubuf,
size_t cnt, loff_t *ppos)
{ char ch;
size_t read = 0;
ssize_t ret;
if (!*ppos)
trace_parser_clear(parser);
ret = get_user(ch, ubuf++); if (ret) goto fail;
read++;
cnt--;
/* * The parser is not finished with the last write, * continue reading the user input without skipping spaces.
*/ if (!parser->cont) { /* skip white space */ while (cnt && isspace(ch)) {
ret = get_user(ch, ubuf++); if (ret) goto fail;
read++;
cnt--;
}
parser->idx = 0;
/* only spaces were written */ if (isspace(ch) || !ch) {
*ppos += read; return read;
}
}
/* read the non-space input */ while (cnt && !isspace(ch) && ch) { if (parser->idx < parser->size - 1)
parser->buffer[parser->idx++] = ch; else {
ret = -EINVAL; goto fail;
}
ret = get_user(ch, ubuf++); if (ret) goto fail;
read++;
cnt--;
}
/* We either got finished input or we have to wait for another call. */ if (isspace(ch) || !ch) {
parser->buffer[parser->idx] = 0;
parser->cont = false;
} elseif (parser->idx < parser->size - 1) {
parser->cont = true;
parser->buffer[parser->idx++] = ch; /* Make sure the parsed string always terminates with '\0'. */
parser->buffer[parser->idx] = 0;
} else {
ret = -EINVAL; goto fail;
}
void latency_fsnotify(struct trace_array *tr)
{ if (!fsnotify_wq) return; /* * We cannot call queue_work(&tr->fsnotify_work) from here because it's * possible that we are called from __schedule() or do_idle(), which * could cause a deadlock.
*/
irq_work_queue(&tr->fsnotify_irqwork);
}
/* * Copy the new maximum trace into the separate maximum-trace * structure. (this way the maximum trace is permanently saved, * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
*/ staticvoid
__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
{ struct array_buffer *trace_buf = &tr->array_buffer; struct array_buffer *max_buf = &tr->max_buffer; struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu); struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
strscpy(max_data->comm, tsk->comm);
max_data->pid = tsk->pid; /* * If tsk == current, then use current_uid(), as that does not use * RCU. The irq tracer can be called out of RCU scope.
*/ if (tsk == current)
max_data->uid = current_uid(); else
max_data->uid = task_uid(tsk);
/* record this tasks comm */
tracing_record_cmdline(tsk);
latency_fsnotify(tr);
}
/** * update_max_tr - snapshot all trace buffers from global_trace to max_tr * @tr: tracer * @tsk: the task with the latency * @cpu: The cpu that initiated the trace. * @cond_data: User data associated with a conditional snapshot * * Flip the buffers between the @tr and the max_tr and record information * about which task was the cause of this latency.
*/ void
update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, void *cond_data)
{ if (tr->stop_count) return;
WARN_ON_ONCE(!irqs_disabled());
if (!tr->allocated_snapshot) { /* Only the nop tracer should hit this when disabling */
WARN_ON_ONCE(tr->current_trace != &nop_trace); return;
}
arch_spin_lock(&tr->max_lock);
/* Inherit the recordable setting from array_buffer */ if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
ring_buffer_record_on(tr->max_buffer.buffer); else
ring_buffer_record_off(tr->max_buffer.buffer);
/* Any waiters on the old snapshot buffer need to wake up */
ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
}
/** * update_max_tr_single - only copy one trace over, and reset the rest * @tr: tracer * @tsk: task with the latency * @cpu: the cpu of the buffer to copy. * * Flip the trace of a single CPU buffer between the @tr and the max_tr.
*/ void
update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
{ int ret;
if (tr->stop_count) return;
WARN_ON_ONCE(!irqs_disabled()); if (!tr->allocated_snapshot) { /* Only the nop tracer should hit this when disabling */
WARN_ON_ONCE(tr->current_trace != &nop_trace); return;
}
arch_spin_lock(&tr->max_lock);
ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
if (ret == -EBUSY) { /* * We failed to swap the buffer due to a commit taking * place on this CPU. We fail to record, but we reset * the max trace buffer (no one writes directly to it) * and flag that it failed. * Another reason is resize is in progress.
*/
trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_, "Failed to swap buffers due to commit or resize in progress\n");
}
WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
wait_pipe_cond, &pwait);
#ifdef CONFIG_TRACER_MAX_TRACE /* * Make sure this is still the snapshot buffer, as if a snapshot were * to happen, this would now be the main buffer.
*/ if (iter->snapshot)
iter->array_buffer = &iter->tr->max_buffer; #endif return ret;
}
if (!type->selftest || tracing_selftest_disabled) return 0;
/* * If a tracer registers early in boot up (before scheduling is * initialized and such), then do not run its selftests yet. * Instead, run it a little later in the boot process.
*/ if (!selftests_can_run) return save_selftest(type);
if (!tracing_is_on()) {
pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
type->name); return 0;
}
/* * Run a selftest on this tracer. * Here we reset the trace buffer, and set the current * tracer to be this tracer. The tracer can then run some * internal tracing to verify that everything is in order. * If we fail, we do not register this tracer.
*/
tracing_reset_online_cpus(&tr->array_buffer);
tr->current_trace = type;
#ifdef CONFIG_TRACER_MAX_TRACE if (type->use_max_tr) { /* If we expanded the buffers, make sure the max is expanded too */ if (tr->ring_buffer_expanded)
ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
RING_BUFFER_ALL_CPUS);
tr->allocated_snapshot = true;
} #endif
/* the test is responsible for initializing and enabling */
pr_info("Testing tracer %s: ", type->name);
ret = type->selftest(type, tr); /* the test is responsible for resetting too */
tr->current_trace = saved_tracer; if (ret) {
printk(KERN_CONT "FAILED!\n"); /* Add the warning after printing 'FAILED' */
WARN_ON(1); return -1;
} /* Only reset on passing, to avoid touching corrupted buffers */
tracing_reset_online_cpus(&tr->array_buffer);
#ifdef CONFIG_TRACER_MAX_TRACE if (type->use_max_tr) {
tr->allocated_snapshot = false;
/* Shrink the max buffer again */ if (tr->ring_buffer_expanded)
ring_buffer_resize(tr->max_buffer.buffer, 1,
RING_BUFFER_ALL_CPUS);
} #endif
printk(KERN_CONT "PASSED\n"); return 0;
}
staticint do_run_tracer_selftest(struct tracer *type)
{ int ret;
/* * Tests can take a long time, especially if they are run one after the * other, as does happen during bootup when all the tracers are * registered. This could cause the soft lockup watchdog to trigger.
*/
cond_resched();
tracing_selftest_running = true;
ret = run_tracer_selftest(type);
tracing_selftest_running = false;
return ret;
}
static __init int init_trace_selftests(void)
{ struct trace_selftests *p, *n; struct tracer *t, **last; int ret;
selftests_can_run = true;
guard(mutex)(&trace_types_lock);
if (list_empty(&postponed_selftests)) return 0;
pr_info("Running postponed tracer tests:\n");
tracing_selftest_running = true;
list_for_each_entry_safe(p, n, &postponed_selftests, list) { /* This loop can take minutes when sanitizers are enabled, so * lets make sure we allow RCU processing.
*/
cond_resched();
ret = run_tracer_selftest(p->type); /* If the test fails, then warn and remove from available_tracers */ if (ret < 0) {
WARN(1, "tracer: %s failed selftest, disabling\n",
p->type->name);
last = &trace_types; for (t = trace_types; t; t = t->next) { if (t == p->type) {
*last = t->next; break;
}
last = &t->next;
}
}
list_del(&p->list);
kfree(p);
}
tracing_selftest_running = false;
/** * register_tracer - register a tracer with the ftrace system. * @type: the plugin for the tracer * * Register a new plugin tracer.
*/ int __init register_tracer(struct tracer *type)
{ struct tracer *t; int ret = 0;
if (!type->name) {
pr_info("Tracer must have a name\n"); return -1;
}
if (strlen(type->name) >= MAX_TRACER_SIZE) {
pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); return -1;
}
if (security_locked_down(LOCKDOWN_TRACEFS)) {
pr_warn("Can not register tracer %s due to lockdown\n",
type->name); return -EPERM;
}
mutex_lock(&trace_types_lock);
for (t = trace_types; t; t = t->next) { if (strcmp(type->name, t->name) == 0) { /* already found */
pr_info("Tracer %s already registered\n",
type->name);
ret = -1; goto out;
}
}
if (!type->set_flag)
type->set_flag = &dummy_set_flag; if (!type->flags) { /*allocate a dummy tracer_flags*/
type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL); if (!type->flags) {
ret = -ENOMEM; goto out;
}
type->flags->val = 0;
type->flags->opts = dummy_tracer_opt;
} else if (!type->flags->opts)
type->flags->opts = dummy_tracer_opt;
/* store the tracer for __set_tracer_option */
type->flags->trace = type;
ret = do_run_tracer_selftest(type); if (ret < 0) goto out;
if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) return 0;
printk(KERN_INFO "Starting tracer '%s'\n", type->name); /* Do we want this tracer to start on bootup? */
tracing_set_tracer(&global_trace, type->name);
default_bootup_tracer = NULL;
apply_trace_boot_options();
/* disable other selftests, since this will break it. */
disable_tracing_selftest("running a tracer");
guard(raw_spinlock_irqsave)(&tr->start_lock); if (--tr->stop_count) { if (WARN_ON_ONCE(tr->stop_count < 0)) { /* Someone screwed up their debugging */
tr->stop_count = 0;
} return;
}
/* Prevent the buffers from switching */
arch_spin_lock(&tr->max_lock);
buffer = tr->array_buffer.buffer; if (buffer)
ring_buffer_record_enable(buffer);
#ifdef CONFIG_TRACER_MAX_TRACE
buffer = tr->max_buffer.buffer; if (buffer)
ring_buffer_record_enable(buffer); #endif
arch_spin_unlock(&tr->max_lock);
}
/** * tracing_start - quick start of the tracer * * If tracing is enabled but was stopped by tracing_stop, * this will start the tracer back up.
*/ void tracing_start(void)
guard(raw_spinlock_irqsave)(&tr->start_lock); if (tr->stop_count++) return;
/* Prevent the buffers from switching */
arch_spin_lock(&tr->max_lock);
buffer = tr->array_buffer.buffer; if (buffer)
ring_buffer_record_disable(buffer);
#ifdef CONFIG_TRACER_MAX_TRACE
buffer = tr->max_buffer.buffer; if (buffer)
ring_buffer_record_disable(buffer); #endif
arch_spin_unlock(&tr->max_lock);
}
/** * tracing_stop - quick stop of the tracer * * Light weight way to stop tracing. Use in conjunction with * tracing_start.
*/ void tracing_stop(void)
{ return tracing_stop_tr(&global_trace);
}
/* * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function * simplifies those functions and keeps them in sync.
*/ enum print_line_t trace_handle_return(struct trace_seq *s)
{ return trace_seq_has_overflowed(s) ?
TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
}
EXPORT_SYMBOL_GPL(trace_handle_return);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.