// SPDX-License-Identifier: GPL-2.0 /* * builtin-record.c * * Builtin record command: Record the profile of a workload * (or a CPU, or a PID) into the perf.data output file - for * later analysis via perf report.
*/ #include"builtin.h"
aio_errno = aio_error(cblock); if (aio_errno == EINPROGRESS) return 0;
written = aio_ret = aio_return(cblock); if (aio_ret < 0) { if (aio_errno != EINTR)
pr_err("failed to write perf data, error: %m\n");
written = 0;
}
rem_size = cblock->aio_nbytes - written;
if (rem_size == 0) {
cblock->aio_fildes = -1; /* * md->refcount is incremented in record__aio_pushfn() for * every aio write request started in record__aio_push() so * decrement it because the request is now complete.
*/
perf_mmap__put(&md->core);
rc = 1;
} else { /* * aio write request may require restart with the * remainder if the kernel didn't write whole * chunk at once.
*/
rem_off = cblock->aio_offset + written;
rem_buf = (void *)(cblock->aio_buf + written);
record__aio_write(cblock, cblock->aio_fildes,
rem_buf, rem_size, rem_off);
rc = 0;
}
do {
do_suspend = 0; for (i = 0; i < md->aio.nr_cblocks; ++i) { if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { if (sync_all)
aiocb[i] = NULL; else return i;
} else { /* * Started aio write is not complete yet * so it has to be waited before the * next allocation.
*/
aiocb[i] = &cblocks[i];
do_suspend = 1;
}
} if (!do_suspend) return -1;
while (aio_suspend((conststruct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { if (!(errno == EAGAIN || errno == EINTR))
pr_err("failed to sync perf data, error: %m\n");
}
} while (1);
}
/* * map->core.base data pointed by buf is copied into free map->aio.data[] buffer * to release space in the kernel buffer as fast as possible, calling * perf_mmap__consume() from perf_mmap__push() function. * * That lets the kernel to proceed with storing more profiling data into * the kernel buffer earlier than other per-cpu kernel buffers are handled. * * Coping can be done in two steps in case the chunk of profiling data * crosses the upper bound of the kernel buffer. In this case we first move * part of data from map->start till the upper bound and then the remainder * from the beginning of the kernel buffer till the end of the data chunk.
*/
if (!aio->size) { /* * Increment map->refcount to guard map->aio.data[] buffer * from premature deallocation because map object can be * released earlier than aio write request started on * map->aio.data[] buffer is complete. * * perf_mmap__put() is done at record__aio_complete() * after started aio request completion or at record__aio_push() * if the request failed to start.
*/
perf_mmap__get(&map->core);
}
aio->size += size;
return size;
}
staticint record__aio_push(struct record *rec, struct mmap *map, off_t *off)
{ int ret, idx; int trace_fd = rec->session->data->file.fd; struct record_aio aio = { .rec = rec, .size = 0 };
/* * Call record__aio_sync() to wait till map->aio.data[] buffer * becomes available after previous aio write operation.
*/
idx = record__aio_sync(map, false);
aio.data = map->aio.data[idx];
ret = perf_mmap__push(map, &aio, record__aio_pushfn); if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ return ret;
rec->samples++;
ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); if (!ret) {
*off += aio.size;
rec->bytes_written += aio.size; if (switch_output_size(rec))
trigger_hit(&switch_output_trigger);
} else { /* * Decrement map->refcount incremented in record__aio_pushfn() * back if record__aio_write() operation failed to start, otherwise * map->refcount is decremented in record__aio_complete() after * aio write operation finishes successfully.
*/
perf_mmap__put(&map->core);
}
/* * The record from `zstd_compress` is not 8 bytes aligned, which would cause asan * error. We make it aligned here.
*/
event->data_size = compressed - sizeof(struct perf_record_compressed2);
event->header.size = PERF_ALIGN(compressed, sizeof(u64));
padding = event->header.size - compressed; return record__write(rec, map, bf, compressed) ||
record__write(rec, map, &pad, padding);
}
done = 1; #ifdef HAVE_EVENTFD_SUPPORT if (done_fd >= 0) {
u64 tmp = 1; int orig_errno = errno;
/* * It is possible for this signal handler to run after done is * checked in the main loop, but before the perf counter fds are * polled. If this happens, the poll() will continue to wait * even though done is set, and will only break out if either * another signal is received, or the counters are ready for * read. To ensure the poll() doesn't sleep when done is set, * use an eventfd (done_fd) to wake up the poll().
*/ if (write(done_fd, &tmp, sizeof(tmp)) < 0)
pr_err("failed to signal wakeup fd, error: %m\n");
staticvoid record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
{
pr_debug("Recording AUX area tracing snapshot\n"); if (record__auxtrace_read_snapshot_all(rec) < 0) {
trigger_error(&auxtrace_snapshot_trigger);
} else { if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
trigger_error(&auxtrace_snapshot_trigger); else
trigger_ready(&auxtrace_snapshot_trigger);
}
}
staticint record__auxtrace_snapshot_exit(struct record *rec)
{ if (trigger_is_error(&auxtrace_snapshot_trigger)) return 0;
if (!auxtrace_record__snapshot_started &&
auxtrace_record__snapshot_start(rec->itr)) return -1;
record__read_auxtrace_snapshot(rec, true); if (trigger_is_error(&auxtrace_snapshot_trigger)) return -1;
return 0;
}
staticint record__auxtrace_init(struct record *rec)
{ int err;
if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
&& record__threads_enabled(rec)) {
pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); return -EINVAL;
}
if (!rec->itr) {
rec->itr = auxtrace_record__init(rec->evlist, &err); if (err) return err;
}
err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
rec->opts.auxtrace_snapshot_opts); if (err) return err;
err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
rec->opts.auxtrace_sample_opts); if (err) return err;
err = auxtrace_parse_aux_action(rec->evlist); if (err) return err;
return auxtrace_parse_filters(rec->evlist);
}
#else
staticinline int record__auxtrace_mmap_read(struct record *rec __maybe_unused, struct mmap *map __maybe_unused)
{ return 0;
}
/* * If non-dummy evsel exists, system_wide sideband is need to * help parse sample information. * For example, PERF_EVENT_MMAP event to help parse symbol, * and PERF_EVENT_COMM event to help parse task executable name.
*/
evlist__for_each_entry(evlist, evsel) { if (!evsel__is_dummy_event(evsel)) returntrue;
}
/* * For initial_delay, system wide or a hybrid system, we need to add * tracking event so that we can track PERF_RECORD_MMAP to cover the * delay of waiting or event synthesis.
*/ if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
perf_pmus__num_core_pmus() > 1) {
/* * User space tasks can migrate between CPUs, so when tracing * selected CPUs, sideband for all CPUs is still needed.
*/ if (!!opts->target.cpu_list && record__tracking_system_wide(rec))
system_wide = true;
evsel = evlist__findnew_tracking_event(evlist, system_wide); if (!evsel) return -ENOMEM;
/* * Enable the tracking event when the process is forked for * initial_delay, immediately for system wide.
*/ if (opts->target.initial_delay && !evsel->immediate &&
!target__has_cpu(&opts->target))
evsel->core.attr.enable_on_exec = 1; else
evsel->immediate = 1;
}
return 0;
}
staticbool record__kcore_readable(struct machine *machine)
{ char kcore[PATH_MAX]; int fd;
staticvoid record__free_thread_data(struct record *rec)
{ int t; struct record_thread *thread_data = rec->thread_data;
if (thread_data == NULL) return;
for (t = 0; t < rec->nr_threads; t++) {
record__thread_data_close_pipes(&thread_data[t]);
zfree(&thread_data[t].maps);
zfree(&thread_data[t].overwrite_maps);
fdarray__exit(&thread_data[t].pollfd);
}
zfree(&rec->thread_data);
}
staticint record__map_thread_evlist_pollfd_indexes(struct record *rec, int evlist_pollfd_index, int thread_pollfd_index)
{
size_t x = rec->index_map_cnt;
for (i = 0; i < rec->index_map_cnt; i++) { int e_pos = rec->index_map[i].evlist_pollfd_index; int t_pos = rec->index_map[i].thread_pollfd_index;
if (e_entries[e_pos].fd != t_entries[t_pos].fd ||
e_entries[e_pos].events != t_entries[t_pos].events) {
pr_err("Thread and evlist pollfd index mismatch\n");
err = -EINVAL; continue;
}
e_entries[e_pos].revents = t_entries[t_pos].revents;
} return err;
}
staticint record__dup_non_perf_events(struct record *rec, struct evlist *evlist, struct record_thread *thread_data)
{ struct fdarray *fda = &evlist->core.pollfd; int i, ret;
for (i = 0; i < fda->nr; i++) { if (!(fda->priv[i].flags & fdarray_flag__non_perf_event)) continue;
ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda); if (ret < 0) {
pr_err("Failed to duplicate descriptor in main thread pollfd\n"); return ret;
}
pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n",
thread_data, ret, fda->entries[i].fd);
ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret); if (ret < 0) {
pr_err("Failed to map thread and evlist pollfd indexes\n"); return ret;
}
} return 0;
}
staticint record__alloc_thread_data(struct record *rec, struct evlist *evlist)
{ int t, ret; struct record_thread *thread_data;
rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data))); if (!rec->thread_data) {
pr_err("Failed to allocate thread data\n"); return -ENOMEM;
}
thread_data = rec->thread_data;
for (t = 0; t < rec->nr_threads; t++)
record__thread_data_init_pipes(&thread_data[t]);
for (t = 0; t < rec->nr_threads; t++) {
thread_data[t].rec = rec;
thread_data[t].mask = &rec->thread_masks[t];
ret = record__thread_data_init_maps(&thread_data[t], evlist); if (ret) {
pr_err("Failed to initialize thread[%d] maps\n", t); goto out_free;
}
ret = record__thread_data_init_pollfd(&thread_data[t], evlist); if (ret) {
pr_err("Failed to initialize thread[%d] pollfd\n", t); goto out_free;
} if (t) {
thread_data[t].tid = -1;
ret = record__thread_data_open_pipes(&thread_data[t]); if (ret) {
pr_err("Failed to open thread[%d] communication pipes\n", t); goto out_free;
}
ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); if (ret < 0) {
pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); goto out_free;
}
thread_data[t].ctlfd_pos = ret;
pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
thread_data, thread_data[t].ctlfd_pos,
thread_data[t].pipes.msg[0]);
} else {
thread_data[t].tid = gettid();
ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]); if (ret < 0) goto out_free;
thread_data[t].ctlfd_pos = -1; /* Not used */
}
}
return 0;
out_free:
record__free_thread_data(rec);
return ret;
}
staticint record__mmap_evlist(struct record *rec, struct evlist *evlist)
{ int i, ret; struct record_opts *opts = &rec->opts; bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
opts->auxtrace_sample_mode; char msg[512];
if (opts->affinity != PERF_AFFINITY_SYS)
cpu__setup_cpunode_map();
if (evlist__mmap_ex(evlist, opts->mmap_pages,
opts->auxtrace_mmap_pages,
auxtrace_overwrite,
opts->nr_cblocks, opts->affinity,
opts->mmap_flush, opts->comp_level) < 0) { if (errno == EPERM) {
pr_err("Permission error mapping pages.\n" "Consider increasing " "/proc/sys/kernel/perf_event_mlock_kb,\n" "or try again with a smaller value of -m/--mmap_pages.\n" "(current value: %u,%u)\n",
opts->mmap_pages, opts->auxtrace_mmap_pages); return -errno;
} else {
pr_err("failed to mmap with %d (%s)\n", errno,
str_error_r(errno, msg, sizeof(msg))); if (errno) return -errno; else return -EINVAL;
}
}
if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) return -1;
ret = record__alloc_thread_data(rec, evlist); if (ret) return ret;
if (record__threads_enabled(rec)) {
ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); if (ret) {
pr_err("Failed to create data directory: %s\n", strerror(-ret)); return ret;
} for (i = 0; i < evlist->core.nr_mmaps; i++) { if (evlist->mmap)
evlist->mmap[i].file = &rec->data.dir.files[i]; if (evlist->overwrite_mmap)
evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
}
}
return 0;
}
staticint record__mmap(struct record *rec)
{ return record__mmap_evlist(rec, rec->evlist);
}
if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
pr_warning( "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" "Samples in kernel functions may not be resolved if a suitable vmlinux\n" "file is not found in the buildid cache or in the vmlinux path.\n\n" "Samples in kernel modules won't be resolved at all.\n\n" "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" "even with a suitable vmlinux or kallsyms file.\n\n");
}
if (evlist__apply_filters(evlist, &pos, &opts->target)) {
pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
pos->filter ?: "BPF", evsel__name(pos), errno,
str_error_r(errno, msg, sizeof(msg)));
rc = -1; goto out;
}
staticint process_buildids(struct record *rec)
{ struct perf_session *session = rec->session;
if (perf_data__size(&rec->data) == 0) return 0;
/* * During this process, it'll load kernel map and replace the * dso->long_name to a real pathname it found. In this case * we prefer the vmlinux path like * /lib/modules/3.16.4/build/vmlinux * * rather than build-id path (in debug directory). * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
*/
symbol_conf.ignore_vmlinux_buildid = true;
/* * If --buildid-all is given, it marks all DSO regardless of hits, * so no need to process samples. But if timestamp_boundary is enabled, * it still needs to walk on all samples to get the timestamps of * first/last samples.
*/ if (rec->buildid_all && !rec->timestamp_boundary)
rec->tool.sample = process_event_sample_stub;
return perf_session__process_events(session);
}
staticvoid perf_event__synthesize_guest_os(struct machine *machine, void *data)
{ int err; struct perf_tool *tool = data; /* *As for guest kernel when processing subcommand record&report, *we arrange module mmap prior to guest kernel mmap and trigger *a preload dso because default guest module symbols are loaded *from guest kallsyms instead of /lib/modules/XXX/XXX. This *method is used to avoid symbol missing when the first addr is *in module instead of in guest kernel.
*/
err = perf_event__synthesize_modules(tool, process_synthesized_event,
machine); if (err < 0)
pr_err("Couldn't record guest kernel [%d]'s reference" " relocation symbol.\n", machine->pid);
/* * We use _stext for guest kernel because guest kernel's /proc/kallsyms * have no _text sometimes.
*/
err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
machine); if (err < 0)
pr_err("Couldn't record guest kernel [%d]'s reference" " relocation symbol.\n", machine->pid);
}
if (record__aio_enabled(rec))
record__aio_set_pos(trace_fd, off);
/* * Mark the round finished in case we wrote * at least one event. * * No need for round events in directory mode, * because per-cpu maps and files have data * sorted by kernel.
*/ if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
if (overwrite)
evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
out: return rc;
}
staticint record__mmap_read_all(struct record *rec, bool synch)
{ int err;
err = record__mmap_read_evlist(rec, rec->evlist, false, synch); if (err) return err;
staticint record__synthesize(struct record *rec, bool tail);
staticint
record__switch_output(struct record *rec, bool at_exit)
{ struct perf_data *data = &rec->data; char *new_filename = NULL; int fd, err;
/* Same Size: "2015122520103046"*/ char timestamp[] = "InvalidTimestamp";
record__aio_mmap_read_sync(rec);
write_finished_init(rec, true);
record__synthesize(rec, true); if (target__none(&rec->opts.target))
record__synthesize_workload(rec, true);
rec->samples = 0;
record__finish_output(rec);
err = fetch_current_timestamp(timestamp, sizeof(timestamp)); if (err) {
pr_err("Failed to get current timestamp\n"); return -EINVAL;
}
if (rec->switch_output.num_files) { int n = rec->switch_output.cur_file + 1;
if (n >= rec->switch_output.num_files)
n = 0;
rec->switch_output.cur_file = n; if (rec->switch_output.filenames[n]) {
remove(rec->switch_output.filenames[n]);
zfree(&rec->switch_output.filenames[n]);
}
rec->switch_output.filenames[n] = new_filename;
} else {
free(new_filename);
}
/* Output tracking events */ if (!at_exit) {
record__synthesize(rec, false);
/* * In 'perf record --switch-output' without -a, * record__synthesize() in record__switch_output() won't * generate tracking events because there's no thread_map * in evlist. Which causes newly created perf.data doesn't * contain map and comm information. * Create a fake thread_map and directly call * perf_event__synthesize_thread_map() for those events.
*/ if (target__none(&rec->opts.target))
record__synthesize_workload(rec, false);
write_finished_init(rec, false);
} return fd;
}
staticvoid __record__save_lost_samples(struct record *rec, struct evsel *evsel, struct perf_record_lost_samples *lost, int cpu_idx, int thread_idx, u64 lost_count,
u16 misc_flag)
{ struct perf_sample_id *sid; struct perf_sample sample; int id_hdr_size;
/* * evlist__prepare_workload will send a SIGUSR1 * if the fork fails, since we asked by setting its * want_signal to true.
*/ staticvoid workload_exec_failed_signal(int signo __maybe_unused,
siginfo_t *info, void *ucontext __maybe_unused)
{
workload_exec_errno = info->si_value.sival_int;
done = 1;
child_finished = 1;
}
if (data->is_pipe) {
err = perf_event__synthesize_for_pipe(tool, session, data,
process_synthesized_event); if (err < 0) goto out;
rec->bytes_written += err;
}
err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
process_synthesized_event, machine); if (err) goto out;
/* Synthesize id_index before auxtrace_info */
err = perf_event__synthesize_id_index(tool,
process_synthesized_event,
session->evlist, machine); if (err) goto out;
if (rec->opts.full_auxtrace) {
err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
session, process_synthesized_event); if (err) goto out;
}
if (!evlist__exclude_kernel(rec->evlist)) {
err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
machine);
WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" "Check /proc/kallsyms permission or run as root.\n");
err = perf_event__synthesize_modules(tool, process_synthesized_event,
machine);
WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" "Check /proc/modules permission or run as root.\n");
}
if (perf_guest) {
machines__process_guests(&session->machines,
perf_event__synthesize_guest_os, tool);
}
err = perf_event__synthesize_extra_attr(&rec->tool,
rec->evlist,
process_synthesized_event,
data->is_pipe); if (err) goto out;
staticint record__setup_sb_evlist(struct record *rec)
{ struct record_opts *opts = &rec->opts;
if (rec->sb_evlist != NULL) { /* * We get here if --switch-output-event populated the * sb_evlist, so associate a callback that will send a SIGUSR2 * to the main thread.
*/
evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
rec->thread_id = pthread_self();
} #ifdef HAVE_LIBBPF_SUPPORT if (!opts->no_bpf_event) { if (rec->sb_evlist == NULL) {
rec->sb_evlist = evlist__new();
if (rec->sb_evlist == NULL) {
pr_err("Couldn't create side band evlist.\n."); return -1;
}
}
if (evlist__add_bpf_sb_event(rec->sb_evlist, perf_session__env(rec->session))) {
pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); return -1;
}
} #endif if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
opts->no_bpf_event = true;
}
if (rec->opts.kcore &&
!record__kcore_readable(&session->machines.host)) {
pr_err("ERROR: kcore is not readable.\n"); return -1;
}
if (record__init_clock(rec)) return -1;
record__init_features(rec);
if (forks) {
err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
workload_exec_failed_signal); if (err < 0) {
pr_err("Couldn't run the workload!\n");
status = err; goto out_delete_session;
}
}
/* * If we have just single event and are sending data * through pipe, we need to force the ids allocation, * because we synthesize event name through the pipe * and need the id for that.
*/ if (data->is_pipe && rec->evlist->core.nr_entries == 1)
rec->opts.sample_id = true;
if (rec->timestamp_filename && perf_data__is_pipe(data)) {
rec->timestamp_filename = false;
pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n");
}
/* * Use global stat_config that is zero meaning aggr_mode is AGGR_NONE * and hybrid_merge is false.
*/
evlist__uniquify_evsel_names(rec->evlist, &stat_config);
/* Debug message used by test scripts */
pr_debug3("perf record opening and mmapping events\n"); if (record__open(rec) != 0) {
err = -1; goto out_free_threads;
} /* Debug message used by test scripts */
pr_debug3("perf record done opening and mmapping events\n");
env->comp_mmap_len = session->evlist->core.mmap_len;
if (rec->opts.kcore) {
err = record__kcore_copy(&session->machines.host, data); if (err) {
pr_err("ERROR: Failed to copy kcore\n"); goto out_free_threads;
}
}
/* * Normally perf_session__new would do this, but it doesn't have the * evlist.
*/ if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
rec->tool.ordered_events = false;
}
if (evlist__nr_groups(rec->evlist) == 0)
perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
if (data->is_pipe) {
err = perf_header__write_pipe(fd); if (err < 0) goto out_free_threads;
} else {
err = perf_session__write_header(session, rec->evlist, fd, false); if (err < 0) goto out_free_threads;
}
if (!evlist__needs_bpf_sb_event(rec->evlist))
opts->no_bpf_event = true;
err = record__setup_sb_evlist(rec); if (err) goto out_free_threads;
err = record__synthesize(rec, false); if (err < 0) goto out_free_threads;
if (rec->realtime_prio) { struct sched_param param;
param.sched_priority = rec->realtime_prio; if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
pr_err("Could not set realtime priority.\n");
err = -1; goto out_free_threads;
}
}
if (record__start_threads(rec)) goto out_free_threads;
/* * When perf is starting the traced process, all the events * (apart from group members) have enable_on_exec=1 set, * so don't spoil it by prematurely enabling them.
*/ if (!target__none(&opts->target) && !opts->target.initial_delay)
evlist__enable(rec->evlist);
/* * offcpu-time does not call execve, so enable_on_exe wouldn't work * when recording a workload, do it manually
*/ if (rec->off_cpu)
evlist__enable_evsel(rec->evlist, (char *)OFFCPU_EVENT);
/* * Let the child rip
*/ if (forks) { struct machine *machine = &session->machines.host; union perf_event *event;
pid_t tgid;
/* * Some H/W events are generated before COMM event * which is emitted during exec(), so perf script * cannot see a correct process name for those events. * Synthesize COMM event to prevent it.
*/
tgid = perf_event__synthesize_comm(tool, event,
rec->evlist->workload.pid,
process_synthesized_event,
machine);
free(event);
/* * Must write FINISHED_INIT so it will be seen after all other * synthesized user events, but before any regular events.
*/
err = write_finished_init(rec, false); if (err < 0) goto out_child;
for (;;) { unsignedlonglong hits = thread->samples;
/* * rec->evlist->bkw_mmap_state is possible to be * BKW_MMAP_EMPTY here: when done == true and * hits != rec->samples in previous round. * * evlist__toggle_bkw_mmap ensure we never * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
*/ if (trigger_is_hit(&switch_output_trigger) || done || draining)
evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
if (auxtrace_record__snapshot_started) {
auxtrace_record__snapshot_started = 0; if (!trigger_is_error(&auxtrace_snapshot_trigger))
record__read_auxtrace_snapshot(rec, false); if (trigger_is_error(&auxtrace_snapshot_trigger)) {
pr_err("AUX area tracing snapshot failed\n");
err = -1; goto out_child;
}
}
if (trigger_is_hit(&switch_output_trigger)) { /* * If switch_output_trigger is hit, the data in * overwritable ring buffer should have been collected, * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. * * If SIGUSR2 raise after or during record__mmap_read_all(), * record__mmap_read_all() didn't collect data from * overwritable ring buffer. Read again.
*/ if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) continue;
trigger_ready(&switch_output_trigger);
/* * Reenable events in overwrite ring buffer after * record__mmap_read_all(): we should have collected * data from it.
*/
evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
if (!quiet)
fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
record__waking(rec));
thread->waking = 0;
fd = record__switch_output(rec, false); if (fd < 0) {
pr_err("Failed to switch to new file\n");
trigger_error(&switch_output_trigger);
err = fd; goto out_child;
}
/* re-arm the alarm */ if (rec->switch_output.time)
alarm(rec->switch_output.time);
}
if (hits == thread->samples) { if (done || draining) break;
err = fdarray__poll(&thread->pollfd, -1); /* * Propagate error, only if there's any. Ignore positive * number of returned events and interrupt error.
*/ if (err > 0 || (err < 0 && errno == EINTR))
err = 0;
thread->waking++;
err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread); if (err) goto out_child;
}
if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { switch (cmd) { case EVLIST_CTL_CMD_SNAPSHOT:
hit_auxtrace_snapshot_trigger(rec);
evlist__ctlfd_ack(rec->evlist); break; case EVLIST_CTL_CMD_STOP:
done = 1; break; case EVLIST_CTL_CMD_ACK: case EVLIST_CTL_CMD_UNSUPPORTED: case EVLIST_CTL_CMD_ENABLE: case EVLIST_CTL_CMD_DISABLE: case EVLIST_CTL_CMD_EVLIST: case EVLIST_CTL_CMD_PING: default: break;
}
}
err = event_enable_timer__process(rec->evlist->eet); if (err < 0) goto out_child; if (err) {
err = 0;
done = 1;
}
/* * When perf is starting the traced process, at the end events * die with the process and we wait for that. Thus no need to * disable events in this case.
*/ if (done && !disabled && !target__none(&opts->target)) {
trigger_off(&auxtrace_snapshot_trigger);
evlist__disable(rec->evlist);
disabled = true;
}
}
if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
env->comp_ratio = ratio + 0.5;
}
if (forks) { int exit_status;
if (!child_finished)
kill(rec->evlist->workload.pid, SIGTERM);
wait(&exit_status);
if (err < 0)
status = err; elseif (WIFEXITED(exit_status))
status = WEXITSTATUS(exit_status); elseif (WIFSIGNALED(exit_status))
signr = WTERMSIG(exit_status);
} else
status = err;
if (rec->off_cpu)
rec->bytes_written += off_cpu_write(rec->session);
record__read_lost_samples(rec);
record__synthesize(rec, true); /* this will be recalculated during process_buildids() */
rec->samples = 0;
if (!err) { if (!rec->timestamp_filename) {
record__finish_output(rec);
} else {
fd = record__switch_output(rec, true); if (fd < 0) {
status = fd; goto out_delete_session;
}
}
}
ret = parse_callchain_record_opt(arg, callchain); if (!ret) { /* Enable data address sampling for DWARF unwind. */ if (callchain->record_mode == CALLCHAIN_DWARF)
record->sample_address = true;
callchain_debug(callchain);
}
return ret;
}
int record_parse_callchain_opt(conststruct option *opt, constchar *arg, int unset)
{ return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
}
int record_callchain_opt(conststruct option *opt, constchar *arg __maybe_unused, int unset __maybe_unused)
{ struct callchain_param *callchain = opt->value;
callchain->enabled = true;
if (callchain->record_mode == CALLCHAIN_NONE)
callchain->record_mode = CALLCHAIN_FP;
/* * If we're using --switch-output-events, then we imply its * --switch-output=signal, as we'll send a SIGUSR2 from the side band * thread to its parent.
*/ if (rec->switch_output_event_set) { if (record__threads_enabled(rec)) {
pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); return 0;
} goto do_signal;
}
if (!s->set) return 0;
if (record__threads_enabled(rec)) {
pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); return 0;
}
if (!strcmp(s->str, "signal")) {
do_signal:
s->signal = true;
pr_debug("switch-output with SIGUSR2 signal\n"); goto enabled;
}
val = parse_tag_value(s->str, tags_size); if (val != (unsignedlong) -1) {
s->size = val;
pr_debug("switch-output with %s size threshold\n", s->str); goto enabled;
}
val = parse_tag_value(s->str, tags_time); if (val != (unsignedlong) -1) {
s->time = val;
pr_debug("switch-output with %s time threshold (%lu seconds)\n",
s->str, s->time); goto enabled;
}
staticint build_id__process_mmap(conststruct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine)
{ /* * We already have the kernel maps, put in place via perf_session__create_kernel_maps() * no need to add them twice.
*/ if (!(event->header.misc & PERF_RECORD_MISC_USER)) return 0; return perf_event__process_mmap(tool, event, sample, machine);
}
staticint build_id__process_mmap2(conststruct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine)
{ /* * We already have the kernel maps, put in place via perf_session__create_kernel_maps() * no need to add them twice.
*/ if (!(event->header.misc & PERF_RECORD_MISC_USER)) return 0;
/* * XXX Ideally would be local to cmd_record() and passed to a record__new * because we need to have access to it in record__exit, that is called * after cmd_record() exits, but since record_options need to be accessible to * builtin-script, leave it here. * * At least we don't ouch it in all the other functions here directly. * * Just say no to tons of global variables, sigh.
*/ staticstruct record record = {
.opts = {
.sample_time = true,
.mmap_pages = UINT_MAX,
.user_freq = UINT_MAX,
.user_interval = ULLONG_MAX,
.freq = 4000,
.target = {
.uses_mmap = true,
.default_per_cpu = true,
},
.mmap_flush = MMAP_FLUSH_DEFAULT,
.nr_threads_synthesize = 1,
.ctl_fd = -1,
.ctl_fd_ack = -1,
.synth = PERF_SYNTH_ALL,
.off_cpu_thresh_ns = OFFCPU_THRESH,
},
.buildid_mmap = true,
};
/* * XXX Will stay a global variable till we fix builtin-script.c to stop messing * with it and switch to use the library functions in perf_evlist that came * from builtin-record.c, i.e. use record_opts, * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', * using pipes, etc.
*/ staticstruct option __record_options[] = {
OPT_CALLBACK('e', "event", &parse_events_option_args, "event", "event selector. use 'perf list' to list available events",
parse_events_option),
OPT_CALLBACK(0, "filter", &record.evlist, "filter", "event filter", parse_filter),
OPT_BOOLEAN(0, "latency", &record.latency, "Enable data collection for latency profiling.\n" "\t\t\t Use perf report --latency for latency-centric profile."),
OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
NULL, "don't record events from perf itself",
exclude_perf),
OPT_STRING('p', "pid", &record.opts.target.pid, "pid", "record events on existing process id"),
OPT_STRING('t', "tid", &record.opts.target.tid, "tid", "record events on existing thread id"),
OPT_INTEGER('r', "realtime", &record.realtime_prio, "collect data with this RT SCHED_FIFO priority"),
OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, "collect data without buffering"),
OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, "collect raw sample records from all opened counters"),
OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, "system-wide collection from all CPUs"),
OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", "list of cpus to monitor"),
OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
OPT_STRING('o', "output", &record.data.path, "file", "output file name"),
OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
&record.opts.no_inherit_set, "child tasks do not inherit counters"),
OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, "synthesize non-sample events at the end of output"),
OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, "Fail if the specified frequency can't be used"),
OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", "profile at this frequency",
record__parse_freq),
OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", "number of mmap data pages and AUX area tracing mmap pages",
record__parse_mmap_pages),
OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
record__mmap_flush_parse),
OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
NULL, "enables call-graph recording" ,
&record_callchain_opt),
OPT_CALLBACK(0, "call-graph", &record.opts, "record_mode[,record_size]", record_callchain_help,
&record_parse_callchain_opt),
OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"),
OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"),
OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, "per thread counts"),
OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, "Record the sample physical addresses"),
OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, "Record the sampled data address data page size"),
OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, "Record the sampled code address (ip) page size"),
OPT_BOOLEAN(0, "sample-mem-info", &record.opts.sample_data_src, "Record the data source for memory operations"),
OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier, "Record the sample identifier"),
OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
&record.opts.sample_time_set, "Record the sample timestamps"),
OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, "Record the sample period"),
OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, "don't sample"),
OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
&record.no_buildid_cache_set, "do not update the buildid cache"),
OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
&record.no_buildid_set, "do not collect buildids in perf.data"),
OPT_CALLBACK('G', "cgroup", &record.evlist, "name", "monitor event in cgroup name only",
parse_cgroups),
OPT_CALLBACK('D', "delay", &record, "ms", "ms to wait before starting measurement after program start (-1: start with events disabled), " "or ranges of time to enable events e.g. '-D 10-20,30-40'",
record__parse_event_enable_time),
OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, "branch any", "sample any taken branches",
parse_branch_stack),
OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, "branch filter mask", "branch stack filter modes",
parse_branch_stack),
OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, "sample by weight (on special events only)"),
OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, "sample transaction flags (special events only)"),
OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, "use per-thread mmaps"),
OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", "sample selected machine registers on interrupt," " use '-I?' to list register names", parse_intr_regs),
OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", "sample selected machine registers in user space," " use '--user-regs=?' to list register names", parse_user_regs),
OPT_BOOLEAN(0, "running-time", &record.opts.running_time, "Record running/enabled time of read (:S) events"),
OPT_CALLBACK('k', "clockid", &record.opts, "clockid", "clockid to use for events, see clock_gettime()",
parse_clockid),
OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, "opts", "AUX area tracing Snapshot Mode", ""),
OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, "opts", "sample AUX area", ""),
OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, "per thread proc mmap processing timeout in ms"),
OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, "Record namespaces events"),
OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, "Record cgroup events"),
OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
&record.opts.record_switch_events_set, "Record context switch events"),
OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, "Configure all used events to run in kernel space.",
PARSE_OPT_EXCLUSIVE),
OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, "Configure all used events to run in user space.",
PARSE_OPT_EXCLUSIVE),
OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, "collect kernel callchains"),
OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, "collect user callchains"),
OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"),
OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, "Record build-id of all DSOs regardless of hits"),
OPT_BOOLEAN_SET(0, "buildid-mmap", &record.buildid_mmap, &record.buildid_mmap_set, "Record build-id in mmap events and skip build-id processing."),
OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, "append timestamp to output filename"),
OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, "Record timestamp boundary (time of first/last samples)"),
OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
&record.switch_output.set, "signal or size[BKMG] or time[smhd]", "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", "signal"),
OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args,
&record.switch_output_event_set, "switch output event", "switch output event selector. use 'perf list' to list available events",
parse_events_option_new_evlist),
OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, "Limit number of switch output generated files"),
OPT_BOOLEAN(0, "dry-run", &dry_run, "Parse options then exit"), #ifdef HAVE_AIO_SUPPORT
OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
&nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
record__aio_parse), #endif
OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
record__parse_affinity), #ifdef HAVE_ZSTD_SUPPORT
OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
record__parse_comp_level), #endif
OPT_CALLBACK(0, "max-size", &record.output_max_size, "size", "Limit the maximum size of the output file", parse_output_max_size),
OPT_UINTEGER(0, "num-thread-synthesize",
&record.opts.nr_threads_synthesize, "number of threads to run for event synthesis"), #ifdef HAVE_LIBPFM
OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", "libpfm4 event selector. use 'perf list' to list available events",
parse_libpfm_events_option), #endif
OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" "\t\t\t 'snapshot': AUX area tracing snapshot).\n" "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
parse_control_option),
OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", "Fine-tune event synthesis: default=all", parse_record_synth_option),
OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
&record.debuginfod.set, "debuginfod urls", "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", "system"),
OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", "write collected trace data into several data files using parallel threads",
record__parse_threads),
OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin", "BPF filter action"),
OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms", "Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)",
record__parse_off_cpu_thresh),
OPT_END()
};
perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) { /* Return ENODEV is input cpu is greater than max cpu */ if ((unsignedlong)cpu.cpu > mask->nbits) return -ENODEV;
__set_bit(cpu.cpu, mask->bits);
}
staticint record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, constchar **maps_spec, constchar **affinity_spec,
u32 nr_spec)
{
u32 s; int ret = 0, t = 0; struct mmap_cpu_mask cpus_mask; struct thread_mask thread_mask, full_mask, *thread_masks;
ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); if (ret) {
pr_err("Failed to allocate CPUs mask\n"); return ret;
}
ret = record__mmap_cpu_mask_init(&cpus_mask, cpus); if (ret) {
pr_err("Failed to init cpu mask\n"); goto out_free_cpu_mask;
}
ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); if (ret) {
pr_err("Failed to allocate full mask\n"); goto out_free_cpu_mask;
}
ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); if (ret) {
pr_err("Failed to allocate thread mask\n"); goto out_free_full_and_cpu_masks;
}
for (s = 0; s < nr_spec; s++) {
ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); if (ret) {
pr_err("Failed to initialize maps thread mask\n"); goto out_free;
}
ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); if (ret) {
pr_err("Failed to initialize affinity thread mask\n"); goto out_free;
}
/* ignore invalid CPUs but do not allow empty masks */ if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
cpus_mask.bits, thread_mask.maps.nbits)) {
pr_err("Empty maps mask: %s\n", maps_spec[s]);
ret = -EINVAL; goto out_free;
} if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
cpus_mask.bits, thread_mask.affinity.nbits)) {
pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
ret = -EINVAL; goto out_free;
}
/* do not allow intersection with other masks (full_mask) */ if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
thread_mask.maps.nbits)) {
pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
ret = -EINVAL; goto out_free;
} if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
thread_mask.affinity.nbits)) {
pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
ret = -EINVAL; goto out_free;
}
staticint record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
{ int ret; struct cpu_topology *topo;
topo = cpu_topology__new(); if (!topo) {
pr_err("Failed to allocate CPU topology\n"); return -ENOMEM;
}
ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
topo->core_cpus_list, topo->core_cpus_lists);
cpu_topology__delete(topo);
return ret;
}
staticint record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
{ int ret; struct cpu_topology *topo;
topo = cpu_topology__new(); if (!topo) {
pr_err("Failed to allocate CPU topology\n"); return -ENOMEM;
}
ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
topo->package_cpus_list, topo->package_cpus_lists);
cpu_topology__delete(topo);
return ret;
}
staticint record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
{
u32 s; int ret; constchar **spec; struct numa_topology *topo;
topo = numa_topology__new(); if (!topo) {
pr_err("Failed to allocate NUMA topology\n"); return -ENOMEM;
}
spec = zalloc(topo->nr * sizeof(char *)); if (!spec) {
pr_err("Failed to allocate NUMA spec\n");
ret = -ENOMEM; goto out_delete_topo;
} for (s = 0; s < topo->nr; s++)
spec[s] = topo->nodes[s].cpus;
ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
spec = strtok_r(user_spec, ":", &spec_ptr); if (spec == NULL) break;
pr_debug2("threads_spec[%d]: %s\n", t, spec);
mask = strtok_r(spec, "/", &mask_ptr); if (mask == NULL) break;
pr_debug2(" maps mask: %s\n", mask);
tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); if (!tmp_spec) {
pr_err("Failed to reallocate maps spec\n");
ret = -ENOMEM; goto out_free;
}
maps_spec = tmp_spec;
maps_spec[nr_spec] = dup_mask = strdup(mask); if (!maps_spec[nr_spec]) {
pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
ret = -ENOMEM; goto out_free;
}
mask = strtok_r(NULL, "/", &mask_ptr); if (mask == NULL) {
pr_err("Invalid thread maps or affinity specs\n");
ret = -EINVAL; goto out_free;
}
pr_debug2(" affinity mask: %s\n", mask);
tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); if (!tmp_spec) {
pr_err("Failed to reallocate affinity spec\n");
ret = -ENOMEM; goto out_free;
}
affinity_spec = tmp_spec;
affinity_spec[nr_spec] = strdup(mask); if (!affinity_spec[nr_spec]) {
pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
ret = -ENOMEM; goto out_free;
}
dup_mask = NULL;
nr_spec++;
}
ret = record__init_thread_masks_spec(rec, cpus, (constchar **)maps_spec,
(constchar **)affinity_spec, nr_spec);
out_free:
free(dup_mask); for (s = 0; s < nr_spec; s++) { if (maps_spec)
free(maps_spec[s]); if (affinity_spec)
free(affinity_spec[s]);
}
free(affinity_spec);
free(maps_spec);
return ret;
}
staticint record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
{ int ret;
ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); if (ret) return ret;
if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus)) return -ENODEV;
rec->nr_threads = 1;
return 0;
}
staticint record__init_thread_masks(struct record *rec)
{ int ret = 0; struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
if (!record__threads_enabled(rec)) return record__init_thread_default_masks(rec, cpus);
if (evlist__per_thread(rec->evlist)) {
pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); return -EINVAL;
}
switch (rec->opts.threads_spec) { case THREAD_SPEC__CPU:
ret = record__init_thread_cpu_masks(rec, cpus); break; case THREAD_SPEC__CORE:
ret = record__init_thread_core_masks(rec, cpus); break; case THREAD_SPEC__PACKAGE:
ret = record__init_thread_package_masks(rec, cpus); break; case THREAD_SPEC__NUMA:
ret = record__init_thread_numa_masks(rec, cpus); break; case THREAD_SPEC__USER:
ret = record__init_thread_user_masks(rec, cpus); break; default: break;
}
return ret;
}
int cmd_record(int argc, constchar **argv)
{ int err; struct record *rec = &record; char errbuf[BUFSIZ];
setlocale(LC_ALL, "");
#ifndef HAVE_BPF_SKEL # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); # undef set_nobuild #endif
/* Disable eager loading of kernel symbols that adds overhead to perf record. */
symbol_conf.lazy_load_kernel_maps = true;
rec->opts.affinity = PERF_AFFINITY_SYS;
rec->evlist = evlist__new(); if (rec->evlist == NULL) return -ENOMEM;
err = perf_config(perf_record_config, rec); if (err) return err;
argc = parse_options(argc, argv, record_options, record_usage,
PARSE_OPT_STOP_AT_NON_OPTION); if (quiet)
perf_quiet_option();
err = symbol__validate_sym_arguments(); if (err) return err;
perf_debuginfod_setup(&record.debuginfod);
/* Make system wide (-a) the default target. */ if (!argc && target__none(&rec->opts.target))
rec->opts.target.system_wide = true;
if (nr_cgroups && !rec->opts.target.system_wide) {
usage_with_options_msg(record_usage, record_options, "cgroup monitoring only available in system-wide mode");
}
if (record.latency) { /* * There is no fundamental reason why latency profiling * can't work for system-wide mode, but exact semantics * and details are to be defined. * See the following thread for details: * https://lore.kernel.org/all/Z4XDJyvjiie3howF@google.com/
*/ if (record.opts.target.system_wide) {
pr_err("Failed: latency profiling is not supported with system-wide collection.\n");
err = -EINVAL; goto out_opts;
}
record.opts.record_switch_events = true;
}
if (!rec->buildid_mmap) {
pr_debug("Disabling build id in synthesized mmap2 events.\n");
symbol_conf.no_buildid_mmap2 = true;
} elseif (rec->buildid_mmap_set) { /* * Explicitly passing --buildid-mmap disables buildid processing * and cache generation.
*/
rec->no_buildid = true;
} if (rec->buildid_mmap && !perf_can_record_build_id()) {
pr_warning("Missing support for build id in kernel mmap events.\n" "Disable this warning with --no-buildid-mmap\n");
rec->buildid_mmap = false;
} if (rec->buildid_mmap) { /* Enable perf_event_attr::build_id bit. */
rec->opts.build_id = true;
}
if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
pr_err("Kernel has no cgroup sampling support.\n");
err = -EINVAL; goto out_opts;
}
if (rec->opts.kcore)
rec->opts.text_poke = true;
if (rec->opts.kcore || record__threads_enabled(rec))
rec->data.is_dir = true;
if (record__threads_enabled(rec)) { if (rec->opts.affinity != PERF_AFFINITY_SYS) {
pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); goto out_opts;
} if (record__aio_enabled(rec)) {
pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); goto out_opts;
}
}
if (rec->opts.comp_level != 0) {
pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
rec->no_buildid = true;
}
if (rec->opts.record_switch_events &&
!perf_can_record_switch_events()) {
ui__error("kernel does not support recording context switch events\n");
parse_options_usage(record_usage, record_options, "switch-events", 0);
err = -EINVAL; goto out_opts;
}
if (rec->switch_output.time) {
signal(SIGALRM, alarm_sig_handler);
alarm(rec->switch_output.time);
}
if (rec->switch_output.num_files) {
rec->switch_output.filenames = calloc(rec->switch_output.num_files, sizeof(char *)); if (!rec->switch_output.filenames) {
err = -EINVAL; goto out_opts;
}
}
if (rec->timestamp_filename && record__threads_enabled(rec)) {
rec->timestamp_filename = false;
pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
}
/* For backward compatibility, -d implies --mem-info */ if (rec->opts.sample_address)
rec->opts.sample_data_src = true;
/* * Allow aliases to facilitate the lookup of symbols for address * filters. Refer to auxtrace_parse_filters().
*/
symbol_conf.allow_aliases = true;
symbol__init(NULL);
err = record__auxtrace_init(rec); if (err) goto out;
if (dry_run) goto out;
err = -ENOMEM;
if (rec->no_buildid_cache || rec->no_buildid) {
disable_buildid_cache();
} elseif (rec->switch_output.enabled) { /* * In 'perf record --switch-output', disable buildid * generation by default to reduce data file switching * overhead. Still generate buildid if they are required * explicitly using * * perf record --switch-output --no-no-buildid \ * --no-no-buildid-cache * * Following code equals to: * * if ((rec->no_buildid || !rec->no_buildid_set) && * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) * disable_buildid_cache();
*/ bool disable = true;
if (rec->no_buildid_set && !rec->no_buildid)
disable = false; if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
disable = false; if (disable) {
rec->no_buildid = true;
rec->no_buildid_cache = true;
disable_buildid_cache();
}
}
if (record.opts.overwrite)
record.opts.tail_synthesize = true;
if (rec->evlist->core.nr_entries == 0) {
err = parse_event(rec->evlist, "cycles:P"); if (err) goto out;
}
if (rec->opts.target.tid && !rec->opts.no_inherit_set)
rec->opts.no_inherit = true;
if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
arch__add_leaf_frame_record_opts(&rec->opts);
err = -ENOMEM; if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) { if (rec->opts.target.pid != NULL) {
pr_err("Couldn't create thread/CPU maps: %s\n",
errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); goto out;
} else
usage_with_options(record_usage, record_options);
}
err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); if (err) goto out;
/* * We take all buildids when the file contains * AUX area tracing data because we do not decode the * trace because it would take too long.
*/ if (rec->opts.full_auxtrace)
rec->buildid_all = true;
if (rec->opts.text_poke) {
err = record__config_text_poke(rec->evlist); if (err) {
pr_err("record__config_text_poke failed, error %d\n", err); goto out;
}
}
if (rec->off_cpu) {
err = record__config_off_cpu(rec); if (err) {
pr_err("record__config_off_cpu failed, error %d\n", err); goto out;
}
}
if (record_opts__config(&rec->opts)) {
err = -EINVAL; goto out;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.