// SPDX-License-Identifier: GPL-2.0-only /* * builtin-stat.c * * Builtin stat command: Give a precise performance counters summary * overview about any workload, CPU or specific PID. * * Sample output:
/* Check that leader matches cpus with each member. */ if (leader == evsel) continue; if (perf_cpu_map__equal(leader->core.cpus, evsel->core.cpus)) continue;
/* If there's mismatch disable the group and warn user. */ if (warned_leader != leader) { char buf[200];
pr_warning("WARNING: grouped events cpus do not match.\n" "Events with CPUs not matching the leader will " "be removed from the group.\n");
evsel__group_desc(leader, buf, sizeof(buf));
pr_warning(" %s\n", buf);
warned_leader = leader;
} if (verbose > 0) { char buf[200];
staticint read_single_counter(struct evsel *counter, int cpu_map_idx, int thread)
{ int err = evsel__read_counter(counter, cpu_map_idx, thread);
/* * Reading user and system time will fail when the process * terminates. Use the wait4 values in that case.
*/ if (err && cpu_map_idx == 0 &&
(evsel__tool_event(counter) == TOOL_PMU__EVENT_USER_TIME ||
evsel__tool_event(counter) == TOOL_PMU__EVENT_SYSTEM_TIME)) {
u64 val, *start_time; struct perf_counts_values *count =
perf_counts(counter->counts, cpu_map_idx, thread);
/* * Read out the results of a single counter: * do not aggregate counts across CPUs in system-wide mode
*/ staticint read_counter_cpu(struct evsel *counter, int cpu_map_idx)
{ int nthreads = perf_thread_map__nr(evsel_list->core.threads); int thread;
/* * The leader's group read loads data into its group members * (via evsel__read_counter()) and sets their count->loaded.
*/ if (!perf_counts__is_loaded(counter->counts, cpu_map_idx, thread) &&
read_single_counter(counter, cpu_map_idx, thread)) {
counter->counts->scaled = -1;
perf_counts(counter->counts, cpu_map_idx, thread)->ena = 0;
perf_counts(counter->counts, cpu_map_idx, thread)->run = 0; return -1;
}
/* * If we don't have tracee (attaching to task or cpu), counters may * still be running. To get accurate group ratios, we must stop groups * from counting before reading their constituent counters.
*/ if (!target__none(&target)) {
evlist__for_each_entry(evsel_list, counter)
bpf_counter__disable(counter); if (!all_counters_use_bpf)
evlist__disable(evsel_list);
}
}
staticvolatile sig_atomic_t workload_exec_errno;
/* * evlist__prepare_workload will send a SIGUSR1 * if the fork fails, since we asked by setting its * want_signal to true.
*/ staticvoid workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, void *ucontext __maybe_unused)
{
workload_exec_errno = info->si_value.sival_int;
}
if (evlist__ctlfd_process(evlist, &cmd) > 0) { switch (cmd) { case EVLIST_CTL_CMD_ENABLE:
fallthrough; case EVLIST_CTL_CMD_DISABLE: if (interval)
process_interval(); break; case EVLIST_CTL_CMD_SNAPSHOT: case EVLIST_CTL_CMD_ACK: case EVLIST_CTL_CMD_UNSUPPORTED: case EVLIST_CTL_CMD_EVLIST: case EVLIST_CTL_CMD_STOP: case EVLIST_CTL_CMD_PING: default: break;
}
}
}
staticvoid compute_tts(struct timespec *time_start, struct timespec *time_stop, int *time_to_sleep)
{ int tts = *time_to_sleep; struct timespec time_diff;
staticint dispatch_events(bool forks, int timeout, int interval, int *times)
{ int child_exited = 0, status = 0; int time_to_sleep, sleep_time; struct timespec time_start, time_stop;
staticenum counter_recovery stat_handle_error(struct evsel *counter)
{ char msg[BUFSIZ]; /* * PPC returns ENXIO for HW counters until 2.6.37 * (behavior changed with commit b0a873e).
*/ if (errno == EINVAL || errno == ENOSYS ||
errno == ENOENT || errno == ENXIO) { if (verbose > 0)
ui__warning("%s event is not supported by the kernel.\n",
evsel__name(counter));
counter->supported = false; /* * errored is a sticky flag that means one of the counter's * cpu event had a problem and needs to be reexamined.
*/
counter->errored = true;
if ((evsel__leader(counter) != counter) ||
!(counter->core.leader->nr_members > 1)) return COUNTER_SKIP;
} elseif (evsel__fallback(counter, &target, errno, msg, sizeof(msg))) { if (verbose > 0)
ui__warning("%s\n", msg); return COUNTER_RETRY;
} elseif (target__has_per_thread(&target) && errno != EOPNOTSUPP &&
evsel_list->core.threads &&
evsel_list->core.threads->err_thread != -1) { /* * For global --per-thread case, skip current * error thread.
*/ if (!thread_map__remove(evsel_list->core.threads,
evsel_list->core.threads->err_thread)) {
evsel_list->core.threads->err_thread = -1; return COUNTER_RETRY;
}
} elseif (counter->skippable) { if (verbose > 0)
ui__warning("skipping event %s that kernel failed to open .\n",
evsel__name(counter));
counter->supported = false;
counter->errored = true; return COUNTER_SKIP;
}
if (errno == EOPNOTSUPP) { if (verbose > 0) {
ui__warning("%s event is not supported by the kernel.\n",
evsel__name(counter));
}
counter->supported = false;
counter->errored = true;
/* * bperf calls evsel__open_per_cpu() in bperf__load(), so * no need to call it again here.
*/ if (target.use_bpf) break;
if (counter->reset_group || counter->errored) continue; if (evsel__is_bperf(counter)) continue;
try_again: if (create_perf_stat_counter(counter, &stat_config, &target,
evlist_cpu_itr.cpu_map_idx) < 0) {
/* * Weak group failed. We cannot just undo this here * because earlier CPUs might be in group mode, and the kernel * doesn't support mixing group and non group reads. Defer * it to later. * Don't close here because we're in the wrong affinity.
*/ if ((errno == EINVAL || errno == EBADF) &&
evsel__leader(counter) != counter &&
counter->weak_group) {
evlist__reset_weak_group(evsel_list, counter, false);
assert(counter->reset_group);
second_pass = true; continue;
}
switch (stat_handle_error(counter)) { case COUNTER_FATAL:
err = -1; goto err_out; case COUNTER_RETRY: goto try_again; case COUNTER_SKIP: continue; default: break;
}
}
counter->supported = true;
}
if (second_pass) { /* * Now redo all the weak group after closing them, * and also close errored counters.
*/
/* First close errored or weak retry */
evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
counter = evlist_cpu_itr.evsel;
if (!counter->reset_group && !counter->errored) continue;
if (evlist__apply_filters(evsel_list, &counter, &target)) {
pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
counter->filter, evsel__name(counter), errno,
str_error_r(errno, msg, sizeof(msg))); return -1;
}
if (STAT_RECORD) { int fd = perf_data__fd(&perf_stat.data);
/* * Closing a group leader splits the group, and as we only disable * group leaders, results in remaining events becoming enabled. To * avoid arbitrary skew, we must read all counters before closing any * group leaders.
*/ if (read_counters() == 0)
process_counters();
/* * We need to keep evsel_list alive, because it's processed * later the evsel_list will be closed after.
*/ if (!STAT_RECORD)
evlist__close(evsel_list);
return WEXITSTATUS(status);
err_out: if (forks)
evlist__cancel_workload(evsel_list);
affinity__cleanup(affinity); return err;
}
/* * Returns -1 for fatal errors which signifies to not continue * when in repeat mode. * * Returns < -1 error codes when stat record is used. These * result in the stat information being displayed, but writing * to the file fails and is non fatal.
*/ staticint run_perf_stat(int argc, constchar **argv, int run_idx)
{ int ret;
if (pre_cmd) {
ret = system(pre_cmd); if (ret) return ret;
}
if (sync_run)
sync();
ret = __run_perf_stat(argc, argv, run_idx); if (ret) return ret;
if (post_cmd) {
ret = system(post_cmd); if (ret) return ret;
}
return ret;
}
staticvoid print_counters(struct timespec *ts, int argc, constchar **argv)
{ /* Do not print anything if we record to the pipe. */ if (STAT_RECORD && perf_stat.data.is_pipe) return; if (quiet) return;
signr = signo; /* * render child_pid harmless * won't send SIGTERM to a random * process in case of race condition * and fast PID recycling
*/
child_pid = -1;
}
staticvoid sig_atexit(void)
{
sigset_t set, oset;
/* * avoid race condition with SIGCHLD handler * in skip_signal() which is modifying child_pid * goal is to avoid send SIGTERM to a random * process
*/
sigemptyset(&set);
sigaddset(&set, SIGCHLD);
sigprocmask(SIG_BLOCK, &set, &oset);
staticint parse_stat_cgroups(conststruct option *opt, constchar *str, int unset)
{ if (stat_config.cgroup_list) {
pr_err("--cgroup and --for-each-cgroup cannot be used together\n"); return -1;
}
/* * If no string is specified, aggregate based on the topology of * Last Level Cache (LLC). Since the LLC level can change from * architecture to architecture, set level greater than * MAX_CACHE_LVL which will be interpreted as LLC.
*/ if (str == NULL) {
level = MAX_CACHE_LVL + 1; goto out;
}
/* * The format to specify cache level is LX or lX where X is the * cache level.
*/ if (strlen(str) != 2 || (str[0] != 'l' && str[0] != 'L')) {
pr_err("Cache level must be of form L[1-%d], or l[1-%d]\n",
MAX_CACHE_LVL,
MAX_CACHE_LVL); return -EINVAL;
}
level = atoi(&str[1]); if (level < 1) {
pr_err("Cache level must be of form L[1-%d], or l[1-%d]\n",
MAX_CACHE_LVL,
MAX_CACHE_LVL); return -EINVAL;
}
if (level > MAX_CACHE_LVL) {
pr_err("perf only supports max cache level of %d.\n" "Consider increasing MAX_CACHE_LVL\n", MAX_CACHE_LVL); return -EINVAL;
}
out:
opt_aggr_mode->cache = true;
*aggr_level = level; return 0;
}
/** * Calculate the cache instance ID from the map in * /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list * Cache instance ID is the first CPU reported in the shared_cpu_list file.
*/ staticint cpu__get_cache_id_from_map(struct perf_cpu cpu, char *map)
{ int id; struct perf_cpu_map *cpu_map = perf_cpu_map__new(map);
/* * If the map contains no CPU, consider the current CPU to * be the first online CPU in the cache domain else use the * first online CPU of the cache domain as the ID.
*/
id = perf_cpu_map__min(cpu_map).cpu; if (id == -1)
id = cpu.cpu;
/* Free the perf_cpu_map used to find the cache ID */
perf_cpu_map__put(cpu_map);
return id;
}
/** * cpu__get_cache_id - Returns 0 if successful in populating the * cache level and cache id. Cache level is read from * /sys/devices/system/cpu/cpuX/cache/indexY/level where as cache instance ID * is the first CPU reported by * /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list
*/ staticint cpu__get_cache_details(struct perf_cpu cpu, struct perf_cache *cache)
{ int ret = 0;
u32 cache_level = stat_config.aggr_level; struct cpu_cache_level caches[MAX_CACHE_LVL];
u32 i = 0, caches_cnt = 0;
ret = build_caches_for_cpu(cpu.cpu, caches, &caches_cnt); if (ret) { /* * If caches_cnt is not 0, cpu_cache_level data * was allocated when building the topology. * Free the allocated data before returning.
*/ if (caches_cnt) goto free_caches;
return ret;
}
if (!caches_cnt) return -1;
/* * Save the data for the highest level if no * level was specified by the user.
*/ if (cache_level > MAX_CACHE_LVL) { int max_level_index = 0;
for (i = 1; i < caches_cnt; ++i) { if (caches[i].level > caches[max_level_index].level)
max_level_index = i;
}
/* Reset i to 0 to free entire caches[] */
i = 0; goto free_caches;
}
for (i = 0; i < caches_cnt; ++i) { if (caches[i].level == cache_level) {
cache->cache_lvl = cache_level;
cache->cache = cpu__get_cache_id_from_map(cpu, caches[i].map);
}
cpu_cache_level__free(&caches[i]);
}
free_caches: /* * Free all the allocated cpu_cache_level data.
*/ while (i < caches_cnt)
cpu_cache_level__free(&caches[i++]);
return ret;
}
/** * aggr_cpu_id__cache - Create an aggr_cpu_id with cache instache ID, cache * level, die and socket populated with the cache instache ID, cache level, * die and socket for cpu. The function signature is compatible with * aggr_cpu_id_get_t.
*/ staticstruct aggr_cpu_id aggr_cpu_id__cache(struct perf_cpu cpu, void *data)
{ int ret; struct aggr_cpu_id id; struct perf_cache cache;
id = aggr_cpu_id__die(cpu, data); if (aggr_cpu_id__is_empty(&id)) return id;
ret = cpu__get_cache_details(cpu, &cache); if (ret) return id;
static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
{ switch (aggr_mode) { case AGGR_SOCKET: return aggr_cpu_id__socket; case AGGR_DIE: return aggr_cpu_id__die; case AGGR_CLUSTER: return aggr_cpu_id__cluster; case AGGR_CACHE: return aggr_cpu_id__cache; case AGGR_CORE: return aggr_cpu_id__core; case AGGR_NODE: return aggr_cpu_id__node; case AGGR_NONE: return aggr_cpu_id__cpu; case AGGR_GLOBAL: return aggr_cpu_id__global; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: default: return NULL;
}
}
static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
{ switch (aggr_mode) { case AGGR_SOCKET: return perf_stat__get_socket_cached; case AGGR_DIE: return perf_stat__get_die_cached; case AGGR_CLUSTER: return perf_stat__get_cluster_cached; case AGGR_CACHE: return perf_stat__get_cache_id_cached; case AGGR_CORE: return perf_stat__get_core_cached; case AGGR_NODE: return perf_stat__get_node_cached; case AGGR_NONE: return perf_stat__get_cpu_cached; case AGGR_GLOBAL: return perf_stat__get_global_cached; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: default: return NULL;
}
}
staticint perf_stat_init_aggr_mode(void)
{ int nr;
aggr_cpu_id_get_t get_id = aggr_mode__get_aggr(stat_config.aggr_mode);
/* * The evsel_list->cpus is the base we operate on, * taking the highest cpu number to be the size of * the aggregation translate cpumap.
*/
nr = perf_cpu_map__max(evsel_list->core.all_cpus).cpu + 1;
stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr); return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
}
if (cpu.cpu != -1) { /* * die_id is relative to socket, so start * with the socket ID and then add die to * make a unique ID.
*/
id.socket = env->cpu[cpu.cpu].socket_id;
id.die = env->cpu[cpu.cpu].die_id;
}
for (i = caches_cnt - 1; i > -1; --i) { struct perf_cpu_map *cpu_map; int map_contains_cpu;
/* * If user has not specified a level, find the fist level with * the cpu in the map. Since building the map is expensive, do * this only if levels match.
*/ if (cache_level <= MAX_CACHE_LVL && caches[i].level != cache_level) continue;
if (cpu.cpu != -1) { /* * core_id is relative to socket, die and cluster, we need a * global id. So we set socket, die id, cluster id and core id.
*/
id.socket = env->cpu[cpu.cpu].socket_id;
id.die = env->cpu[cpu.cpu].die_id;
id.cluster = env->cpu[cpu.cpu].cluster_id;
id.core = env->cpu[cpu.cpu].core_id;
}
if (cpu.cpu != -1) { /* * core_id is relative to socket and die, * we need a global id. So we set * socket, die id and core id
*/
id.socket = env->cpu[cpu.cpu].socket_id;
id.die = env->cpu[cpu.cpu].die_id;
id.core = env->cpu[cpu.cpu].core_id;
id.cpu = cpu;
}
static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
{ switch (aggr_mode) { case AGGR_SOCKET: return perf_env__get_socket_aggr_by_cpu; case AGGR_DIE: return perf_env__get_die_aggr_by_cpu; case AGGR_CLUSTER: return perf_env__get_cluster_aggr_by_cpu; case AGGR_CACHE: return perf_env__get_cache_aggr_by_cpu; case AGGR_CORE: return perf_env__get_core_aggr_by_cpu; case AGGR_NODE: return perf_env__get_node_aggr_by_cpu; case AGGR_GLOBAL: return perf_env__get_global_aggr_by_cpu; case AGGR_NONE: return perf_env__get_cpu_aggr_by_cpu; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: default: return NULL;
}
}
static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode)
{ switch (aggr_mode) { case AGGR_SOCKET: return perf_stat__get_socket_file; case AGGR_DIE: return perf_stat__get_die_file; case AGGR_CLUSTER: return perf_stat__get_cluster_file; case AGGR_CACHE: return perf_stat__get_cache_file; case AGGR_CORE: return perf_stat__get_core_file; case AGGR_NODE: return perf_stat__get_node_file; case AGGR_GLOBAL: return perf_stat__get_global_file; case AGGR_NONE: return perf_stat__get_cpu_file; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: default: return NULL;
}
}
/* * Add default events, if there were no attributes specified or * if -d/--detailed, -d -d or -d -d -d is used:
*/ staticint add_default_events(void)
{ constchar *pmu = parse_events_option_args.pmu_filter ?: "all"; struct parse_events_error err; struct evlist *evlist = evlist__new(); struct evsel *evsel; int ret = 0;
if (!evlist) return -ENOMEM;
parse_events_error__init(&err);
/* Set attrs if no event is selected and !null_run: */ if (stat_config.null_run) goto out;
if (transaction_run) { /* Handle -T as -M transaction. Once platform specific metrics * support has been added to the json files, all architectures * will use this approach. To determine transaction support * on an architecture test for such a metric name.
*/ if (!metricgroup__has_metric_or_groups(pmu, "transaction")) {
pr_err("Missing transaction metrics\n");
ret = -1; goto out;
}
ret = metricgroup__parse_groups(evlist, pmu, "transaction",
stat_config.metric_no_group,
stat_config.metric_no_merge,
stat_config.metric_no_threshold,
stat_config.user_requested_cpu_list,
stat_config.system_wide,
stat_config.hardware_aware_grouping); goto out;
}
if (smi_cost) { int smi;
if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
pr_err("freeze_on_smi is not supported.\n");
ret = -1; goto out;
}
if (!smi) { if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
pr_err("Failed to set freeze_on_smi.\n");
ret = -1; goto out;
}
smi_reset = true;
}
if (!metricgroup__has_metric_or_groups(pmu, "smi")) {
pr_err("Missing smi metrics\n");
ret = -1; goto out;
}
if (!force_metric_only)
stat_config.metric_only = true;
if (!force_metric_only)
stat_config.metric_only = true;
if (!max_level) {
pr_err("Topdown requested but the topdown metric groups aren't present.\n" "(See perf list the metric groups have names like TopdownL1)\n");
ret = -1; goto out;
} if (stat_config.topdown_level > max_level) {
pr_err("Invalid top-down metrics level. The max level is %u.\n", max_level);
ret = -1; goto out;
} elseif (!stat_config.topdown_level) {
stat_config.topdown_level = 1;
} if (!stat_config.interval && !stat_config.metric_only) {
fprintf(stat_config.output, "Topdown accuracy may decrease when measuring long periods.\n" "Please print the result regularly, e.g. -I1000\n");
}
str[8] = stat_config.topdown_level + '0'; if (metricgroup__parse_groups(evlist,
pmu, str, /*metric_no_group=*/false, /*metric_no_merge=*/false, /*metric_no_threshold=*/true,
stat_config.user_requested_cpu_list,
stat_config.system_wide,
stat_config.hardware_aware_grouping) < 0) {
ret = -1; goto out;
}
}
if (!stat_config.topdown_level)
stat_config.topdown_level = 1;
if (!evlist->core.nr_entries && !evsel_list->core.nr_entries) { /* No events so add defaults. */ if (target__has_cpu(&target))
ret = parse_events(evlist, "cpu-clock", &err); else
ret = parse_events(evlist, "task-clock", &err); if (ret) goto out;
ret = parse_events(evlist, "context-switches," "cpu-migrations," "page-faults," "instructions," "cycles," "stalled-cycles-frontend," "stalled-cycles-backend," "branches," "branch-misses",
&err); if (ret) goto out;
/* * Add TopdownL1 metrics if they exist. To minimize * multiplexing, don't request threshold computation.
*/ if (metricgroup__has_metric_or_groups(pmu, "Default")) { struct evlist *metric_evlist = evlist__new();
if (!metric_evlist) {
ret = -ENOMEM; goto out;
} if (metricgroup__parse_groups(metric_evlist, pmu, "Default", /*metric_no_group=*/false, /*metric_no_merge=*/false, /*metric_no_threshold=*/true,
stat_config.user_requested_cpu_list,
stat_config.system_wide,
stat_config.hardware_aware_grouping) < 0) {
ret = -1; goto out;
}
/* Detailed events get appended to the event list: */
if (!ret && detailed_run >= 1) { /* * Detailed stats (-d), covering the L1 and last level data * caches:
*/
ret = parse_events(evlist, "L1-dcache-loads," "L1-dcache-load-misses," "LLC-loads," "LLC-load-misses",
&err);
} if (!ret && detailed_run >= 2) { /* * Very detailed stats (-d -d), covering the instruction cache * and the TLB caches:
*/
ret = parse_events(evlist, "L1-icache-loads," "L1-icache-load-misses," "dTLB-loads," "dTLB-load-misses," "iTLB-loads," "iTLB-load-misses",
&err);
} if (!ret && detailed_run >= 3) { /* * Very, very detailed stats (-d -d -d), adding prefetch events:
*/
ret = parse_events(evlist, "L1-dcache-prefetches," "L1-dcache-prefetch-misses",
&err);
}
out: if (!ret) {
evlist__for_each_entry(evlist, evsel) { /* * Make at least one event non-skippable so fatal errors are visible. * 'cycles' always used to be default and non-skippable, so use that.
*/ if (strcmp("cycles", evsel__name(evsel)))
evsel->skippable = true;
}
}
parse_events_error__exit(&err);
evlist__splice_list_tail(evsel_list, &evlist->core.entries);
metricgroup__copy_metric_events(evsel_list, /*cgrp=*/NULL,
&evsel_list->metric_events,
&evlist->metric_events);
evlist__delete(evlist); return ret;
}
staticconstchar * const stat_record_usage[] = { "perf stat record []",
NULL,
};
staticvoid init_features(struct perf_session *session)
{ int feat;
for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
perf_header__set_feat(&session->header, feat);
ret = perf_session__process_events(session); if (ret) return ret;
perf_session__delete(session); return 0;
}
staticvoid setup_system_wide(int forks)
{ /* * Make system wide (-a) the default target if * no target was specified and one of following * conditions is met: * * - there's no workload specified * - there is workload specified but all requested * events are system wide events
*/ if (!target__none(&target)) return;
if (!forks)
target.system_wide = true; else { struct evsel *counter;
int cmd_stat(int argc, constchar **argv)
{ struct opt_aggr_mode opt_mode = {}; struct option stat_options[] = {
OPT_BOOLEAN('T', "transaction", &transaction_run, "hardware transaction statistics"),
OPT_CALLBACK('e', "event", &parse_events_option_args, "event", "event selector. use 'perf list' to list available events",
parse_events_option),
OPT_CALLBACK(0, "filter", &evsel_list, "filter", "event filter", parse_filter),
OPT_BOOLEAN('i', "no-inherit", &stat_config.no_inherit, "child tasks do not inherit counters"),
OPT_STRING('p', "pid", &target.pid, "pid", "stat events on existing process id"),
OPT_STRING('t', "tid", &target.tid, "tid", "stat events on existing thread id"), #ifdef HAVE_BPF_SKEL
OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id", "stat events on existing bpf program id"),
OPT_BOOLEAN(0, "bpf-counters", &target.use_bpf, "use bpf program to count events"),
OPT_STRING(0, "bpf-attr-map", &target.attr_map, "attr-map-path", "path to perf_event_attr map"), #endif
OPT_BOOLEAN('a', "all-cpus", &target.system_wide, "system-wide collection from all CPUs"),
OPT_BOOLEAN(0, "scale", &stat_config.scale, "Use --no-scale to disable counter scaling for multiplexing"),
OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"),
OPT_INTEGER('r', "repeat", &stat_config.run_count, "repeat command and print average + stddev (max: 100, forever: 0)"),
OPT_BOOLEAN(0, "table", &stat_config.walltime_run_table, "display details about each run (only with -r option)"),
OPT_BOOLEAN('n', "null", &stat_config.null_run, "null run - dont start any counters"),
OPT_INCR('d', "detailed", &detailed_run, "detailed run - start a lot of events"),
OPT_BOOLEAN('S', "sync", &sync_run, "call sync() before starting a run"),
OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, "print large numbers with thousands\' separators",
stat__set_big_num),
OPT_STRING('C', "cpu", &target.cpu_list, "cpu", "list of cpus to monitor in system-wide"),
OPT_BOOLEAN('A', "no-aggr", &opt_mode.no_aggr, "disable aggregation across CPUs or PMUs"),
OPT_BOOLEAN(0, "no-merge", &opt_mode.no_aggr, "disable aggregation the same as -A or -no-aggr"),
OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge, "Merge identical named hybrid events"),
OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator", "print counts with custom separator"),
OPT_BOOLEAN('j', "json-output", &stat_config.json_output, "print counts in JSON format"),
OPT_CALLBACK('G', "cgroup", &evsel_list, "name", "monitor event in cgroup name only", parse_stat_cgroups),
OPT_STRING(0, "for-each-cgroup", &stat_config.cgroup_list, "name", "expand events for each cgroup"),
OPT_STRING('o', "output", &output_name, "file", "output file name"),
OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
OPT_INTEGER(0, "log-fd", &output_fd, "log output to fd, instead of stderr"),
OPT_STRING(0, "pre", &pre_cmd, "command", "command to run prior to the measured command"),
OPT_STRING(0, "post", &post_cmd, "command", "command to run after to the measured command"),
OPT_UINTEGER('I', "interval-print", &stat_config.interval, "print counts at regular interval in ms " "(overhead is possible for values <= 100ms)"),
OPT_INTEGER(0, "interval-count", &stat_config.times, "print counts for fixed number of times"),
OPT_BOOLEAN(0, "interval-clear", &stat_config.interval_clear, "clear screen in between new interval"),
OPT_UINTEGER(0, "timeout", &stat_config.timeout, "stop workload and print counts after a timeout period in ms (>= 10ms)"),
OPT_BOOLEAN(0, "per-socket", &opt_mode.socket, "aggregate counts per processor socket"),
OPT_BOOLEAN(0, "per-die", &opt_mode.die, "aggregate counts per processor die"),
OPT_BOOLEAN(0, "per-cluster", &opt_mode.cluster, "aggregate counts per processor cluster"),
OPT_CALLBACK_OPTARG(0, "per-cache", &opt_mode, &stat_config.aggr_level, "cache level", "aggregate count at this cache level (Default: LLC)",
parse_cache_level),
OPT_BOOLEAN(0, "per-core", &opt_mode.core, "aggregate counts per physical processor core"),
OPT_BOOLEAN(0, "per-thread", &opt_mode.thread, "aggregate counts per thread"),
OPT_BOOLEAN(0, "per-node", &opt_mode.node, "aggregate counts per numa node"),
OPT_INTEGER('D', "delay", &target.initial_delay, "ms to wait before starting measurement after program start (-1: start with events disabled)"),
OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL, "Only print computed metrics. No raw values", enable_metric_only),
OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group, "don't group metric events, impacts multiplexing"),
OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge, "don't try to share events between metrics in a group"),
OPT_BOOLEAN(0, "metric-no-threshold", &stat_config.metric_no_threshold, "disable adding events for the metric threshold calculation"),
OPT_BOOLEAN(0, "topdown", &topdown_run, "measure top-down statistics"), #ifdef HAVE_ARCH_X86_64_SUPPORT
--> --------------------
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.