/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * Performance events: * * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de> * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra * * Data type definitions, declarations, prototypes. * * Started by: Thomas Gleixner and Ingo Molnar * * For licencing details see kernel-base/COPYING
*/ #ifndef _UAPI_LINUX_PERF_EVENT_H #define _UAPI_LINUX_PERF_EVENT_H
/* * attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE * * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA * AA: hardware event ID * EEEEEEEE: PMU type ID * * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB * BB: hardware cache ID * CC: hardware cache op ID * DD: hardware cache op result ID * EEEEEEEE: PMU type ID * * If the PMU type ID is 0, PERF_TYPE_RAW will be applied.
*/ #define PERF_PMU_TYPE_SHIFT 32 #define PERF_HW_EVENT_MASK 0xffffffff
/* * Generalized performance event event_id types, used by the * attr.event_id parameter of the sys_perf_event_open() * syscall:
*/ enum perf_hw_id { /* * Common hardware events, generalized by the kernel:
*/
PERF_COUNT_HW_CPU_CYCLES = 0,
PERF_COUNT_HW_INSTRUCTIONS = 1,
PERF_COUNT_HW_CACHE_REFERENCES = 2,
PERF_COUNT_HW_CACHE_MISSES = 3,
PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
PERF_COUNT_HW_BRANCH_MISSES = 5,
PERF_COUNT_HW_BUS_CYCLES = 6,
PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7,
PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8,
PERF_COUNT_HW_REF_CPU_CYCLES = 9,
/* * Special "software" events provided by the kernel, even if the hardware * does not support performance events. These events measure various * physical and SW events of the kernel (and allow the profiling of them as * well):
*/ enum perf_sw_ids {
PERF_COUNT_SW_CPU_CLOCK = 0,
PERF_COUNT_SW_TASK_CLOCK = 1,
PERF_COUNT_SW_PAGE_FAULTS = 2,
PERF_COUNT_SW_CONTEXT_SWITCHES = 3,
PERF_COUNT_SW_CPU_MIGRATIONS = 4,
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
PERF_COUNT_SW_EMULATION_FAULTS = 8,
PERF_COUNT_SW_DUMMY = 9,
PERF_COUNT_SW_BPF_OUTPUT = 10,
PERF_COUNT_SW_CGROUP_SWITCHES = 11,
/* * Values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set. * * If the user does not pass priv level information via branch_sample_type, * the kernel uses the event's priv level. Branch and event priv levels do * not have to match. Branch priv level is checked for permissions. * * The branch types can be combined, however BRANCH_ANY covers all types * of branches and therefore it supersedes all the other types.
*/ enum perf_branch_sample_type_shift {
PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */
PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */
PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */
PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */
PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */
PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */
PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */
PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */
PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */
PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */
PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */
/* * 'struct perf_event_attr' contains various attributes that define * a performance event - most of them hardware related configuration * details, but also a lot of behavioral switches and values implemented * by the kernel.
*/ struct perf_event_attr {
/* * Major type: hardware/software/tracepoint/etc.
*/
__u32 type;
/* * Size of the attr structure, for forward/backwards compatibility.
*/
__u32 size;
/* * Type specific configuration information.
*/
__u64 config;
union {
__u64 sample_period;
__u64 sample_freq;
};
__u64 sample_type;
__u64 read_format;
__u64 disabled : 1, /* off by default */
inherit : 1, /* children inherit it */
pinned : 1, /* must always be on PMU */
exclusive : 1, /* only group on PMU */
exclude_user : 1, /* don't count user */
exclude_kernel : 1, /* ditto kernel */
exclude_hv : 1, /* ditto hypervisor */
exclude_idle : 1, /* don't count when idle */
mmap : 1, /* include mmap data */
comm : 1, /* include comm data */
freq : 1, /* use freq, not period */
inherit_stat : 1, /* per task counts */
enable_on_exec : 1, /* next exec enables */
task : 1, /* trace fork/exit */
watermark : 1, /* wakeup_watermark */ /* * precise_ip: * * 0 - SAMPLE_IP can have arbitrary skid * 1 - SAMPLE_IP must have constant skid * 2 - SAMPLE_IP requested to have 0 skid * 3 - SAMPLE_IP must have 0 skid * * See also PERF_RECORD_MISC_EXACT_IP
*/
precise_ip : 2, /* skid constraint */
mmap_data : 1, /* non-exec mmap data */
sample_id_all : 1, /* sample_type all events */
exclude_host : 1, /* don't count in host */
exclude_guest : 1, /* don't count in guest */
exclude_callchain_kernel : 1, /* exclude kernel callchains */
exclude_callchain_user : 1, /* exclude user callchains */
mmap2 : 1, /* include mmap with inode data */
comm_exec : 1, /* flag comm events that are due to an exec */
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
write_backward : 1, /* write ring buffer from end to beginning */
namespaces : 1, /* include namespaces data */
ksymbol : 1, /* include ksymbol events */
bpf_event : 1, /* include BPF events */
aux_output : 1, /* generate AUX records instead of events */
cgroup : 1, /* include cgroup events */
text_poke : 1, /* include text poke events */
build_id : 1, /* use build ID in mmap2 events */
inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
remove_on_exec : 1, /* event is removed from task on exec */
sigtrap : 1, /* send synchronous SIGTRAP on event */
__reserved_1 : 26;
union {
__u32 wakeup_events; /* wake up every n events */
__u32 wakeup_watermark; /* bytes before wakeup */
};
__u32 bp_type; union {
__u64 bp_addr;
__u64 kprobe_func; /* for perf_kprobe */
__u64 uprobe_path; /* for perf_uprobe */
__u64 config1; /* extension of config */
}; union {
__u64 bp_len;
__u64 kprobe_addr; /* when kprobe_func == NULL */
__u64 probe_offset; /* for perf_[k,u]probe */
__u64 config2; /* extension of config1 */
};
__u64 branch_sample_type; /* enum perf_branch_sample_type */
/* * Defines set of user regs to dump on samples. * See asm/perf_regs.h for details.
*/
__u64 sample_regs_user;
/* * Defines size of the user stack to dump on samples.
*/
__u32 sample_stack_user;
__s32 clockid; /* * Defines set of regs to dump for each sample * state captured on: * - precise = 0: PMU interrupt * - precise > 0: sampled instruction * * See asm/perf_regs.h for details.
*/
__u64 sample_regs_intr;
/* * Wakeup watermark for AUX area
*/
__u32 aux_watermark;
/* * Max number of frame pointers in a callchain, should be * lower than /proc/sys/kernel/perf_event_max_stack. * * Max number of entries of branch stack should be lower * than the hardware limit.
*/
__u16 sample_max_stack;
__u16 __reserved_2;
__u32 aux_sample_size;
union {
__u32 aux_action; struct {
__u32 aux_start_paused : 1, /* start AUX area tracing paused */
aux_pause : 1, /* on overflow, pause AUX area tracing */
aux_resume : 1, /* on overflow, resume AUX area tracing */
__reserved_3 : 29;
};
};
/* * User provided data if sigtrap=1, passed back to user via * siginfo_t::si_perf_data, e.g. to permit user to identify the event. * Note, siginfo_t::si_perf_data is long-sized, and sig_data will be * truncated accordingly on 32 bit architectures.
*/
__u64 sig_data;
__u64 config3; /* extension of config2 */
};
/* * Structure used by below PERF_EVENT_IOC_QUERY_BPF command * to query BPF programs attached to the same perf tracepoint * as the given perf event.
*/ struct perf_event_query_bpf { /* * The below ids array length
*/
__u32 ids_len; /* * Set by the kernel to indicate the number of * available programs
*/
__u32 prog_cnt; /* * User provided buffer to store program ids
*/
__u32 ids[];
};
/* * Structure of the page that can be mapped via mmap
*/ struct perf_event_mmap_page {
__u32 version; /* version number of this structure */
__u32 compat_version; /* lowest version this is compat with */
/* * Bits needed to read the HW events in user-space. * * u32 seq, time_mult, time_shift, index, width; * u64 count, enabled, running; * u64 cyc, time_offset; * s64 pmc = 0; * * do { * seq = pc->lock; * barrier() * * enabled = pc->time_enabled; * running = pc->time_running; * * if (pc->cap_usr_time && enabled != running) { * cyc = rdtsc(); * time_offset = pc->time_offset; * time_mult = pc->time_mult; * time_shift = pc->time_shift; * } * * index = pc->index; * count = pc->offset; * if (pc->cap_user_rdpmc && index) { * width = pc->pmc_width; * pmc = rdpmc(index - 1); * } * * barrier(); * } while (pc->lock != seq); * * NOTE: for obvious reason this only works on self-monitoring * processes.
*/
__u32 lock; /* seqlock for synchronization */
__u32 index; /* hardware event identifier */
__s64 offset; /* add to hardware event value */
__u64 time_enabled; /* time event active */
__u64 time_running; /* time event on CPU */ union {
__u64 capabilities; struct {
__u64 cap_bit0 : 1, /* Always 0, deprecated, see commit 860f085b74e9 */
cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */
cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */
cap_user_time : 1, /* The time_{shift,mult,offset} fields are used */
cap_user_time_zero : 1, /* The time_zero field is used */
cap_user_time_short : 1, /* the time_{cycle,mask} fields are used */
cap_____res : 58;
};
};
/* * If cap_user_rdpmc this field provides the bit-width of the value * read using the rdpmc() or equivalent instruction. This can be used * to sign extend the result like: * * pmc <<= 64 - width; * pmc >>= 64 - width; // signed shift right * count += pmc;
*/
__u16 pmc_width;
/* * If cap_usr_time the below fields can be used to compute the time * delta since time_enabled (in ns) using RDTSC or similar. * * u64 quot, rem; * u64 delta; * * quot = (cyc >> time_shift); * rem = cyc & (((u64)1 << time_shift) - 1); * delta = time_offset + quot * time_mult + * ((rem * time_mult) >> time_shift); * * Where time_offset,time_mult,time_shift and cyc are read in the * seqcount loop described above. This delta can then be added to * enabled and possible running (if index), improving the scaling: * * enabled += delta; * if (index) * running += delta; * * quot = count / running; * rem = count % running; * count = quot * enabled + (rem * enabled) / running;
*/
__u16 time_shift;
__u32 time_mult;
__u64 time_offset; /* * If cap_usr_time_zero, the hardware clock (e.g. TSC) can be calculated * from sample timestamps. * * time = timestamp - time_zero; * quot = time / time_mult; * rem = time % time_mult; * cyc = (quot << time_shift) + (rem << time_shift) / time_mult; * * And vice versa: * * quot = cyc >> time_shift; * rem = cyc & (((u64)1 << time_shift) - 1); * timestamp = time_zero + quot * time_mult + * ((rem * time_mult) >> time_shift);
*/
__u64 time_zero;
__u32 size; /* Header size up to __reserved[] fields. */
__u32 __reserved_1;
/* * If cap_usr_time_short, the hardware clock is less than 64bit wide * and we must compute the 'cyc' value, as used by cap_usr_time, as: * * cyc = time_cycles + ((cyc - time_cycles) & time_mask) * * NOTE: this form is explicitly chosen such that cap_usr_time_short * is a correction on top of cap_usr_time, and code that doesn't * know about cap_usr_time_short still works under the assumption * the counter doesn't wrap.
*/
__u64 time_cycles;
__u64 time_mask;
/* * Hole for extension of the self monitor capabilities
*/
__u8 __reserved[116*8]; /* align to 1k. */
/* * Control data for the mmap() data buffer. * * User-space reading the @data_head value should issue an smp_rmb(), * after reading this value. * * When the mapping is PROT_WRITE the @data_tail value should be * written by user-space to reflect the last read data, after issuing * an smp_mb() to separate the data read from the ->data_tail store. * In this case the kernel will not over-write unread data. * * See perf_output_put_handle() for the data ordering. * * data_{offset,size} indicate the location and size of the perf record * buffer within the mmapped area.
*/
__u64 data_head; /* head in the data section */
__u64 data_tail; /* user-space written tail */
__u64 data_offset; /* where the buffer starts */
__u64 data_size; /* data buffer size */
/* * AUX area is defined by aux_{offset,size} fields that should be set * by the user-space, so that * * aux_offset >= data_offset + data_size * * prior to mmap()ing it. Size of the mmap()ed area should be aux_size. * * Ring buffer pointers aux_{head,tail} have the same semantics as * data_{head,tail} and same ordering rules apply.
*/
__u64 aux_head;
__u64 aux_tail;
__u64 aux_offset;
__u64 aux_size;
};
/* * The current state of perf_event_header::misc bits usage: * ('|' used bit, '-' unused bit) * * 012 CDEF * |||---------|||| * * Where: * 0-2 CPUMODE_MASK * * C PROC_MAP_PARSE_TIMEOUT * D MMAP_DATA / COMM_EXEC / FORK_EXEC / SWITCH_OUT * E MMAP_BUILD_ID / EXACT_IP / SCHED_OUT_PREEMPT * F (reserved)
*/
/* * Indicates that /proc/PID/maps parsing are truncated by time out.
*/ #define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12) /* * Following PERF_RECORD_MISC_* are used on different * events, so can reuse the same bit position: * * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event * PERF_RECORD_MISC_FORK_EXEC - PERF_RECORD_FORK event (perf internal) * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events
*/ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) #define PERF_RECORD_MISC_FORK_EXEC (1 << 13) #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) /* * These PERF_RECORD_MISC_* flags below are safely reused * for the following events: * * PERF_RECORD_MISC_EXACT_IP - PERF_RECORD_SAMPLE of precise events * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events * PERF_RECORD_MISC_MMAP_BUILD_ID - PERF_RECORD_MMAP2 event * * * PERF_RECORD_MISC_EXACT_IP: * Indicates that the content of PERF_SAMPLE_IP points to * the actual instruction that triggered the event. See also * perf_event_attr::precise_ip. * * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT: * Indicates that thread was preempted in TASK_RUNNING state. * * PERF_RECORD_MISC_MMAP_BUILD_ID: * Indicates that mmap2 event carries build ID data.
*/ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) #define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) #define PERF_RECORD_MISC_MMAP_BUILD_ID (1 << 14) /* * Reserve the last bit to indicate some extended misc field
*/ #define PERF_RECORD_MISC_EXT_RESERVED (1 << 15)
NR_NAMESPACES, /* number of available namespaces */
};
enum perf_event_type {
/* * If perf_event_attr.sample_id_all is set then all event types will * have the sample_type selected fields related to where/when * (identity) an event took place (TID, TIME, ID, STREAM_ID, CPU, * IDENTIFIER) described in PERF_RECORD_SAMPLE below, it will be stashed * just after the perf_event_header and the fields already present for * the existing fields, i.e. at the end of the payload. That way a newer * perf.data file will be supported by older perf tools, with these new * optional fields being ignored. * * struct sample_id { * { u32 pid, tid; } && PERF_SAMPLE_TID * { u64 time; } && PERF_SAMPLE_TIME * { u64 id; } && PERF_SAMPLE_ID * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID * { u32 cpu, res; } && PERF_SAMPLE_CPU * { u64 id; } && PERF_SAMPLE_IDENTIFIER * } && perf_event_attr::sample_id_all * * Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID. The * advantage of PERF_SAMPLE_IDENTIFIER is that its position is fixed * relative to header.size.
*/
/* * The MMAP events record the PROT_EXEC mappings so that we can * correlate user-space IPs to code. They have the following structure: * * struct { * struct perf_event_header header; * * u32 pid, tid; * u64 addr; * u64 len; * u64 pgoff; * char filename[]; * struct sample_id sample_id; * };
*/
PERF_RECORD_MMAP = 1,
/* * Records a context switch in or out (flagged by * PERF_RECORD_MISC_SWITCH_OUT). See also * PERF_RECORD_SWITCH_CPU_WIDE. * * struct { * struct perf_event_header header; * struct sample_id sample_id; * };
*/
PERF_RECORD_SWITCH = 14,
/* * CPU-wide version of PERF_RECORD_SWITCH with next_prev_pid and * next_prev_tid that are the next (switching out) or previous * (switching in) pid/tid. * * struct { * struct perf_event_header header; * u32 next_prev_pid; * u32 next_prev_tid; * struct sample_id sample_id; * };
*/
PERF_RECORD_SWITCH_CPU_WIDE = 15,
/* * Records changes to kernel text i.e. self-modified code. 'old_len' is * the number of old bytes, 'new_len' is the number of new bytes. Either * 'old_len' or 'new_len' may be zero to indicate, for example, the * addition or removal of a trampoline. 'bytes' contains the old bytes * followed immediately by the new bytes. * * struct { * struct perf_event_header header; * u64 addr; * u16 old_len; * u16 new_len; * u8 bytes[]; * struct sample_id sample_id; * };
*/
PERF_RECORD_TEXT_POKE = 20,
/* * Data written to the AUX area by hardware due to aux_output, may need * to be matched to the event by an architecture-specific hardware ID. * This records the hardware ID, but requires sample_id to provide the * event ID. e.g. Intel PT uses this record to disambiguate PEBS-via-PT * records from multiple events. * * struct { * struct perf_event_header header; * u64 hw_id; * struct sample_id sample_id; * };
*/
PERF_RECORD_AUX_OUTPUT_HW_ID = 21,
PERF_RECORD_MAX, /* non-ABI */
};
enum perf_record_ksymbol_type {
PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0,
PERF_RECORD_KSYMBOL_TYPE_BPF = 1, /* * Out of line code such as kprobe-replaced instructions or optimized * kprobes or ftrace trampolines.
*/
PERF_RECORD_KSYMBOL_TYPE_OOL = 2,
PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */
};
/** * PERF_RECORD_AUX::flags bits
*/ #define PERF_AUX_FLAG_TRUNCATED 0x0001 /* Record was truncated to fit */ #define PERF_AUX_FLAG_OVERWRITE 0x0002 /* Snapshot from overwrite mode */ #define PERF_AUX_FLAG_PARTIAL 0x0004 /* Record contains gaps */ #define PERF_AUX_FLAG_COLLISION 0x0008 /* Sample collided with another */ #define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */
/* CoreSight PMU AUX buffer formats */ #define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ #define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.