/* To enable MSR tracing please use the generic trace points. */
/* * | NHM/WSM | SNB | * register ------------------------------- * | HT | no HT | HT | no HT | *----------------------------------------- * offcore | core | core | cpu | core | * lbr_sel | core | core | cpu | core | * ld_lat | cpu | core | cpu | core | *----------------------------------------- * * Given that there is a small number of shared regs, * we can pre-allocate their slot in the per-cpu * per-core reg tables.
*/ enum extra_reg_type {
EXTRA_REG_NONE = -1, /* not used */
/* * Per register state.
*/ struct er_account {
raw_spinlock_t lock; /* per-core: protect structure */
u64 config; /* extra MSR config */
u64 reg; /* extra MSR number */
atomic_t ref; /* reference count */
};
/* * Per core/cpu state * * Used to coordinate shared registers between HT threads or * among events on a single PMU.
*/ struct intel_shared_regs { struct er_account regs[EXTRA_REG_MAX]; int refcnt; /* per-core: #HT threads */ unsigned core_id; /* per-core: core id */
};
enum intel_excl_state_type {
INTEL_EXCL_UNUSED = 0, /* counter is unused */
INTEL_EXCL_SHARED = 1, /* counter can be used by both threads */
INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */
};
struct intel_excl_states { enum intel_excl_state_type state[X86_PMC_IDX_MAX]; bool sched_started; /* true if scheduling has started */
};
struct intel_excl_cntrs {
raw_spinlock_t lock;
struct intel_excl_states states[2];
union {
u16 has_exclusive[2];
u32 exclusive_present;
};
int refcnt; /* per-core: #HT threads */ unsigned core_id; /* per-core: core id */
};
struct cpu_hw_events { /* * Generic x86 PMC bits
*/ struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ unsignedlong active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsignedlong dirty[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; int enabled;
int n_events; /* the # of events in the below arrays */ int n_added; /* the # last events in the below arrays;
they've never been enabled yet */ int n_txn; /* the # last events in the below arrays;
added in the current transaction */ int n_txn_pair; int n_txn_metric; int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
u64 tags[X86_PMC_IDX_MAX];
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
int n_excl; /* the number of exclusive events */ int n_late_setup; /* the num of events needs late setup */
unsignedint txn_flags; int is_fake;
/* * Intel DebugStore bits
*/ struct debug_store *ds; void *ds_pebs_vaddr; void *ds_bts_vaddr;
u64 pebs_enabled; int n_pebs; int n_large_pebs; int n_pebs_via_pt; int pebs_output;
/* Current super set of events hardware configuration */
u64 pebs_data_cfg;
u64 active_pebs_data_cfg; int pebs_record_size;
#define __EVENT_CONSTRAINT(c, n, m, w, o, f) \
__EVENT_CONSTRAINT_RANGE(c, c, n, m, w, o, f)
#define EVENT_CONSTRAINT(c, n, m) \
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
/* * The constraint_match() function only works for 'simple' event codes * and not for extended (AMD64_EVENTSEL_EVENT) events codes.
*/ #define EVENT_CONSTRAINT_RANGE(c, e, n, m) \
__EVENT_CONSTRAINT_RANGE(c, e, n, m, HWEIGHT(n), 0, 0)
#define INTEL_EXCLEVT_CONSTRAINT(c, n) \
__EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
0, PERF_X86_EVENT_EXCL)
/* * The overlap flag marks event constraints with overlapping counter * masks. This is the case if the counter mask of such an event is not * a subset of any other counter mask of a constraint with an equal or * higher weight, e.g.: * * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0); * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0); * * The event scheduler may not select the correct counter in the first * cycle because it needs to know which subsequent events will be * scheduled. It may fail to schedule the events then. So we set the * overlap flag for such constraints to give the scheduler a hint which * events to select for counter rescheduling. * * Care must be taken as the rescheduling algorithm is O(n!) which * will increase scheduling cycles for an over-committed system * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros * and its counter masks must be kept at a minimum.
*/ #define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0)
/* * Constraint on the Event code.
*/ #define INTEL_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
/* * Constraint on a range of Event codes
*/ #define INTEL_EVENT_CONSTRAINT_RANGE(c, e, n) \
EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT)
/* * Constraint on the Event code + UMask + fixed-mask * * filter mask to validate fixed counter events. * the following filters disqualify for fixed counters: * - inv * - edge * - cnt-mask * - in_tx * - in_tx_checkpointed * The other filters are supported by fixed counters. * The any-thread option is supported starting with v3.
*/ #define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED) #define FIXED_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
/* * The special metric counters do not actually exist. They are calculated from * the combination of the FxCtr3 + MSR_PERF_METRICS. * * The special metric counters are mapped to a dummy offset for the scheduler. * The sharing between multiple users of the same metric without multiplexing * is not allowed, even though the hardware supports that in principle.
*/
/* * Constraint on the Event code + UMask
*/ #define INTEL_UEVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
/* Constraint on specific umask bit only + event */ #define INTEL_UBIT_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|(c))
/* Like UEVENT_CONSTRAINT, but match flags too */ #define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
/* Event constraint, but match on all event flags too. */ #define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS)
#define INTEL_FLAGS_EVENT_CONSTRAINT_RANGE(c, e, n) \
EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS)
/* Check only flags, but allow all event/umask */ #define INTEL_ALL_EVENT_CONSTRAINT(code, n) \
EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
/* Check flags and event code, and set the HSW store flag */ #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \
__EVENT_CONSTRAINT(code, n, \
ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
/* Check flags and event code, and set the HSW load flag */ #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \
__EVENT_CONSTRAINT(code, n, \
ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
/* Check flags and event code/umask, and set the HSW store flag */ #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \
__EVENT_CONSTRAINT(code, n, \
INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
/* Check flags and event code/umask, and set the HSW load flag */ #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \
__EVENT_CONSTRAINT(code, n, \
INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
/* Check flags and event code/umask, and set the HSW N/A flag */ #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \
__EVENT_CONSTRAINT(code, n, \
INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW)
/* * We define the end marker as having a weight of -1 * to enable blacklisting of events using a counter bitmask * of zero and thus a weight of zero. * The end marker has a weight that cannot possibly be * obtained from counting the bits in the bitmask.
*/ #define EVENT_CONSTRAINT_END { .weight = -1 }
/* * Check for end marker with weight == -1
*/ #define for_each_event_constraint(e, c) \ for ((e) = (c); (e)->weight != -1; (e)++)
/* * Extra registers for specific events. * * Some events need large masks and require external MSRs. * Those extra MSRs end up being shared for all events on * a PMU and sometimes between PMU of sibling HT threads. * In either case, the kernel needs to handle conflicting * accesses to those extra, shared, regs. The data structure * to manage those registers is stored in cpu_hw_event.
*/ struct extra_reg { unsignedint event; unsignedint msr;
u64 config_mask;
u64 valid_mask; int idx; /* per_xxx->regs[] reg index */ bool extra_msr_access;
};
/* The belows are only used for matching */
hybrid_big_small = hybrid_big | hybrid_small,
hybrid_small_tiny = hybrid_small | hybrid_tiny,
hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny,
};
/* * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
*/ int (*check_period) (struct perf_event *event, u64 period);
int (*aux_output_match) (struct perf_event *event);
void (*filter)(struct pmu *pmu, int cpu, bool *ret); /* * Hybrid support * * Most PMU capabilities are the same among different hybrid PMUs. * The global x86_pmu saves the architecture capabilities, which * are available for all PMUs. The hybrid_pmu only includes the * unique capabilities.
*/ int num_hybrid_pmus; struct x86_hybrid_pmu *hybrid_pmu; enum intel_cpu_type (*get_hybrid_cpu_type) (void);
};
struct x86_perf_task_context_opt { int lbr_callstack_users; int lbr_stack_state; int log_id;
};
struct x86_perf_task_context {
u64 lbr_sel; int tos; int valid_lbrs; struct x86_perf_task_context_opt opt; struct lbr_entry lbr[MAX_LBR_ENTRIES];
};
/* * Add padding to guarantee the 64-byte alignment of the state buffer. * * The structure is dynamically allocated. The size of the LBR state may vary * based on the number of LBR registers. * * Do not put anything after the LBR state.
*/ struct x86_perf_task_context_arch_lbr_xsave { struct x86_perf_task_context_opt opt;
int x86_perf_event_set_period(struct perf_event *event);
/* * Generalized hw caching related hw_event table, filled * in on a per model basis. A value of 0 means * 'not supported', -1 means 'hw_event makes no sense on * this CPU', any other value means the raw hw_event * ID.
*/
if (hwc->extra_reg.reg)
wrmsrq(hwc->extra_reg.reg, hwc->extra_reg.config);
/* * Add enabled Merge event on next counter * if large increment event being enabled on this counter
*/ if (is_counter_pair(hwc))
wrmsrq(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en);
int perf_assign_events(struct event_constraint **constraints, int n, int wmin, int wmax, int gpmax, int *assign); int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
void x86_pmu_stop(struct perf_event *event, int flags);
/* * Not all PMUs provide the right context information to place the reported IP * into full context. Specifically segment registers are typically not * supplied. * * Assuming the address is a linear address (it is for IBS), we fake the CS and * vm86 mode using the known zero-based code segment and 'fix up' the registers * to reflect this. * * Intel PEBS/LBR appear to typically provide the effective address, nothing * much we can do about that but pray and treat it like a linear address.
*/ staticinlinevoid set_linear_ip(struct pt_regs *regs, unsignedlong ip)
{
regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS; if (regs->flags & X86_VM_MASK)
regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK);
regs->ip = ip;
}
int common_branch_type(int type); int branch_type(unsignedlong from, unsignedlong to, int abort); int branch_type_fused(unsignedlong from, unsignedlong to, int abort, int *offset);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.