staticbool pcidrv_registered; struct pci_driver *uncore_pci_driver; /* The PCI driver for the device which the uncore doesn't own. */ struct pci_driver *uncore_pci_sub_driver; /* pci bus to socket mapping */
DEFINE_RAW_SPINLOCK(pci2phy_map_lock); struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); struct pci_extra_dev *uncore_extra_pci_dev; int __uncore_max_dies;
/* mask of cpus that collect uncore events */ static cpumask_t uncore_cpu_mask;
/* * The unsigned check also catches the '-1' return value for non * existent mappings in the topology map.
*/ return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL;
}
/* * generic get constraint function for shared match/mask registers.
*/ struct event_constraint *
uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
{ struct intel_uncore_extra_reg *er; struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; unsignedlong flags; bool ok = false;
/* * reg->alloc can be set due to existing state, so for fake box we * need to ignore this, otherwise we might fail to allocate proper * fake state for this extra reg constraint.
*/ if (reg1->idx == EXTRA_REG_NONE ||
(!uncore_box_is_fake(box) && reg1->alloc)) return NULL;
er = &box->shared_regs[reg1->idx];
raw_spin_lock_irqsave(&er->lock, flags); if (!atomic_read(&er->ref) ||
(er->config1 == reg1->config && er->config2 == reg2->config)) {
atomic_inc(&er->ref);
er->config1 = reg1->config;
er->config2 = reg2->config;
ok = true;
}
raw_spin_unlock_irqrestore(&er->lock, flags);
if (ok) { if (!uncore_box_is_fake(box))
reg1->alloc = 1; return NULL;
}
/* * Only put constraint if extra reg was actually allocated. Also * takes care of event which do not use an extra shared reg. * * Also, if this is a fake box we shouldn't touch any event state * (reg->alloc) and we don't care about leaving inconsistent box * state either since it will be thrown out.
*/ if (uncore_box_is_fake(box) || !reg1->alloc) return;
er = &box->shared_regs[reg1->idx];
atomic_dec(&er->ref);
reg1->alloc = 0;
}
/* * The overflow interrupt is unavailable for SandyBridge-EP, is broken * for SandyBridge. So we use hrtimer to periodically poll the counter * to avoid overflow.
*/ staticenum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
{ struct intel_uncore_box *box; struct perf_event *event; int bit;
/* * handle boxes with an active event list as opposed to active * counters
*/
list_for_each_entry(event, &box->active_list, active_entry) {
uncore_perf_event_update(box, event);
}
/* set default hrtimer timeout */
box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
INIT_LIST_HEAD(&box->active_list);
return box;
}
/* * Using uncore_pmu_event_init pmu event_init callback * as a detection point for uncore events.
*/ staticint uncore_pmu_event_init(struct perf_event *event);
staticint uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
{ unsignedlong used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; struct event_constraint *c; int i, wmin, wmax, ret = 0; struct hw_perf_event *hwc;
bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
c = uncore_get_event_constraint(box, box->event_list[i]);
box->event_constraint[i] = c;
wmin = min(wmin, c->weight);
wmax = max(wmax, c->weight);
}
/* fastpath, try to reuse previous register */ for (i = 0; i < n; i++) {
hwc = &box->event_list[i]->hw;
c = box->event_constraint[i];
/* never assigned */ if (hwc->idx == -1) break;
/* constraint still honored */ if (!test_bit(hwc->idx, c->idxmsk)) break;
/* not already used */ if (test_bit(hwc->idx, used_mask)) break;
__set_bit(hwc->idx, used_mask); if (assign)
assign[i] = hwc->idx;
} /* slow path */ if (i != n)
ret = perf_assign_events(box->event_constraint, n,
wmin, wmax, n, assign);
if (!assign || ret) { for (i = 0; i < n; i++)
uncore_put_event_constraint(box, box->event_list[i]);
} return ret ? -EINVAL : 0;
}
void uncore_pmu_event_start(struct perf_event *event, int flags)
{ struct intel_uncore_box *box = uncore_event_to_box(event); int idx = event->hw.idx;
if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX)) return;
/* * Free running counter is read-only and always active. * Use the current counter value as start point. * There is no overflow interrupt for free running counter. * Use hrtimer to periodically poll the counter to avoid overflow.
*/ if (uncore_pmc_freerunning(event->hw.idx)) {
list_add_tail(&event->active_entry, &box->active_list);
local64_set(&event->hw.prev_count,
uncore_read_counter(box, event)); if (box->n_active++ == 0)
uncore_pmu_start_hrtimer(box); return;
}
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) return;
if (box->n_active == 0)
uncore_pmu_cancel_hrtimer(box);
}
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { /* * Drain the remaining delta count out of a event * that we are disabling:
*/
uncore_perf_event_update(box, event);
hwc->state |= PERF_HES_UPTODATE;
}
}
int uncore_pmu_event_add(struct perf_event *event, int flags)
{ struct intel_uncore_box *box = uncore_event_to_box(event); struct hw_perf_event *hwc = &event->hw; int assign[UNCORE_PMC_IDX_MAX]; int i, n, ret;
if (!box) return -ENODEV;
/* * The free funning counter is assigned in event_init(). * The free running counter event and free running counter * are 1:1 mapped. It doesn't need to be tracked in event_list.
*/ if (uncore_pmc_freerunning(hwc->idx)) { if (flags & PERF_EF_START)
uncore_pmu_event_start(event, 0); return 0;
}
ret = n = uncore_collect_events(box, event, false); if (ret < 0) return ret;
void uncore_pmu_event_del(struct perf_event *event, int flags)
{ struct intel_uncore_box *box = uncore_event_to_box(event); int i;
uncore_pmu_event_stop(event, PERF_EF_UPDATE);
/* * The event for free running counter is not tracked by event_list. * It doesn't need to force event->hw.idx = -1 to reassign the counter. * Because the event and the free running counter are 1:1 mapped.
*/ if (uncore_pmc_freerunning(event->hw.idx)) return;
for (i = 0; i < box->n_events; i++) { if (event == box->event_list[i]) {
uncore_put_event_constraint(box, event);
for (++i; i < box->n_events; i++)
box->event_list[i - 1] = box->event_list[i];
/* * validation ensures the group can be loaded onto the * PMU if it was the only group available.
*/ staticint uncore_validate_group(struct intel_uncore_pmu *pmu, struct perf_event *event)
{ struct perf_event *leader = event->group_leader; struct intel_uncore_box *fake_box; int ret = -EINVAL, n;
/* The free running counter is always active. */ if (uncore_pmc_freerunning(event->hw.idx)) return 0;
fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE); if (!fake_box) return -ENOMEM;
fake_box->pmu = pmu; /* * the event is not yet connected with its * siblings therefore we must first collect * existing siblings, then add the new event * before we can simulate the scheduling
*/
n = uncore_collect_events(fake_box, leader, true); if (n < 0) goto out;
fake_box->n_events = n;
n = uncore_collect_events(fake_box, event, false); if (n < 0) goto out;
fake_box->n_events = n;
ret = uncore_assign_events(fake_box, NULL, n);
out:
kfree(fake_box); return ret;
}
if (event->attr.type != event->pmu->type) return -ENOENT;
pmu = uncore_event_to_pmu(event); /* no device found for this pmu */ if (!pmu->registered) return -ENOENT;
/* Sampling not supported yet */ if (hwc->sample_period) return -EINVAL;
/* * Place all uncore events for a particular physical package * onto a single cpu
*/ if (event->cpu < 0) return -EINVAL;
box = uncore_pmu_to_box(pmu, event->cpu); if (!box || box->cpu < 0) return -EINVAL;
event->cpu = box->cpu;
event->pmu_private = box;
if (event->attr.config == UNCORE_FIXED_EVENT) { /* no fixed counter */ if (!pmu->type->fixed_ctl) return -EINVAL; /* * if there is only one fixed counter, only the first pmu * can access the fixed counter
*/ if (pmu->type->single_fixed && pmu->pmu_idx > 0) return -EINVAL;
/* fixed counters have event field hardcoded to zero */
hwc->config = 0ULL;
} elseif (is_freerunning_event(event)) {
hwc->config = event->attr.config; if (!check_valid_freerunning_event(box, event)) return -EINVAL;
event->hw.idx = UNCORE_PMC_IDX_FREERUNNING; /* * The free running counter event and free running counter * are always 1:1 mapped. * The free running counter is always active. * Assign the free running counter here.
*/
event->hw.event_base = uncore_freerunning_counter(box, event);
} else {
hwc->config = event->attr.config &
(pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32)); if (pmu->type->ops->hw_config) {
ret = pmu->type->ops->hw_config(box, event); if (ret) return ret;
}
}
if (event->group_leader != event)
ret = uncore_validate_group(pmu, event); else
ret = 0;
/* * No uncore block name in discovery table. * Use uncore_type_&typeid_&boxid as name.
*/ if (!type->name) {
uncore_get_alias_name(pmu->name, pmu); return;
}
if (type->num_boxes == 1) { if (strlen(type->name) > 0)
sprintf(pmu->name, "uncore_%s", type->name); else
sprintf(pmu->name, "uncore");
} else { /* * Use the box ID from the discovery table if applicable.
*/
sprintf(pmu->name, "uncore_%s_%d", type->name,
uncore_get_box_id(type, pmu));
}
}
staticint uncore_pmu_register(struct intel_uncore_pmu *pmu)
{ int ret;
err: for (i = 0; i < type->num_boxes; i++)
kfree(pmus[i].boxes);
kfree(pmus);
return -ENOMEM;
}
staticint __init
uncore_types_init(struct intel_uncore_type **types)
{ int ret;
for (; *types; types++) {
ret = uncore_type_init(*types); if (ret) return ret;
} return 0;
}
/* * Get the die information of a PCI device. * @pdev: The PCI device. * @die: The die id which the device maps to.
*/ staticint uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die)
{
*die = uncore_pcibus_to_dieid(pdev->bus); if (*die < 0) return -EINVAL;
/* * Find the PMU of a PCI device. * @pdev: The PCI device. * @ids: The ID table of the available PCI devices with a PMU. * If NULL, search the whole uncore_pci_uncores.
*/ staticstruct intel_uncore_pmu *
uncore_pci_find_dev_pmu(struct pci_dev *pdev, conststruct pci_device_id *ids)
{ struct intel_uncore_pmu *pmu = NULL; struct intel_uncore_type *type;
kernel_ulong_t data; unsignedint devfn;
if (!ids) return uncore_pci_find_dev_pmu_from_types(pdev);
while (ids && ids->vendor) { if ((ids->vendor == pdev->vendor) &&
(ids->device == pdev->device)) {
data = ids->driver_data;
devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data),
UNCORE_PCI_DEV_FUNC(data)); if (devfn == pdev->devfn) {
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)];
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)]; break;
}
}
ids++;
} return pmu;
}
/* * Register the PMU for a PCI device * @pdev: The PCI device. * @type: The corresponding PMU type of the device. * @pmu: The corresponding PMU of the device. * @die: The die id which the device maps to.
*/ staticint uncore_pci_pmu_register(struct pci_dev *pdev, struct intel_uncore_type *type, struct intel_uncore_pmu *pmu, int die)
{ struct intel_uncore_box *box; int ret;
if (WARN_ON_ONCE(pmu->boxes[die] != NULL)) return -EINVAL;
box = uncore_alloc_box(type, NUMA_NO_NODE); if (!box) return -ENOMEM;
pmu->boxes[die] = box; if (atomic_inc_return(&pmu->activeboxes) > 1) return 0;
/* First active box registers the pmu */
ret = uncore_pmu_register(pmu); if (ret) {
pmu->boxes[die] = NULL;
uncore_box_exit(box);
kfree(box);
} return ret;
}
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
/* * Some platforms, e.g. Knights Landing, use a common PCI device ID * for multiple instances of an uncore PMU device type. We should check * PCI slot and func to indicate the uncore box.
*/ if (id->driver_data & ~0xffff) { struct pci_driver *pci_drv = to_pci_driver(pdev->dev.driver);
pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table); if (pmu == NULL) return -ENODEV;
} else { /* * for performance monitoring unit with multiple boxes, * each box has a different function id.
*/
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
}
ret = uncore_pci_pmu_register(pdev, type, pmu, die);
pci_set_drvdata(pdev, pmu->boxes[die]);
return ret;
}
/* * Unregister the PMU of a PCI device * @pmu: The corresponding PMU is unregistered. * @die: The die id which the device maps to.
*/ staticvoid uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die)
{ struct intel_uncore_box *box = pmu->boxes[die];
while (ids && ids->vendor) {
pci_sub_dev = NULL;
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)]; /* * Search the available device, and register the * corresponding PMU.
*/ while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
ids->device, pci_sub_dev))) {
devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
UNCORE_PCI_DEV_FUNC(ids->driver_data)); if (devfn != pci_sub_dev->devfn) continue;
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)]; if (!pmu) continue;
if (uncore_pci_get_dev_die_info(pci_sub_dev, &die)) continue;
staticvoid uncore_pci_exit(void)
{ if (pcidrv_registered) {
pcidrv_registered = false; if (uncore_pci_sub_driver)
bus_unregister_notifier(&pci_bus_type, &uncore_pci_sub_notifier); if (uncore_pci_driver)
pci_unregister_driver(uncore_pci_driver); else
bus_unregister_notifier(&pci_bus_type, &uncore_pci_notifier);
uncore_types_exit(uncore_pci_uncores);
kfree(uncore_extra_pci_dev);
uncore_free_pcibus_map();
}
}
staticbool uncore_die_has_box(struct intel_uncore_type *type, int die, unsignedint pmu_idx)
{ if (!type->boxes) returntrue;
if (intel_uncore_find_discovery_unit_id(type->boxes, die, pmu_idx) < 0) returnfalse;
returntrue;
}
staticvoid uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, int new_cpu)
{ struct intel_uncore_pmu *pmu = type->pmus; struct intel_uncore_box *box; int i, die;
die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu); for (i = 0; i < type->num_boxes; i++, pmu++) {
box = pmu->boxes[die]; if (!box) continue;
staticvoid uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu)
{ for (; *uncores; uncores++)
uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
}
staticvoid uncore_box_unref(struct intel_uncore_type **types, int id)
{ struct intel_uncore_type *type; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; int i;
for (; *types; types++) {
type = *types;
pmu = type->pmus; for (i = 0; i < type->num_boxes; i++, pmu++) {
box = pmu->boxes[id]; if (box && box->cpu >= 0 && atomic_dec_return(&box->refcnt) == 0)
uncore_box_exit(box);
}
}
}
staticint uncore_event_cpu_offline(unsignedint cpu)
{ int die, target;
/* Check if exiting cpu is used for collecting uncore events */ if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) goto unref; /* Find a new cpu to collect uncore events */
target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
/* Migrate uncore events to the new target */ if (target < nr_cpu_ids)
cpumask_set_cpu(target, &uncore_cpu_mask); else
target = -1;
staticint uncore_box_ref(struct intel_uncore_type **types, int id, unsignedint cpu)
{ struct intel_uncore_type *type; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; int i, ret;
ret = allocate_boxes(types, id, cpu); if (ret) return ret;
for (; *types; types++) {
type = *types;
pmu = type->pmus; for (i = 0; i < type->num_boxes; i++, pmu++) {
box = pmu->boxes[id]; if (box && box->cpu >= 0 && atomic_inc_return(&box->refcnt) == 1)
uncore_box_init(box);
}
} return 0;
}
staticint uncore_event_cpu_online(unsignedint cpu)
{ int die, target, msr_ret, mmio_ret;
die = topology_logical_die_id(cpu);
msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu); if (msr_ret && mmio_ret) return -ENOMEM;
/* * Check if there is an online cpu in the package * which collects uncore events already.
*/
target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu)); if (target < nr_cpu_ids) return 0;
cpumask_set_cpu(cpu, &uncore_cpu_mask);
if (!msr_ret)
uncore_change_context(uncore_msr_uncores, -1, cpu); if (!mmio_ret)
uncore_change_context(uncore_mmio_uncores, -1, cpu);
uncore_change_context(uncore_pci_uncores, -1, cpu); return 0;
}
staticint __init type_pmu_register(struct intel_uncore_type *type)
{ int i, ret;
for (i = 0; i < type->num_boxes; i++) {
ret = uncore_pmu_register(&type->pmus[i]); if (ret) return ret;
} return 0;
}
ret = uncore_types_init(types); if (ret) goto err;
for (; *types; types++) {
ret = type_pmu_register(*types); if (ret) goto err;
} return 0;
err:
uncore_types_exit(uncore_mmio_uncores);
uncore_mmio_uncores = empty_uncore; return ret;
}
struct intel_uncore_init_fun { void (*cpu_init)(void); int (*pci_init)(void); void (*mmio_init)(void); /* Discovery table is required */ bool use_discovery; /* The units in the discovery table should be ignored. */ int *uncore_units_ignore;
};
id = x86_match_cpu(intel_uncore_match); if (!id) { if (!uncore_no_discover && intel_uncore_has_discovery_tables(NULL))
uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init; else return -ENODEV;
} else {
uncore_init = (struct intel_uncore_init_fun *)id->driver_data; if (uncore_no_discover && uncore_init->use_discovery) return -ENODEV; if (uncore_init->use_discovery &&
!intel_uncore_has_discovery_tables(uncore_init->uncore_units_ignore)) return -ENODEV;
}
if (uncore_init->pci_init) {
pret = uncore_init->pci_init(); if (!pret)
pret = uncore_pci_init();
}
if (uncore_init->cpu_init) {
uncore_init->cpu_init();
cret = uncore_cpu_init();
}
if (uncore_init->mmio_init) {
uncore_init->mmio_init();
mret = uncore_mmio_init();
}
if (cret && pret && mret) {
ret = -ENODEV; goto free_discovery;
}
/* Install hotplug callbacks to setup the targets for each package */
ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, "perf/x86/intel/uncore:online",
uncore_event_cpu_online,
uncore_event_cpu_offline); if (ret) goto err; return 0;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.