// SPDX-License-Identifier: GPL-2.0-only /* * Driver for the L3 cache PMUs in Qualcomm Technologies chips. * * The driver supports a distributed cache architecture where the overall * cache for a socket is comprised of multiple slices each with its own PMU. * Access to each individual PMU is provided even though all CPUs share all * the slices. User space needs to aggregate to individual counts to provide * a global picture. * * See Documentation/admin-guide/perf/qcom_l3_pmu.rst for more details. * * Copyright (c) 2015-2017, The Linux Foundation. All rights reserved.
*/
/* Number of counters on each PMU */ #define L3_NUM_COUNTERS 8 /* Mask for the event type field within perf_event_attr.config and EVTYPE reg */ #define L3_EVTYPE_MASK 0xFF /* * Bit position of the 'long counter' flag within perf_event_attr.config. * Reserve some space between the event type and this flag to allow expansion * in the event type field.
*/ #define L3_EVENT_LC_BIT 32
/* * Decoding of settings from perf_event_attr * * The config format for perf events is: * - config: bits 0-7: event type * bit 32: HW counter size requested, 0: 32 bits, 1: 64 bits
*/
/* * Type used to group hardware counter operations * * Used to implement two types of hardware counters, standard (32bits) and * long (64bits). The hardware supports counter chaining which we use to * implement long counters. This support is exposed via the 'lc' flag field * in perf_event_attr.config.
*/ struct l3cache_event_ops { /* Called to start event monitoring */ void (*start)(struct perf_event *event); /* Called to stop event monitoring */ void (*stop)(struct perf_event *event, int flags); /* Called to update the perf_event */ void (*update)(struct perf_event *event);
};
/* * Implementation of long counter operations * * 64bit counters are implemented by chaining two of the 32bit physical * counters. The PMU only supports chaining of adjacent even/odd pairs * and for simplicity the driver always configures the odd counter to * count the overflows of the lower-numbered even counter. Note that since * the resulting hardware counter is 64bits no IRQs are required to maintain * the software counter which is also 64bits.
*/
/* Set the odd counter to count the overflows of the even counter */
gang = readl_relaxed(l3pmu->regs + L3_M_BC_GANG);
gang |= GANG_EN(idx + 1);
writel_relaxed(gang, l3pmu->regs + L3_M_BC_GANG);
/* Initialize the hardware counters and reset prev_count*/
local64_set(&event->hw.prev_count, 0);
writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx + 1));
writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
/* * Set the event types, the upper half must use zero and the lower * half the actual event type
*/
writel_relaxed(EVSEL(0), l3pmu->regs + L3_HML3_PM_EVTYPE(idx + 1));
writel_relaxed(EVSEL(evsel), l3pmu->regs + L3_HML3_PM_EVTYPE(idx));
/* * Implementation of standard counter operations * * 32bit counters use a single physical counter and a hardware feature that * asserts the overflow IRQ on the toggling of the most significant bit in * the counter. This feature allows the counters to be left free-running * without needing the usual reprogramming required to properly handle races * during concurrent calls to update.
*/
/* Retrieve the appropriate operations for the given event */ static conststruct l3cache_event_ops *l3cache_event_get_ops(struct perf_event *event)
{ if (event_uses_long_counter(event)) return &event_ops_long; else return &event_ops_std;
}
/* * Top level PMU functions.
*/
staticinlinevoid qcom_l3_cache__init(struct l3cache_pmu *l3pmu)
{ int i;
/* * Use writel for the first programming command to ensure the basic * counter unit is stopped before proceeding
*/
writel(BC_SATROLL_CR_RESET, l3pmu->regs + L3_M_BC_SATROLL_CR);
/* * Since the IRQ is not enabled for events using long counters * we should never see one of those here, however, be consistent * and use the ops indirections like in the other operations.
*/
/* Ensure the basic counter unit is stopped before proceeding */
wmb();
}
/* * We must NOT create groups containing events from multiple hardware PMUs, * although mixing different software and hardware PMUs is allowed.
*/ staticbool qcom_l3_cache__validate_event_group(struct perf_event *event)
{ struct perf_event *leader = event->group_leader; struct perf_event *sibling; int counters = 0;
if (leader->pmu != event->pmu && !is_software_event(leader)) returnfalse;
/* * Is the event for this PMU?
*/ if (event->attr.type != event->pmu->type) return -ENOENT;
/* * Sampling not supported since these events are not core-attributable.
*/ if (hwc->sample_period) return -EINVAL;
/* * Task mode not available, we run the counters as socket counters, * not attributable to any CPU and therefore cannot attribute per-task.
*/ if (event->cpu < 0) return -EINVAL;
/* Validate the group */ if (!qcom_l3_cache__validate_event_group(event)) return -EINVAL;
hwc->idx = -1;
/* * Many perf core operations (eg. events rotation) operate on a * single CPU context. This is obvious for CPU PMUs, where one * expects the same sets of events being observed on all CPUs, * but can lead to issues for off-core PMUs, like this one, where * each event could be theoretically assigned to a different CPU. * To mitigate this, we enforce CPU assignment to one designated * processor (the one described in the "cpumask" attribute exported * by the PMU device). perf user space tools honor this and avoid * opening more than one copy of the events.
*/
event->cpu = cpumask_first(&l3pmu->cpumask);
staticint qcom_l3_cache__event_add(struct perf_event *event, int flags)
{ struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int order = event_uses_long_counter(event) ? 1 : 0; int idx;
/* * Try to allocate a counter.
*/
idx = bitmap_find_free_region(l3pmu->used_mask, L3_NUM_COUNTERS, order); if (idx < 0) /* The counters are all in use. */ return -EAGAIN;
/* * Add sysfs attributes * * We export: * - formats, used by perf user space and other tools to configure events * - events, used by perf user space and other tools to create events * symbolically, e.g.: * perf stat -a -e l3cache_0_0/event=read-miss/ ls * perf stat -a -e l3cache_0_0/event=0x21/ ls * - cpumask, used by perf user space and other tools to know on which CPUs * to open the events
*/
l3pmu->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &memrc); if (IS_ERR(l3pmu->regs)) return PTR_ERR(l3pmu->regs);
qcom_l3_cache__init(l3pmu);
ret = platform_get_irq(pdev, 0); if (ret <= 0) return ret;
ret = devm_request_irq(&pdev->dev, ret, qcom_l3_cache__handle_irq, 0,
name, l3pmu); if (ret) {
dev_err(&pdev->dev, "Request for IRQ failed for slice @%pa\n",
&memrc->start); return ret;
}
/* Add this instance to the list used by the offline callback */
ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, &l3pmu->node); if (ret) {
dev_err(&pdev->dev, "Error %d registering hotplug", ret); return ret;
}
ret = perf_pmu_register(&l3pmu->pmu, name, -1); if (ret < 0) {
dev_err(&pdev->dev, "Failed to register L3 cache PMU (%d)\n", ret); return ret;
}
staticint __init register_qcom_l3_cache_pmu_driver(void)
{ int ret;
/* Install a hook to update the reader CPU in case it goes offline */
ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, "perf/qcom/l3cache:online",
qcom_l3_cache_pmu_online_cpu,
qcom_l3_cache_pmu_offline_cpu); if (ret) return ret;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.