// SPDX-License-Identifier: GPL-2.0 /* * Energy Model of devices * * Copyright (c) 2018-2021, Arm ltd. * Written by: Quentin Perret, Arm ltd. * Improvements provided by: Lukasz Luba, Arm ltd.
*/
staticvoid em_release_table_kref(struct kref *kref)
{ /* It was the last owner of this table so we can free */
kfree_rcu(container_of(kref, struct em_perf_table, kref), rcu);
}
/** * em_table_free() - Handles safe free of the EM table when needed * @table : EM table which is going to be freed * * No return values.
*/ void em_table_free(struct em_perf_table *table)
{
kref_put(&table->kref, em_release_table_kref);
}
/** * em_table_alloc() - Allocate a new EM table * @pd : EM performance domain for which this must be done * * Allocate a new EM table and initialize its kref to indicate that it * has a user. * Returns allocated table or NULL.
*/ struct em_perf_table *em_table_alloc(struct em_perf_domain *pd)
{ struct em_perf_table *table; int table_size;
table = kzalloc(sizeof(*table) + table_size, GFP_KERNEL); if (!table) return NULL;
kref_init(&table->kref);
return table;
}
staticvoid em_init_performance(struct device *dev, struct em_perf_domain *pd, struct em_perf_state *table, int nr_states)
{
u64 fmax, max_cap; int i, cpu;
/* This is needed only for CPUs and EAS skip other devices */ if (!_is_cpu_device(dev)) return;
cpu = cpumask_first(em_span_cpus(pd));
/* * Calculate the performance value for each frequency with * linear relationship. The final CPU capacity might not be ready at * boot time, but the EM will be updated a bit later with correct one.
*/
fmax = (u64) table[nr_states - 1].frequency;
max_cap = (u64) arch_scale_cpu_capacity(cpu); for (i = 0; i < nr_states; i++)
table[i].performance = div64_u64(max_cap * table[i].frequency,
fmax);
}
staticint em_compute_costs(struct device *dev, struct em_perf_state *table, conststruct em_data_callback *cb, int nr_states, unsignedlong flags)
{ unsignedlong prev_cost = ULONG_MAX; int i, ret;
/* This is needed only for CPUs and EAS skip other devices */ if (!_is_cpu_device(dev)) return 0;
/* Compute the cost of each performance state. */ for (i = nr_states - 1; i >= 0; i--) { unsignedlong power_res, cost;
/** * em_dev_compute_costs() - Calculate cost values for new runtime EM table * @dev : Device for which the EM table is to be updated * @table : The new EM table that is going to get the costs calculated * @nr_states : Number of performance states * * Calculate the em_perf_state::cost values for new runtime EM table. The * values are used for EAS during task placement. It also calculates and sets * the efficiency flag for each performance state. When the function finish * successfully the EM table is ready to be updated and used by EAS. * * Return 0 on success or a proper error in case of failure.
*/ int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, int nr_states)
{ return em_compute_costs(dev, table, NULL, nr_states, 0);
}
/** * em_dev_update_perf_domain() - Update runtime EM table for a device * @dev : Device for which the EM is to be updated * @new_table : The new EM table that is going to be used from now * * Update EM runtime modifiable table for the @dev using the provided @table. * * This function uses a mutex to serialize writers, so it must not be called * from a non-sleeping context. * * Return 0 on success or an error code on failure.
*/ int em_dev_update_perf_domain(struct device *dev, struct em_perf_table *new_table)
{ struct em_perf_table *old_table; struct em_perf_domain *pd;
if (!dev) return -EINVAL;
/* Serialize update/unregister or concurrent updates */
mutex_lock(&em_pd_mutex);
if (!dev->em_pd) {
mutex_unlock(&em_pd_mutex); return -EINVAL;
}
pd = dev->em_pd;
staticint em_create_perf_table(struct device *dev, struct em_perf_domain *pd, struct em_perf_state *table, conststruct em_data_callback *cb, unsignedlong flags)
{ unsignedlong power, freq, prev_freq = 0; int nr_states = pd->nr_perf_states; int i, ret;
/* Build the list of performance states for this performance domain */ for (i = 0, freq = 0; i < nr_states; i++, freq++) { /* * active_power() is a driver callback which ceils 'freq' to * lowest performance state of 'dev' above 'freq' and updates * 'power' and 'freq' accordingly.
*/
ret = cb->active_power(dev, &power, &freq); if (ret) {
dev_err(dev, "EM: invalid perf. state: %d\n",
ret); return -EINVAL;
}
/* * We expect the driver callback to increase the frequency for * higher performance states.
*/ if (freq <= prev_freq) {
dev_err(dev, "EM: non-increasing freq: %lu\n",
freq); return -EINVAL;
}
/* * The power returned by active_state() is expected to be * positive and be in range.
*/ if (!power || power > EM_MAX_POWER) {
dev_err(dev, "EM: invalid power: %lu\n",
power); return -EINVAL;
}
if (_is_cpu_device(dev)) {
num_cpus = cpumask_weight(cpus);
/* Prevent max possible energy calculation to not overflow */ if (num_cpus > EM_MAX_NUM_CPUS) {
dev_err(dev, "EM: too many CPUs, overflow possible\n"); return -EINVAL;
}
pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL); if (!pd) return -ENOMEM;
staticvoid
em_cpufreq_update_efficiencies(struct device *dev, struct em_perf_state *table)
{ struct em_perf_domain *pd = dev->em_pd; struct cpufreq_policy *policy; int found = 0; int i, cpu;
if (!_is_cpu_device(dev)) return;
/* Try to get a CPU which is active and in this PD */
cpu = cpumask_first_and(em_span_cpus(pd), cpu_active_mask); if (cpu >= nr_cpu_ids) {
dev_warn(dev, "EM: No online CPU for CPUFreq policy\n"); return;
}
policy = cpufreq_cpu_get(cpu); if (!policy) {
dev_warn(dev, "EM: Access to CPUFreq policy failed\n"); return;
}
for (i = 0; i < pd->nr_perf_states; i++) { if (!(table[i].flags & EM_PERF_STATE_INEFFICIENT)) continue;
if (!cpufreq_table_set_inefficient(policy, table[i].frequency))
found++;
}
cpufreq_cpu_put(policy);
if (!found) return;
/* * Efficiencies have been installed in CPUFreq, inefficient frequencies * will be skipped. The EM can do the same.
*/
pd->flags |= EM_PERF_DOMAIN_SKIP_INEFFICIENCIES;
}
/** * em_pd_get() - Return the performance domain for a device * @dev : Device to find the performance domain for * * Returns the performance domain to which @dev belongs, or NULL if it doesn't * exist.
*/ struct em_perf_domain *em_pd_get(struct device *dev)
{ if (IS_ERR_OR_NULL(dev)) return NULL;
/** * em_cpu_get() - Return the performance domain for a CPU * @cpu : CPU to find the performance domain for * * Returns the performance domain to which @cpu belongs, or NULL if it doesn't * exist.
*/ struct em_perf_domain *em_cpu_get(int cpu)
{ struct device *cpu_dev;
cpu_dev = get_cpu_device(cpu); if (!cpu_dev) return NULL;
/** * em_dev_register_perf_domain() - Register the Energy Model (EM) for a device * @dev : Device for which the EM is to register * @nr_states : Number of performance states to register * @cb : Callback functions providing the data of the Energy Model * @cpus : Pointer to cpumask_t, which in case of a CPU device is * obligatory. It can be taken from i.e. 'policy->cpus'. For other * type of devices this should be set to NULL. * @microwatts : Flag indicating that the power values are in micro-Watts or * in some other scale. It must be set properly. * * Create Energy Model tables for a performance domain using the callbacks * defined in cb. * * The @microwatts is important to set with correct value. Some kernel * sub-systems might rely on this flag and check if all devices in the EM are * using the same scale. * * If multiple clients register the same performance domain, all but the first * registration will be ignored. * * Return 0 on success
*/ int em_dev_register_perf_domain(struct device *dev, unsignedint nr_states, conststruct em_data_callback *cb, const cpumask_t *cpus, bool microwatts)
{ int ret = em_dev_register_pd_no_update(dev, nr_states, cb, cpus, microwatts);
if (_is_cpu_device(dev))
em_check_capacity_update();
/** * em_dev_register_pd_no_update() - Register a perf domain for a device * @dev : Device to register the PD for * @nr_states : Number of performance states in the new PD * @cb : Callback functions for populating the energy model * @cpus : CPUs to include in the new PD (mandatory if @dev is a CPU device) * @microwatts : Whether or not the power values in the EM will be in uW * * Like em_dev_register_perf_domain(), but does not trigger a CPU capacity * update after registering the PD, even if @dev is a CPU device.
*/ int em_dev_register_pd_no_update(struct device *dev, unsignedint nr_states, conststruct em_data_callback *cb, const cpumask_t *cpus, bool microwatts)
{ struct em_perf_table *em_table; unsignedlong cap, prev_cap = 0; unsignedlong flags = 0; int cpu, ret;
if (!dev || !nr_states || !cb) return -EINVAL;
/* * Use a mutex to serialize the registration of performance domains and * let the driver-defined callback functions sleep.
*/
mutex_lock(&em_pd_mutex);
if (dev->em_pd) {
ret = -EEXIST; goto unlock;
}
if (_is_cpu_device(dev)) { if (!cpus) {
dev_err(dev, "EM: invalid CPU mask\n");
ret = -EINVAL; goto unlock;
}
for_each_cpu(cpu, cpus) { if (em_cpu_get(cpu)) {
dev_err(dev, "EM: exists for CPU%d\n", cpu);
ret = -EEXIST; goto unlock;
} /* * All CPUs of a domain must have the same * micro-architecture since they all share the same * table.
*/
cap = arch_scale_cpu_capacity(cpu); if (prev_cap && prev_cap != cap) {
dev_err(dev, "EM: CPUs of %*pbl must have the same capacity\n",
cpumask_pr_args(cpus));
ret = -EINVAL; goto unlock;
}
prev_cap = cap;
}
}
if (microwatts)
flags |= EM_PERF_DOMAIN_MICROWATTS; elseif (cb->get_cost)
flags |= EM_PERF_DOMAIN_ARTIFICIAL;
/* * EM only supports uW (exception is artificial EM). * Therefore, check and force the drivers to provide * power in uW.
*/ if (!microwatts && !(flags & EM_PERF_DOMAIN_ARTIFICIAL)) {
dev_err(dev, "EM: only supports uW power values\n");
ret = -EINVAL; goto unlock;
}
ret = em_create_pd(dev, nr_states, cb, cpus, flags); if (ret) goto unlock;
/** * em_dev_unregister_perf_domain() - Unregister Energy Model (EM) for a device * @dev : Device for which the EM is registered * * Unregister the EM for the specified @dev (but not a CPU device).
*/ void em_dev_unregister_perf_domain(struct device *dev)
{ if (IS_ERR_OR_NULL(dev) || !dev->em_pd) return;
if (_is_cpu_device(dev)) return;
/* * The mutex separates all register/unregister requests and protects * from potential clean-up/setup issues in the debugfs directories. * The debugfs directory name is the same as device's name.
*/
mutex_lock(&em_pd_mutex);
em_debug_remove_pd(dev);
if (!em_is_artificial(pd)) {
ret = em_compute_costs(dev, em_table->state, NULL,
pd->nr_perf_states, pd->flags); if (ret) goto free_em_table;
}
ret = em_dev_update_perf_domain(dev, em_table); if (ret) goto free_em_table;
/* * This is one-time-update, so give up the ownership in this updater. * The EM framework has incremented the usage counter and from now * will keep the reference (then free the memory when needed).
*/
free_em_table:
em_table_free(em_table); return ret;
}
/* * Adjustment of CPU performance values after boot, when all CPUs capacites * are correctly calculated.
*/ staticvoid em_adjust_new_capacity(unsignedint cpu, struct device *dev, struct em_perf_domain *pd)
{ unsignedlong cpu_capacity = arch_scale_cpu_capacity(cpu); struct em_perf_table *em_table; struct em_perf_state *table; unsignedlong em_max_perf;
/** * em_adjust_cpu_capacity() - Adjust the EM for a CPU after a capacity update. * @cpu: Target CPU. * * Adjust the existing EM for @cpu after a capacity update under the assumption * that the capacity has been updated in the same way for all of the CPUs in * the same perf domain.
*/ void em_adjust_cpu_capacity(unsignedint cpu)
{ struct device *dev = get_cpu_device(cpu); struct em_perf_domain *pd;
pd = em_pd_get(dev); if (pd)
em_adjust_new_capacity(cpu, dev, pd);
}
staticvoid em_check_capacity_update(void)
{
cpumask_var_t cpu_done_mask; int cpu, failed_cpus = 0;
if (!zalloc_cpumask_var(&cpu_done_mask, GFP_KERNEL)) {
pr_warn("no free memory\n"); return;
}
/* Check if CPUs capacity has changed than update EM */
for_each_possible_cpu(cpu) { struct cpufreq_policy *policy; struct em_perf_domain *pd; struct device *dev;
if (cpumask_test_cpu(cpu, cpu_done_mask)) continue;
policy = cpufreq_cpu_get(cpu); if (!policy) {
failed_cpus++; continue;
}
cpufreq_cpu_put(policy);
dev = get_cpu_device(cpu);
pd = em_pd_get(dev); if (!pd || em_is_artificial(pd)) continue;
/** * em_dev_update_chip_binning() - Update Energy Model after the new voltage * information is present in the OPPs. * @dev : Device for which the Energy Model has to be updated. * * This function allows to update easily the EM with new values available in * the OPP framework and DT. It can be used after the chip has been properly * verified by device drivers and the voltages adjusted for the 'chip binning'.
*/ int em_dev_update_chip_binning(struct device *dev)
{ struct em_perf_table *em_table; struct em_perf_domain *pd; int i, ret;
if (IS_ERR_OR_NULL(dev)) return -EINVAL;
pd = em_pd_get(dev); if (!pd) {
dev_warn(dev, "Couldn't find Energy Model\n"); return -EINVAL;
}
/* Update power values which might change due to new voltage in OPPs */ for (i = 0; i < pd->nr_perf_states; i++) { unsignedlong freq = em_table->state[i].frequency; unsignedlong power;
ret = dev_pm_opp_calc_power(dev, &power, &freq); if (ret) {
em_table_free(em_table); return ret;
}
/** * em_update_performance_limits() - Update Energy Model with performance * limits information. * @pd : Performance Domain with EM that has to be updated. * @freq_min_khz : New minimum allowed frequency for this device. * @freq_max_khz : New maximum allowed frequency for this device. * * This function allows to update the EM with information about available * performance levels. It takes the minimum and maximum frequency in kHz * and does internal translation to performance levels. * Returns 0 on success or -EINVAL when failed.
*/ int em_update_performance_limits(struct em_perf_domain *pd, unsignedlong freq_min_khz, unsignedlong freq_max_khz)
{ struct em_perf_state *table; int min_ps = -1; int max_ps = -1; int i;
for (i = 0; i < pd->nr_perf_states; i++) { if (freq_min_khz == table[i].frequency)
min_ps = i; if (freq_max_khz == table[i].frequency)
max_ps = i;
}
rcu_read_unlock();
/* Only update when both are found and sane */ if (min_ps < 0 || max_ps < 0 || max_ps < min_ps) return -EINVAL;
/* Guard simultaneous updates and make them atomic */
mutex_lock(&em_pd_mutex);
pd->min_perf_state = min_ps;
pd->max_perf_state = max_ps;
mutex_unlock(&em_pd_mutex);
/* * When called from the cpufreq_register_driver() path, the * cpu_hotplug_lock is already held, so use a work item to * avoid nested locking in rebuild_sched_domains().
*/
schedule_work(&rebuild_sd_work);
}
Messung V0.5
¤ Dauer der Verarbeitung: 0.12 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.