/** * struct sample - Store performance sample * @core_avg_perf: Ratio of APERF/MPERF which is the actual average * performance during last sample period * @busy_scaled: Scaled busy value which is used to calculate next * P state. This can be different than core_avg_perf * to account for cpu idle period * @aperf: Difference of actual performance frequency clock count * read from APERF MSR between last and current sample * @mperf: Difference of maximum performance frequency clock count * read from MPERF MSR between last and current sample * @tsc: Difference of time stamp counter between last and * current sample * @time: Current time from scheduler * * This structure is used in the cpudata structure to store performance sample * data for choosing next P State.
*/ struct sample {
int32_t core_avg_perf;
int32_t busy_scaled;
u64 aperf;
u64 mperf;
u64 tsc;
u64 time;
};
/** * struct pstate_data - Store P state data * @current_pstate: Current requested P state * @min_pstate: Min P state possible for this platform * @max_pstate: Max P state possible for this platform * @max_pstate_physical:This is physical Max P state for a processor * This can be higher than the max_pstate which can * be limited by platform thermal design power limits * @perf_ctl_scaling: PERF_CTL P-state to frequency scaling factor * @scaling: Scaling factor between performance and frequency * @turbo_pstate: Max Turbo P state possible for this platform * @min_freq: @min_pstate frequency in cpufreq units * @max_freq: @max_pstate frequency in cpufreq units * @turbo_freq: @turbo_pstate frequency in cpufreq units * * Stores the per cpu model P state limits and current P state.
*/ struct pstate_data { int current_pstate; int min_pstate; int max_pstate; int max_pstate_physical; int perf_ctl_scaling; int scaling; int turbo_pstate; unsignedint min_freq; unsignedint max_freq; unsignedint turbo_freq;
};
/** * struct vid_data - Stores voltage information data * @min: VID data for this platform corresponding to * the lowest P state * @max: VID data corresponding to the highest P State. * @turbo: VID data for turbo P state * @ratio: Ratio of (vid max - vid min) / * (max P state - Min P State) * * Stores the voltage data for DVFS (Dynamic Voltage and Frequency Scaling) * This data is used in Atom platforms, where in addition to target P state, * the voltage data needs to be specified to select next P State.
*/ struct vid_data { int min; int max; int turbo;
int32_t ratio;
};
/** * struct global_params - Global parameters, mostly tunable via sysfs. * @no_turbo: Whether or not to use turbo P-states. * @turbo_disabled: Whether or not turbo P-states are available at all, * based on the MSR_IA32_MISC_ENABLE value and whether or * not the maximum reported turbo P-state is different from * the maximum reported non-turbo one. * @min_perf_pct: Minimum capacity limit in percent of the maximum turbo * P-state capacity. * @max_perf_pct: Maximum capacity limit in percent of the maximum turbo * P-state capacity.
*/ struct global_params { bool no_turbo; bool turbo_disabled; int max_perf_pct; int min_perf_pct;
};
/** * struct cpudata - Per CPU instance data storage * @cpu: CPU number for this instance data * @policy: CPUFreq policy value * @update_util: CPUFreq utility callback information * @update_util_set: CPUFreq utility callback is set * @iowait_boost: iowait-related boost fraction * @last_update: Time of the last update. * @pstate: Stores P state limits for this CPU * @vid: Stores VID limits for this CPU * @last_sample_time: Last Sample time * @aperf_mperf_shift: APERF vs MPERF counting frequency difference * @prev_aperf: Last APERF value read from APERF MSR * @prev_mperf: Last MPERF value read from MPERF MSR * @prev_tsc: Last timestamp counter (TSC) value * @sample: Storage for storing last Sample data * @min_perf_ratio: Minimum capacity in terms of PERF or HWP ratios * @max_perf_ratio: Maximum capacity in terms of PERF or HWP ratios * @acpi_perf_data: Stores ACPI perf information read from _PSS * @valid_pss_table: Set to true for valid ACPI _PSS entries found * @epp_powersave: Last saved HWP energy performance preference * (EPP) or energy performance bias (EPB), * when policy switched to performance * @epp_policy: Last saved policy used to set EPP/EPB * @epp_default: Power on default HWP energy performance * preference/bias * @epp_cached: Cached HWP energy-performance preference value * @hwp_req_cached: Cached value of the last HWP Request MSR * @hwp_cap_cached: Cached value of the last HWP Capabilities MSR * @last_io_update: Last time when IO wake flag was set * @capacity_perf: Highest perf used for scale invariance * @sched_flags: Store scheduler flags for possible cross CPU update * @hwp_boost_min: Last HWP boosted min performance * @suspended: Whether or not the driver has been suspended. * @pd_registered: Set when a perf domain is registered for this CPU. * @hwp_notify_work: workqueue for HWP notifications. * * This structure stores per CPU instance data for all CPUs.
*/ struct cpudata { int cpu;
/** * struct pstate_funcs - Per CPU model specific callbacks * @get_max: Callback to get maximum non turbo effective P state * @get_max_physical: Callback to get maximum non turbo physical P state * @get_min: Callback to get minimum P state * @get_turbo: Callback to get turbo P state * @get_scaling: Callback to get frequency scaling factor * @get_cpu_scaling: Get frequency scaling factor for a given cpu * @get_aperf_mperf_shift: Callback to get the APERF vs MPERF frequency difference * @get_val: Callback to convert P state to actual MSR write value * @get_vid: Callback to get VID data for Atom platforms * * Core and Atom CPU models have different way to get P State limits. This * structure is used to store those callbacks.
*/ struct pstate_funcs { int (*get_max)(int cpu); int (*get_max_physical)(int cpu); int (*get_min)(int cpu); int (*get_turbo)(int cpu); int (*get_scaling)(void); int (*get_cpu_scaling)(int cpu); int (*get_aperf_mperf_shift)(void);
u64 (*get_val)(struct cpudata*, int pstate); void (*get_vid)(struct cpudata *);
};
staticbool intel_pstate_get_ppc_enable_status(void)
{ if (intel_pstate_acpi_pm_profile_server()) returntrue;
return acpi_ppc;
}
#ifdef CONFIG_ACPI_CPPC_LIB
/* The work item is needed to avoid CPU hotplug locking issues */ staticvoid intel_pstste_sched_itmt_work_fn(struct work_struct *work)
{
sched_set_itmt_support();
}
ret = cppc_get_perf_caps(cpu, &cppc_perf); /* * If CPPC is not available, fall back to MSR_HWP_CAPABILITIES bits [8:0]. * * Also, on some systems with overclocking enabled, CPPC.highest_perf is * hardcoded to 0xff, so CPPC.highest_perf cannot be used to enable ITMT. * Fall back to MSR_HWP_CAPABILITIES then too.
*/ if (ret || cppc_perf.highest_perf == CPPC_MAX_PERF)
cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached));
/* * The priorities can be set regardless of whether or not * sched_set_itmt_support(true) has been called and it is valid to * update them at any time after it has been called.
*/
sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu);
if (max_highest_perf <= min_highest_perf) { if (cppc_perf.highest_perf > max_highest_perf)
max_highest_perf = cppc_perf.highest_perf;
if (cppc_perf.highest_perf < min_highest_perf)
min_highest_perf = cppc_perf.highest_perf;
if (max_highest_perf > min_highest_perf) { /* * This code can be run during CPU online under the * CPU hotplug locks, so sched_set_itmt_support() * cannot be called from here. Queue up a work item * to invoke it.
*/
schedule_work(&sched_itmt_work);
}
}
}
staticint intel_pstate_get_cppc_guaranteed(int cpu)
{ struct cppc_perf_caps cppc_perf; int ret;
ret = cppc_get_perf_caps(cpu, &cppc_perf); if (ret) return ret;
if (cppc_perf.guaranteed_perf) return cppc_perf.guaranteed_perf;
/* * Compute the perf-to-frequency scaling factor for the given CPU if * possible, unless it would be 0.
*/ if (!cppc_get_perf_caps(cpu, &cppc_perf) &&
cppc_perf.nominal_perf && cppc_perf.nominal_freq) return div_u64(cppc_perf.nominal_freq * KHZ_PER_MHZ,
cppc_perf.nominal_perf);
staticvoid intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
{ struct cpudata *cpu; int ret; int i;
if (hwp_active) {
intel_pstate_set_itmt_prio(policy->cpu); return;
}
if (!intel_pstate_get_ppc_enable_status()) return;
cpu = all_cpu_data[policy->cpu];
ret = acpi_processor_register_performance(&cpu->acpi_perf_data,
policy->cpu); if (ret) return;
/* * Check if the control value in _PSS is for PERF_CTL MSR, which should * guarantee that the states returned by it map to the states in our * list directly.
*/ if (cpu->acpi_perf_data.control_register.space_id !=
ACPI_ADR_SPACE_FIXED_HARDWARE) goto err;
/* * If there is only one entry _PSS, simply ignore _PSS and continue as * usual without taking _PSS into account
*/ if (cpu->acpi_perf_data.state_count < 2) goto err;
pr_debug("CPU%u - ACPI _PSS perf data\n", policy->cpu); for (i = 0; i < cpu->acpi_perf_data.state_count; i++) {
pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n",
(i == cpu->acpi_perf_data.state ? '*' : ' '), i,
(u32) cpu->acpi_perf_data.states[i].core_frequency,
(u32) cpu->acpi_perf_data.states[i].power,
(u32) cpu->acpi_perf_data.states[i].control);
}
cpu->valid_pss_table = true;
pr_debug("_PPC limits will be enforced\n");
/** * intel_pstate_hybrid_hwp_adjust - Calibrate HWP performance levels. * @cpu: Target CPU. * * On hybrid processors, HWP may expose more performance levels than there are * P-states accessible through the PERF_CTL interface. If that happens, the * scaling factor between HWP performance levels and CPU frequency will be less * than the scaling factor between P-state values and CPU frequency. * * In that case, adjust the CPU parameters used in computations accordingly.
*/ staticvoid intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu)
{ int perf_ctl_max_phys = cpu->pstate.max_pstate_physical; int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling; int perf_ctl_turbo = pstate_funcs.get_turbo(cpu->cpu); int scaling = cpu->pstate.scaling; int freq;
freq = cpu->pstate.min_pstate * perf_ctl_scaling;
cpu->pstate.min_freq = freq; /* * Cast the min P-state value retrieved via pstate_funcs.get_min() to * the effective range of HWP performance levels.
*/
cpu->pstate.min_pstate = intel_pstate_freq_to_hwp(cpu, freq);
}
if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { /* * When hwp_req_data is 0, means that caller didn't read * MSR_HWP_REQUEST, so need to read and get EPP.
*/ if (!hwp_req_data) {
epp = rdmsrq_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST,
&hwp_req_data); if (epp) return epp;
}
epp = (hwp_req_data >> 24) & 0xff;
} else { /* When there is no EPP present, HWP uses EPB settings */
epp = intel_pstate_get_epb(cpu_data);
}
return epp;
}
staticint intel_pstate_set_epb(int cpu, s16 pref)
{
u64 epb; int ret;
if (!boot_cpu_has(X86_FEATURE_EPB)) return -ENXIO;
ret = rdmsrq_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); if (ret) return ret;
if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { if (epp == epp_values[EPP_INDEX_PERFORMANCE]) return EPP_INDEX_PERFORMANCE; if (epp == epp_values[EPP_INDEX_BALANCE_PERFORMANCE]) return EPP_INDEX_BALANCE_PERFORMANCE; if (epp == epp_values[EPP_INDEX_BALANCE_POWERSAVE]) return EPP_INDEX_BALANCE_POWERSAVE; if (epp == epp_values[EPP_INDEX_POWERSAVE]) return EPP_INDEX_POWERSAVE;
*raw_epp = epp; return 0;
} elseif (boot_cpu_has(X86_FEATURE_EPB)) { /* * Range: * 0x00-0x03 : Performance * 0x04-0x07 : Balance performance * 0x08-0x0B : Balance power * 0x0C-0x0F : Power * The EPB is a 4 bit value, but our ranges restrict the * value which can be set. Here only using top two bits * effectively.
*/
index = (epp >> 2) + 1;
}
return index;
}
staticint intel_pstate_set_epp(struct cpudata *cpu, u32 epp)
{ int ret;
/* * Use the cached HWP Request MSR value, because in the active mode the * register itself may be updated by intel_pstate_hwp_boost_up() or * intel_pstate_hwp_boost_down() at any time.
*/
u64 value = READ_ONCE(cpu->hwp_req_cached);
value &= ~GENMASK_ULL(31, 24);
value |= (u64)epp << 24; /* * The only other updater of hwp_req_cached in the active mode, * intel_pstate_hwp_set(), is called under the same lock as this * function, so it cannot run in parallel with the update below.
*/
WRITE_ONCE(cpu->hwp_req_cached, value);
ret = wrmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); if (!ret)
cpu->epp_cached = epp;
return ret;
}
staticint intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, int pref_index, bool use_raw,
u32 raw_epp)
{ int epp = -EINVAL; int ret;
if (!pref_index)
epp = cpu_data->epp_default;
if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { if (use_raw)
epp = raw_epp; elseif (epp == -EINVAL)
epp = epp_values[pref_index];
/* * To avoid confusion, refuse to set EPP to any values different * from 0 (performance) if the current policy is "performance", * because those values would be overridden.
*/ if (epp > 0 && cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) return -EBUSY;
ret = intel_pstate_set_epp(cpu_data, epp);
} else { if (epp == -EINVAL)
epp = (pref_index - 1) << 2;
ret = intel_pstate_set_epb(cpu_data->cpu, epp);
}
return ret;
}
static ssize_t show_energy_performance_available_preferences( struct cpufreq_policy *policy, char *buf)
{ int i = 0; int ret = 0;
while (energy_perf_strings[i] != NULL)
ret += sprintf(&buf[ret], "%s ", energy_perf_strings[i++]);
ret = sscanf(buf, "%20s", str_preference); if (ret != 1) return -EINVAL;
ret = match_string(energy_perf_strings, -1, str_preference); if (ret < 0) { if (!boot_cpu_has(X86_FEATURE_HWP_EPP)) return ret;
ret = kstrtouint(buf, 10, &epp); if (ret) return ret;
if (epp > 255) return -EINVAL;
raw = true;
}
/* * This function runs with the policy R/W semaphore held, which * guarantees that the driver pointer will not change while it is * running.
*/ if (!intel_pstate_driver) return -EAGAIN;
mutex_lock(&intel_pstate_limits_lock);
if (intel_pstate_driver == &intel_pstate) {
ret = intel_pstate_set_energy_pref_index(cpu, ret, raw, epp);
} else { /* * In the passive mode the governor needs to be stopped on the * target CPU before the EPP update and restarted after it, * which is super-heavy-weight, so make sure it is worth doing * upfront.
*/ if (!raw)
epp = ret ? epp_values[ret] : cpu->epp_default;
if (cpu->epp_cached != epp) { int err;
cpufreq_stop_governor(policy);
ret = intel_pstate_set_epp(cpu, epp);
err = cpufreq_start_governor(policy); if (!ret)
ret = err;
} else {
ret = 0;
}
}
staticstruct cpudata *hybrid_max_perf_cpu __read_mostly; /* * Protects hybrid_max_perf_cpu, the capacity_perf fields in struct cpudata, * and the x86 arch scale-invariance information from concurrent updates.
*/ static DEFINE_MUTEX(hybrid_capacity_lock);
staticint hybrid_active_power(struct device *dev, unsignedlong *power, unsignedlong *freq)
{ /* * Create "utilization bins" of 0-40%, 40%-60%, 60%-80%, and 80%-100% * of the maximum capacity such that two CPUs of the same type will be * regarded as equally attractive if the utilization of each of them * falls into the same bin, which should prevent tasks from being * migrated between them too often. * * For this purpose, return the "frequency" of 2 for the first * performance level and otherwise leave the value set by the caller.
*/ if (!*freq)
*freq = 2;
/* No power information. */
*power = EM_MAX_POWER;
/* * The smaller the perf-to-frequency scaling factor, the larger the IPC * ratio between the given CPU and the least capable CPU in the system. * Regard that IPC ratio as the primary cost component and assume that * the scaling factors for different CPU types will differ by at least * 5% and they will not be above INTEL_PSTATE_CORE_SCALING. * * Add the freq value to the cost, so that the cost of running on CPUs * of the same type in different "utilization bins" is different.
*/
*cost = div_u64(100ULL * INTEL_PSTATE_CORE_SCALING, pstate->scaling) + freq; /* * Increase the cost slightly for CPUs able to access L3 to avoid * touching it in case some other CPUs of the same type can do the work * without it.
*/ if (cacheinfo) { unsignedint i;
/* Check if L3 cache is there. */ for (i = 0; i < cacheinfo->num_leaves; i++) { if (cacheinfo->info_list[i].level == 3) {
*cost += 2; break;
}
}
}
/* * Registering EM perf domains without enabling asymmetric CPU capacity * support is not really useful and one domain should not be registered * more than once.
*/ if (!hybrid_max_perf_cpu || cpudata->pd_registered) returnfalse;
cpu_dev = get_cpu_device(cpu); if (!cpu_dev) returnfalse;
if (em_dev_register_pd_no_update(cpu_dev, HYBRID_EM_STATE_COUNT, &cb,
cpumask_of(cpu), false)) returnfalse;
/* * During initialization, CPU performance at full capacity needs * to be determined.
*/ if (!hybrid_max_perf_cpu)
hybrid_get_capacity_perf(cpu);
/* * If hybrid_max_perf_cpu is not NULL at this point, it is * being replaced, so don't take it into account when looking * for the new one.
*/ if (cpu == hybrid_max_perf_cpu) continue;
if (max_perf_cpu) {
hybrid_max_perf_cpu = max_perf_cpu;
hybrid_set_capacity_of_cpus();
} else {
pr_info("Found no CPUs with nonzero maximum performance\n"); /* Revert to the flat CPU capacity structure. */
for_each_online_cpu(cpunum)
hybrid_clear_cpu_capacity(cpunum);
}
}
__hybrid_refresh_cpu_capacity_scaling(); /* * Perf domains are not registered before setting hybrid_max_perf_cpu, * so register them all after setting up CPU capacity scaling.
*/
hybrid_register_all_perf_domains();
}
staticvoid hybrid_init_cpu_capacity_scaling(bool refresh)
{ /* Bail out if enabling capacity-aware scheduling is prohibited. */ if (no_cas) return;
/* * If hybrid_max_perf_cpu is set at this point, the hybrid CPU capacity * scaling has been enabled already and the driver is just changing the * operation mode.
*/ if (refresh) {
hybrid_refresh_cpu_capacity_scaling(); return;
}
/* * On hybrid systems, use asym capacity instead of ITMT, but because * the capacity of SMT threads is not deterministic even approximately, * do not do that when SMT is in use.
*/ if (hwp_is_hybrid && !sched_smt_active() && arch_enable_hybrid_capacity_scale()) {
hybrid_refresh_cpu_capacity_scaling(); /* * Disabling ITMT causes sched domains to be rebuilt to disable asym * packing and enable asym capacity and EAS.
*/
sched_clear_itmt_support();
}
}
/* * The maximum performance of the CPU may have changed, but assume * that the performance of the other CPUs has not changed.
*/
max_cap_perf = hybrid_max_perf_cpu->capacity_perf;
intel_pstate_get_hwp_cap(cpu);
hybrid_get_capacity_perf(cpu); /* Should hybrid_max_perf_cpu be replaced by this CPU? */ if (cpu->capacity_perf > max_cap_perf) {
hybrid_max_perf_cpu = cpu;
hybrid_set_capacity_of_cpus(); goto unlock;
}
/* If this CPU is hybrid_max_perf_cpu, should it be replaced? */ if (cpu == hybrid_max_perf_cpu && cpu->capacity_perf < max_cap_perf) {
hybrid_update_cpu_capacity_scaling(); goto unlock;
}
hybrid_set_cpu_capacity(cpu); /* * If the CPU was offline to start with and it is going online for the * first time, a perf domain needs to be registered for it if hybrid * capacity scaling has been enabled already. In that case, sched * domains need to be rebuilt to take the new perf domain into account.
*/ if (hybrid_register_perf_domain(cpu->cpu))
em_rebuild_sched_domains();
max = cpu_data->max_perf_ratio;
min = cpu_data->min_perf_ratio;
if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
min = max;
rdmsrq_on_cpu(cpu, MSR_HWP_REQUEST, &value);
value &= ~HWP_MIN_PERF(~0L);
value |= HWP_MIN_PERF(min);
value &= ~HWP_MAX_PERF(~0L);
value |= HWP_MAX_PERF(max);
if (cpu_data->epp_policy == cpu_data->policy) goto skip_epp;
cpu_data->epp_policy = cpu_data->policy;
if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) {
epp = intel_pstate_get_epp(cpu_data, value);
cpu_data->epp_powersave = epp; /* If EPP read was failed, then don't try to write */ if (epp < 0) goto skip_epp;
epp = 0;
} else { /* skip setting EPP, when saved value is invalid */ if (cpu_data->epp_powersave < 0) goto skip_epp;
/* * No need to restore EPP when it is not zero. This * means: * - Policy is not changed * - user has manually changed * - Error reading EPB
*/
epp = intel_pstate_get_epp(cpu_data, value); if (epp) goto skip_epp;
epp = cpu_data->epp_powersave;
} if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
value &= ~GENMASK_ULL(31, 24);
value |= (u64)epp << 24;
} else {
intel_pstate_set_epb(cpu, epp);
}
skip_epp:
WRITE_ONCE(cpu_data->hwp_req_cached, value);
wrmsrq_on_cpu(cpu, MSR_HWP_REQUEST, value);
}
staticvoid intel_pstate_hwp_offline(struct cpudata *cpu)
{
u64 value = READ_ONCE(cpu->hwp_req_cached); int min_perf;
intel_pstate_disable_hwp_interrupt(cpu);
if (boot_cpu_has(X86_FEATURE_HWP_EPP)) { /* * In case the EPP has been set to "performance" by the * active mode "performance" scaling algorithm, replace that * temporary value with the cached EPP one.
*/
value &= ~GENMASK_ULL(31, 24);
value |= HWP_ENERGY_PERF_PREFERENCE(cpu->epp_cached); /* * However, make sure that EPP will be set to "performance" when * the CPU is brought back online again and the "performance" * scaling algorithm is still in effect.
*/
cpu->epp_policy = CPUFREQ_POLICY_UNKNOWN;
}
/* * Clear the desired perf field in the cached HWP request value to * prevent nonzero desired values from being leaked into the active * mode.
*/
value &= ~HWP_DESIRED_PERF(~0L);
WRITE_ONCE(cpu->hwp_req_cached, value);
value &= ~GENMASK_ULL(31, 0);
min_perf = HWP_LOWEST_PERF(READ_ONCE(cpu->hwp_cap_cached));
/* Set hwp_max = hwp_min */
value |= HWP_MAX_PERF(min_perf);
value |= HWP_MIN_PERF(min_perf);
/* Set EPP to min */ if (boot_cpu_has(X86_FEATURE_HWP_EPP))
value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE);
wrmsrq_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
mutex_lock(&hybrid_capacity_lock);
if (!hybrid_max_perf_cpu) {
mutex_unlock(&hybrid_capacity_lock);
return;
}
if (hybrid_max_perf_cpu == cpu)
hybrid_update_cpu_capacity_scaling();
mutex_unlock(&hybrid_capacity_lock);
/* Reset the capacity of the CPU going offline to the initial value. */
hybrid_clear_cpu_capacity(cpu->cpu);
}
/* Only restore if the system default is changed */ if (power_ctl_ee_state == POWER_CTL_EE_ENABLE)
set_power_ctl_ee_state(true); elseif (power_ctl_ee_state == POWER_CTL_EE_DISABLE)
set_power_ctl_ee_state(false);
if (cpu->suspended && hwp_active) {
mutex_lock(&intel_pstate_limits_lock);
/* Re-enable HWP, because "online" has not done that. */
intel_pstate_hwp_reenable(cpu);
mutex_unlock(&intel_pstate_limits_lock);
}
cpu->suspended = false;
return 0;
}
staticvoid intel_pstate_update_policies(void)
{ int cpu;
/* * If per cpu limits are enforced there are no global limits, so * return without creating max/min_perf_pct attributes
*/ if (per_cpu_limits) return;
if (intel_pstate_update_max_freq(cpudata)) { /* * The driver will not be unregistered while this function is * running, so update the capacity without acquiring the driver * lock.
*/
hybrid_update_capacity(cpudata);
}
if (cpu_feature_enabled(X86_FEATURE_HWP_HIGHEST_PERF_CHANGE))
interrupt_mask |= HWP_HIGHEST_PERF_CHANGE_REQ;
/* wrmsrq_on_cpu has to be outside spinlock as this can result in IPC */
wrmsrq_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, interrupt_mask);
wrmsrq_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
}
}
/* * If the EPP is set by firmware, which means that firmware enabled HWP * - Is equal or less than 0x80 (default balance_perf EPP) * - But less performance oriented than performance EPP * then use this as new balance_perf EPP.
*/ if (hwp_forced && cpudata->epp_default <= HWP_EPP_BALANCE_PERFORMANCE &&
cpudata->epp_default > HWP_EPP_PERFORMANCE) {
epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = cpudata->epp_default; return;
}
/* * If this CPU gen doesn't call for change in balance_perf * EPP return.
*/ if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE) return;
/* * Use hard coded value per gen to update the balance_perf * and default EPP.
*/
cpudata->epp_default = epp_values[EPP_INDEX_BALANCE_PERFORMANCE];
intel_pstate_set_epp(cpudata, cpudata->epp_default);
}
staticvoid intel_pstate_hwp_enable(struct cpudata *cpudata)
{ /* First disable HWP notification interrupt till we activate again */ if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
wrmsrq_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
/* * Return the hybrid scaling factor for P-cores and use the * default core scaling for E-cores.
*/ if (cpu_type == INTEL_CPU_TYPE_CORE) return hybrid_scaling_factor;
if (cpu_type == INTEL_CPU_TYPE_ATOM) return core_get_scaling();
}
/* Use core scaling on non-hybrid systems. */ if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) return core_get_scaling();
/* * The system is hybrid, but the hybrid scaling factor is not known or * the CPU type is not one of the above, so use CPPC to compute the * scaling factor for this CPU.
*/ return intel_pstate_cppc_get_scaling(cpu);
}
staticvoid intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
{
trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
cpu->pstate.current_pstate = pstate; /* * Generally, there is no guarantee that this code will always run on * the CPU being updated, so force the register update to run on the * right CPU.
*/
wrmsrq_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
pstate_funcs.get_val(cpu, pstate));
}
if (hwp_active && !hwp_mode_bdw) {
__intel_pstate_get_hwp_cap(cpu);
if (pstate_funcs.get_cpu_scaling) {
cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu); if (cpu->pstate.scaling != perf_ctl_scaling) {
intel_pstate_hybrid_hwp_adjust(cpu);
hwp_is_hybrid = true;
}
} else {
cpu->pstate.scaling = perf_ctl_scaling;
} /* * If the CPU is going online for the first time and it was * offline initially, asym capacity scaling needs to be updated.
*/
hybrid_update_capacity(cpu);
} else {
cpu->pstate.scaling = perf_ctl_scaling;
cpu->pstate.max_pstate = pstate_funcs.get_max(cpu->cpu);
cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(cpu->cpu);
}
if (pstate_funcs.get_aperf_mperf_shift)
cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift();
if (pstate_funcs.get_vid)
pstate_funcs.get_vid(cpu);
intel_pstate_set_min_pstate(cpu);
}
/* * Long hold time will keep high perf limits for long time, * which negatively impacts perf/watt for some workloads, * like specpower. 3ms is based on experiements on some * workoads.
*/ staticint hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
/* * Cases to consider (User changes via sysfs or boot time): * If, P0 (Turbo max) = P1 (Guaranteed max) = min: * No boost, return. * If, P0 (Turbo max) > P1 (Guaranteed max) = min: * Should result in one level boost only for P0. * If, P0 (Turbo max) = P1 (Guaranteed max) > min: * Should result in two level boost: * (min + p1)/2 and P1. * If, P0 (Turbo max) > P1 (Guaranteed max) > min: * Should result in three level boost: * (min + p1)/2, P1 and P0.
*/
/* If max and min are equal or already at max, nothing to boost */ if (max_limit == min_limit || cpu->hwp_boost_min >= max_limit) return;
if (!cpu->hwp_boost_min)
cpu->hwp_boost_min = min_limit;
/* level at half way mark between min and guranteed */
boost_level1 = (HWP_GUARANTEED_PERF(hwp_cap) + min_limit) >> 1;
staticinlinevoid intel_pstate_hwp_boost_down(struct cpudata *cpu)
{ if (cpu->hwp_boost_min) { bool expired;
/* Check if we are idle for hold time to boost down */
expired = time_after64(cpu->sample.time, cpu->last_update +
hwp_boost_hold_time_ns); if (expired) {
wrmsrq(MSR_HWP_REQUEST, cpu->hwp_req_cached);
cpu->hwp_boost_min = 0;
}
}
cpu->last_update = cpu->sample.time;
}
if (cpu->sched_flags & SCHED_CPUFREQ_IOWAIT) { bool do_io = false;
cpu->sched_flags = 0; /* * Set iowait_boost flag and update time. Since IO WAIT flag * is set all the time, we can't just conclude that there is * some IO bound activity is scheduled on this CPU with just * one occurrence. If we receive at least two in two * consecutive ticks, then we treat as boost candidate.
*/ if (time_before64(time, cpu->last_io_update + 2 * TICK_NSEC))
do_io = true;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.