// SPDX-License-Identifier: GPL-2.0-only /* * Thermal throttle event support code (such as syslog messaging and rate * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). * * This allows consistent reporting of CPU thermal throttle events. * * Maintains a counter in /sys that keeps track of the number of thermal * events, such that the user knows how bad the thermal problem might be * (since the logging to syslog is rate limited). * * Author: Dmitriy Zavin (dmitriyz@google.com) * * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. * Inspired by Ross Biro's and Al Borchers' counter code.
*/ #include <linux/interrupt.h> #include <linux/notifier.h> #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/percpu.h> #include <linux/export.h> #include <linux/types.h> #include <linux/init.h> #include <linux/smp.h> #include <linux/cpu.h>
/** * struct _thermal_state - Represent the current thermal event state * @next_check: Stores the next timestamp, when it is allowed * to log the next warning message. * @last_interrupt_time: Stores the timestamp for the last threshold * high event. * @therm_work: Delayed workqueue structure * @count: Stores the current running count for thermal * or power threshold interrupts. * @last_count: Stores the previous running count for thermal * or power threshold interrupts. * @max_time_ms: This shows the maximum amount of time CPU was * in throttled state for a single thermal * threshold high to low state. * @total_time_ms: This is a cumulative time during which CPU was * in the throttled state. * @rate_control_active: Set when a throttling message is logged. * This is used for the purpose of rate-control. * @new_event: Stores the last high/low status of the * THERM_STATUS_PROCHOT or * THERM_STATUS_POWER_LIMIT. * @level: Stores whether this _thermal_state instance is * for a CORE level or for PACKAGE level. * @sample_index: Index for storing the next sample in the buffer * temp_samples[]. * @sample_count: Total number of samples collected in the buffer * temp_samples[]. * @average: The last moving average of temperature samples * @baseline_temp: Temperature at which thermal threshold high * interrupt was generated. * @temp_samples: Storage for temperature samples to calculate * moving average. * * This structure is used to represent data related to thermal state for a CPU. * There is a separate storage for core and package level for each CPU.
*/ struct _thermal_state {
u64 next_check;
u64 last_interrupt_time; struct delayed_work therm_work; unsignedlong count; unsignedlong last_count; unsignedlong max_time_ms; unsignedlong total_time_ms; bool rate_control_active; bool new_event;
u8 level;
u8 sample_index;
u8 sample_count;
u8 average;
u8 baseline_temp;
u8 temp_samples[3];
};
/* Callback to handle core threshold interrupts */ int (*platform_thermal_notify)(__u64 msr_val);
EXPORT_SYMBOL(platform_thermal_notify);
/* Callback to handle core package threshold_interrupts */ int (*platform_thermal_package_notify)(__u64 msr_val);
EXPORT_SYMBOL_GPL(platform_thermal_package_notify);
/* Callback support of rate control, return true, if
* callback has rate control */ bool (*platform_thermal_package_rate_control)(void);
EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control);
/* * Bit 1, 3, 5: CPUID.01H:EDX[22] = 1. This driver will not * enable interrupts, when 0 as it checks for X86_FEATURE_ACPI.
*/
therm_intr_core_clear_mask = (BIT(1) | BIT(3) | BIT(5));
/* * Bit 7 and 9: Thermal Threshold #1 and #2 log * If CPUID.01H:ECX[8] = 1
*/ if (boot_cpu_has(X86_FEATURE_TM2))
therm_intr_core_clear_mask |= (BIT(7) | BIT(9));
/* Bit 11: Power Limitation log (R/WC0) If CPUID.06H:EAX[4] = 1 */ if (boot_cpu_has(X86_FEATURE_PLN))
therm_intr_core_clear_mask |= BIT(11);
/* * Bit 13: Current Limit log (R/WC0) If CPUID.06H:EAX[7] = 1 * Bit 15: Cross Domain Limit log (R/WC0) If CPUID.06H:EAX[7] = 1
*/ if (boot_cpu_has(X86_FEATURE_HWP))
therm_intr_core_clear_mask |= (BIT(13) | BIT(15));
}
staticvoid thermal_intr_init_pkg_clear_mask(void)
{ if (therm_intr_pkg_clear_mask) return;
/* All bits except BIT 26 depend on CPUID.06H: EAX[6] = 1 */ if (boot_cpu_has(X86_FEATURE_PTS))
therm_intr_pkg_clear_mask = (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11));
/* * Intel SDM Volume 2A: Thermal and Power Management Leaf * Bit 26: CPUID.06H: EAX[19] = 1
*/ if (boot_cpu_has(X86_FEATURE_HFI))
therm_intr_pkg_clear_mask |= BIT(26);
}
/* * Clear the bits in package thermal status register for bit = 1 * in bitmask
*/ void thermal_clear_package_intr_status(int level, u64 bit_mask)
{
u64 msr_val; int msr;
get_therm_status(state->level, &hot, &temp); /* temperature value is offset from the max so lesser means hotter */ if (!hot && temp > state->baseline_temp) { if (state->rate_control_active)
pr_info("CPU%d: %s temperature/speed normal (total events = %lu)\n",
this_cpu,
state->level == CORE_LEVEL ? "Core" : "Package",
state->count);
state->rate_control_active = false; return;
}
if (time_before64(now, state->next_check) &&
state->rate_control_active) goto re_arm;
state->next_check = now + CHECK_INTERVAL;
if (state->count != state->last_count) { /* There was one new thermal interrupt */
state->last_count = state->count;
state->average = 0;
state->sample_count = 0;
state->sample_index = 0;
}
/*** * therm_throt_process - Process thermal throttling event from interrupt * @curr: Whether the condition is current or not (boolean), since the * thermal interrupt normally gets called both when the thermal * event begins and once the event has ended. * * This function is called by the thermal interrupt after the * IRQ has been acknowledged. * * It will take care of rate limiting and printing messages to the syslog.
*/ staticvoid therm_throt_process(bool new_event, int event, int level)
{ struct _thermal_state *state; unsignedint this_cpu = smp_processor_id(); bool old_event;
u64 now; struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
now = get_jiffies_64(); if (level == CORE_LEVEL) { if (event == THERMAL_THROTTLING_EVENT)
state = &pstate->core_throttle; elseif (event == POWER_LIMIT_EVENT)
state = &pstate->core_power_limit; else return;
} elseif (level == PACKAGE_LEVEL) { if (event == THERMAL_THROTTLING_EVENT)
state = &pstate->package_throttle; elseif (event == POWER_LIMIT_EVENT)
state = &pstate->package_power_limit; else return;
} else return;
if (new_event && !state->last_interrupt_time) { bool hot;
u8 temp;
get_therm_status(state->level, &hot, &temp); /* * Ignore short temperature spike as the system is not close * to PROCHOT. 10C offset is large enough to ignore. It is * already dropped from the high threshold temperature.
*/ if (temp > 10) return;
/* Get notified when a cpu comes on/off. Be hotplug friendly. */ staticint thermal_throttle_online(unsignedint cpu)
{ struct thermal_state *state = &per_cpu(thermal_state, cpu); struct device *dev = get_cpu_device(cpu);
u32 l;
/* * The first CPU coming online will enable the HFI. Usually this causes * hardware to issue an HFI thermal interrupt. Such interrupt will reach * the CPU once we enable the thermal vector in the local APIC.
*/
intel_hfi_online(cpu);
/* Unmask the thermal vector after the above workqueues are initialized. */
l = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
if (platform_thermal_package_rate_control &&
platform_thermal_package_rate_control()) { /* Rate control is implemented in callback */
platform_thermal_package_notify(msr_val); return;
}
void __init therm_lvt_init(void)
{ /* * This function is only called on boot CPU. Save the init thermal * LVT value on BSP and use that value to restore APs' thermal LVT * entry BIOS programmed later
*/ if (intel_thermal_supported(&boot_cpu_data))
lvtthmr_init = apic_read(APIC_LVTTHMR);
}
void intel_init_thermal(struct cpuinfo_x86 *c)
{ unsignedint cpu = smp_processor_id(); int tm2 = 0;
u32 l, h;
if (!intel_thermal_supported(c)) return;
/* * First check if its enabled already, in which case there might * be some SMM goo which handles it, so we can't even put a handler * since it might be delivered via SMI already:
*/
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
h = lvtthmr_init; /* * The initial value of thermal LVT entries on all APs always reads * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI * sequence to them and LVT registers are reset to 0s except for * the mask bits which are set to 1s when APs receive INIT IPI. * If BIOS takes over the thermal interrupt and sets its interrupt * delivery mode to SMI (not fixed), it restores the value that the * BIOS has programmed on AP based on BSP's info we saved since BIOS * is always setting the same value for all threads/cores.
*/ if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
apic_write(APIC_LVTTHMR, lvtthmr_init);
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { if (system_state == SYSTEM_BOOTING)
pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu); return;
}
/* early Pentium M models use different method for enabling TM2 */ if (cpu_has(c, X86_FEATURE_TM2)) { if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
rdmsr(MSR_THERM2_CTL, l, h); if (l & MSR_THERM2_CTL_TM_SELECT)
tm2 = 1;
} elseif (l & MSR_IA32_MISC_ENABLE_TM2)
tm2 = 1;
}
/* We'll mask the thermal vector in the lapic till we're ready: */
h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
apic_write(APIC_LVTTHMR, h);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.