int smpcfd_dying_cpu(unsignedint cpu)
{ /* * The IPIs for the smp-call-function callbacks queued by other CPUs * might arrive late, either due to hardware latencies or because this * CPU disabled interrupts (inside stop-machine) before the IPIs were * sent. So flush out any pending callbacks explicitly (without waiting * for the IPIs to arrive), to ensure that the outgoing CPU doesn't go * offline with work still pending. * * This runs with interrupts disabled inside the stopper task invoked by * stop_machine(), ensuring mutually exclusive CPU offlining and IPI flush.
*/
__flush_smp_call_function_queue(false);
irq_work_run(); return 0;
}
/* * Parse the csdlock_debug= kernel boot parameter. * * If you need to restore the old "ext" value that once provided * additional debugging information, reapply the following commits: * * de7b09ef658d ("locking/csd_lock: Prepare more CSD lock debugging") * a5aabace5fb8 ("locking/csd_lock: Add more data to CSD lock debugging")
*/ staticint __init csdlock_debug(char *str)
{ int ret; unsignedint val = 0;
ret = get_option(&str, &val); if (ret) { if (val)
static_branch_enable(&csdlock_debug_enabled); else
static_branch_disable(&csdlock_debug_enabled);
}
static ulong csd_lock_timeout = 5000; /* CSD lock timeout in milliseconds. */
module_param(csd_lock_timeout, ulong, 0644); staticint panic_on_ipistall; /* CSD panic timeout in milliseconds, 300000 for five minutes. */
module_param(panic_on_ipistall, int, 0644);
static atomic_t csd_bug_count = ATOMIC_INIT(0);
/* Record current CSD work for current CPU, NULL to erase. */ staticvoid __csd_lock_record(call_single_data_t *csd)
{ if (!csd) {
smp_mb(); /* NULL cur_csd after unlock. */
__this_cpu_write(cur_csd, NULL); return;
}
__this_cpu_write(cur_csd_func, csd->func);
__this_cpu_write(cur_csd_info, csd->info);
smp_wmb(); /* func and info before csd. */
__this_cpu_write(cur_csd, csd);
smp_mb(); /* Update cur_csd before function call. */ /* Or before unlock, as the case may be. */
}
static __always_inline void csd_lock_record(call_single_data_t *csd)
{ if (static_branch_unlikely(&csdlock_debug_enabled))
__csd_lock_record(csd);
}
csd_type = CSD_TYPE(csd); if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC) return csd->node.dst; /* Other CSD_TYPE_ values might not have ->dst. */ return -1;
}
static atomic_t n_csd_lock_stuck;
/** * csd_lock_is_stuck - Has a CSD-lock acquisition been stuck too long? * * Returns @true if a CSD-lock acquisition is stuck and has been stuck * long enough for a "non-responsive CSD lock" message to be printed.
*/ bool csd_lock_is_stuck(void)
{ return !!atomic_read(&n_csd_lock_stuck);
}
/* * Complain if too much time spent waiting. Note that only * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU, * so waiting on other types gets much less information.
*/ staticbool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id, unsignedlong *nmessages)
{ int cpu = -1; int cpux; bool firsttime;
u64 ts2, ts_delta;
call_single_data_t *cpu_cur_csd; unsignedint flags = READ_ONCE(csd->node.u_flags); unsignedlonglong csd_lock_timeout_ns = csd_lock_timeout * NSEC_PER_MSEC;
if (!(flags & CSD_FLAG_LOCK)) { if (!unlikely(*bug_id)) returntrue;
cpu = csd_lock_wait_getcpu(csd);
pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n",
*bug_id, raw_smp_processor_id(), cpu);
atomic_dec(&n_csd_lock_stuck); returntrue;
}
ts2 = ktime_get_mono_fast_ns(); /* How long since we last checked for a stuck CSD lock.*/
ts_delta = ts2 - *ts1; if (likely(ts_delta <= csd_lock_timeout_ns * (*nmessages + 1) *
(!*nmessages ? 1 : (ilog2(num_online_cpus()) / 2 + 1)) ||
csd_lock_timeout_ns == 0)) returnfalse;
if (ts0 > ts2) { /* Our own sched_clock went backward; don't blame another CPU. */
ts_delta = ts0 - ts2;
pr_alert("sched_clock on CPU %d went backward by %llu ns\n", raw_smp_processor_id(), ts_delta);
*ts1 = ts2; returnfalse;
}
firsttime = !*bug_id; if (firsttime)
*bug_id = atomic_inc_return(&csd_bug_count);
cpu = csd_lock_wait_getcpu(csd); if (WARN_ONCE(cpu < 0 || cpu >= nr_cpu_ids, "%s: cpu = %d\n", __func__, cpu))
cpux = 0; else
cpux = cpu;
cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */ /* How long since this CSD lock was stuck. */
ts_delta = ts2 - ts0;
pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %lld ns for CPU#%02d %pS(%ps).\n",
firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), (s64)ts_delta,
cpu, csd->func, csd->info);
(*nmessages)++; if (firsttime)
atomic_inc(&n_csd_lock_stuck); /* * If the CSD lock is still stuck after 5 minutes, it is unlikely * to become unstuck. Use a signed comparison to avoid triggering * on underflows when the TSC is out of sync between sockets.
*/
BUG_ON(panic_on_ipistall > 0 && (s64)ts_delta > ((s64)panic_on_ipistall * NSEC_PER_MSEC)); if (cpu_cur_csd && csd != cpu_cur_csd) {
pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n",
*bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)),
READ_ONCE(per_cpu(cur_csd_info, cpux)));
} else {
pr_alert("\tcsd: CSD lock (#%d) %s.\n",
*bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request");
} if (cpu >= 0) { if (atomic_cmpxchg_acquire(&per_cpu(trigger_backtrace, cpu), 1, 0))
dump_cpu_task(cpu); if (!cpu_cur_csd) {
pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu);
arch_send_call_function_single_ipi(cpu);
}
} if (firsttime)
dump_stack();
*ts1 = ts2;
returnfalse;
}
/* * csd_lock/csd_unlock used to serialize access to per-cpu csd resources * * For non-synchronous ipi calls the csd can still be in use by the * previous function call. For multi-cpu calls its even more interesting * as we'll have to ensure no other cpu is observing our csd.
*/ staticvoid __csd_lock_wait(call_single_data_t *csd)
{ unsignedlong nmessages = 0; int bug_id = 0;
u64 ts0, ts1;
ts1 = ts0 = ktime_get_mono_fast_ns(); for (;;) { if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id, &nmessages)) break;
cpu_relax();
}
smp_acquire__after_ctrl_dep();
}
/* * prevent CPU from reordering the above assignment * to ->flags with any subsequent assignments to other * fields of the specified call_single_data_t structure:
*/
smp_wmb();
}
void __smp_call_single_queue(int cpu, struct llist_node *node)
{ /* * We have to check the type of the CSD before queueing it, because * once queued it can have its flags cleared by * flush_smp_call_function_queue() * even if we haven't sent the smp_call IPI yet (e.g. the stopper * executes migration_cpu_stop() on the remote CPU).
*/ if (trace_csd_queue_cpu_enabled()) {
call_single_data_t *csd;
smp_call_func_t func;
/* * The list addition should be visible to the target CPU when it pops * the head of the list to pull the entry off it in the IPI handler * because of normal cache coherency rules implied by the underlying * llist ops. * * If IPIs can go out of order to the cache coherency protocol * in an architecture, sufficient synchronisation should be added * to arch code to make it appear to obey cache coherency WRT * locking and barrier primitives. Generic code isn't really * equipped to do the right thing...
*/ if (llist_add(node, &per_cpu(call_single_queue, cpu)))
send_call_function_single_ipi(cpu);
}
/* * Insert a previously allocated call_single_data_t element * for execution on the given CPU. data must already have * ->func, ->info, and ->flags set.
*/ staticint generic_exec_single(int cpu, call_single_data_t *csd)
{ /* * Preemption already disabled here so stopper cannot run on this CPU, * ensuring mutually exclusive CPU offlining and last IPI flush.
*/ if (cpu == smp_processor_id()) {
smp_call_func_t func = csd->func; void *info = csd->info; unsignedlong flags;
/* * We can unlock early even for the synchronous on-stack case, * since we're doing this from the same CPU..
*/
csd_lock_record(csd);
csd_unlock(csd);
local_irq_save(flags);
csd_do_func(func, info, NULL);
csd_lock_record(NULL);
local_irq_restore(flags); return 0;
}
/** * generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks * * Invoked by arch to handle an IPI for call function single. * Must be called with interrupts disabled.
*/ void generic_smp_call_function_single_interrupt(void)
{
__flush_smp_call_function_queue(true);
}
/** * __flush_smp_call_function_queue - Flush pending smp-call-function callbacks * * @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an * offline CPU. Skip this check if set to 'false'. * * Flush any pending smp-call-function callbacks queued on this CPU. This is * invoked by the generic IPI handler, as well as by a CPU about to go offline, * to ensure that all pending IPI callbacks are run before it goes completely * offline. * * Loop through the call_single_queue and run all the queued callbacks. * Must be called with interrupts disabled.
*/ staticvoid __flush_smp_call_function_queue(bool warn_cpu_offline)
{
call_single_data_t *csd, *csd_next; struct llist_node *entry, *prev; struct llist_head *head; staticbool warned;
atomic_t *tbt;
lockdep_assert_irqs_disabled();
/* Allow waiters to send backtrace NMI from here onwards */
tbt = this_cpu_ptr(&trigger_backtrace);
atomic_set_release(tbt, 1);
head = this_cpu_ptr(&call_single_queue);
entry = llist_del_all(head);
entry = llist_reverse_order(entry);
/* There shouldn't be any pending callbacks on an offline CPU. */ if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
!warned && entry != NULL)) {
warned = true;
WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
/* * We don't have to use the _safe() variant here * because we are not invoking the IPI handlers yet.
*/
llist_for_each_entry(csd, entry, node.llist) { switch (CSD_TYPE(csd)) { case CSD_TYPE_ASYNC: case CSD_TYPE_SYNC: case CSD_TYPE_IRQ_WORK:
pr_warn("IPI callback %pS sent to offline CPU\n",
csd->func); break;
case CSD_TYPE_TTWU:
pr_warn("IPI task-wakeup sent to offline CPU\n"); break;
default:
pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
CSD_TYPE(csd)); break;
}
}
}
/* * First; run all SYNC callbacks, people are waiting for us.
*/
prev = NULL;
llist_for_each_entry_safe(csd, csd_next, entry, node.llist) { /* Do we wait until *after* callback? */ if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
smp_call_func_t func = csd->func; void *info = csd->info;
/* * Third; only CSD_TYPE_TTWU is left, issue those.
*/ if (entry) {
csd = llist_entry(entry, typeof(*csd), node.llist);
csd_do_func(sched_ttwu_pending, entry, csd);
}
}
/** * flush_smp_call_function_queue - Flush pending smp-call-function callbacks * from task context (idle, migration thread) * * When TIF_POLLING_NRFLAG is supported and a CPU is in idle and has it * set, then remote CPUs can avoid sending IPIs and wake the idle CPU by * setting TIF_NEED_RESCHED. The idle task on the woken up CPU has to * handle queued SMP function calls before scheduling. * * The migration thread has to ensure that an eventually pending wakeup has * been handled before it migrates a task.
*/ void flush_smp_call_function_queue(void)
{ unsignedint was_pending; unsignedlong flags;
if (llist_empty(this_cpu_ptr(&call_single_queue))) return;
local_irq_save(flags); /* Get the already pending soft interrupts for RT enabled kernels */
was_pending = local_softirq_pending();
__flush_smp_call_function_queue(true); if (local_softirq_pending())
do_softirq_post_smp_call_flush(was_pending);
local_irq_restore(flags);
}
/* * smp_call_function_single - Run a function on a specific CPU * @func: The function to run. This must be fast and non-blocking. * @info: An arbitrary pointer to pass to the function. * @wait: If true, wait until function has completed on other CPUs. * * Returns 0 on success, else a negative status code.
*/ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, int wait)
{
call_single_data_t *csd;
call_single_data_t csd_stack = {
.node = { .u_flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC, },
}; int this_cpu; int err;
/* * Prevent preemption and reschedule on another CPU, as well as CPU * removal. This prevents stopper from running on this CPU, thus * providing mutual exclusion of the below cpu_online() check and * IPI sending ensuring IPI are not missed by CPU going offline.
*/
this_cpu = get_cpu();
/* * Can deadlock when called with interrupts disabled. * We allow cpu's that are not yet online though, as no one else can * send smp call function interrupt to this cpu and as such deadlocks * can't happen.
*/
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress);
/* * When @wait we can deadlock when we interrupt between llist_add() and * arch_send_call_function_ipi*(); when !@wait we can deadlock due to * csd_lock() on because the interrupt context uses the same csd * storage.
*/
WARN_ON_ONCE(!in_task());
/** * smp_call_function_single_async() - Run an asynchronous function on a * specific CPU. * @cpu: The CPU to run on. * @csd: Pre-allocated and setup data structure * * Like smp_call_function_single(), but the call is asynchonous and * can thus be done from contexts with disabled interrupts. * * The caller passes his own pre-allocated data structure * (ie: embedded in an object) and is responsible for synchronizing it * such that the IPIs performed on the @csd are strictly serialized. * * If the function is called with one csd which has not yet been * processed by previous call to smp_call_function_single_async(), the * function will return immediately with -EBUSY showing that the csd * object is still in progress. * * NOTE: Be careful, there is unfortunately no current debugging facility to * validate the correctness of this serialization. * * Return: %0 on success or negative errno value on error
*/ int smp_call_function_single_async(int cpu, call_single_data_t *csd)
{ int err = 0;
/* * smp_call_function_any - Run a function on any of the given cpus * @mask: The mask of cpus it can run on. * @func: The function to run. This must be fast and non-blocking. * @info: An arbitrary pointer to pass to the function. * @wait: If true, wait until function has completed. * * Returns 0 on success, else a negative status code (if no cpus were online). * * Selection preference: * 1) current cpu if in @mask * 2) nearest cpu in @mask, based on NUMA topology
*/ int smp_call_function_any(conststruct cpumask *mask,
smp_call_func_t func, void *info, int wait)
{ unsignedint cpu; int ret;
/* Try for same CPU (cheapest) */
cpu = get_cpu(); if (!cpumask_test_cpu(cpu, mask))
cpu = sched_numa_find_nth_cpu(mask, 0, cpu_to_node(cpu));
/* * Flags to be used as scf_flags argument of smp_call_function_many_cond(). * * %SCF_WAIT: Wait until function execution is completed * %SCF_RUN_LOCAL: Run also locally if local cpu is set in cpumask
*/ #define SCF_WAIT (1U << 0) #define SCF_RUN_LOCAL (1U << 1)
/* * Can deadlock when called with interrupts disabled. * We allow cpu's that are not yet online though, as no one else can * send smp call function interrupt to this cpu and as such deadlocks * can't happen.
*/ if (cpu_online(this_cpu) && !oops_in_progress &&
!early_boot_irqs_disabled)
lockdep_assert_irqs_enabled();
/* * When @wait we can deadlock when we interrupt between llist_add() and * arch_send_call_function_ipi*(); when !@wait we can deadlock due to * csd_lock() on because the interrupt context uses the same csd * storage.
*/
WARN_ON_ONCE(!in_task());
/* Check if we need remote execution, i.e., any CPU excluding this one. */ if (cpumask_any_and_but(mask, cpu_online_mask, this_cpu) < nr_cpu_ids) {
cfd = this_cpu_ptr(&cfd_data);
cpumask_and(cfd->cpumask, mask, cpu_online_mask);
__cpumask_clear_cpu(this_cpu, cfd->cpumask);
/* * Kick the remote CPU if this is the first work * item enqueued.
*/ if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) {
__cpumask_set_cpu(cpu, cfd->cpumask_ipi);
nr_cpus++;
last_cpu = cpu;
}
}
/* * Choose the most efficient way to send an IPI. Note that the * number of CPUs might be zero due to concurrent changes to the * provided mask.
*/ if (nr_cpus == 1)
send_call_function_single_ipi(last_cpu); elseif (likely(nr_cpus > 1))
send_call_function_ipi_mask(cfd->cpumask_ipi);
}
/* Check if we need local execution. */ if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask) &&
(!cond_func || cond_func(this_cpu, info))) { unsignedlong flags;
/** * smp_call_function_many(): Run a function on a set of CPUs. * @mask: The set of cpus to run on (only runs on online subset). * @func: The function to run. This must be fast and non-blocking. * @info: An arbitrary pointer to pass to the function. * @wait: If true, wait (atomically) until function has completed * on other CPUs. * * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. Preemption * must be disabled when calling this function. * * @func is not called on the local CPU even if @mask contains it. Consider * using on_each_cpu_cond_mask() instead if this is not desirable.
*/ void smp_call_function_many(conststruct cpumask *mask,
smp_call_func_t func, void *info, bool wait)
{
smp_call_function_many_cond(mask, func, info, wait * SCF_WAIT, NULL);
}
EXPORT_SYMBOL(smp_call_function_many);
/** * smp_call_function(): Run a function on all other CPUs. * @func: The function to run. This must be fast and non-blocking. * @info: An arbitrary pointer to pass to the function. * @wait: If true, wait (atomically) until function has completed * on other CPUs. * * Returns 0. * * If @wait is true, then returns once @func has returned; otherwise * it returns just before the target cpu calls @func. * * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler.
*/ void smp_call_function(smp_call_func_t func, void *info, int wait)
{
preempt_disable();
smp_call_function_many(cpu_online_mask, func, info, wait);
preempt_enable();
}
EXPORT_SYMBOL(smp_call_function);
/* Setup configured maximum number of CPUs to activate */ unsignedint setup_max_cpus = NR_CPUS;
EXPORT_SYMBOL(setup_max_cpus);
/* * Setup routine for controlling SMP activation * * Command-line option of "nosmp" or "maxcpus=0" will disable SMP * activation entirely (the MPS table probe still happens, though). * * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer * greater than 0, limits the maximum number of CPUs activated in * SMP mode to <NUM>.
*/
#if (NR_CPUS > 1) && !defined(CONFIG_FORCE_NR_CPUS) /* Setup number of possible processor ids */ unsignedint nr_cpu_ids __read_mostly = NR_CPUS;
EXPORT_SYMBOL(nr_cpu_ids); #endif
/* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */ void __init setup_nr_cpu_ids(void)
{
set_nr_cpu_ids(find_last_bit(cpumask_bits(cpu_possible_mask), NR_CPUS) + 1);
}
/* Called by boot processor to activate the rest. */ void __init smp_init(void)
{ int num_nodes, num_cpus;
/* Any cleanup work */
smp_cpus_done(setup_max_cpus);
}
/* * on_each_cpu_cond(): Call a function on each processor for which * the supplied function cond_func returns true, optionally waiting * for all the required CPUs to finish. This may include the local * processor. * @cond_func: A callback function that is passed a cpu id and * the info parameter. The function is called * with preemption disabled. The function should * return a boolean value indicating whether to IPI * the specified CPU. * @func: The function to run on all applicable CPUs. * This must be fast and non-blocking. * @info: An arbitrary pointer to pass to both functions. * @wait: If true, wait (atomically) until function has * completed on other CPUs. * * Preemption is disabled to protect against CPUs going offline but not online. * CPUs going online during the call will not be seen or sent an IPI. * * You must not call this function with disabled interrupts or * from a hardware interrupt handler or from a bottom half handler.
*/ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, void *info, bool wait, conststruct cpumask *mask)
{ unsignedint scf_flags = SCF_RUN_LOCAL;
/** * kick_all_cpus_sync - Force all cpus out of idle * * Used to synchronize the update of pm_idle function pointer. It's * called after the pointer is updated and returns after the dummy * callback function has been executed on all cpus. The execution of * the function can only happen on the remote cpus after they have * left the idle function which had been called via pm_idle function * pointer. So it's guaranteed that nothing uses the previous pointer * anymore.
*/ void kick_all_cpus_sync(void)
{ /* Make sure the change is visible before we kick the cpus */
smp_mb();
smp_call_function(do_nothing, NULL, 1);
}
EXPORT_SYMBOL_GPL(kick_all_cpus_sync);
/** * wake_up_all_idle_cpus - break all cpus out of idle * wake_up_all_idle_cpus try to break all cpus which is in idle state even * including idle polling cpus, for non-idle cpus, we will do nothing * for them.
*/ void wake_up_all_idle_cpus(void)
{ int cpu;
/** * struct smp_call_on_cpu_struct - Call a function on a specific CPU * @work: &work_struct * @done: &completion to signal * @func: function to call * @data: function's data argument * @ret: return value from @func * @cpu: target CPU (%-1 for any CPU) * * Used to call a function on a specific cpu and wait for it to return. * Optionally make sure the call is done on a specified physical cpu via vcpu * pinning in order to support virtualized environments.
*/ struct smp_call_on_cpu_struct { struct work_struct work; struct completion done; int (*func)(void *); void *data; int ret; int cpu;
};
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.