/* * If there is no vector associated or if the associated vector is * the shutdown vector, which is associated to make PCI/MSI * shutdown mode work, then there is nothing to release. Clear out * prev_vector for this and the offlined target case.
*/
apicd->prev_vector = 0; if (!apicd->vector || apicd->vector == MANAGED_IRQ_SHUTDOWN_VECTOR) goto setnew; /* * If the target CPU of the previous vector is online, then mark * the vector as move in progress and store it for cleanup when the * first interrupt on the new vector arrives. If the target CPU is * offline then the regular release mechanism via the cleanup * vector is not possible and the vector can be immediately freed * in the underlying matrix allocator.
*/ if (cpu_online(apicd->cpu)) {
apicd->move_in_progress = true;
apicd->prev_vector = apicd->vector;
apicd->prev_cpu = apicd->cpu;
WARN_ON_ONCE(apicd->cpu == newcpu);
} else {
irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector,
managed);
}
/* * If the current target CPU is online and in the new requested * affinity mask, there is no point in moving the interrupt from * one CPU to another.
*/ if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest)) return 0;
/* * Careful here. @apicd might either have move_in_progress set or * be enqueued for cleanup. Assigning a new vector would either * leave a stale vector on some CPU around or in case of a pending * cleanup corrupt the hlist.
*/ if (apicd->move_in_progress || !hlist_unhashed(&apicd->clist)) return -EBUSY;
staticint assign_irq_vector_any_locked(struct irq_data *irqd)
{ /* Get the affinity mask - either irq_default_affinity or (user) set */ conststruct cpumask *affmsk = irq_data_get_affinity_mask(irqd); int node = irq_data_get_node(irqd);
if (node != NUMA_NO_NODE) { /* Try the intersection of @affmsk and node mask */
cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk); if (!assign_vector_locked(irqd, vector_searchmask)) return 0;
}
/* Try the full affinity mask */
cpumask_and(vector_searchmask, affmsk, cpu_online_mask); if (!assign_vector_locked(irqd, vector_searchmask)) return 0;
if (node != NUMA_NO_NODE) { /* Try the node mask */ if (!assign_vector_locked(irqd, cpumask_of_node(node))) return 0;
}
/* Try the full online mask */ return assign_vector_locked(irqd, cpu_online_mask);
}
staticint
assign_irq_vector_policy(struct irq_data *irqd, struct irq_alloc_info *info)
{ if (irqd_affinity_is_managed(irqd)) return reserve_managed_vector(irqd); if (info->mask) return assign_irq_vector(irqd, info->mask); /* * Make only a global reservation with no guarantee. A real vector * is associated at activation time.
*/ return reserve_irq_vector(irqd);
}
/* Regular fixed assigned interrupt */ if (!apicd->is_managed && !apicd->can_reserve) return; /* If the interrupt has a global reservation, nothing to do */ if (apicd->has_reserved) return;
ret = assign_irq_vector_any_locked(irqd); if (!ret) {
apicd->has_reserved = false; /* * Core might have disabled reservation mode after * allocating the irq descriptor. Ideally this should * happen before allocation time, but that would require * completely convoluted ways of transporting that * information.
*/ if (!irqd_can_reserve(irqd))
apicd->can_reserve = false;
}
/* * Check to ensure that the effective affinity mask is a subset * the user supplied affinity mask, and warn the user if it is not
*/ if (!cpumask_subset(irq_data_get_effective_affinity_mask(irqd),
irq_data_get_affinity_mask(irqd))) {
pr_warn("irq %u: Affinity broken due to vector space exhaustion.\n",
irqd->irq);
}
cpumask_and(vector_searchmask, dest, cpu_online_mask); if (WARN_ON_ONCE(cpumask_empty(vector_searchmask))) { /* Something in the core code broke! Survive gracefully */
pr_err("Managed startup for irq %u, but no CPU\n", irqd->irq); return -EINVAL;
}
ret = assign_managed_vector(irqd, vector_searchmask); /* * This should not happen. The vector reservation got buggered. Handle * it gracefully.
*/ if (WARN_ON_ONCE(ret < 0)) {
pr_err("Managed startup irq %u, no vector available\n",
irqd->irq);
} return ret;
}
raw_spin_lock_irqsave(&vector_lock, flags); /* * If the interrupt is activated, then it must stay at this vector * position. That's usually the timer interrupt (0).
*/ if (irqd_is_activated(irqd)) {
trace_vector_setup(virq, true, 0);
apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
} else { /* Release the vector */
apicd->can_reserve = true;
irqd_set_can_reserve(irqd);
clear_irq_vector(irqd);
realloc = true;
}
raw_spin_unlock_irqrestore(&vector_lock, flags); return realloc;
}
/* * Catch any attempt to touch the cascade interrupt on a PIC * equipped system.
*/ if (WARN_ON_ONCE(info->flags & X86_IRQ_ALLOC_LEGACY &&
virq == PIC_CASCADE_IR)) return -EINVAL;
for (i = 0; i < nr_irqs; i++) {
irqd = irq_domain_get_irq_data(domain, virq + i);
BUG_ON(!irqd);
node = irq_data_get_node(irqd);
WARN_ON_ONCE(irqd->chip_data);
apicd = alloc_apic_chip_data(node); if (!apicd) {
err = -ENOMEM; goto error;
}
apicd->irq = virq + i;
irqd->chip = &lapic_controller;
irqd->chip_data = apicd;
irqd->hwirq = virq + i;
irqd_set_single_target(irqd); /* * Prevent that any of these interrupts is invoked in * non interrupt context via e.g. generic_handle_irq() * as that can corrupt the affinity move state.
*/
irqd_set_handle_enforce_irqctx(irqd);
/* Don't invoke affinity setter on deactivated interrupts */
irqd_set_affinity_on_activate(irqd);
/* * Legacy vectors are already assigned when the IOAPIC * takes them over. They stay on the same vector. This is * required for check_timer() to work correctly as it might * switch back to legacy mode. Only update the hardware * config.
*/ if (info->flags & X86_IRQ_ALLOC_LEGACY) { if (!vector_configure_legacy(virq + i, irqd, apicd)) continue;
}
err = assign_irq_vector_policy(irqd, info);
trace_vector_setup(virq + i, false, err); if (err) {
irqd->chip_data = NULL;
free_apic_chip_data(apicd); goto error;
}
}
staticint x86_vector_select(struct irq_domain *d, struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token)
{ /* * HPET and I/OAPIC cannot be parented in the vector domain * if IRQ remapping is enabled. APIC IDs above 15 bits are * only permitted if IRQ remapping is enabled, so check that.
*/ if (apic_id_valid(32768)) return 0;
if (irq_get_nr_irqs() > NR_VECTORS * nr_cpu_ids)
irq_set_nr_irqs(NR_VECTORS * nr_cpu_ids);
nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids; #ifdefined(CONFIG_PCI_MSI) /* * for MSI and HT dyn irq
*/ if (gsi_top <= NR_IRQS_LEGACY)
nr += 8 * nr_cpu_ids; else
nr += gsi_top * 16; #endif if (nr < irq_get_nr_irqs())
irq_set_nr_irqs(nr);
/* * We don't know if PIC is present at this point so we need to do * probe() to get the right number of legacy IRQs.
*/ return legacy_pic->probe();
}
void lapic_assign_legacy_vector(unsignedint irq, bool replace)
{ /* * Use assign system here so it won't get accounted as allocated * and movable in the cpu hotplug check and it prevents managed * irq reservation from touching it.
*/
irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace);
}
if (IS_ENABLED(CONFIG_X86_IO_APIC) && nr_ioapics > 0) return;
/* * If the IO/APIC is disabled via config, kernel command line or * lack of enumeration then all legacy interrupts are routed * through the PIC. Make sure that they are marked as legacy * vectors. PIC_CASCADE_IRQ has already been marked in * lapic_assign_system_vectors().
*/ for (i = 0; i < nr_legacy_irqs(); i++) { if (i != PIC_CASCADE_IR)
lapic_assign_legacy_vector(i, true);
}
}
void __init lapic_assign_system_vectors(void)
{ unsignedint i, vector;
if (nr_legacy_irqs() > 1)
lapic_assign_legacy_vector(PIC_CASCADE_IR, false);
/* System vectors are reserved, online it */
irq_matrix_online(vector_matrix);
/* Mark the preallocated legacy interrupts */ for (i = 0; i < nr_legacy_irqs(); i++) { /* * Don't touch the cascade interrupt. It's unusable * on PIC equipped machines. See the large comment * in the IO/APIC code.
*/ if (i != PIC_CASCADE_IR)
irq_matrix_assign(vector_matrix, ISA_IRQ_VECTOR(i));
}
}
int __init arch_early_irq_init(void)
{ struct fwnode_handle *fn;
/* Check whether the irq is in the legacy space */ if (isairq < 0 || isairq >= nr_legacy_irqs()) return VECTOR_UNUSED; /* Check whether the irq is handled by the IOAPIC */ if (test_bit(isairq, &io_apic_irqs)) return VECTOR_UNUSED; return irq_to_desc(isairq);
}
/* Online the local APIC infrastructure and initialize the vectors */ void lapic_online(void)
{ unsignedint vector;
lockdep_assert_held(&vector_lock);
/* Online the vector matrix array for this CPU */
irq_matrix_online(vector_matrix);
/* * The interrupt affinity logic never targets interrupts to offline * CPUs. The exception are the legacy PIC interrupts. In general * they are only targeted to CPU0, but depending on the platform * they can be distributed to any online CPU in hardware. The * kernel has no influence on that. So all active legacy vectors * must be installed on all CPUs. All non legacy interrupts can be * cleared.
*/ for (vector = 0; vector < NR_VECTORS; vector++)
this_cpu_write(vector_irq[vector], __setup_vector_irq(vector));
}
/* * Managed interrupts are usually not migrated away * from an online CPU, but CPU isolation 'managed_irq' * can make that happen. * 1) Activation does not take the isolation into account * to keep the code simple * 2) Migration away from an isolated CPU can happen when * a non-isolated CPU which is in the calculated * affinity mask comes online.
*/
trace_vector_free_moved(apicd->irq, cpu, vector, managed);
irq_matrix_free(vector_matrix, cpu, vector, managed);
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
hlist_del_init(&apicd->clist);
apicd->prev_vector = 0;
apicd->move_in_progress = 0;
}
/* * Called from fixup_irqs() with @desc->lock held and interrupts disabled.
*/ staticvoid apic_force_complete_move(struct irq_data *irqd)
{ unsignedint cpu = smp_processor_id(); struct apic_chip_data *apicd; unsignedint vector;
guard(raw_spinlock)(&vector_lock);
apicd = apic_chip_data(irqd); if (!apicd) return;
/* * If prev_vector is empty or the descriptor is neither currently * nor previously on the outgoing CPU no action required.
*/
vector = apicd->prev_vector; if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu)) return;
/* * This is tricky. If the cleanup of the old vector has not been * done yet, then the following setaffinity call will fail with * -EBUSY. This can leave the interrupt in a stale state. * * All CPUs are stuck in stop machine with interrupts disabled so * calling __irq_complete_move() would be completely pointless. * * 1) The interrupt is in move_in_progress state. That means that we * have not seen an interrupt since the io_apic was reprogrammed to * the new vector. * * 2) The interrupt has fired on the new vector, but the cleanup IPIs * have not been processed yet.
*/ if (apicd->move_in_progress) { /* * In theory there is a race: * * set_ioapic(new_vector) <-- Interrupt is raised before update * is effective, i.e. it's raised on * the old vector. * * So if the target cpu cannot handle that interrupt before * the old vector is cleaned up, we get a spurious interrupt * and in the worst case the ioapic irq line becomes stale. * * But in case of cpu hotplug this should be a non issue * because if the affinity update happens right before all * cpus rendezvous in stop machine, there is no way that the * interrupt can be blocked on the target cpu because all cpus * loops first with interrupts enabled in stop machine, so the * old vector is not yet cleaned up when the interrupt fires. * * So the only way to run into this issue is if the delivery * of the interrupt on the apic/system bus would be delayed * beyond the point where the target cpu disables interrupts * in stop machine. I doubt that it can happen, but at least * there is a theoretical chance. Virtualization might be * able to expose this, but AFAICT the IOAPIC emulation is not * as stupid as the real hardware. * * Anyway, there is nothing we can do about that at this point * w/o refactoring the whole fixup_irq() business completely. * We print at least the irq number and the old vector number, * so we have the necessary information when a problem in that * area arises.
*/
pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
irqd->irq, vector);
}
free_moved_vector(apicd);
}
/* * Paranoia: Check if the vector that needs to be cleaned * up is registered at the APICs IRR. That's clearly a * hardware issue if the vector arrived on the old target * _after_ interrupts were disabled above. Keep @apicd * on the list and schedule the timer again to give the CPU * a chance to handle the pending interrupt. * * Do not check IRR when called from lapic_offline(), because * fixup_irqs() was just called to scan IRR for set bits and * forward them to new destination CPUs via IPIs.
*/ if (check_irr && is_vector_pending(vector)) {
pr_warn_once("Moved interrupt pending in old target APIC %u\n", apicd->irq);
rearm = true; continue;
}
free_moved_vector(apicd);
}
/* * Must happen under vector_lock to make the timer_pending() check * in __vector_schedule_cleanup() race free against the rearm here.
*/ if (rearm)
mod_timer(&cl->timer, jiffies + 1);
}
/* * The lockless timer_pending() check is safe here. If it * returns true, then the callback will observe this new * apic data in the hlist as everything is serialized by * vector lock. * * If it returns false then the timer is either not armed * or the other CPU executes the callback, which again * would be blocked on vector lock. Rearming it in the * latter case makes it fire for nothing. * * This is also safe against the callback rearming the timer * because that's serialized via vector lock too.
*/ if (!timer_pending(&cl->timer)) {
cl->timer.expires = jiffies + 1;
add_timer_on(&cl->timer, cpu);
}
} else {
pr_warn("IRQ %u schedule cleanup for offline CPU %u\n", apicd->irq, cpu);
free_moved_vector(apicd);
}
raw_spin_unlock(&vector_lock);
}
apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); if (likely(!apicd->move_in_progress)) return;
/* * If the interrupt arrived on the new target CPU, cleanup the * vector on the old target CPU. A vector check is not required * because an interrupt can never move from one vector to another * on the same CPU.
*/ if (apicd->cpu == smp_processor_id())
__vector_schedule_cleanup(apicd);
}
#ifdef CONFIG_HOTPLUG_CPU /* * Note, this is not accurate accounting, but at least good enough to * prevent that the actual interrupt move will run out of vectors.
*/ int lapic_can_unplug_cpu(void)
{ unsignedint rsvd, avl, tomove, cpu = smp_processor_id(); int ret = 0;
pr_debug("printing local APIC contents on CPU#%d/%d:\n",
smp_processor_id(), read_apic_id());
v = apic_read(APIC_ID);
pr_info("... APIC ID: %08x (%01x)\n", v, read_apic_id());
v = apic_read(APIC_LVR);
pr_info("... APIC VERSION: %08x\n", v);
ver = GET_APIC_VERSION(v);
maxlvt = lapic_get_maxlvt();
v = apic_read(APIC_TASKPRI);
pr_debug("... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
/* !82489DX */ if (APIC_INTEGRATED(ver)) { if (!APIC_XAPIC(ver)) {
v = apic_read(APIC_ARBPRI);
pr_debug("... APIC ARBPRI: %08x (%02x)\n",
v, v & APIC_ARBPRI_MASK);
}
v = apic_read(APIC_PROCPRI);
pr_debug("... APIC PROCPRI: %08x\n", v);
}
/* * Remote read supported only in the 82489DX and local APIC for * Pentium processors.
*/ if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
v = apic_read(APIC_RRR);
pr_debug("... APIC RRR: %08x\n", v);
}
v = apic_read(APIC_LDR);
pr_debug("... APIC LDR: %08x\n", v); if (!x2apic_enabled()) {
v = apic_read(APIC_DFR);
pr_debug("... APIC DFR: %08x\n", v);
}
v = apic_read(APIC_SPIV);
pr_debug("... APIC SPIV: %08x\n", v);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.