// SPDX-License-Identifier: GPL-2.0 /* * Intel IO-APIC support for multi-Pentium hosts. * * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo * * Many thanks to Stig Venaas for trying out countless experimental * patches and reporting/debugging problems patiently! * * (c) 1999, Multiple IO-APIC support, developed by * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>, * further tested and cleaned up by Zach Brown <zab@redhat.com> * and Ingo Molnar <mingo@redhat.com> * * Fixes * Maciej W. Rozycki : Bits for genuine 82489DX APICs; * thanks to Eric Gilmore * and Rolf G. Tews * for testing these extensively * Paul Diefenbaugh : Added full ACPI support * * Historical information which is worth to be preserved: * * - SiS APIC rmw bug: * * We used to have a workaround for a bug in SiS chips which * required to rewrite the index register for a read-modify-write * operation as the chip lost the index information which was * setup for the read already. We cache the data now, so that * workaround has been removed.
*/
/* * When we write a new IO APIC routing entry, we need to write the high * word first! If the mask bit in the low word is clear, we will enable * the interrupt, and we need to make sure the entry is fully populated * before that happens.
*/ staticvoid __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
{
io_apic_write(apic, 0x11 + 2*pin, e.w2);
io_apic_write(apic, 0x10 + 2*pin, e.w1);
}
/* * When we mask an IO APIC routing entry, we need to write the low * word first, in order to set the mask bit before we change the * high bits!
*/ staticvoid ioapic_mask_entry(int apic, int pin)
{ struct IO_APIC_route_entry e = { .masked = true };
/* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs.
*/ staticbool add_pin_to_irq_node(struct mp_chip_data *data, int node, int apic, int pin)
{ struct irq_pin_list *entry;
/* * Synchronize the IO-APIC and the CPU by doing a dummy read from the * IO-APIC
*/ staticvoid io_apic_sync(struct irq_pin_list *entry)
{ struct io_apic __iomem *io_apic;
/* * IO-APIC versions below 0x20 don't support EOI register. * For the record, here is the information about various versions: * 0Xh 82489DX * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant * 2Xh I/O(x)APIC which is PCI 2.2 Compliant * 30h-FFh Reserved * * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic * version as 0x2. This is an error with documentation and these ICH chips * use io-apic's of version 0x20. * * For IO-APIC's with EOI register, we use that to do an explicit EOI. * Otherwise, we simulate the EOI message manually by changing the trigger * mode to edge and then back to level, with RTE being masked during this.
*/ staticvoid __eoi_ioapic_pin(int apic, int pin, int vector)
{ if (mpc_ioapic_ver(apic) >= 0x20) {
io_apic_eoi(apic, vector);
} else { struct IO_APIC_route_entry entry, entry1;
entry = entry1 = __ioapic_read_entry(apic, pin);
/* Mask the entry and change the trigger mode to edge. */
entry1.masked = true;
entry1.is_level = false;
/* Check delivery_mode to be sure we're not clearing an SMI pin */
entry = ioapic_read_entry(apic, pin); if (entry.delivery_mode == APIC_DELIVERY_MODE_SMI) return;
/* * Make sure the entry is masked and re-read the contents to check * if it is a level triggered pin and if the remote-IRR is set.
*/ if (!entry.masked) {
entry.masked = true;
ioapic_write_entry(apic, pin, entry);
entry = ioapic_read_entry(apic, pin);
}
if (entry.irr) { /* * Make sure the trigger mode is set to level. Explicit EOI * doesn't clear the remote-IRR if the trigger mode is not * set to level.
*/ if (!entry.is_level) {
entry.is_level = true;
ioapic_write_entry(apic, pin, entry);
}
guard(raw_spinlock_irqsave)(&ioapic_lock);
__eoi_ioapic_pin(apic, pin, entry.vector);
}
/* * Clear the rest of the bits in the IO-APIC RTE except for the mask * bit.
*/
ioapic_mask_entry(apic, pin);
entry = ioapic_read_entry(apic, pin); if (entry.irr)
pr_err("Unable to reset IRR for apic: %d, pin :%d\n",
mpc_ioapic_id(apic), pin);
}
for_each_ioapic(ioapic_idx) { if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic) return ioapic_idx;
}
}
return -1;
}
staticbool irq_active_low(int idx)
{ int bus = mp_irqs[idx].srcbus;
/* * Determine IRQ line polarity (high active or low active):
*/ switch (mp_irqs[idx].irqflag & MP_IRQPOL_MASK) { case MP_IRQPOL_DEFAULT: /* * Conforms to spec, ie. bus-type dependent polarity. PCI * defaults to low active. [E]ISA defaults to high active.
*/ return !test_bit(bus, mp_bus_not_pci); case MP_IRQPOL_ACTIVE_HIGH: returnfalse; case MP_IRQPOL_RESERVED:
pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n");
fallthrough; case MP_IRQPOL_ACTIVE_LOW: default: /* Pointless default required due to do gcc stupidity */ returntrue;
}
}
/* * EISA interrupts are always active high and can be edge or level * triggered depending on the ELCR value. If an interrupt is listed as * EISA conforming in the MP table, that means its trigger type must be * read in from the ELCR.
*/ staticbool eisa_irq_is_level(int idx, int bus, bool level)
{ switch (mp_bus_id_to_type[bus]) { case MP_BUS_PCI: case MP_BUS_ISA: return level; case MP_BUS_EISA: return EISA_ELCR(mp_irqs[idx].srcbusirq);
}
pr_warn("IOAPIC: Invalid srcbus: %d defaulting to level\n", bus); returntrue;
} #else staticinlineint eisa_irq_is_level(int idx, int bus, bool level)
{ return level;
} #endif
staticbool irq_is_level(int idx)
{ int bus = mp_irqs[idx].srcbus; bool level;
/* * Determine IRQ trigger mode (edge or level sensitive):
*/ switch (mp_irqs[idx].irqflag & MP_IRQTRIG_MASK) { case MP_IRQTRIG_DEFAULT: /* * Conforms to spec, ie. bus-type dependent trigger * mode. PCI defaults to level, ISA to edge.
*/
level = !test_bit(bus, mp_bus_not_pci); /* Take EISA into account */ return eisa_irq_is_level(idx, bus, level); case MP_IRQTRIG_EDGE: returnfalse; case MP_IRQTRIG_RESERVED:
pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n");
fallthrough; case MP_IRQTRIG_LEVEL: default: /* Pointless default required due to do gcc stupidity */ returntrue;
}
}
/* * setup_IO_APIC_irqs() programs all legacy IRQs with default trigger * and polarity attributes. So allow the first user to reprogram the * pin with real trigger and polarity attributes.
*/ if (irq < nr_legacy_irqs() && data->count == 1) { if (info->ioapic.is_level != data->is_level)
mp_register_handler(irq, info->ioapic.is_level);
data->entry.is_level = data->is_level = info->ioapic.is_level;
data->entry.active_low = data->active_low = info->ioapic.active_low;
}
staticint alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi, struct irq_alloc_info *info)
{ int type = ioapics[ioapic].irqdomain_cfg.type; bool legacy = false; int irq = -1;
switch (type) { case IOAPIC_DOMAIN_LEGACY: /* * Dynamically allocate IRQ number for non-ISA IRQs in the first * 16 GSIs on some weird platforms.
*/ if (!ioapic_initialized || gsi >= nr_legacy_irqs())
irq = gsi;
legacy = mp_is_legacy_irq(irq); break; case IOAPIC_DOMAIN_STRICT:
irq = gsi; break; case IOAPIC_DOMAIN_DYNAMIC: break; default:
WARN(1, "ioapic: unknown irqdomain type %d\n", type); return -1;
}
/* * Need special handling for ISA IRQs because there may be multiple IOAPIC pins * sharing the same ISA IRQ number and irqdomain only supports 1:1 mapping * between IOAPIC pin and IRQ number. A typical IOAPIC has 24 pins, pin 0-15 are * used for legacy IRQs and pin 16-23 are used for PCI IRQs (PIRQ A-H). * When ACPI is disabled, only legacy IRQ numbers (IRQ0-15) are available, and * some BIOSes may use MP Interrupt Source records to override IRQ numbers for * PIRQs instead of reprogramming the interrupt routing logic. Thus there may be * multiple pins sharing the same legacy IRQ number when ACPI is disabled.
*/ staticint alloc_isa_irq_from_domain(struct irq_domain *domain, int irq, int ioapic, int pin, struct irq_alloc_info *info)
{ struct irq_data *irq_data = irq_get_irq_data(irq); int node = ioapic_alloc_attr_node(info); struct mp_chip_data *data;
/* * Legacy ISA IRQ has already been allocated, just add pin to * the pin list associated with this IRQ and program the IOAPIC * entry.
*/ if (irq_data && irq_data->parent_data) { if (!mp_check_pin_attr(irq, info)) return -EBUSY; if (!add_pin_to_irq_node(irq_data->chip_data, node, ioapic, info->ioapic.pin)) return -ENOMEM;
} else {
info->flags |= X86_IRQ_ALLOC_LEGACY;
irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true, NULL); if (irq >= 0) {
irq_data = irq_domain_get_irq_data(domain, irq);
data = irq_data->chip_data;
data->isa_irq = true;
}
}
return irq;
}
staticint mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, unsignedint flags, struct irq_alloc_info *info)
{ struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); struct irq_alloc_info tmp; struct mp_chip_data *data; bool legacy = false; int irq;
if (!domain) return -ENOSYS;
if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) {
irq = mp_irqs[idx].srcbusirq;
legacy = mp_is_legacy_irq(irq); /* * IRQ2 is unusable for historical reasons on systems which * have a legacy PIC. See the comment vs. IRQ2 further down. * * If this gets removed at some point then the related code * in lapic_assign_system_vectors() needs to be adjusted as * well.
*/ if (legacy && irq == PIC_CASCADE_IR) return -EINVAL;
}
staticint pin_2_irq(int idx, int ioapic, int pin, unsignedint flags)
{
u32 gsi = mp_pin_to_gsi(ioapic, pin);
/* Debugging check, we are in big trouble if this message pops up! */ if (mp_irqs[idx].dstirq != pin)
pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");
#ifdef CONFIG_X86_32 /* PCI IRQ command line redirection. Yes, limits are hardcoded. */ if ((pin >= 16) && (pin <= 23)) { if (pirq_entries[pin - 16] != -1) { if (!pirq_entries[pin - 16]) {
apic_pr_verbose("Disabling PIRQ%d\n", pin - 16);
} else { int irq = pirq_entries[pin-16];
data = irq_data->chip_data; if (!data || data->isa_irq) return;
guard(mutex)(&ioapic_mutex); if (--data->count == 0)
irq_domain_free_irqs(irq, 1);
}
/* * Find a specific PCI IRQ entry. * Not an __init, possibly needed by modules
*/ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
{ int irq, i, best_ioapic = -1, best_idx = -1;
/* * Use the first all-but-pin matching entry as a * best-guess fuzzy result for broken mptables.
*/ if (best_idx < 0) {
best_idx = i;
best_ioapic = ioapic_idx;
}
} if (best_idx < 0) return -1;
staticvoid __init print_IO_APIC(int ioapic_idx)
{ union IO_APIC_reg_00 reg_00; union IO_APIC_reg_01 reg_01; union IO_APIC_reg_02 reg_02; union IO_APIC_reg_03 reg_03;
/* * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, * but the value of reg_02 is read as the previous read register * value, so ignore it if reg_02 == reg_01.
*/ if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
apic_dbg(".... register #02: %08X\n", reg_02.raw);
apic_dbg("....... : arbitration: %02X\n", reg_02.bits.arbitration);
}
/* * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 * or reg_03, but the value of reg_0[23] is read as the previous read * register value, so ignore it if reg_03 == reg_0[12].
*/ if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
reg_03.raw != reg_01.raw) {
apic_dbg(".... register #03: %08X\n", reg_03.raw);
apic_dbg("....... : Boot DT : %X\n", reg_03.bits.boot_DT);
}
void __init print_IO_APICs(void)
{ int ioapic_idx; unsignedint irq;
apic_dbg("number of MP IRQ sources: %d.\n", mp_irq_entries);
for_each_ioapic(ioapic_idx) {
apic_dbg("number of IO-APIC #%d registers: %d.\n",
mpc_ioapic_id(ioapic_idx), ioapics[ioapic_idx].nr_registers);
}
/* * We are a bit conservative about what we expect. We have to * know about every hardware change ASAP.
*/
printk(KERN_INFO "testing the IO APIC.......................\n");
/* Where if anywhere is the i8259 connect in external int mode */ staticstruct { int pin, apic; } ioapic_i8259 = { -1, -1 };
void __init enable_IO_APIC(void)
{ int i8259_apic, i8259_pin, apic, pin;
if (ioapic_is_disabled)
nr_ioapics = 0;
if (!nr_legacy_irqs() || !nr_ioapics) return;
for_each_ioapic_pin(apic, pin) { /* See if any of the pins is in ExtINT mode */ struct IO_APIC_route_entry entry = ioapic_read_entry(apic, pin);
/* * If the interrupt line is enabled and in ExtInt mode I * have found the pin where the i8259 is connected.
*/ if (!entry.masked && entry.delivery_mode == APIC_DELIVERY_MODE_EXTINT) {
ioapic_i8259.apic = apic;
ioapic_i8259.pin = pin; break;
}
}
/* * Look to see what if the MP table has reported the ExtINT * * If we could not find the appropriate pin by looking at the ioapic * the i8259 probably is not connected the ioapic but give the * mptable a chance anyway.
*/
i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
i8259_apic = find_isa_irq_apic(0, mp_ExtINT); /* Trust the MP table if nothing is setup in the hardware */ if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
pr_warn("ExtINT not setup in hardware but reported by MP table\n");
ioapic_i8259.pin = i8259_pin;
ioapic_i8259.apic = i8259_apic;
} /* Complain if the MP table and the hardware disagree */ if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
(i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
pr_warn("ExtINT in hardware and MP table differ\n");
/* Do not trust the IO-APIC being empty at bootup */
clear_IO_APIC();
}
void native_restore_boot_irq_mode(void)
{ /* * If the i8259 is routed through an IOAPIC Put that IOAPIC in * virtual wire mode so legacy interrupts can be delivered.
*/ if (ioapic_i8259.pin != -1) { struct IO_APIC_route_entry entry;
u32 apic_id = read_apic_id();
/* Add it to the IO-APIC irq-routing table */
ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
}
if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config())
disconnect_bsp_APIC(ioapic_i8259.pin != -1);
}
void restore_boot_irq_mode(void)
{ if (!nr_legacy_irqs()) return;
x86_apic_ops.restore();
}
#ifdef CONFIG_X86_32 /* * function to set the IO-APIC physical IDs based on the * values stored in the MPC table. * * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
*/ staticvoid __init setup_ioapic_ids_from_mpc_nocheck(void)
{
DECLARE_BITMAP(phys_id_present_map, MAX_LOCAL_APIC); const u32 broadcast_id = 0xF; union IO_APIC_reg_00 reg_00; unsignedchar old_id; int ioapic_idx, i;
/* * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless.
*/
copy_phys_cpu_present_map(phys_id_present_map);
/* * Set the IOAPIC ID to the value stored in the MPC table.
*/
for_each_ioapic(ioapic_idx) { /* Read the register 0 value */
scoped_guard (raw_spinlock_irqsave, &ioapic_lock)
reg_00.raw = io_apic_read(ioapic_idx, 0);
old_id = mpc_ioapic_id(ioapic_idx);
if (mpc_ioapic_id(ioapic_idx) >= broadcast_id) {
pr_err(FW_BUG "IO-APIC#%d ID is %d in the MPC table!...\n",
ioapic_idx, mpc_ioapic_id(ioapic_idx));
pr_err("... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID);
ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID;
}
/* * Sanity check, is the ID really free? Every APIC in a * system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages.
*/ if (test_bit(mpc_ioapic_id(ioapic_idx), phys_id_present_map)) {
pr_err(FW_BUG "IO-APIC#%d ID %d is already used!...\n",
ioapic_idx, mpc_ioapic_id(ioapic_idx)); for (i = 0; i < broadcast_id; i++) if (!test_bit(i, phys_id_present_map)) break; if (i >= broadcast_id)
panic("Max APIC ID exceeded!\n");
pr_err("... fixing up to %d. (tell your hw vendor)\n", i);
set_bit(i, phys_id_present_map);
ioapics[ioapic_idx].mp_config.apicid = i;
} else {
apic_pr_verbose("Setting %d in the phys_id_present_map\n",
mpc_ioapic_id(ioapic_idx));
set_bit(mpc_ioapic_id(ioapic_idx), phys_id_present_map);
}
/* * We need to adjust the IRQ routing table if the ID * changed.
*/ if (old_id != mpc_ioapic_id(ioapic_idx)) { for (i = 0; i < mp_irq_entries; i++) { if (mp_irqs[i].dstapic == old_id)
mp_irqs[i].dstapic = mpc_ioapic_id(ioapic_idx);
}
}
/* * Update the ID register according to the right value from * the MPC table if they are different.
*/ if (mpc_ioapic_id(ioapic_idx) == reg_00.bits.ID) continue;
apic_pr_verbose("...changing IO-APIC physical APIC ID to %d ...",
mpc_ioapic_id(ioapic_idx));
if (acpi_ioapic) return; /* * Don't check I/O APIC IDs for xAPIC systems. They have * no meaning without the serial APIC bus.
*/ if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
|| APIC_XAPIC(boot_cpu_apic_version)) return;
setup_ioapic_ids_from_mpc_nocheck();
} #endif
/* * We don't know the TSC frequency yet, but waiting for * 40000000000/HZ TSC cycles is safe: * 4 GHz == 10 jiffies * 1 GHz == 40 jiffies
*/ do {
native_pause();
now = rdtsc();
} while ((now - start) < 40000000000ULL / HZ && time_before_eq(jiffies, end));
}
staticvoid __init delay_without_tsc(void)
{ unsignedlong end = jiffies + 4; int band = 1;
/* * We don't know any frequency yet, but waiting for * 40940000000/HZ cycles is safe: * 4 GHz == 10 jiffies * 1 GHz == 40 jiffies * 1 << 1 + 1 << 2 +...+ 1 << 11 = 4094
*/ do {
__delay(((1U << band++) * 10000000UL) / HZ);
} while (band < 12 && time_before_eq(jiffies, end));
}
/* * There is a nasty bug in some older SMP boards, their mptable lies * about the timer IRQ. We do the following to work around the situation: * * - timer IRQ defaults to IO-APIC IRQ * - if this function detects that timer IRQs are defunct, then we fall * back to ISA timer IRQs
*/ staticint __init timer_irq_works(void)
{ unsignedlong t1 = jiffies;
if (no_timer_check) return 1;
local_irq_enable(); if (boot_cpu_has(X86_FEATURE_TSC))
delay_with_tsc(); else
delay_without_tsc();
/* * Expect a few ticks at least, to be sure some possible * glue logic does not lock up after one or two first * ticks in a non-ExtINT mode. Also the local APIC * might have cached one ExtINT interrupt. Finally, at * least one tick may be lost due to delays.
*/
/* * In the SMP+IOAPIC case it might happen that there are an unspecified * number of pending IRQ events unhandled. These cases are very rare, * so we 'resend' these IRQs via IPIs, to the same CPU. It's much * better to do it this way as thus we do not have to be aware of * 'pending' interrupts in the IRQ path, except at this point. * * * Edge triggered needs to resend any interrupt that was delayed but this * is now handled in the device independent code. * * Starting up a edge-triggered IO-APIC interrupt is nasty - we need to * make sure that we get the edge. If it is already asserted for some * reason, we need return 1 to indicate that is was pending. * * This is not complete - we should be able to fake an edge even if it * isn't on the 8259A...
*/ staticunsignedint startup_ioapic_irq(struct irq_data *data)
{ int was_pending = 0, irq = data->irq;
guard(raw_spinlock_irqsave)(&ioapic_lock); if (irq < nr_legacy_irqs()) {
legacy_pic->mask(irq); if (legacy_pic->irq_pending(irq))
was_pending = 1;
}
__unmask_ioapic(data->chip_data); return was_pending;
}
guard(raw_spinlock_irqsave)(&ioapic_lock);
for_each_irq_pin(entry, data->irq_2_pin) { struct IO_APIC_route_entry e; int pin;
pin = entry->pin;
e.w1 = io_apic_read(entry->apic, 0x10 + pin*2); /* Is the remote IRR bit set? */ if (e.irr) returntrue;
} returnfalse;
}
staticinlinebool ioapic_prepare_move(struct irq_data *data)
{ /* If we are moving the IRQ we need to mask it */ if (unlikely(irqd_is_setaffinity_pending(data))) { if (!irqd_irq_masked(data))
mask_ioapic_irq(data); returntrue;
} returnfalse;
}
staticinlinevoid ioapic_finish_move(struct irq_data *data, bool moveit)
{ if (unlikely(moveit)) { /* * Only migrate the irq if the ack has been received. * * On rare occasions the broadcast level triggered ack gets * delayed going to ioapics, and if we reprogram the * vector while Remote IRR is still set the irq will never * fire again. * * To prevent this scenario we read the Remote IRR bit * of the ioapic. This has two effects. * - On any sane system the read of the ioapic will * flush writes (and acks) going to the ioapic from * this cpu. * - We get to see if the ACK has actually been delivered. * * Based on failed experiments of reprogramming the * ioapic entry from outside of irq context starting * with masking the ioapic entry and then polling until * Remote IRR was clear before reprogramming the * ioapic I don't trust the Remote IRR bit to be * completely accurate. * * However there appears to be no other way to plug * this race, so if the Remote IRR bit is not * accurate and is causing problems then it is a hardware bug * and you can go talk to the chipset vendor about it.
*/ if (!io_apic_level_ack_pending(data->chip_data))
irq_move_masked_irq(data); /* If the IRQ is masked in the core, leave it: */ if (!irqd_irq_masked(data))
unmask_ioapic_irq(data);
}
} #else staticinlinebool ioapic_prepare_move(struct irq_data *data)
{ returnfalse;
} staticinlinevoid ioapic_finish_move(struct irq_data *data, bool moveit)
{
} #endif
/* * It appears there is an erratum which affects at least version 0x11 * of I/O APIC (that's the 82093AA and cores integrated into various * chipsets). Under certain conditions a level-triggered interrupt is * erroneously delivered as edge-triggered one but the respective IRR * bit gets set nevertheless. As a result the I/O unit expects an EOI * message but it will never arrive and further interrupts are blocked * from the source. The exact reason is so far unknown, but the * phenomenon was observed when two consecutive interrupt requests * from a given source get delivered to the same CPU and the source is * temporarily disabled in between. * * A workaround is to simulate an EOI message manually. We achieve it * by setting the trigger mode to edge and then to level when the edge * trigger mode gets detected in the TMR of a local APIC for a * level-triggered interrupt. We mask the source for the time of the * operation to prevent an edge-triggered interrupt escaping meanwhile. * The idea is from Manfred Spraul. --macro * * Also in the case when cpu goes offline, fixup_irqs() will forward * any unhandled interrupt on the offlined cpu to the new cpu * destination that is handling the corresponding interrupt. This * interrupt forwarding is done via IPI's. Hence, in this case also * level-triggered io-apic interrupt will be seen as an edge * interrupt in the IRR. And we can't rely on the cpu's EOI * to be broadcasted to the IO-APIC's which will clear the remoteIRR * corresponding to the level-triggered interrupt. Hence on IO-APIC's * supporting EOI register, we do an explicit EOI to clear the * remote IRR and on IO-APIC's which don't have an EOI register, * we use the above logic (mask+edge followed by unmask+level) from * Manfred Spraul to clear the remote IRR.
*/
i = cfg->vector;
v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
/* * We must acknowledge the irq before we move it or the acknowledge will * not propagate properly.
*/
apic_eoi();
/* * Tail end of clearing remote IRR bit (either by delivering the EOI * message via io-apic EOI register write or simulating it using * mask+edge followed by unmask+level logic) manually when the * level triggered interrupt is seen as the edge triggered interrupt * at the cpu.
*/ if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
eoi_ioapic_pin(cfg->vector, irq_data->chip_data);
}
/* * Intr-remapping uses pin number as the virtual vector * in the RTE. Actual vector is programmed in * intr-remapping table entry. Hence for the io-apic * EOI we use the pin number.
*/
apic_ack_irq(irq_data);
eoi_ioapic_pin(data->entry.vector, data);
}
/* * The I/OAPIC is just a device for generating MSI messages from legacy * interrupt pins. Various fields of the RTE translate into bits of the * resulting MSI which had a historical meaning. * * With interrupt remapping, many of those bits have different meanings * in the underlying MSI, but the way that the I/OAPIC transforms them * from its RTE to the MSI message is the same. This function allows * the parent IRQ domain to compose the MSI message, then takes the * relevant bits to put them in the appropriate places in the RTE in * order to generate that message when the IRQ happens. * * The setup here relies on a preconfigured route entry (is_level, * active_low, masked) because the parent domain is merely composing the * generic message routing information which is used for the MSI.
*/ staticvoid ioapic_setup_msg_from_msi(struct irq_data *irq_data, struct IO_APIC_route_entry *entry)
{ struct msi_msg msg;
/* Let the parent domain compose the MSI message */
irq_chip_compose_msi_msg(irq_data, &msg);
/* * - Real vector * - DMAR/IR: 8bit subhandle (ioapic.pin) * - AMD/IR: 8bit IRTE index
*/
entry->vector = msg.arch_data.vector; /* Delivery mode (for DMAR/IR all 0) */
entry->delivery_mode = msg.arch_data.delivery_mode; /* Destination mode or DMAR/IR index bit 15 */
entry->dest_mode_logical = msg.arch_addr_lo.dest_mode_logical; /* DMAR/IR: 1, 0 for all other modes */
entry->ir_format = msg.arch_addr_lo.dmar_format; /* * - DMAR/IR: index bit 0-14. * * - Virt: If the host supports x2apic without a virtualized IR * unit then bit 0-6 of dmar_index_0_14 are providing bit * 8-14 of the destination id. * * All other modes have bit 0-6 of dmar_index_0_14 cleared and the * topmost 8 bits are destination id bit 0-7 (entry::destid_0_7).
*/
entry->ir_index_0_14 = msg.arch_addr_lo.dmar_index_0_14;
}
ret = parent->chip->irq_set_affinity(parent, mask, force);
guard(raw_spinlock_irqsave)(&ioapic_lock); if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE)
ioapic_configure_entry(irq_data);
return ret;
}
/* * Interrupt shutdown masks the ioapic pin, but the interrupt might already * be in flight, but not yet serviced by the target CPU. That means * __synchronize_hardirq() would return and claim that everything is calmed * down. So free_irq() would proceed and deactivate the interrupt and free * resources. * * Once the target CPU comes around to service it it will find a cleared * vector and complain. While the spurious interrupt is harmless, the full * release of resources might prevent the interrupt from being acknowledged * which keeps the hardware in a weird state. * * Verify that the corresponding Remote-IRR bits are clear.
*/ staticint ioapic_irq_get_chip_state(struct irq_data *irqd, enum irqchip_irq_state which, bool *state)
{ struct mp_chip_data *mcd = irqd->chip_data; struct IO_APIC_route_entry rentry; struct irq_pin_list *p;
if (which != IRQCHIP_STATE_ACTIVE) return -EINVAL;
*state = false;
guard(raw_spinlock)(&ioapic_lock);
for_each_irq_pin(p, mcd->irq_2_pin) {
rentry = __ioapic_read_entry(p->apic, p->pin); /* * The remote IRR is only valid in level trigger mode. It's * meaning is undefined for edge triggered interrupts and * irrelevant because the IO-APIC treats them as fire and * forget.
*/ if (rentry.irr && rentry.is_level) {
*state = true; break;
}
} return 0;
}
for_each_active_irq(irq) {
cfg = irq_cfg(irq); if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { /* * Hmm.. We don't have an entry for this, so * default to an old-fashioned 8259 interrupt if we * can. Otherwise set the dummy interrupt chip.
*/ if (irq < nr_legacy_irqs())
legacy_pic->make_irq(irq); else
irq_set_chip(irq, &no_irq_chip);
}
}
}
/* * The local APIC irq-chip implementation:
*/ staticvoid mask_lapic_irq(struct irq_data *data)
{ unsignedlong v = apic_read(APIC_LVT0);
apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
}
staticvoid unmask_lapic_irq(struct irq_data *data)
{ unsignedlong v = apic_read(APIC_LVT0);
/* * This looks a bit hackish but it's about the only one way of sending * a few INTA cycles to 8259As and any associated glue logic. ICR does * not support the ExtINT mode, unfortunately. We need to send these * cycles as some i82489DX-based boards have glue logic that keeps the * 8259A interrupt line asserted until INTA. --macro
*/ staticinlinevoid __init unlock_ExtINT_logic(void)
{ unsignedchar save_control, save_freq_select; struct IO_APIC_route_entry entry0, entry1; int apic, pin, i;
u32 apic_id;
staticint disable_timer_pin_1 __initdata; /* Actually the next is obsolete, but keep it for paranoid reasons -AK */ staticint __init disable_timer_pin_setup(char *arg)
{
disable_timer_pin_1 = 1; return 0;
}
early_param("disable_timer_pin_1", disable_timer_pin_setup);
staticint __init mp_alloc_timer_irq(int ioapic, int pin)
{ struct irq_domain *domain = mp_ioapic_irqdomain(ioapic); int irq = -1;
staticvoid __init replace_pin_at_irq_node(struct mp_chip_data *data, int node, int oldapic, int oldpin, int newapic, int newpin)
{ struct irq_pin_list *entry;
/* Old apic/pin didn't exist, so just add a new one */
add_pin_to_irq_node(data, node, newapic, newpin);
}
/* * This code may look a bit paranoid, but it's supposed to cooperate with * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast * fanatically on his truly buggy board.
*/ staticinlinevoid __init check_timer(void)
{ struct irq_data *irq_data = irq_get_irq_data(0); struct mp_chip_data *data = irq_data->chip_data; struct irq_cfg *cfg = irqd_cfg(irq_data); int node = cpu_to_node(0); int apic1, pin1, apic2, pin2; int no_pin1 = 0;
if (!global_clock_event) return;
local_irq_disable();
/* * get/set the timer IRQ vector:
*/
legacy_pic->mask(0);
/* * As IRQ0 is to be enabled in the 8259A, the virtual * wire has to be disabled in the local APIC. Also * timer interrupts need to be acknowledged manually in * the 8259A for the i82489DX when using the NMI * watchdog as that APIC treats NMIs as level-triggered. * The AEOI mode will finish them in the 8259A * automatically.
*/
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
legacy_pic->init(1);
/* * Some BIOS writers are clueless and report the ExtINTA * I/O APIC input from the cascaded 8259A as the timer * interrupt input. So just in case, if only one pin * was found above, try it both directly and through the * 8259A.
*/ if (pin1 == -1) {
panic_if_irq_remap(FW_BUG "Timer not connected to IO-APIC");
pin1 = pin2;
apic1 = apic2;
no_pin1 = 1;
} elseif (pin2 == -1) {
pin2 = pin1;
apic2 = apic1;
}
if (pin1 != -1) { /* Ok, does IRQ0 through the IOAPIC work? */ if (no_pin1) {
mp_alloc_timer_irq(apic1, pin1);
} else { /* * for edge trigger, it's already unmasked, * so only need to unmask if it is level-trigger * do we really have level trigger timer?
*/ int idx = find_irq_entry(apic1, pin1, mp_INT);
if (idx != -1 && irq_is_level(idx))
unmask_ioapic_irq(irq_get_irq_data(0));
}
irq_domain_deactivate_irq(irq_data);
irq_domain_activate_irq(irq_data, false); if (timer_irq_works()) { if (disable_timer_pin_1 > 0)
clear_IO_APIC_pin(0, pin1); goto out;
}
panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
clear_IO_APIC_pin(apic1, pin1); if (!no_pin1)
pr_err("..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
pr_info("...trying to set up timer (IRQ0) through the 8259A ...\n");
pr_info("..... (found apic %d pin %d) ...\n", apic2, pin2); /* * legacy devices should be connected to IO APIC #0
*/
replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2);
irq_domain_deactivate_irq(irq_data);
irq_domain_activate_irq(irq_data, false);
legacy_pic->unmask(0); if (timer_irq_works()) {
pr_info("....... works.\n"); goto out;
} /* * Cleanup, just in case ...
*/
legacy_pic->mask(0);
clear_IO_APIC_pin(apic2, pin2);
pr_info("....... failed.\n");
}
pr_info("...trying to set up timer as Virtual Wire IRQ...\n");
if (timer_irq_works()) {
pr_info("..... works.\n"); goto out;
}
pr_info("..... failed :\n"); if (apic_is_x2apic_enabled()) {
pr_info("Perhaps problem with the pre-enabled x2apic mode\n" "Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
}
panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " "report. Then try booting with the 'noapic' option.\n");
out:
local_irq_enable();
}
/* * Traditionally ISA IRQ2 is the cascade IRQ, and is not available * to devices. However there may be an I/O APIC pin available for * this interrupt regardless. The pin may be left unconnected, but * typically it will be reused as an ExtINT cascade interrupt for * the master 8259A. In the MPS case such a pin will normally be * reported as an ExtINT interrupt in the MP table. With ACPI * there is no provision for ExtINT interrupts, and in the absence * of an override it would be treated as an ordinary ISA I/O APIC * interrupt, that is edge-triggered and unmasked by default. We * used to do this, but it caused problems on some systems because * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using * the same ExtINT cascade interrupt to drive the local APIC of the * bootstrap processor. Therefore we refrain from routing IRQ2 to * the I/O APIC in all cases now. No actual device should request * it anyway. --macro
*/ #define PIC_IRQS (1UL << PIC_CASCADE_IR)
/* * The register returns the maximum index redir index supported, * which is one less than the total number of redir entries.
*/ return reg_01.bits.entries + 1;
}
/* * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use * gsi_top if ioapic_dynirq_base hasn't been initialized yet.
*/
ret = ioapic_dynirq_base ? : gsi_top;
/* * For DT enabled machines ioapic_dynirq_base is irrelevant and * always 0. gsi_top can be 0 if there is no IO/APIC registered. * 0 is an invalid interrupt number for dynamic allocations. Return * @from instead.
*/ return ret ? : from;
}
#ifdef CONFIG_X86_32 staticint io_apic_get_unique_id(int ioapic, int apic_id)
{ static DECLARE_BITMAP(apic_id_map, MAX_LOCAL_APIC); const u32 broadcast_id = 0xF; union IO_APIC_reg_00 reg_00; int i = 0;
/* Initialize the ID map */ if (bitmap_empty(apic_id_map, MAX_LOCAL_APIC))
copy_phys_cpu_present_map(apic_id_map);
/* Every APIC in a system must have a unique ID */ if (test_bit(apic_id, apic_id_map)) { for (i = 0; i < broadcast_id; i++) { if (!test_bit(i, apic_id_map)) break;
}
if (i == broadcast_id)
panic("Max apic_id exceeded!\n");
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.