/* * Lock ordering: * ->dmar_global_lock * ->irq_2_ir_lock * ->qi->q_lock * ->iommu->register_lock * Note: * intel_irq_remap_ops.{supported,prepare,enable,disable,reenable} are called * in single-threaded environment with interrupt disabled, so no need to tabke * the dmar_global_lock.
*/
DEFINE_RAW_SPINLOCK(irq_2_ir_lock); staticconststruct irq_domain_ops intel_ir_domain_ops;
index = irq_iommu->irte_index + irq_iommu->sub_handle;
irte = &iommu->ir_table->base[index];
if ((irte->pst == 1) || (irte_modified->pst == 1)) { /* * We use cmpxchg16 to atomically update the 128-bit IRTE, * and it cannot be updated by the hardware or other processors * behind us, so the return value of cmpxchg16 should be the * same as the old value.
*/
u128 old = irte->irte;
WARN_ON(!try_cmpxchg128(&irte->irte, &old, irte_modified->irte));
} else {
WRITE_ONCE(irte->low, irte_modified->low);
WRITE_ONCE(irte->high, irte_modified->high);
}
__iommu_flush_cache(iommu, irte, sizeof(*irte));
/* * source validation type
*/ #define SVT_NO_VERIFY 0x0 /* no verification is required */ #define SVT_VERIFY_SID_SQ 0x1 /* verify using SID and SQ fields */ #define SVT_VERIFY_BUS 0x2 /* verify bus of request-id */
/* * source-id qualifier
*/ #define SQ_ALL_16 0x0 /* verify all 16 bits of request-id */ #define SQ_13_IGNORE_1 0x1 /* verify most significant 13 bits, ignore * the third least significant bit
*/ #define SQ_13_IGNORE_2 0x2 /* verify most significant 13 bits, ignore * the second and third least significant bits
*/ #define SQ_13_IGNORE_3 0x3 /* verify most significant 13 bits, ignore * the least three significant bits
*/
/* * set SVT, SQ and SID fields of irte to verify * source ids of interrupt requests
*/ staticvoid set_irte_sid(struct irte *irte, unsignedint svt, unsignedint sq, unsignedint sid)
{ if (disable_sourceid_checking)
svt = SVT_NO_VERIFY;
irte->svt = svt;
irte->sq = sq;
irte->sid = sid;
}
/* * Set an IRTE to match only the bus number. Interrupt requests that reference * this IRTE must have a requester-id whose bus number is between or equal * to the start_bus and end_bus arguments.
*/ staticvoid set_irte_verify_bus(struct irte *irte, unsignedint start_bus, unsignedint end_bus)
{
set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16,
(start_bus << 8) | end_bus);
}
staticint set_ioapic_sid(struct irte *irte, int apic)
{ int i;
u16 sid = 0;
if (!irte) return -1;
for (i = 0; i < MAX_IO_APICS; i++) { if (ir_ioapic[i].iommu && ir_ioapic[i].id == apic) {
sid = PCI_DEVID(ir_ioapic[i].bus, ir_ioapic[i].devfn); break;
}
}
if (sid == 0) {
pr_warn("Failed to set source-id of IOAPIC (%d)\n", apic); return -1;
}
for (i = 0; i < MAX_HPET_TBS; i++) { if (ir_hpet[i].iommu && ir_hpet[i].id == id) {
sid = PCI_DEVID(ir_hpet[i].bus, ir_hpet[i].devfn); break;
}
}
if (sid == 0) {
pr_warn("Failed to set source-id of HPET block (%d)\n", id); return -1;
}
/* * Should really use SQ_ALL_16. Some platforms are broken. * While we figure out the right quirks for these broken platforms, use * SQ_13_IGNORE_3 for now.
*/
set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_13_IGNORE_3, sid);
return 0;
}
struct set_msi_sid_data { struct pci_dev *pdev;
u16 alias; int count; int busmatch_count;
};
/* * DMA alias provides us with a PCI device and alias. The only case * where the it will return an alias on a different bus than the * device is the case of a PCIe-to-PCI bridge, where the alias is for * the subordinate bus. In this case we can only verify the bus. * * If there are multiple aliases, all with the same bus number, * then all we can do is verify the bus. This is typical in NTB * hardware which use proxy IDs where the device will generate traffic * from multiple devfn numbers on the same bus. * * If the alias device is on a different bus than our source device * then we have a topology based alias, use it. * * Otherwise, the alias is for a device DMA quirk and we cannot * assume that MSI uses the same requester ID. Therefore use the * original device.
*/ if (PCI_BUS_NUM(data.alias) != data.pdev->bus->number)
set_irte_verify_bus(irte, PCI_BUS_NUM(data.alias),
dev->bus->number); elseif (data.count >= 2 && data.busmatch_count == data.count)
set_irte_verify_bus(irte, dev->bus->number, dev->bus->number); elseif (data.pdev->bus->number != dev->bus->number)
set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, data.alias); else
set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
pci_dev_id(dev));
/* Check whether the old ir-table has the same size as ours */
irta = dmar_readq(iommu->reg + DMAR_IRTA_REG); if ((irta & INTR_REMAP_TABLE_REG_SIZE_MASK)
!= INTR_REMAP_TABLE_REG_SIZE) return -EINVAL;
/* * Now check the table for used entries and mark those as * allocated in the bitmap
*/ for (i = 0; i < INTR_REMAP_TABLE_ENTRIES; i++) { if (iommu->ir_table->base[i].present)
bitmap_set(iommu->ir_table->bitmap, i, 1);
}
/* * Global invalidation of interrupt entry cache to make sure the * hardware uses the new irq remapping table.
*/ if (!cap_esirtps(iommu->cap))
qi_global_iec(iommu);
}
/* * With CFI clear in the Global Command register, we should be * protected from dangerous (i.e. compatibility) interrupts * regardless of x2apic status. Check just to be sure.
*/ if (sts & DMA_GSTS_CFIS)
WARN(1, KERN_WARNING "Compatibility-format IRQs enabled despite intr remapping;\n" "you are vulnerable to IRQ injection.\n");
/* * If the queued invalidation is already initialized, * shouldn't disable it.
*/ if (!iommu->qi) { /* * Clear previous faults.
*/
dmar_fault(-1, iommu);
dmar_disable_qi(iommu);
if (dmar_enable_qi(iommu)) {
pr_err("Failed to enable queued invalidation\n"); goto out_free_ir_domain;
}
}
init_ir_status(iommu);
if (ir_pre_enabled(iommu)) { if (!is_kdump_kernel()) {
pr_info_once("IRQ remapping was enabled on %s but we are not in kdump mode\n",
iommu->name);
clear_ir_pre_enabled(iommu);
iommu_disable_irq_remapping(iommu);
} elseif (iommu_load_old_irte(iommu))
pr_err("Failed to copy IR table for %s from previous kernel\n",
iommu->name); else
pr_info("Copied IR table for %s from previous kernel\n",
iommu->name);
}
if (irq_remap_broken) {
pr_warn("This system BIOS has enabled interrupt remapping\n" "on a chipset that contains an erratum making that\n" "feature unstable. To maintain system stability\n" "interrupt remapping is being disabled. Please\n" "contact your BIOS vendor for an update\n");
add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); return -ENODEV;
}
if (dmar_table_init() < 0) return -ENODEV;
if (!dmar_ir_support()) return -ENODEV;
if (parse_ioapics_under_ir()) {
pr_info("Not enabling interrupt remapping\n"); goto error;
}
/* First make sure all IOMMUs support IRQ remapping */
for_each_iommu(iommu, drhd) if (!ecap_ir_support(iommu->ecap)) goto error;
/* Detect remapping mode: lapic or x2apic */ if (x2apic_supported()) {
eim = !dmar_x2apic_optout(); if (!eim) {
pr_info("x2apic is disabled because BIOS sets x2apic opt out bit.");
pr_info("Use 'intremap=no_x2apic_optout' to override the BIOS setting.\n");
}
}
for_each_iommu(iommu, drhd) { if (eim && !ecap_eim_support(iommu->ecap)) {
pr_info("%s does not support EIM\n", iommu->name);
eim = 0;
}
}
eim_mode = eim; if (eim)
pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
/* Do the initializations early */
for_each_iommu(iommu, drhd) { if (intel_setup_irq_remapping(iommu)) {
pr_err("Failed to setup irq remapping for %s\n",
iommu->name); goto error;
}
}
if (!disable_irq_post) { /* * If IRTE is in posted format, the 'pda' field goes across the * 64-bit boundary, we need use cmpxchg16b to atomically update * it. We only expose posted-interrupt when X86_FEATURE_CX16 * is supported. Actually, hardware platforms supporting PI * should have X86_FEATURE_CX16 support, this has been confirmed * with Intel hardware guys.
*/ if (boot_cpu_has(X86_FEATURE_CX16))
intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP;
/* * Setup Interrupt-remapping for all the DRHD's now.
*/
for_each_iommu(iommu, drhd) { if (!ir_pre_enabled(iommu))
iommu_enable_irq_remapping(iommu);
setup = true;
}
while (start < end && ret == 0) {
scope = start; if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC)
ret = ir_parse_one_ioapic_scope(scope, iommu, drhd); elseif (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_HPET)
ret = ir_parse_one_hpet_scope(scope, iommu, drhd);
start += scope->length;
}
return ret;
}
staticvoid ir_remove_ioapic_hpet_scope(struct intel_iommu *iommu)
{ int i;
for (i = 0; i < MAX_HPET_TBS; i++) if (ir_hpet[i].iommu == iommu)
ir_hpet[i].iommu = NULL;
for (i = 0; i < MAX_IO_APICS; i++) if (ir_ioapic[i].iommu == iommu)
ir_ioapic[i].iommu = NULL;
}
/* * Finds the assocaition between IOAPIC's and its Interrupt-remapping * hardware unit.
*/ staticint __init parse_ioapics_under_ir(void)
{ struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; bool ir_supported = false; int ioapic_idx;
for_each_iommu(iommu, drhd) { int ret;
if (!ecap_ir_support(iommu->ecap)) continue;
ret = ir_parse_ioapic_hpet_scope(drhd->hdr, iommu); if (ret) return ret;
ir_supported = true;
}
if (!ir_supported) return -ENODEV;
for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) { int ioapic_id = mpc_ioapic_id(ioapic_idx); if (!map_ioapic_to_iommu(ioapic_id)) {
pr_err(FW_BUG "ioapic %d has no mapping iommu, " "interrupt remapping will be disabled\n",
ioapic_id); return -1;
}
}
return 0;
}
staticint __init ir_dev_scope_init(void)
{ int ret;
if (!irq_remapping_enabled) return 0;
down_write(&dmar_global_lock);
ret = dmar_dev_scope_init();
up_write(&dmar_global_lock);
/* * Store the MSI remapping domain pointer in the device if enabled. * * This is called from dmar_pci_bus_add_dev() so it works even when DMA * remapping is disabled. Only update the pointer if the device is not * already handled by a non default PCI/MSI interrupt domain. This protects * e.g. VMD devices.
*/ void intel_irq_remap_add_device(struct dmar_pci_notify_info *info)
{ if (!irq_remapping_enabled || !pci_dev_has_default_msi_parent_domain(info->dev)) return;
irte->present = 1;
irte->dst_mode = apic->dest_mode_logical; /* * Trigger mode in the IRTE will always be edge, and for IO-APIC, the * actual level or edge trigger will be setup in the IO-APIC * RTE. This will help simplify level triggered irq migration. * For more details, see the comments (in io_apic.c) explainig IO-APIC * irq migration in the presence of interrupt-remapping.
*/
irte->trigger_mode = 0;
irte->dlvry_mode = APIC_DELIVERY_MODE_FIXED;
irte->vector = vector;
irte->dest_id = IRTE_DEST(dest);
irte->redir_hint = 1;
}
/* * Don't modify IRTEs for IRQs that are being posted to vCPUs if the * host CPU affinity changes.
*/ if (ir_data->irq_2_iommu.posted_vcpu && !force_host) return;
ir_data->irq_2_iommu.posted_vcpu = false;
if (ir_data->irq_2_iommu.posted_msi)
intel_ir_reconfigure_irte_posted(irqd); else
modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry);
}
/* * Atomically updates the IRTE with the new destination, vector * and flushes the interrupt entry cache.
*/
irte->vector = cfg->vector;
irte->dest_id = IRTE_DEST(cfg->dest_apicid);
__intel_ir_reconfigure_irte(irqd, force_host);
}
/* * Migrate the IO-APIC irq in the presence of intr-remapping. * * For both level and edge triggered, irq migration is a simple atomic * update(of vector and cpu destination) of IRTE and flush the hardware cache. * * For level triggered, we eliminate the io-apic RTE modification (with the * updated vector information), by using a virtual vector (io-apic pin number). * Real vector that is used for interrupting cpu will be coming from * the interrupt-remapping table entry. * * As the migration is a simple atomic update of IRTE, the same mechanism * is used to migrate MSI irq's in the presence of interrupt-remapping.
*/ staticint
intel_ir_set_affinity(struct irq_data *data, conststruct cpumask *mask, bool force)
{ struct irq_data *parent = data->parent_data; struct irq_cfg *cfg = irqd_cfg(data); int ret;
ret = parent->chip->irq_set_affinity(parent, mask, force); if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) return ret;
intel_ir_reconfigure_irte(data, false); /* * After this point, all the interrupts will start arriving * at the new destination. So, time to cleanup the previous * vector allocation.
*/
vector_schedule_cleanup(cfg);
/* stop posting interrupts, back to the default mode */ if (!pi_data) {
__intel_ir_reconfigure_irte(data, true);
} else { struct irte irte_pi;
/* * We are not caching the posted interrupt entry. We * copy the data from the remapped entry and modify * the fields which are relevant for posted mode. The * cached remapped entry is used for switching back to * remapped mode.
*/
memset(&irte_pi, 0, sizeof(irte_pi));
dmar_copy_shared_irte(&irte_pi, &ir_data->irte_entry);
/* * With posted MSIs, the MSI vectors are multiplexed into a single notification * vector, and only the notification vector is sent to the APIC IRR. Device * MSIs are then dispatched in a demux loop that harvests the MSIs from the * CPU's Posted Interrupt Request bitmap. I.e. Posted MSIs never get sent to * the APIC IRR, and thus do not need an EOI. The notification handler instead * performs a single EOI after processing the PIR. * * Note! Pending SMP/CPU affinity changes, which are per MSI, must still be * honored, only the APIC EOI is omitted. * * For the example below, 3 MSIs are coalesced into one CPU notification. Only * one apic_eoi() is needed, but each MSI needs to process pending changes to * its CPU affinity. * * __sysvec_posted_msi_notification() * irq_enter(); * handle_edge_irq() * irq_chip_ack_parent() * irq_move_irq(); // No EOI * handle_irq_event() * driver_handler() * handle_edge_irq() * irq_chip_ack_parent() * irq_move_irq(); // No EOI * handle_irq_event() * driver_handler() * handle_edge_irq() * irq_chip_ack_parent() * irq_move_irq(); // No EOI * handle_irq_event() * driver_handler() * apic_eoi() * irq_exit() *
*/ staticstruct irq_chip intel_ir_chip_post_msi = {
.name = "INTEL-IR-POST",
.irq_ack = irq_move_irq,
.irq_set_affinity = intel_ir_set_affinity,
.irq_compose_msi_msg = intel_ir_compose_msi_msg,
.irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
};
/* * Support of Interrupt Remapping Unit Hotplug
*/ staticint dmar_ir_add(struct dmar_drhd_unit *dmaru, struct intel_iommu *iommu)
{ int ret; int eim = x2apic_enabled();
if (eim && !ecap_eim_support(iommu->ecap)) {
pr_info("DRHD %Lx: EIM not supported by DRHD, ecap %Lx\n",
iommu->reg_phys, iommu->ecap); return -ENODEV;
}
if (ir_parse_ioapic_hpet_scope(dmaru->hdr, iommu)) {
pr_warn("DRHD %Lx: failed to parse managed IOAPIC/HPET\n",
iommu->reg_phys); return -ENODEV;
}
/* TODO: check all IOAPICs are covered by IOMMU */
/* Setup Interrupt-remapping now. */
ret = intel_setup_irq_remapping(iommu); if (ret) {
pr_err("Failed to setup irq remapping for %s\n",
iommu->name);
intel_teardown_irq_remapping(iommu);
ir_remove_ioapic_hpet_scope(iommu);
} else {
iommu_enable_irq_remapping(iommu);
}
return ret;
}
int dmar_ir_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
{ int ret = 0; struct intel_iommu *iommu = dmaru->iommu;
if (!irq_remapping_enabled) return 0; if (iommu == NULL) return -EINVAL; if (!ecap_ir_support(iommu->ecap)) return 0; if (irq_remapping_cap(IRQ_POSTING_CAP) &&
!cap_pi_support(iommu->cap)) return -EBUSY;
if (insert) { if (!iommu->ir_table)
ret = dmar_ir_add(dmaru, iommu);
} else { if (iommu->ir_table) { if (!bitmap_empty(iommu->ir_table->bitmap,
INTR_REMAP_TABLE_ENTRIES)) {
ret = -EBUSY;
} else {
iommu_disable_irq_remapping(iommu);
intel_teardown_irq_remapping(iommu);
ir_remove_ioapic_hpet_scope(iommu);
}
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.