enum vmd_features { /* * Device may contain registers which hint the physical location of the * membars, in order to allow proper address translation during * resource assignment to enable guest virtualization
*/
VMD_FEAT_HAS_MEMBAR_SHADOW = (1 << 0),
/* * Device may provide root port configuration information which limits * bus numbering
*/
VMD_FEAT_HAS_BUS_RESTRICTIONS = (1 << 1),
/* * Device contains physical location shadow registers in * vendor-specific capability space
*/
VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP = (1 << 2),
/* * Device may use MSI-X vector 0 for software triggering and will not * be used for MSI remapping
*/
VMD_FEAT_OFFSET_FIRST_VECTOR = (1 << 3),
/* * Device can bypass remapping MSI-X transactions into its MSI-X table, * avoiding the requirement of a VMD MSI domain for child device * interrupt handling.
*/
VMD_FEAT_CAN_BYPASS_MSI_REMAP = (1 << 4),
/* * Enable ASPM on the PCIE root ports and set the default LTR of the * storage devices on platforms where these values are not configured by * BIOS. This is needed for laptops, which require these settings for * proper power management of the SoC.
*/
VMD_FEAT_BIOS_PM_QUIRK = (1 << 5),
};
/** * struct vmd_irq - private data to map driver IRQ to the VMD shared vector * @node: list item for parent traversal. * @irq: back pointer to parent. * @enabled: true if driver enabled IRQ * @virq: the virtual IRQ value provided to the requesting driver. * * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to * a VMD IRQ using this structure.
*/ struct vmd_irq { struct list_head node; struct vmd_irq_list *irq; bool enabled; unsignedint virq;
};
/** * struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector * @irq_list: the list of irq's the VMD one demuxes to. * @srcu: SRCU struct for local synchronization. * @count: number of child IRQs assigned to this vector; used to track * sharing. * @virq: The underlying VMD Linux interrupt number
*/ struct vmd_irq_list { struct list_head irq_list; struct srcu_struct srcu; unsignedint count; unsignedint virq;
};
/* * Drivers managing a device in a VMD domain allocate their own IRQs as before, * but the MSI entry for the hardware it's driving will be programmed with a * destination ID for the VMD MSI-X table. The VMD muxes interrupts in its * domain into one of its own, and the VMD driver de-muxes these for the * handlers sharing that VMD IRQ. The vmd irq_domain provides the operations * and irq_chip to set this up.
*/ staticvoid vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
{ struct vmd_irq *vmdirq = data->chip_data; struct vmd_irq_list *irq = vmdirq->irq; struct vmd_dev *vmd = irq_data_get_irq_handler_data(data);
/* * XXX: We can be even smarter selecting the best IRQ once we solve the * affinity problem.
*/ staticstruct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc)
{ int i, best;
if (vmd->msix_count == 1 + vmd->first_vec) return &vmd->irqs[vmd->first_vec];
/* * White list for fast-interrupt handlers. All others will share the * "slow" interrupt vector.
*/ switch (msi_desc_to_pci_dev(desc)->class) { case PCI_CLASS_STORAGE_EXPRESS: break; default: return &vmd->irqs[vmd->first_vec];
}
scoped_guard(raw_spinlock_irq, &list_lock) {
best = vmd->first_vec + 1; for (i = best; i < vmd->msix_count; i++) if (vmd->irqs[i].count < vmd->irqs[best].count)
best = i;
vmd->irqs[best].count++;
}
staticvoid vmd_remove_irq_domain(struct vmd_dev *vmd)
{ /* * Some production BIOS won't enable remapping between soft reboots. * Ensure remapping is restored before unloading the driver.
*/ if (!vmd->msix_count)
vmd_set_msi_remapping(vmd, true);
if (vmd->irq_domain) { struct fwnode_handle *fn = vmd->irq_domain->fwnode;
if (offset + len >= resource_size(&vmd->dev->resource[VMD_CFGBAR])) return NULL;
return vmd->cfgbar + offset;
}
/* * CPU may deadlock if config space is not serialized on some versions of this * hardware, so all config space access is done under a spinlock.
*/ staticint vmd_pci_read(struct pci_bus *bus, unsignedint devfn, int reg, int len, u32 *value)
{ struct vmd_dev *vmd = vmd_from_bus(bus); void __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len);
if (!addr) return -EFAULT;
guard(raw_spinlock_irqsave)(&vmd->cfg_lock); switch (len) { case 1:
*value = readb(addr); return 0; case 2:
*value = readw(addr); return 0; case 4:
*value = readl(addr); return 0; default: return -EINVAL;
}
}
/* * VMD h/w converts non-posted config writes to posted memory writes. The * read-back in this function forces the completion so it returns only after * the config space was written, as expected.
*/ staticint vmd_pci_write(struct pci_bus *bus, unsignedint devfn, int reg, int len, u32 value)
{ struct vmd_dev *vmd = vmd_from_bus(bus); void __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len);
if (!addr) return -EFAULT;
guard(raw_spinlock_irqsave)(&vmd->cfg_lock); switch (len) { case 1:
writeb(value, addr);
readb(addr); return 0; case 2:
writew(value, addr);
readw(addr); return 0; case 4:
writel(value, addr);
readl(addr); return 0; default: return -EINVAL;
}
}
/* * VMD domains start at 0x10000 to not clash with ACPI _SEG domains. * Per ACPI r6.0, sec 6.5.6, _SEG returns an integer, of which the lower * 16 bits are the PCI Segment Group (domain) number. Other bits are * currently reserved.
*/ staticint vmd_find_free_domain(void)
{ int domain = 0xffff; struct pci_bus *bus = NULL;
/* * Since VMD is an aperture to regular PCIe root ports, only allow it to * control features that the OS is allowed to control on the physical PCI bus.
*/ staticvoid vmd_copy_host_bridge_flags(struct pci_host_bridge *root_bridge, struct pci_host_bridge *vmd_bridge)
{
vmd_bridge->native_pcie_hotplug = root_bridge->native_pcie_hotplug;
vmd_bridge->native_shpc_hotplug = root_bridge->native_shpc_hotplug;
vmd_bridge->native_aer = root_bridge->native_aer;
vmd_bridge->native_pme = root_bridge->native_pme;
vmd_bridge->native_ltr = root_bridge->native_ltr;
vmd_bridge->native_dpc = root_bridge->native_dpc;
}
/* * Enable ASPM and LTR settings on devices that aren't configured by BIOS.
*/ staticint vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata)
{ unsignedlong features = *(unsignedlong *)userdata;
u16 ltr = VMD_BIOS_PM_QUIRK_LTR;
u32 ltr_reg; int pos;
if (!(features & VMD_FEAT_BIOS_PM_QUIRK)) return 0;
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_LTR); if (!pos) goto out_state_change;
/* * Skip if the max snoop LTR is non-zero, indicating BIOS has set it * so the LTR quirk is not needed.
*/
pci_read_config_dword(pdev, pos + PCI_LTR_MAX_SNOOP_LAT, <r_reg); if (!!(ltr_reg & (PCI_LTR_VALUE_MASK | PCI_LTR_SCALE_MASK))) goto out_state_change;
/* * Set the default values to the maximum required by the platform to * allow the deepest power management savings. Write as a DWORD where * the lower word is the max snoop latency and the upper word is the * max non-snoop latency.
*/
ltr_reg = (ltr << 16) | ltr;
pci_write_config_dword(pdev, pos + PCI_LTR_MAX_SNOOP_LAT, ltr_reg);
pci_info(pdev, "VMD: Default LTR value set by driver\n");
out_state_change: /* * Ensure devices are in D0 before enabling PCI-PM L1 PM Substates, per * PCIe r6.0, sec 5.5.4.
*/
pci_set_power_state_locked(pdev, PCI_D0);
pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL); return 0;
}
/* * Shadow registers may exist in certain VMD device ids which allow * guests to correctly assign host physical addresses to the root ports * and child devices. These registers will either return the host value * or 0, depending on an enable bit in the VMD device.
*/ if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) {
membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE;
ret = vmd_get_phys_offsets(vmd, true, &offset[0], &offset[1]); if (ret) return ret;
} elseif (features & VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP) {
ret = vmd_get_phys_offsets(vmd, false, &offset[0], &offset[1]); if (ret) return ret;
}
/* * Certain VMD devices may have a root port configuration option which * limits the bus range to between 0-127, 128-255, or 224-255
*/ if (features & VMD_FEAT_HAS_BUS_RESTRICTIONS) {
ret = vmd_get_bus_number_start(vmd); if (ret) return ret;
}
/* * If the window is below 4GB, clear IORESOURCE_MEM_64 so we can * put 32-bit resources in the window. * * There's no hardware reason why a 64-bit window *couldn't* * contain a 32-bit resource, but pbus_size_mem() computes the * bridge window size assuming a 64-bit window will contain no * 32-bit resources. __pci_assign_resource() enforces that * artificial restriction to make sure everything will fit. * * The only way we could use a 64-bit non-prefetchable MEMBAR is * if its address is <4GB so that we can convert it to a 32-bit * resource. To be visible to the host OS, all VMD endpoints must * be initially configured by platform BIOS, which includes setting * up these resources. We can assume the device is configured * according to the platform needs.
*/
res = &vmd->dev->resource[VMD_MEMBAR1];
upper_bits = upper_32_bits(res->end);
flags = res->flags & ~IORESOURCE_SIZEALIGN; if (!upper_bits)
flags &= ~IORESOURCE_MEM_64;
vmd->resources[1] = (struct resource) {
.name = "VMD MEMBAR1",
.start = res->start,
.end = res->end,
.flags = flags,
.parent = res,
};
/* * Currently MSI remapping must be enabled in guest passthrough mode * due to some missing interrupt remapping plumbing. This is probably * acceptable because the guest is usually CPU-limited and MSI * remapping doesn't become a performance bottleneck.
*/ if (!(features & VMD_FEAT_CAN_BYPASS_MSI_REMAP) ||
offset[0] || offset[1]) {
ret = vmd_alloc_irqs(vmd); if (ret) return ret;
vmd_set_msi_remapping(vmd, true);
ret = vmd_create_irq_domain(vmd); if (ret) return ret;
} else {
vmd_set_msi_remapping(vmd, false);
}
/* When Intel VMD is enabled, the OS does not discover the Root Ports * owned by Intel VMD within the MMCFG space. pci_reset_bus() applies * a reset to the parent of the PCI device supplied as argument. This * is why we pass a child device, so the reset can be triggered at * the Intel bridge level and propagated to all the children in the * hierarchy.
*/
list_for_each_entry(child, &vmd->bus->children, node) { if (!list_empty(&child->devices)) {
dev = list_first_entry(&child->devices, struct pci_dev, bus_list);
ret = pci_reset_bus(dev); if (ret)
pci_warn(dev, "can't reset device: %d\n", ret);
/* * VMD root buses are virtual and don't return true on pci_is_pcie() * and will fail pcie_bus_configure_settings() early. It can instead be * run on each of the real root ports.
*/
list_for_each_entry(child, &vmd->bus->children, node)
pcie_bus_configure_settings(child);
pci_bus_add_devices(vmd->bus);
vmd_acpi_end(); return 0;
}
staticint vmd_probe(struct pci_dev *dev, conststruct pci_device_id *id)
{ unsignedlong features = (unsignedlong) id->driver_data; struct vmd_dev *vmd; int err;
if (xen_domain()) { /* * Xen doesn't have knowledge about devices in the VMD bus * because the config space of devices behind the VMD bridge is * not known to Xen, and hence Xen cannot discover or configure * them in any way. * * Bypass of MSI remapping won't work in that case as direct * write by Linux to the MSI entries won't result in functional * interrupts, as Xen is the entity that manages the host * interrupt controller and must configure interrupts. However * multiplexing of interrupts by the VMD bridge will work under * Xen, so force the usage of that mode which must always be * supported by VMD bridges.
*/
features &= ~VMD_FEAT_CAN_BYPASS_MSI_REMAP;
}
if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20)) return -ENOMEM;
vmd = devm_kzalloc(&dev->dev, sizeof(*vmd), GFP_KERNEL); if (!vmd) return -ENOMEM;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.