/* * ACPI table definitions * * These data structures are laid over the table to parse the important values * out of it.
*/
/* * structure describing one IOMMU in the ACPI table. Typically followed by one * or more ivhd_entrys.
*/ struct ivhd_header {
u8 type;
u8 flags;
u16 length;
u16 devid;
u16 cap_ptr;
u64 mmio_phys;
u16 pci_seg;
u16 info;
u32 efr_attr;
/* Following only valid on IVHD type 11h and 40h */
u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
u64 efr_reg2;
} __attribute__((packed));
/* * A device entry describing which devices a specific IOMMU translates and * which requestor ids they use.
*/ struct ivhd_entry {
u8 type;
u16 devid;
u8 flags;
struct_group(ext_hid,
u32 ext;
u32 hidh;
);
u64 cid;
u8 uidf;
u8 uidl;
u8 uid;
} __attribute__((packed));
/* * An AMD IOMMU memory definition structure. It defines things like exclusion * ranges for devices and regions that should be unity mapped.
*/ struct ivmd_header {
u8 type;
u8 flags;
u16 length;
u16 devid;
u16 aux;
u16 pci_seg;
u8 resv[6];
u64 range_start;
u64 range_length;
} __attribute__((packed));
/* Global EFR and EFR2 registers */
u64 amd_iommu_efr;
u64 amd_iommu_efr2;
/* Host (v1) page table is not supported*/ bool amd_iommu_hatdis;
/* SNP is enabled on the system? */ bool amd_iommu_snp_en;
EXPORT_SYMBOL(amd_iommu_snp_en);
LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */
LIST_HEAD(amd_ivhd_dev_flags_list); /* list of all IVHD device entry settings */
/* Number of IOMMUs present in the system */ staticint amd_iommus_present;
/* IOMMUs have a non-present cache? */ bool amd_iommu_np_cache __read_mostly; bool amd_iommu_iotlb_sup __read_mostly = true;
/* * Iterate through all the IOMMUs to get common EFR * masks among all IOMMUs and warn if found inconsistency.
*/ static __init void get_global_efr(void)
{ struct amd_iommu *iommu;
pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2);
}
/* * For IVHD type 0x11/0x40, EFR is also available via IVHD. * Default to IVHD EFR since it is available sooner * (i.e. before PCI init).
*/ staticvoid __init early_iommu_features_init(struct amd_iommu *iommu, struct ivhd_header *h)
{ if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) {
iommu->features = h->efr_reg;
iommu->features2 = h->efr_reg2;
} if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP)
amdr_ivrs_remap_support = true;
}
/**************************************************************************** * * AMD IOMMU MMIO register space handling functions * * These functions are used to program the IOMMU device registers in * MMIO space required for that driver. *
****************************************************************************/
/* * This function set the exclusion range in the IOMMU. DMA accesses to the * exclusion range are passed through untranslated
*/ staticvoid iommu_set_exclusion_range(struct amd_iommu *iommu)
{
u64 start = iommu->exclusion_start & PAGE_MASK;
u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
u64 entry;
/* Note: * Default to 4 Kbytes, which can be specified by setting base * address equal to the limit address.
*/
memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
&entry, sizeof(entry));
}
/* Programs the physical address of the device table into the IOMMU hardware */ staticvoid iommu_set_device_table(struct amd_iommu *iommu)
{
u64 entry;
u32 dev_table_size = iommu->pci_seg->dev_table_size; void *dev_table = (void *)get_dev_table(iommu);
/* Generic functions to enable/disable certain features of the IOMMU. */ void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
{
iommu_feature_set(iommu, 1ULL, 1ULL, bit);
}
/* Clear IRTE cache disabling bit */
iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
}
/* * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in * the system has one.
*/ static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
{ if (!request_mem_region(address, end, "amd_iommu")) {
pr_err("Can not reserve memory region %llx-%llx for mmio\n",
address, end);
pr_err("This is a BIOS bug. Please contact your hardware vendor\n"); return NULL;
}
switch (h->type) { case 0x10:
size = 24; break; case 0x11: case 0x40:
size = 40; break;
} return size;
}
/**************************************************************************** * * The functions below belong to the first pass of AMD IOMMU ACPI table * parsing. In this pass we try to find out the highest device id this * code has to handle. Upon this information the size of the shared data * structures is determined later. *
****************************************************************************/
/* * This function calculates the length of a given IVHD entry
*/ staticinlineint ivhd_entry_length(u8 *ivhd)
{
u32 type = ((struct ivhd_entry *)ivhd)->type;
if (type < 0x80) { return 0x04 << (*ivhd >> 6);
} elseif (type == IVHD_DEV_ACPI_HID) { /* For ACPI_HID, offset 21 is uid len */ return *((u8 *)ivhd + 21) + 22;
} return 0;
}
/* * After reading the highest device id from the IOMMU PCI capability header * this function looks if there is a higher device id defined in the ACPI table
*/ staticint __init find_last_devid_from_ivhd(struct ivhd_header *h)
{
u8 *p = (void *)h, *end = (void *)h; struct ivhd_entry *dev; int last_devid = -EINVAL;
u32 ivhd_size = get_ivhd_header_size(h);
if (!ivhd_size) {
pr_err("Unsupported IVHD type %#x\n", h->type); return -EINVAL;
}
p += ivhd_size;
end += h->length;
while (p < end) {
dev = (struct ivhd_entry *)p; switch (dev->type) { case IVHD_DEV_ALL: /* Use maximum BDF value for DEV_ALL */ return 0xffff; case IVHD_DEV_SELECT: case IVHD_DEV_RANGE_END: case IVHD_DEV_ALIAS: case IVHD_DEV_EXT_SELECT: /* all the above subfield types refer to device ids */ if (dev->devid > last_devid)
last_devid = dev->devid; break; default: break;
}
p += ivhd_entry_length(p);
}
for (i = 0; i < table->length; ++i)
checksum += p[i]; if (checksum != 0) { /* ACPI table corrupt */
pr_err(FW_BUG "IVRS invalid checksum\n"); return -ENODEV;
}
return 0;
}
/* * Iterate over all IVHD entries in the ACPI table and find the highest device * id which we need to handle. This is the first of three functions which parse * the ACPI table. So we check the checksum here.
*/ staticint __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg)
{
u8 *p = (u8 *)table, *end = (u8 *)table; struct ivhd_header *h; int last_devid, last_bdf = 0;
p += IVRS_HEADER_LENGTH;
end += table->length; while (p < end) {
h = (struct ivhd_header *)p; if (h->pci_seg == pci_seg &&
h->type == amd_iommu_target_ivhd_type) {
last_devid = find_last_devid_from_ivhd(h);
if (last_devid < 0) return -EINVAL; if (last_devid > last_bdf)
last_bdf = last_devid;
}
p += h->length;
}
WARN_ON(p != end);
return last_bdf;
}
/**************************************************************************** * * The following functions belong to the code path which parses the ACPI table * the second time. In this ACPI parsing iteration we allocate IOMMU specific * data structures, initialize the per PCI segment device/alias/rlookup table * and also basically initialize the hardware. *
****************************************************************************/
/* * Obtain true physical address in kdump kernel when SME is enabled. * Currently, previous kernel with SME enabled and kdump kernel * with SME support disabled is not supported.
*/
phys = __sme_clr(paddr);
/* * Allocates the command buffer. This buffer is per AMD IOMMU. We can * write commands to that buffer later and the IOMMU will execute them * asynchronously
*/ staticint __init alloc_command_buffer(struct amd_iommu *iommu)
{
iommu->cmd_buf = iommu_alloc_pages_sz(GFP_KERNEL, CMD_BUFFER_SIZE);
return iommu->cmd_buf ? 0 : -ENOMEM;
}
/* * Interrupt handler has processed all pending events and adjusted head * and tail pointer. Reset overflow mask and restart logging again.
*/ void amd_iommu_restart_log(struct amd_iommu *iommu, constchar *evt_type,
u8 cntrl_intr, u8 cntrl_log,
u32 status_run_mask, u32 status_overflow_mask)
{
u32 status;
status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); if (status & status_run_mask) return;
/* * This function restarts event logging in case the IOMMU experienced * an event log buffer overflow.
*/ void amd_iommu_restart_event_logging(struct amd_iommu *iommu)
{
amd_iommu_restart_log(iommu, "Event", CONTROL_EVT_INT_EN,
CONTROL_EVT_LOG_EN, MMIO_STATUS_EVT_RUN_MASK,
MMIO_STATUS_EVT_OVERFLOW_MASK);
}
/* * This function restarts event logging in case the IOMMU experienced * GA log overflow.
*/ void amd_iommu_restart_ga_log(struct amd_iommu *iommu)
{
amd_iommu_restart_log(iommu, "GA", CONTROL_GAINT_EN,
CONTROL_GALOG_EN, MMIO_STATUS_GALOG_RUN_MASK,
MMIO_STATUS_GALOG_OVERFLOW_MASK);
}
/* * This function resets the command buffer if the IOMMU stopped fetching * commands from it.
*/ staticvoid amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
{
iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
/* * This function writes the command buffer address to the hardware and * enables it.
*/ staticvoid iommu_enable_command_buffer(struct amd_iommu *iommu)
{
u64 entry;
BUG_ON(iommu->cmd_buf == NULL);
if (!is_kdump_kernel()) { /* * Command buffer is re-used for kdump kernel and setting * of MMIO register is not required.
*/
entry = iommu_virt_to_phys(iommu->cmd_buf);
entry |= MMIO_CMD_SIZE_512;
memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
&entry, sizeof(entry));
}
amd_iommu_reset_cmd_buffer(iommu);
}
/* * This function disables the command buffer
*/ staticvoid iommu_disable_command_buffer(struct amd_iommu *iommu)
{
iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
}
/* allocates the memory where the IOMMU will log its events to */ staticint __init alloc_event_buffer(struct amd_iommu *iommu)
{
iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL,
EVT_BUFFER_SIZE);
if (!is_kdump_kernel()) { /* * Event buffer is re-used for kdump kernel and setting * of MMIO register is not required.
*/
entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
&entry, sizeof(entry));
}
/* set head and tail to zero manually */
writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); if (status & (MMIO_STATUS_GALOG_RUN_MASK)) break;
udelay(10);
}
if (WARN_ON(i >= MMIO_STATUS_TIMEOUT)) return -EINVAL;
return 0;
}
staticint iommu_init_ga_log(struct amd_iommu *iommu)
{ if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) return 0;
iommu->ga_log = iommu_alloc_pages_sz(GFP_KERNEL, GA_LOG_SIZE); if (!iommu->ga_log) goto err_out;
iommu->ga_log_tail = iommu_alloc_pages_sz(GFP_KERNEL, 8); if (!iommu->ga_log_tail) goto err_out;
if (check_feature(FEATURE_SNP)) { /* * When SNP is enabled, the exclusion base register is used for the * completion wait buffer (CWB) address. Read and re-use it.
*/
pr_info_once("Re-using CWB buffers from the previous kernel\n");
paddr = readq(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET) & PM_ADDR_MASK;
iommu->cmd_sem = iommu_memremap(paddr, PAGE_SIZE); if (!iommu->cmd_sem) return -ENOMEM;
iommu->cmd_sem_paddr = paddr;
} else { return alloc_cwwb_sem(iommu);
}
return 0;
}
staticint __init alloc_iommu_buffers(struct amd_iommu *iommu)
{ int ret;
/* * Reuse/Remap the previous kernel's allocated completion wait * command and event buffers for kdump boot.
*/ if (is_kdump_kernel()) {
ret = remap_or_alloc_cwwb_sem(iommu); if (ret) return ret;
ret = remap_command_buffer(iommu); if (ret) return ret;
ret = remap_event_buffer(iommu); if (ret) return ret;
} else {
ret = alloc_cwwb_sem(iommu); if (ret) return ret;
ret = alloc_command_buffer(iommu); if (ret) return ret;
ret = alloc_event_buffer(iommu); if (ret) return ret;
}
return 0;
}
staticvoid __init free_cwwb_sem(struct amd_iommu *iommu)
{ if (iommu->cmd_sem)
iommu_free_pages((void *)iommu->cmd_sem);
} staticvoid __init unmap_cwwb_sem(struct amd_iommu *iommu)
{ if (iommu->cmd_sem) { if (check_feature(FEATURE_SNP))
memunmap((void *)iommu->cmd_sem); else
iommu_free_pages((void *)iommu->cmd_sem);
}
}
staticvoid iommu_enable_xt(struct amd_iommu *iommu)
{ #ifdef CONFIG_IRQ_REMAP /* * XT mode (32-bit APIC destination ID) requires * GA mode (128-bit IRTE support) as a prerequisite.
*/ if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) &&
amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
iommu_feature_enable(iommu, CONTROL_XT_EN); #endif/* CONFIG_IRQ_REMAP */
}
staticvoid iommu_enable_gt(struct amd_iommu *iommu)
{ if (!check_feature(FEATURE_GT)) return;
iommu_feature_enable(iommu, CONTROL_GT_EN);
}
/* sets a specific bit in the device table entry. */ staticvoid set_dte_bit(struct dev_table_entry *dte, u8 bit)
{ int i = (bit >> 6) & 0x03; int _bit = bit & 0x3f;
/* Each IOMMU use separate device table with the same size */
lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
entry = (((u64) hi) << 32) + lo;
old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; if (old_devtb_size != pci_seg->dev_table_size) {
pr_err("The device table size of IOMMU:%d is not expected!\n",
iommu->index); returnfalse;
}
/* * When SME is enabled in the first kernel, the entry includes the * memory encryption mask(sme_me_mask), we must remove the memory * encryption mask to obtain the true physical address in kdump kernel.
*/
old_devtb_phys = __sme_clr(entry) & PAGE_MASK;
if (old_devtb_phys >= 0x100000000ULL) {
pr_err("The address of old device table is above 4G, not trustworthy!\n"); returnfalse;
}
/* * Re-use the previous kernel's device table for kdump.
*/
pci_seg->old_dev_tbl_cpy = iommu_memremap(old_devtb_phys, pci_seg->dev_table_size); if (pci_seg->old_dev_tbl_cpy == NULL) {
pr_err("Failed to remap memory for reusing old device table!\n"); returnfalse;
}
pr_warn("Translation is already enabled - trying to reuse translation structures\n");
/* * All IOMMUs within PCI segment shares common device table. * Hence reuse device table only once per PCI segment.
*/
for_each_pci_segment(pci_seg) {
for_each_iommu(iommu) { if (pci_seg->id != iommu->pci_seg->id) continue; if (!__reuse_device_table(iommu)) returnfalse; break;
}
}
for_each_ivhd_dte_flags(e) { /* * Need to go through the whole list to find the smallest range, * which contains the devid.
*/ if ((e->segid == segid) &&
(e->devid_first <= devid) && (devid <= e->devid_last)) { unsignedint len = e->devid_last - e->devid_first;
/* * This function takes the device specific flags read from the ACPI * table and sets up the device table entry with that information
*/ staticvoid __init
set_dev_entry_from_acpi_range(struct amd_iommu *iommu, u16 first, u16 last,
u32 flags, u32 ext_flags)
{ int i; struct dev_table_entry dte = {};
/* Parse IVHD DTE setting flags and store information */ if (flags) { struct ivhd_dte_flags *d;
if (search_ivhd_dte_flags(iommu->pci_seg->id, first, last)) return;
d = kzalloc(sizeof(struct ivhd_dte_flags), GFP_KERNEL); if (!d) return;
pr_debug("%s: devid range %#x:%#x\n", __func__, first, last);
if (flags & ACPI_DEVFLAG_INITPASS)
set_dte_bit(&dte, DEV_ENTRY_INIT_PASS); if (flags & ACPI_DEVFLAG_EXTINT)
set_dte_bit(&dte, DEV_ENTRY_EINT_PASS); if (flags & ACPI_DEVFLAG_NMI)
set_dte_bit(&dte, DEV_ENTRY_NMI_PASS); if (flags & ACPI_DEVFLAG_SYSMGT1)
set_dte_bit(&dte, DEV_ENTRY_SYSMGT1); if (flags & ACPI_DEVFLAG_SYSMGT2)
set_dte_bit(&dte, DEV_ENTRY_SYSMGT2); if (flags & ACPI_DEVFLAG_LINT0)
set_dte_bit(&dte, DEV_ENTRY_LINT0_PASS); if (flags & ACPI_DEVFLAG_LINT1)
set_dte_bit(&dte, DEV_ENTRY_LINT1_PASS);
/* Apply erratum 63, which needs info in initial_dte */ if (FIELD_GET(DTE_DATA1_SYSMGT_MASK, dte.data[1]) == 0x1)
dte.data[0] |= DTE_FLAG_IW;
staticint __init add_early_maps(void)
{ int i, ret;
for (i = 0; i < early_ioapic_map_size; ++i) {
ret = add_special_device(IVHD_SPECIAL_IOAPIC,
early_ioapic_map[i].id,
&early_ioapic_map[i].devid,
early_ioapic_map[i].cmd_line); if (ret) return ret;
}
for (i = 0; i < early_hpet_map_size; ++i) {
ret = add_special_device(IVHD_SPECIAL_HPET,
early_hpet_map[i].id,
&early_hpet_map[i].devid,
early_hpet_map[i].cmd_line); if (ret) return ret;
}
for (i = 0; i < early_acpihid_map_size; ++i) {
ret = add_acpi_hid_device(early_acpihid_map[i].hid,
early_acpihid_map[i].uid,
&early_acpihid_map[i].devid,
early_acpihid_map[i].cmd_line); if (ret) return ret;
}
return 0;
}
/* * Takes a pointer to an AMD IOMMU entry in the ACPI table and * initializes the hardware and our data structures with it.
*/ staticint __init init_iommu_from_acpi(struct amd_iommu *iommu, struct ivhd_header *h)
{
u8 *p = (u8 *)h;
u8 *end = p, flags = 0;
u16 devid = 0, devid_start = 0, devid_to = 0, seg_id;
u32 dev_i, ext_flags = 0; bool alias = false; struct ivhd_entry *e; struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
u32 ivhd_size; int ret;
ret = add_early_maps(); if (ret) return ret;
amd_iommu_apply_ivrs_quirks();
/* * First save the recommended feature enable bits from ACPI
*/
iommu->acpi_flags = h->flags;
/* * Done. Now parse the device entries
*/
ivhd_size = get_ivhd_header_size(h); if (!ivhd_size) {
pr_err("Unsupported IVHD type %#x\n", h->type); return -EINVAL;
}
p += ivhd_size;
end += h->length;
while (p < end) {
e = (struct ivhd_entry *)p;
seg_id = pci_seg->id;
ret = add_special_device(type, handle, &devid, false); if (ret) return ret;
/* * add_special_device might update the devid in case a * command-line override is present. So call * set_dev_entry_from_acpi after add_special_device.
*/
set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
break;
} case IVHD_DEV_ACPI_HID: {
u32 devid;
u8 hid[ACPIHID_HID_LEN];
u8 uid[ACPIHID_UID_LEN]; int ret;
if (h->type != 0x40) {
pr_err(FW_BUG "Invalid IVHD device type %#x\n",
e->type); break;
}
ret = add_acpi_hid_device(hid, uid, &devid, false); if (ret) return ret;
/* * add_special_device might update the devid in case a * command-line override is present. So call * set_dev_entry_from_acpi after add_special_device.
*/
set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
/* * First parse ACPI tables to find the largest Bus/Dev/Func we need to * handle in this PCI segment. Upon this information the shared data * structures for the PCI segments in the system will be allocated.
*/
last_bdf = find_last_devid_acpi(ivrs_base, id); if (last_bdf < 0) return NULL;
/* * This function glues the initialization function for one IOMMU * together and also allocates the command buffer and programs the * hardware. It does NOT enable the IOMMU. This is done afterwards.
*/ staticint __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, struct acpi_table_header *ivrs_base)
{ struct amd_iommu_pci_seg *pci_seg;
/* GAM requires GA mode. */ if ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)
amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; break; case 0x11: case 0x40: if (h->efr_reg & (1 << 9))
iommu->mmio_phys_end = MMIO_REG_END_OFFSET; else
iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
/* XT and GAM require GA mode. */ if ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0) {
amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; break;
}
if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
if (h->efr_attr & BIT(IOMMU_IVHD_ATTR_HATDIS_SHIFT)) {
pr_warn_once("Host Address Translation is not supported.\n");
amd_iommu_hatdis = true;
}
early_iommu_features_init(iommu, h);
break; default: return -EINVAL;
}
iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
iommu->mmio_phys_end); if (!iommu->mmio_base) return -ENOMEM;
return init_iommu_from_acpi(iommu, h);
}
staticint __init init_iommu_one_late(struct amd_iommu *iommu)
{ int ret;
ret = alloc_iommu_buffers(iommu); if (ret) return ret;
iommu->int_enabled = false;
init_translation_status(iommu); if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
iommu_disable(iommu);
clear_translation_pre_enabled(iommu);
pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n",
iommu->index);
} if (amd_iommu_pre_enabled)
amd_iommu_pre_enabled = translation_pre_enabled(iommu);
if (amd_iommu_irq_remap) {
ret = amd_iommu_create_irq_domain(iommu); if (ret) return ret;
}
/* * Make sure IOMMU is not considered to translate itself. The IVRS * table tells us so, but this is a lie!
*/
iommu->pci_seg->rlookup_table[iommu->devid] = NULL;
return 0;
}
/** * get_highest_supported_ivhd_type - Look up the appropriate IVHD type * @ivrs: Pointer to the IVRS header * * This function search through all IVDB of the maximum supported IVHD
*/ static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
{
u8 *base = (u8 *)ivrs; struct ivhd_header *ivhd = (struct ivhd_header *)
(base + IVRS_HEADER_LENGTH);
u8 last_type = ivhd->type;
u16 devid = ivhd->devid;
while (((u8 *)ivhd - base < ivrs->length) &&
(ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
u8 *p = (u8 *) ivhd;
/* * Iterates over all IOMMU entries in the ACPI table, allocates the * IOMMU structure and initializes it with init_iommu_one()
*/ staticint __init init_iommu_all(struct acpi_table_header *table)
{
u8 *p = (u8 *)table, *end = (u8 *)table; struct ivhd_header *h; struct amd_iommu *iommu; int ret;
end += table->length;
p += IVRS_HEADER_LENGTH;
/* Phase 1: Process all IVHD blocks */ while (p < end) {
h = (struct ivhd_header *)p; if (*p == amd_iommu_target_ivhd_type) {
/* * Note: IVHD 0x11 and 0x40 also contains exact copy * of the IOMMU Extended Feature Register [MMIO Offset 0030h]. * Default to EFR in IVHD since it is available sooner (i.e. before PCI init).
*/ staticvoid __init late_iommu_features_init(struct amd_iommu *iommu)
{
u64 features, features2;
/* * Some rd890 systems may not be fully reconfigured by the * BIOS, so it's necessary for us to store this information so * it can be reprogrammed on resume
*/
pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
&iommu->stored_addr_lo);
pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
&iommu->stored_addr_hi);
/* Low bit locks writes to configuration space */
iommu->stored_addr_lo &= ~1;
for (i = 0; i < 6; i++) for (j = 0; j < 0x12; j++)
iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
for (i = 0; i < 0x83; i++)
iommu->stored_l2[i] = iommu_read_l2(iommu, i);
}
ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
amd_iommu_groups, "ivhd%d", iommu->index); if (ret) return ret;
/* * Allocate per IOMMU IOPF queue here so that in attach device path, * PRI capable device can be added to IOPF queue
*/ if (amd_iommu_gt_ppr_supported()) {
ret = amd_iommu_iopf_init(iommu); if (ret) return ret;
}
ret = iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); if (ret || amd_iommu_pgtable == PD_MODE_NONE) { /* * Remove sysfs if DMA translation is not supported by the * IOMMU. Do not return an error to enable IRQ remapping * in state_next(), DTE[V, TV] must eventually be set to 0.
*/
iommu_device_sysfs_remove(&iommu->iommu);
}
/* Init global identity domain before registering IOMMU */
amd_iommu_init_identity_domain();
for_each_iommu(iommu) {
ret = iommu_init_pci(iommu); if (ret) {
pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n",
iommu->index, ret); goto out;
} /* Need to setup range after PCI init */
iommu_set_cwwb_range(iommu);
}
/* * Order is important here to make sure any unity map requirements are * fulfilled. The unity mappings are created and written to the device * table during the iommu_init_pci() call. * * After that we call init_device_table_dma() to make sure any * uninitialized DTE will block DMA, and in the end we flush the caches * of all IOMMUs to make sure the changes to the device table are * active.
*/
for_each_pci_segment(pci_seg)
init_device_table_dma(pci_seg);
/**************************************************************************** * * The following functions initialize the MSI interrupts for all IOMMUs * in the system. It's a bit challenging because there could be multiple * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per * pci_dev. *
****************************************************************************/
staticint iommu_setup_msi(struct amd_iommu *iommu)
{ int r;
r = pci_enable_msi(iommu->dev); if (r) return r;
r = request_threaded_irq(iommu->dev->irq,
amd_iommu_int_handler,
amd_iommu_int_thread,
0, "AMD-Vi",
iommu);
staticint iommu_init_irq(struct amd_iommu *iommu)
{ int ret;
if (iommu->int_enabled) goto enable_faults;
if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
ret = iommu_setup_intcapxt(iommu); elseif (iommu->dev->msi_cap)
ret = iommu_setup_msi(iommu); else
ret = -ENODEV;
if (ret) return ret;
iommu->int_enabled = true;
enable_faults:
if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
return 0;
}
/**************************************************************************** * * The next functions belong to the third pass of parsing the ACPI * table. In this last pass the memory mapping requirements are * gathered (like exclusion and unity mapping ranges). *
****************************************************************************/
pci_seg = get_pci_segment(m->pci_seg, ivrs_base); if (pci_seg == NULL) return -ENOMEM;
e = kzalloc(sizeof(*e), GFP_KERNEL); if (e == NULL) return -ENOMEM;
switch (m->type) { default:
kfree(e); return 0; case ACPI_IVMD_TYPE:
s = "IVMD_TYPEi\t\t\t";
e->devid_start = e->devid_end = m->devid; break; case ACPI_IVMD_TYPE_ALL:
s = "IVMD_TYPE_ALL\t\t";
e->devid_start = 0;
e->devid_end = pci_seg->last_bdf; break; case ACPI_IVMD_TYPE_RANGE:
s = "IVMD_TYPE_RANGE\t\t";
e->devid_start = m->devid;
e->devid_end = m->aux; break;
}
e->address_start = PAGE_ALIGN(m->range_start);
e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
e->prot = m->flags >> 1;
/* * Treat per-device exclusion ranges as r/w unity-mapped regions * since some buggy BIOSes might lead to the overwritten exclusion * range (exclusion_start and exclusion_length members). This * happens when there are multiple exclusion ranges (IVMD entries) * defined in ACPI table.
*/ if (m->flags & IVMD_FLAG_EXCL_RANGE)
e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1;
/* Enable the iommu */ if (!(ioc_feature_control & 0x1))
pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
/* Restore the iommu BAR */
pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
iommu->stored_addr_lo);
pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
iommu->stored_addr_hi);
/* Restore the l1 indirect regs for each of the 6 l1s */ for (i = 0; i < 6; i++) for (j = 0; j < 0x12; j++)
iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
/* Restore the l2 indirect regs */ for (i = 0; i < 0x83; i++)
iommu_write_l2(iommu, i, iommu->stored_l2[i]);
/* * Note: * The support for IRTCacheDis feature is dertermined by * checking if the bit is writable.
*/
iommu_feature_enable(iommu, CONTROL_IRTCACHEDIS);
ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
ctrl &= (1ULL << CONTROL_IRTCACHEDIS); if (ctrl)
iommu->irtcachedis_enabled = true;
pr_info("iommu%d (%#06x) : IRT cache is %s\n",
iommu->index, iommu->devid,
iommu->irtcachedis_enabled ? "disabled" : "enabled");
}
staticvoid iommu_enable_2k_int(struct amd_iommu *iommu)
{ if (!FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2)) return;
/* * This function finally enables all IOMMUs found in the system after * they have been initialized. * * Or if in kdump kernel and IOMMUs are all pre-enabled, try to reuse * the old content of device table entries. Not this case or reuse failed, * just continue as normal kernel does.
*/ staticvoid early_enable_iommus(void)
{ struct amd_iommu *iommu; struct amd_iommu_pci_seg *pci_seg;
if (!reuse_device_table()) { /* * If come here because of failure in reusing device table from old * kernel with all IOMMUs enabled, print error message and try to * free allocated old_dev_tbl_cpy.
*/ if (amd_iommu_pre_enabled) {
pr_err("Failed to reuse DEV table from previous kernel.\n"); /* * Bail out early if unable to remap/reuse DEV table from * previous kernel if SNP enabled as IOMMU commands will * time out without DEV table and cause kdump boot panic.
*/
BUG_ON(check_feature(FEATURE_SNP));
}
for_each_iommu(iommu) { /* * Disable GALog if already running. It could have been enabled * in the previous boot before kdump.
*/
status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) continue;
/* * Need to set and poll check the GALOGRun bit to zero before * we can set/ modify GA Log registers safely.
*/ for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) break;
udelay(10);
}
if (WARN_ON(i >= MMIO_STATUS_TIMEOUT)) return;
}
if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
!check_feature(FEATURE_GAM_VAPIC)) {
amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; return;
}
if (amd_iommu_snp_en &&
!FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) {
pr_warn("Force to disable Virtual APIC due to SNP\n");
amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; return;
}
/* Enabling GAM and SNPAVIC support */
for_each_iommu(iommu) { if (iommu_init_ga_log(iommu) ||
iommu_ga_log_enable(iommu)) return;
iommu_feature_enable(iommu, CONTROL_GAM_EN); if (amd_iommu_snp_en)
iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN);
}
/* * If we have map overrides on the kernel command line the * messages in this function might not describe firmware bugs * anymore - so be careful
*/ if (cmdline_maps)
fw_bug = "";
for (idx = 0; idx < nr_ioapics; idx++) { int devid, id = mpc_ioapic_id(idx);
devid = get_ioapic_devid(id); if (devid < 0) {
pr_err("%s: IOAPIC[%d] not in IVRS table\n",
fw_bug, id);
ret = false;
} elseif (devid == IOAPIC_SB_DEVID) {
has_sb_ioapic = true;
ret = true;
}
}
if (!has_sb_ioapic) { /* * We expect the SB IOAPIC to be listed in the IVRS * table. The system timer is connected to the SB IOAPIC * and if we don't have it in the list the system will * panic at boot time. This situation usually happens * when the BIOS is buggy and provides us the wrong * device id for the IOAPIC in the system.
*/
pr_err("%s: No southbridge IOAPIC found\n", fw_bug);
}
if (!ret)
pr_err("Disabling interrupt remapping\n");
/* * This is the hardware init function for AMD IOMMU in the system. * This function is called either from amd_iommu_init or from the interrupt * remapping setup code. * * This function basically parses the ACPI table for AMD IOMMU (IVRS) * four times: * * 1 pass) Discover the most comprehensive IVHD type to use. * * 2 pass) Find the highest PCI device id the driver has to handle. * Upon this information the size of the data structures is * determined that needs to be allocated. * * 3 pass) Initialize the data structures just allocated with the * information in the ACPI table about available AMD IOMMUs * in the system. It also maps the PCI devices in the * system to specific IOMMUs * * 4 pass) After the basic data structures are allocated and * initialized we update them with information about memory * remapping requirements parsed out of the ACPI table in * this last pass. * * After everything is set up the IOMMUs are enabled and the necessary * hotplug and suspend notifiers are registered.
*/ staticint __init early_amd_iommu_init(void)
{ struct acpi_table_header *ivrs_base; int ret;
acpi_status status;
u8 efr_hats;
if (!boot_cpu_has(X86_FEATURE_CX16)) {
pr_err("Failed to initialize. The CMPXCHG16B feature is required.\n");
ret = -EINVAL; goto out;
}
/* * Validate checksum here so we don't need to do it when * we actually parse the table
*/
ret = check_ivrs_checksum(ivrs_base); if (ret) goto out;
ivinfo_init(ivrs_base);
amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
/* * now the data structures are allocated and basically initialized * start the real acpi table scan
*/
ret = init_iommu_all(ivrs_base); if (ret) goto out;
efr_hats = FIELD_GET(FEATURE_HATS, amd_iommu_efr); if (efr_hats != 0x3) { /* * efr[HATS] bits specify the maximum host translation level * supported, with LEVEL 4 being initial max level.
*/
amd_iommu_hpt_level = efr_hats + PAGE_MODE_4_LEVEL;
} else {
pr_warn_once(FW_BUG "Disable host address translation due to invalid translation level (%#x).\n",
efr_hats);
amd_iommu_hatdis = true;
}
if (amd_iommu_pgtable == PD_MODE_V2) { if (!amd_iommu_v2_pgtbl_supported()) {
pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
amd_iommu_pgtable = PD_MODE_V1;
}
}
if (amd_iommu_hatdis) { /* * Host (v1) page table is not available. Attempt to use * Guest (v2) page table.
*/ if (amd_iommu_v2_pgtbl_supported())
amd_iommu_pgtable = PD_MODE_V2; else
amd_iommu_pgtable = PD_MODE_NONE;
}
/* Disable any previously enabled IOMMUs */ if (!is_kdump_kernel() || amd_iommu_disabled)
disable_iommus();
if (amd_iommu_irq_remap)
amd_iommu_irq_remap = check_ioapic_information();
if (amd_iommu_irq_remap) { struct amd_iommu_pci_seg *pci_seg;
ret = -ENOMEM;
for_each_pci_segment(pci_seg) { if (alloc_irq_lookup_table(pci_seg)) goto out;
}
}
ret = init_memory_definitions(ivrs_base); if (ret) goto out;
/* init the device table */
init_device_table();
out: /* Don't leak any ACPI memory */
acpi_put_table(ivrs_base);
return ret;
}
staticint amd_iommu_enable_interrupts(void)
{ struct amd_iommu *iommu; int ret = 0;
for_each_iommu(iommu) {
ret = iommu_init_irq(iommu); if (ret) goto out;
}
/* * Interrupt handler is ready to process interrupts. Enable * PPR and GA log interrupt for all IOMMUs.
*/
enable_iommus_vapic();
enable_iommus_ppr();
/* Don't use IOMMU if there is Stoney Ridge graphics */ for (i = 0; i < 32; i++) {
u32 pci_id;
pci_id = read_pci_config(0, i, 0, 0); if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) {
pr_info("Disable IOMMU on Stoney Ridge\n"); returnfalse;
}
}
out: /* Make sure ACS will be enabled during PCI probe */
pci_request_acs();
returntrue;
}
static __init void iommu_snp_enable(void)
{ #ifdef CONFIG_KVM_AMD_SEV if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return; /* * The SNP support requires that IOMMU must be enabled, and is * configured with V1 page table (DTE[Mode] = 0 is not supported).
*/ if (no_iommu || iommu_default_passthrough()) {
pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n"); goto disable_snp;
}
if (amd_iommu_pgtable != PD_MODE_V1) {
pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n"); goto disable_snp;
}
amd_iommu_snp_en = check_feature(FEATURE_SNP); if (!amd_iommu_snp_en) {
pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n"); goto disable_snp;
}
/* * Enable host SNP support once SNP support is checked on IOMMU.
*/ if (snp_rmptable_init()) {
pr_warn("SNP: RMP initialization failed, SNP cannot be supported.\n"); goto disable_snp;
}
/**************************************************************************** * * AMD IOMMU Initialization State Machine *
****************************************************************************/
staticint __init state_next(void)
{ int ret = 0;
switch (init_state) { case IOMMU_START_STATE: if (!detect_ivrs()) {
init_state = IOMMU_NOT_FOUND;
ret = -ENODEV;
} else {
init_state = IOMMU_IVRS_DETECTED;
} break; case IOMMU_IVRS_DETECTED: if (amd_iommu_disabled) {
init_state = IOMMU_CMDLINE_DISABLED;
ret = -EINVAL;
} else {
ret = early_amd_iommu_init();
init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
} break; case IOMMU_ACPI_FINISHED:
early_enable_iommus();
x86_platform.iommu_shutdown = disable_iommus;
init_state = IOMMU_ENABLED; break; case IOMMU_ENABLED:
register_syscore_ops(&amd_iommu_syscore_ops);
iommu_snp_enable();
ret = amd_iommu_init_pci();
init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; break; case IOMMU_PCI_INIT:
ret = amd_iommu_enable_interrupts();
init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN; break; case IOMMU_INTERRUPTS_EN:
init_state = IOMMU_INITIALIZED; break; case IOMMU_INITIALIZED: /* Nothing to do */ break; case IOMMU_NOT_FOUND: case IOMMU_INIT_ERROR: case IOMMU_CMDLINE_DISABLED: /* Error states => do nothing */
ret = -EINVAL; break; default: /* Unknown state */
BUG();
}
if (ret) {
free_dma_resources(); if (!irq_remapping_enabled) {
disable_iommus();
free_iommu_resources();
} else { struct amd_iommu *iommu; struct amd_iommu_pci_seg *pci_seg;
staticint __init iommu_go_to_state(enum iommu_init_state state)
{ int ret = -EINVAL;
while (init_state != state) { if (init_state == IOMMU_NOT_FOUND ||
init_state == IOMMU_INIT_ERROR ||
init_state == IOMMU_CMDLINE_DISABLED) break;
ret = state_next();
}
/* * SNP platform initilazation requires IOMMUs to be fully configured. * If the SNP support on IOMMUs has NOT been checked, simply mark SNP * as unsupported. If the SNP support on IOMMUs has been checked and * host SNP support enabled but RMP enforcement has not been enabled * in IOMMUs, then the system is in a half-baked state, but can limp * along as all memory should be Hypervisor-Owned in the RMP. WARN, * but leave SNP as "supported" to avoid confusing the kernel.
*/ if (ret && cc_platform_has(CC_ATTR_HOST_SEV_SNP) &&
!WARN_ON_ONCE(amd_iommu_snp_en))
cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
return ret;
}
#ifdef CONFIG_IRQ_REMAP int __init amd_iommu_prepare(void)
{ int ret;
amd_iommu_irq_remap = true;
ret = iommu_go_to_state(IOMMU_ACPI_FINISHED); if (ret) {
amd_iommu_irq_remap = false; return ret;
}
return amd_iommu_irq_remap ? 0 : -ENODEV;
}
int __init amd_iommu_enable(void)
{ int ret;
ret = iommu_go_to_state(IOMMU_ENABLED); if (ret) return ret;
int amd_iommu_reenable(int mode)
{
amd_iommu_resume();
return 0;
}
int amd_iommu_enable_faulting(unsignedint cpu)
{ /* We enable MSI later when PCI is initialized */ return 0;
} #endif
/* * This is the core init function for AMD IOMMU hardware in the system. * This function is called from the generic x86 DMA layer initialization * code.
*/ staticint __init amd_iommu_init(void)
{ int ret;
ret = iommu_go_to_state(IOMMU_INITIALIZED); #ifdef CONFIG_GART_IOMMU if (ret && list_empty(&amd_iommu_list)) { /* * We failed to initialize the AMD IOMMU - try fallback * to GART if possible.
*/
gart_iommu_init();
} #endif
if (!ret)
amd_iommu_debugfs_setup();
return ret;
}
staticbool amd_iommu_sme_check(void)
{ if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) ||
(boot_cpu_data.x86 != 0x17)) returntrue;
/* For Fam17h, a specific level of support is required */ if (boot_cpu_data.microcode >= 0x08001205) returntrue;
if ((boot_cpu_data.microcode >= 0x08001126) &&
(boot_cpu_data.microcode <= 0x080011ff)) returntrue;
pr_notice("IOMMU not currently supported when SME is active\n");
returnfalse;
}
/**************************************************************************** * * Early detect code. This code runs at IOMMU detection time in the DMA * layer. It just looks if there is an IVRS ACPI table to detect AMD * IOMMUs *
****************************************************************************/ void __init amd_iommu_detect(void)
{ int ret;
if (no_iommu || (iommu_detected && !gart_iommu_aperture)) goto disable_snp;
if (!amd_iommu_sme_check()) goto disable_snp;
ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); if (ret) goto disable_snp;
disable_snp: if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
}
/**************************************************************************** * * Parsing functions for the AMD IOMMU specific kernel command line * options. *
****************************************************************************/
if (!hid || !(*hid) || !uid) {
pr_err("Invalid command line: hid or uid\n"); return 1;
}
/* * Ignore leading zeroes after ':', so e.g., AMDI0095:00 * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match
*/ while (*uid == '0' && *(uid + 1))
uid++;
if (strlen(hid) >= ACPIHID_HID_LEN) {
pr_err("Invalid command line: hid is too long\n"); return 1;
} elseif (strlen(uid) >= ACPIHID_UID_LEN) {
pr_err("Invalid command line: uid is too long\n"); return 1;
}
bool amd_iommu_pasid_supported(void)
{ /* CPU page table size should match IOMMU guest page table size */ if (cpu_feature_enabled(X86_FEATURE_LA57) &&
amd_iommu_gpt_level != PAGE_MODE_5_LEVEL) returnfalse;
/* * Since DTE[Mode]=0 is prohibited on SNP-enabled system * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without * setting up IOMMUv1 page table.
*/ return amd_iommu_gt_ppr_supported() && !amd_iommu_snp_en;
}
for_each_iommu(iommu) if (i++ == idx) return iommu; return NULL;
}
/**************************************************************************** * * IOMMU EFR Performance Counter support functionality. This code allows * access to the IOMMU PC functionality. *
****************************************************************************/
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.