// SPDX-License-Identifier: GPL-2.0-only /* * Dynamic DMA mapping support for AMD Hammer. * * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI. * This allows to use PCI devices that only support 32bit addresses on systems * with more than 4GB. * * See Documentation/core-api/dma-api-howto.rst for the interface specification. * * Copyright 2002 Andi Kleen, SuSE Labs.
*/
staticunsignedlong iommu_bus_base; /* GART remapping area (physical) */ staticunsignedlong iommu_size; /* size of remapping area bytes */ staticunsignedlong iommu_pages; /* .. and in pages */
/* * If this is disabled the IOMMU will use an optimized flushing strategy * of only flushing when an mapping is reused. With it true the GART is * flushed for every mapping. Problem is that doing the lazy flush seems * to trigger bugs with some popular PCI cards, in particular 3ware (but * has been also seen with Qlogic at least).
*/ staticint iommu_fullflush = 1;
/* Allocation bitmap for the remapping area: */ static DEFINE_SPINLOCK(iommu_bitmap_lock); /* Guarded by iommu_bitmap_lock: */ staticunsignedlong *iommu_gart_bitmap;
staticvoid iommu_full(struct device *dev, size_t size, int dir)
{ /* * Ran out of IOMMU space for this operation. This is very bad. * Unfortunately the drivers cannot handle this operation properly. * Return some non mapped prereserved space in the aperture and * let the Northbridge deal with it. This will result in garbage * in the IO operation. When the size exceeds the prereserved space * memory corruption will occur or random memory will be DMAed * out. Hopefully no network devices use single mappings that big.
*/
dev_err(dev, "PCI-DMA: Out of IOMMU space for %lu bytes\n", size); #ifdef CONFIG_IOMMU_LEAK
dump_leak(); #endif
}
/* Map a single continuous physical area into the IOMMU. * Caller needs to check if the iommu is needed and flush.
*/ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
size_t size, int dir, unsignedlong align_mask)
{ unsignedlong npages = iommu_num_pages(phys_mem, size, PAGE_SIZE); unsignedlong iommu_page; int i;
if (unlikely(phys_mem + size > GART_MAX_PHYS_ADDR)) return DMA_MAPPING_ERROR;
iommu_page = alloc_iommu(dev, npages, align_mask); if (iommu_page == -1) { if (!nonforced_iommu(dev, phys_mem, size)) return phys_mem; if (panic_on_overflow)
panic("dma_map_area overflow %lu bytes\n", size);
iommu_full(dev, size, dir); return DMA_MAPPING_ERROR;
}
for (i = 0; i < npages; i++) {
iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
phys_mem += PAGE_SIZE;
} return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
}
/* Map a single area into the IOMMU */ static dma_addr_t gart_map_page(struct device *dev, struct page *page, unsignedlong offset, size_t size, enum dma_data_direction dir, unsignedlong attrs)
{ unsignedlong bus;
phys_addr_t paddr = page_to_phys(page) + offset;
if (!need_iommu(dev, paddr, size)) return paddr;
bus = dma_map_area(dev, paddr, size, dir, 0);
flush_gart();
return bus;
}
/* * Free a DMA mapping.
*/ staticvoid gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir, unsignedlong attrs)
{ unsignedlong iommu_page; int npages; int i;
if (WARN_ON_ONCE(dma_addr == DMA_MAPPING_ERROR)) return;
/* * This driver will not always use a GART mapping, but might have * created a direct mapping instead. If that is the case there is * nothing to unmap here.
*/ if (dma_addr < iommu_bus_base ||
dma_addr >= iommu_bus_base + iommu_size) return;
iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); for (i = 0; i < npages; i++) {
iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
}
free_iommu(iommu_page, npages);
}
/* * Wrapper for pci_unmap_single working with scatterlists.
*/ staticvoid gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsignedlong attrs)
{ struct scatterlist *s; int i;
/* Fallback for dma_map_sg in case of overflow */ staticint dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, int nents, int dir)
{ struct scatterlist *s; int i;
/* Handle the previous not yet processed entries */ if (i > start) { /* * Can only merge when the last chunk ends on a * page boundary and the new one doesn't have an * offset.
*/ if (!iommu_merge || !nextneed || !need || s->offset ||
(s->length + seg_size > max_seg_size) ||
(ps->offset + ps->length) % PAGE_SIZE) {
ret = dma_map_cont(dev, start_sg, i - start,
sgmap, pages, need); if (ret < 0) goto error;
out++;
/* When it was forced or merged try again in a dumb way */ if (force_iommu || iommu_merge) {
out = dma_map_sg_nonforce(dev, sg, nents, dir); if (out > 0) return out;
} if (panic_on_overflow)
panic("dma_map_sg: overflow on %lu pages\n", pages);
/* Flush the GART-TLB to remove stale entries */
amd_flush_garts();
}
/* * If fix_up_north_bridges is set, the north bridges have to be fixed up on * resume in the same way as they are handled in gart_iommu_hole_init().
*/ staticbool fix_up_north_bridges; static u32 aperture_order; static u32 aperture_alloc;
for (i = 0; i < amd_nb_num(); i++) { struct pci_dev *dev = node_to_amd_nb(i)->misc;
/* * Don't enable translations just yet. That is the next * step. Restore the pre-suspend aperture settings.
*/
gart_set_size_and_enable(dev, aperture_order);
pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25);
}
}
/* * Private Northbridge GATT initialization in case we cannot use the * AGP driver for some reason.
*/ static __init int init_amd_gatt(struct agp_kern_info *info)
{ unsigned aper_size, gatt_size, new_aper_size; unsigned aper_base, new_aper_base; struct pci_dev *dev; void *gatt; int i;
pr_info("PCI-DMA: Disabling AGP.\n");
aper_size = aper_base = info->aper_size = 0;
dev = NULL; for (i = 0; i < amd_nb_num(); i++) {
dev = node_to_amd_nb(i)->misc;
new_aper_base = read_aperture(dev, &new_aper_size); if (!new_aper_base) goto nommu;
if (!aper_base) {
aper_size = new_aper_size;
aper_base = new_aper_base;
} if (aper_size != new_aper_size || aper_base != new_aper_base) goto nommu;
} if (!aper_base) goto nommu;
if (no_iommu ||
(!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
!gart_iommu_aperture ||
(no_agp && init_amd_gatt(&info) < 0)) { if (max_pfn > MAX_DMA32_PFN) {
pr_warn("More than 4GB of memory but GART IOMMU not available.\n");
pr_warn("falling back to iommu=soft.\n");
} return 0;
}
/* need to map that range */
aper_size = info.aper_size << 20;
aper_base = info.aper_base;
end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
start_pfn = PFN_DOWN(aper_base); if (!pfn_range_is_mapped(start_pfn, end_pfn))
init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT,
PAGE_KERNEL);
/* * Unmap the IOMMU part of the GART. The alias of the page is * always mapped with cache enabled and there is no full cache * coherency across the GART remapping. The unmapping avoids * automatic prefetches from the CPU allocating cache lines in * there. All CPU accesses are done via the direct mapping to * the backing memory. The GART address is only used by PCI * devices.
*/
set_memory_np((unsignedlong)__va(iommu_bus_base),
iommu_size >> PAGE_SHIFT); /* * Tricky. The GART table remaps the physical memory range, * so the CPU won't notice potential aliases and if the memory * is remapped to UC later on, we might surprise the PCI devices * with a stray writeout of a cacheline. So play it sure and * do an explicit, full-scale wbinvd() _after_ having marked all * the pages as Not-Present:
*/
wbinvd();
/* * Now all caches are flushed and we can safely enable * GART hardware. Doing it early leaves the possibility * of stale cache entries that can lead to GART PTE * errors.
*/
enable_gart_translations();
/* * Try to workaround a bug (thanks to BenH): * Set unmapped entries to a scratch page instead of 0. * Any prefetches that hit unmapped entries won't get an bus abort * then. (P2P bridge may be prefetching on DMA reads).
*/
scratch = get_zeroed_page(GFP_KERNEL); if (!scratch)
panic("Cannot allocate iommu scratch page");
gart_unmapped_entry = GPTE_ENCODE(__pa(scratch));
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.