/* * Helper function to get the first pte of a large mapping
*/ static u64 *first_pte_l7(u64 *pte, unsignedlong *page_size, unsignedlong *count)
{ unsignedlong pte_mask, pg_size, cnt;
u64 *fpte;
staticvoid free_pt_lvl(u64 *pt, struct iommu_pages_list *freelist, int lvl)
{
u64 *p; int i;
for (i = 0; i < 512; ++i) { /* PTE present? */ if (!IOMMU_PTE_PRESENT(pt[i])) continue;
/* Large PTE? */ if (PM_PTE_LEVEL(pt[i]) == 0 ||
PM_PTE_LEVEL(pt[i]) == 7) continue;
/* * Free the next level. No need to look at l1 tables here since * they can only contain leaf PTEs; just free them directly.
*/
p = IOMMU_PTE_PAGE(pt[i]); if (lvl > 2)
free_pt_lvl(p, freelist, lvl - 1); else
iommu_pages_list_add(freelist, p);
}
iommu_pages_list_add(freelist, pt);
}
staticvoid free_sub_pt(u64 *root, int mode, struct iommu_pages_list *freelist)
{ switch (mode) { case PAGE_MODE_NONE: case PAGE_MODE_7_LEVEL: break; case PAGE_MODE_1_LEVEL:
iommu_pages_list_add(freelist, root); break; case PAGE_MODE_2_LEVEL: case PAGE_MODE_3_LEVEL: case PAGE_MODE_4_LEVEL: case PAGE_MODE_5_LEVEL: case PAGE_MODE_6_LEVEL:
free_pt_lvl(root, freelist, mode); break; default:
BUG();
}
}
/* * This function is used to add another level to an IO page table. Adding * another level increases the size of the address space by 9 bits to a size up * to 64 bits.
*/ staticbool increase_address_space(struct amd_io_pgtable *pgtable, unsignedlong address, unsignedint page_size_level,
gfp_t gfp)
{ struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg; struct protection_domain *domain =
container_of(pgtable, struct protection_domain, iop); unsignedlong flags; bool ret = true;
u64 *pte;
pte = iommu_alloc_pages_node_sz(cfg->amd.nid, gfp, SZ_4K); if (!pte) returnfalse;
while (last_addr > PM_LEVEL_SIZE(pgtable->mode) ||
pgtable->mode - 1 < PAGE_SIZE_LEVEL(page_size)) { /* * Return an error if there is no memory to update the * page-table.
*/ if (!increase_address_space(pgtable, last_addr,
PAGE_SIZE_LEVEL(page_size), gfp)) return NULL;
}
do {
seqcount = read_seqcount_begin(&pgtable->seqcount);
while (level > end_lvl) {
u64 __pte, __npte; int pte_level;
__pte = *pte;
pte_level = PM_PTE_LEVEL(__pte);
/* * If we replace a series of large PTEs, we need * to tear down all of them.
*/ if (IOMMU_PTE_PRESENT(__pte) &&
pte_level == PAGE_MODE_7_LEVEL) { unsignedlong count, i;
u64 *lpte;
lpte = first_pte_l7(pte, NULL, &count);
/* * Unmap the replicated PTEs that still match the * original large mapping
*/ for (i = 0; i < count; ++i)
cmpxchg64(&lpte[i], __pte, 0ULL);
/* pte could have been changed somewhere. */ if (!try_cmpxchg64(pte, &__pte, __npte))
iommu_free_pages(page); elseif (IOMMU_PTE_PRESENT(__pte))
*updated = true;
continue;
}
/* No level skipping support yet */ if (pte_level != level) return NULL;
level -= 1;
pte = IOMMU_PTE_PAGE(__pte);
if (pte_page && level == end_lvl)
*pte_page = pte;
pte = &pte[PM_LEVEL_INDEX(level, address)];
}
return pte;
}
/* * This function checks if there is a PTE for a given dma address. If * there is one, it returns the pointer to it.
*/ static u64 *fetch_pte(struct amd_io_pgtable *pgtable, unsignedlong address, unsignedlong *page_size)
{ int level; unsignedint seqcount;
u64 *pte;
*page_size = 0;
if (address > PM_LEVEL_SIZE(pgtable->mode)) return NULL;
do {
seqcount = read_seqcount_begin(&pgtable->seqcount);
level = pgtable->mode - 1;
pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
} while (read_seqcount_retry(&pgtable->seqcount, seqcount));
*page_size = PTE_LEVEL_PAGE_SIZE(level);
while (level > 0) {
/* Not Present */ if (!IOMMU_PTE_PRESENT(*pte)) return NULL;
/* Large PTE */ if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL ||
PM_PTE_LEVEL(*pte) == PAGE_MODE_NONE) break;
/* No level skipping support yet */ if (PM_PTE_LEVEL(*pte) != level) return NULL;
level -= 1;
/* Walk to the next level */
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[PM_LEVEL_INDEX(level, address)];
*page_size = PTE_LEVEL_PAGE_SIZE(level);
}
/* * If we have a series of large PTEs, make * sure to return a pointer to the first one.
*/ if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
pte = first_pte_l7(pte, page_size, NULL);
/* * Generic mapping functions. It maps a physical address into a DMA * address space. It allocates the page table pages if necessary. * In the future it can be extended to a generic mapping function * supporting all features of AMD IOMMU page tables like level skipping * and full 64 bit address spaces.
*/ staticint iommu_v1_map_pages(struct io_pgtable_ops *ops, unsignedlong iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped)
{ struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist); bool updated = false;
u64 __pte, *pte; int ret, i, count;
size_t size = pgcount << __ffs(pgsize); unsignedlong o_iova = iova;
/* * 2.2.3.2 Host Dirty Support * When a non-default page size is used , software must OR the * Dirty bits in all of the replicated host PTEs used to map * the page. The IOMMU does not guarantee the Dirty bits are * set in all of the replicated PTEs. Any portion of the page * may have been written even if the Dirty bit is set in only * one of the replicated PTEs.
*/
count = PAGE_SIZE_PTE_COUNT(size); for (i = 0; i < count && test_only; i++) { if (test_bit(IOMMU_PTE_HD_BIT, (unsignedlong *)&ptep[i])) {
dirty = true; break;
}
}
for (i = 0; i < count && !test_only; i++) { if (test_and_clear_bit(IOMMU_PTE_HD_BIT,
(unsignedlong *)&ptep[i])) {
dirty = true;
}
}
/* * Mark the whole IOVA range as dirty even if only one of * the replicated PTEs were marked dirty.
*/ if (pte_test_and_clear_dirty(ptep, pgsize, flags))
iommu_dirty_bitmap_record(dirty, iova, pgsize);
iova += pgsize;
} while (iova < end);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.