// SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. * * The io_pagetable is the top of datastructure that maps IOVA's to PFNs. The * PFNs can be placed into an iommu_domain, or returned to the caller as a page * list for access by an in-kernel user. * * The datastructure uses the iopt_pages to optimize the storage of the PFNs * between the domains and xarray.
*/ #include <linux/err.h> #include <linux/errno.h> #include <linux/iommu.h> #include <linux/iommufd.h> #include <linux/lockdep.h> #include <linux/sched/mm.h> #include <linux/slab.h> #include <uapi/linux/iommufd.h>
/* * Automatically find a block of IOVA that is not being used and not reserved. * Does not return a 0 IOVA even if it is valid.
*/ staticint iopt_alloc_iova(struct io_pagetable *iopt, unsignedlong *iova, unsignedlong addr, unsignedlong length)
{ unsignedlong page_offset = addr % PAGE_SIZE; struct interval_tree_double_span_iter used_span; struct interval_tree_span_iter allowed_span; unsignedlong max_alignment = PAGE_SIZE; unsignedlong iova_alignment;
lockdep_assert_held(&iopt->iova_rwsem);
/* Protect roundup_pow-of_two() from overflow */ if (length == 0 || length >= ULONG_MAX / 2) return -EOVERFLOW;
/* * Keep alignment present in addr when building the IOVA, which * increases the chance we can map a THP.
*/ if (!addr)
iova_alignment = roundup_pow_of_two(length); else
iova_alignment = min_t(unsignedlong,
roundup_pow_of_two(length),
1UL << __ffs64(addr));
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
max_alignment = HPAGE_SIZE; #endif /* Protect against ALIGN() overflow */ if (iova_alignment >= max_alignment)
iova_alignment = max_alignment;
if (iova_alignment < iopt->iova_alignment) return -EINVAL;
if ((iova & (iopt->iova_alignment - 1))) return -EINVAL;
if (check_add_overflow(iova, length - 1, &last)) return -EOVERFLOW;
/* No reserved IOVA intersects the range */ if (iopt_reserved_iter_first(iopt, iova, last)) return -EINVAL;
/* Check that there is not already a mapping in the range */ if (iopt_area_iter_first(iopt, iova, last)) return -EEXIST; return 0;
}
/* * The area takes a slice of the pages from start_bytes to start_byte + length
*/ staticint iopt_insert_area(struct io_pagetable *iopt, struct iopt_area *area, struct iopt_pages *pages, unsignedlong iova, unsignedlong start_byte, unsignedlong length, int iommu_prot)
{
lockdep_assert_held_write(&iopt->iova_rwsem);
if ((iommu_prot & IOMMU_WRITE) && !pages->writable) return -EPERM;
/* * The area is inserted with a NULL pages indicating it is not fully * initialized yet.
*/
area->iopt = iopt;
interval_tree_insert(&area->node, &iopt->area_itree); return 0;
}
if (flags & IOPT_ALLOC_IOVA) { /* Use the first entry to guess the ideal IOVA alignment */
elm = list_first_entry(pages_list, struct iopt_pages_list,
next); switch (elm->pages->type) { case IOPT_ADDRESS_USER:
start = elm->start_byte + (uintptr_t)elm->pages->uptr; break; case IOPT_ADDRESS_FILE:
start = elm->start_byte + elm->pages->start; break;
}
rc = iopt_alloc_iova(iopt, dst_iova, start, length); if (rc) goto out_unlock; if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
WARN_ON(iopt_check_iova(iopt, *dst_iova, length))) {
rc = -EINVAL; goto out_unlock;
}
} else {
rc = iopt_check_iova(iopt, *dst_iova, length); if (rc) goto out_unlock;
}
/* * Areas are created with a NULL pages so that the IOVA space is * reserved and we can unlock the iova_rwsem.
*/
iova = *dst_iova;
list_for_each_entry(elm, pages_list, next) {
rc = iopt_insert_area(iopt, elm->area, elm->pages, iova,
elm->start_byte, elm->length, iommu_prot); if (rc) goto out_unlock;
iova += elm->length;
}
down_read(&iopt->domains_rwsem);
rc = iopt_fill_domains_pages(pages_list); if (rc) goto out_unlock_domains;
down_write(&iopt->iova_rwsem);
list_for_each_entry(elm, pages_list, next) { /* * area->pages must be set inside the domains_rwsem to ensure * any newly added domains will get filled. Moves the reference * in from the list.
*/
elm->area->pages = elm->pages;
elm->pages = NULL;
elm->area = NULL;
}
up_write(&iopt->iova_rwsem);
out_unlock_domains:
up_read(&iopt->domains_rwsem); return rc;
}
staticint iopt_map_common(struct iommufd_ctx *ictx, struct io_pagetable *iopt, struct iopt_pages *pages, unsignedlong *iova, unsignedlong length, unsignedlong start_byte, int iommu_prot, unsignedint flags)
{ struct iopt_pages_list elm = {};
LIST_HEAD(pages_list); int rc;
rc = iopt_map_pages(iopt, &pages_list, length, iova, iommu_prot, flags); if (rc) { if (elm.area)
iopt_abort_area(elm.area); if (elm.pages)
iopt_put_pages(elm.pages); return rc;
} return 0;
}
/** * iopt_map_user_pages() - Map a user VA to an iova in the io page table * @ictx: iommufd_ctx the iopt is part of * @iopt: io_pagetable to act on * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains * the chosen iova on output. Otherwise is the iova to map to on input * @uptr: User VA to map * @length: Number of bytes to map * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping * @flags: IOPT_ALLOC_IOVA or zero * * iova, uptr, and length must be aligned to iova_alignment. For domain backed * page tables this will pin the pages and load them into the domain at iova. * For non-domain page tables this will only setup a lazy reference and the * caller must use iopt_access_pages() to touch them. * * iopt_unmap_iova() must be called to undo this before the io_pagetable can be * destroyed.
*/ int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, unsignedlong *iova, void __user *uptr, unsignedlong length, int iommu_prot, unsignedint flags)
{ struct iopt_pages *pages;
/** * iopt_map_file_pages() - Like iopt_map_user_pages, but map a file. * @ictx: iommufd_ctx the iopt is part of * @iopt: io_pagetable to act on * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains * the chosen iova on output. Otherwise is the iova to map to on input * @file: file to map * @start: map file starting at this byte offset * @length: Number of bytes to map * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping * @flags: IOPT_ALLOC_IOVA or zero
*/ int iopt_map_file_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, unsignedlong *iova, struct file *file, unsignedlong start, unsignedlong length, int iommu_prot, unsignedint flags)
{ struct iopt_pages *pages;
staticint iopt_unmap_iova_range(struct io_pagetable *iopt, unsignedlong start, unsignedlong last, unsignedlong *unmapped)
{ struct iopt_area *area; unsignedlong unmapped_bytes = 0; unsignedint tries = 0; /* If there are no mapped entries then success */ int rc = 0;
/* * The domains_rwsem must be held in read mode any time any area->pages * is NULL. This prevents domain attach/detatch from running * concurrently with cleaning up the area.
*/
again:
down_read(&iopt->domains_rwsem);
down_write(&iopt->iova_rwsem); while ((area = iopt_area_iter_first(iopt, start, last))) { unsignedlong area_last = iopt_area_last_iova(area); unsignedlong area_first = iopt_area_iova(area); struct iopt_pages *pages;
/* Userspace should not race map/unmap's of the same area */ if (!area->pages) {
rc = -EBUSY; goto out_unlock_iova;
}
/* The area is locked by an object that has not been destroyed */ if (area->num_locks) {
rc = -EBUSY; goto out_unlock_iova;
}
/* * num_accesses writers must hold the iova_rwsem too, so we can * safely read it under the write side of the iovam_rwsem * without the pages->mutex.
*/ if (area->num_accesses) {
size_t length = iopt_area_length(area);
/** * iopt_unmap_iova() - Remove a range of iova * @iopt: io_pagetable to act on * @iova: Starting iova to unmap * @length: Number of bytes to unmap * @unmapped: Return number of bytes unmapped * * The requested range must be a superset of existing ranges. * Splitting/truncating IOVA mappings is not allowed.
*/ int iopt_unmap_iova(struct io_pagetable *iopt, unsignedlong iova, unsignedlong length, unsignedlong *unmapped)
{ unsignedlong iova_last;
if (!length) return -EINVAL;
if (check_add_overflow(iova, length - 1, &iova_last)) return -EOVERFLOW;
int iopt_unmap_all(struct io_pagetable *iopt, unsignedlong *unmapped)
{ /* If the IOVAs are empty then unmap all succeeds */ return iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped);
}
/* The caller must always free all the nodes in the allowed_iova rb_root. */ int iopt_set_allow_iova(struct io_pagetable *iopt, struct rb_root_cached *allowed_iova)
{ struct iopt_allowed *allowed;
/* * iopt's start as SW tables that can use the entire size_t IOVA space * due to the use of size_t in the APIs. They have no alignment * restriction.
*/
iopt->iova_alignment = 1;
}
/** * iopt_unfill_domain() - Unfill a domain with PFNs * @iopt: io_pagetable to act on * @domain: domain to unfill * * This is used when removing a domain from the iopt. Every area in the iopt * will be unmapped from the domain. The domain must already be removed from the * domains xarray.
*/ staticvoid iopt_unfill_domain(struct io_pagetable *iopt, struct iommu_domain *domain)
{ struct iopt_area *area;
/* * Some other domain is holding all the pfns still, rapidly unmap this * domain.
*/ if (iopt->next_domain_id != 0) { /* Pick an arbitrary remaining domain to act as storage */ struct iommu_domain *storage_domain =
xa_load(&iopt->domains, 0);
for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
area = iopt_area_iter_next(area, 0, ULONG_MAX)) { struct iopt_pages *pages = area->pages;
if (!pages) continue;
mutex_lock(&pages->mutex); if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
WARN_ON(!area->storage_domain); if (area->storage_domain == domain)
area->storage_domain = storage_domain;
mutex_unlock(&pages->mutex);
iopt_area_unmap_domain(area, domain);
} return;
}
for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
area = iopt_area_iter_next(area, 0, ULONG_MAX)) { struct iopt_pages *pages = area->pages;
/** * iopt_fill_domain() - Fill a domain with PFNs * @iopt: io_pagetable to act on * @domain: domain to fill * * Fill the domain with PFNs from every area in the iopt. On failure the domain * is left unchanged.
*/ staticint iopt_fill_domain(struct io_pagetable *iopt, struct iommu_domain *domain)
{ struct iopt_area *end_area; struct iopt_area *area; int rc;
/* * The io page size drives the iova_alignment. Internally the iopt_pages * works in PAGE_SIZE units and we adjust when mapping sub-PAGE_SIZE * objects into the iommu_domain. * * A iommu_domain must always be able to accept PAGE_SIZE to be * compatible as we can't guarantee higher contiguity.
*/
new_iova_alignment = max_t(unsignedlong,
1UL << __ffs(domain->pgsize_bitmap),
iopt->iova_alignment); if (new_iova_alignment > PAGE_SIZE) {
rc = -EINVAL; goto out_unlock;
} if (new_iova_alignment != iopt->iova_alignment) {
rc = iopt_check_iova_alignment(iopt, new_iova_alignment); if (rc) goto out_unlock;
}
/* No area exists that is outside the allowed domain aperture */ if (geometry->aperture_start != 0) {
rc = iopt_reserve_iova(iopt, 0, geometry->aperture_start - 1,
domain); if (rc) goto out_reserved;
} if (geometry->aperture_end != ULONG_MAX) {
rc = iopt_reserve_iova(iopt, geometry->aperture_end + 1,
ULONG_MAX, domain); if (rc) goto out_reserved;
}
rc = xa_reserve(&iopt->domains, iopt->next_domain_id, GFP_KERNEL); if (rc) goto out_reserved;
rc = iopt_fill_domain(iopt, domain); if (rc) goto out_release;
xa_for_each(&iopt->domains, index, iter_domain) if (iter_domain == domain) break; if (WARN_ON(iter_domain != domain) || index >= iopt->next_domain_id) goto out_unlock;
/* * Compress the xarray to keep it linear by swapping the entry to erase * with the tail entry and shrinking the tail.
*/
iopt->next_domain_id--;
iter_domain = xa_erase(&iopt->domains, iopt->next_domain_id); if (index != iopt->next_domain_id)
xa_store(&iopt->domains, index, iter_domain, GFP_KERNEL);
/** * iopt_area_split - Split an area into two parts at iova * @area: The area to split * @iova: Becomes the last of a new area * * This splits an area into two. It is part of the VFIO compatibility to allow * poking a hole in the mapping. The two areas continue to point at the same * iopt_pages, just with different starting bytes.
*/ staticint iopt_area_split(struct iopt_area *area, unsignedlong iova)
{ unsignedlong alignment = area->iopt->iova_alignment; unsignedlong last_iova = iopt_area_last_iova(area); unsignedlong start_iova = iopt_area_iova(area); unsignedlong new_start = iova + 1; struct io_pagetable *iopt = area->iopt; struct iopt_pages *pages = area->pages; struct iopt_area *lhs; struct iopt_area *rhs; int rc;
lockdep_assert_held_write(&iopt->iova_rwsem);
if (iova == start_iova || iova == last_iova) return 0;
if (!pages || area->prevent_access) return -EBUSY;
mutex_lock(&pages->mutex); /* * Splitting is not permitted if an access exists, we don't track enough * information to split existing accesses.
*/ if (area->num_accesses) {
rc = -EINVAL; goto err_unlock;
}
/* * Splitting is not permitted if a domain could have been mapped with * huge pages.
*/ if (area->storage_domain && !iopt->disable_large_pages) {
rc = -EINVAL; goto err_unlock;
}
/* * If the original area has filled a domain, domains_itree has to be * updated.
*/ if (area->storage_domain) {
interval_tree_remove(&area->pages_node, &pages->domains_itree);
interval_tree_insert(&lhs->pages_node, &pages->domains_itree);
interval_tree_insert(&rhs->pages_node, &pages->domains_itree);
}
int iopt_disable_large_pages(struct io_pagetable *iopt)
{ int rc = 0;
down_write(&iopt->domains_rwsem);
down_write(&iopt->iova_rwsem); if (iopt->disable_large_pages) goto out_unlock;
/* Won't do it if domains already have pages mapped in them */ if (!xa_empty(&iopt->domains) &&
!RB_EMPTY_ROOT(&iopt->area_itree.rb_root)) {
rc = -EINVAL; goto out_unlock;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.