// SPDX-License-Identifier: GPL-2.0 /* * This is a module to test the HMM (Heterogeneous Memory Management) * mirror and zone device private memory migration APIs of the kernel. * Userspace programs can register with the driver to mirror their own address * space and can use the device to read/write any valid virtual address.
*/ #include <linux/init.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/cdev.h> #include <linux/device.h> #include <linux/memremap.h> #include <linux/mutex.h> #include <linux/rwsem.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/highmem.h> #include <linux/delay.h> #include <linux/pagemap.h> #include <linux/hmm.h> #include <linux/vmalloc.h> #include <linux/swap.h> #include <linux/swapops.h> #include <linux/sched/mm.h> #include <linux/platform_device.h> #include <linux/rmap.h> #include <linux/mmu_notifier.h> #include <linux/migrate.h>
/* * For device_private pages, dpage is just a dummy struct page * representing a piece of device memory. dmirror_devmem_alloc_page * allocates a real system memory page as backing storage to fake a * real device. zone_device_data points to that backing page. But * for device_coherent memory, the struct page represents real * physical CPU-accessible memory that we can use directly.
*/ #define BACKING_PAGE(page) (is_device_private_page((page)) ? \
(page)->zone_device_data : (page))
staticunsignedlong spm_addr_dev0;
module_param(spm_addr_dev0, long, 0644);
MODULE_PARM_DESC(spm_addr_dev0, "Specify start address for SPM (special purpose memory) used for device 0. By setting this Coherent device type will be used. Make sure spm_addr_dev1 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE.");
staticunsignedlong spm_addr_dev1;
module_param(spm_addr_dev1, long, 0644);
MODULE_PARM_DESC(spm_addr_dev1, "Specify start address for SPM (special purpose memory) used for device 1. By setting this Coherent device type will be used. Make sure spm_addr_dev0 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE.");
/* * Data structure to track address ranges and register for mmu interval * notifier updates.
*/ struct dmirror_interval { struct mmu_interval_notifier notifier; struct dmirror *dmirror;
};
/* * Data attached to the open device file. * Note that it might be shared after a fork().
*/ struct dmirror { struct dmirror_device *mdevice; struct xarray pt; struct mmu_interval_notifier notifier; struct mutex mutex;
};
/* * Since we asked for hmm_range_fault() to populate pages, * it shouldn't return an error entry on success.
*/
WARN_ON(*pfns & HMM_PFN_ERROR);
WARN_ON(!(*pfns & HMM_PFN_VALID));
/* * The XArray doesn't hold references to pages since it relies on * the mmu notifier to clear page pointers when they become stale. * Therefore, it is OK to just clear the entry.
*/
xa_for_each_range(&dmirror->pt, pfn, entry, start >> PAGE_SHIFT,
end >> PAGE_SHIFT)
xa_erase(&dmirror->pt, pfn);
}
/* * Ignore invalidation callbacks for device private pages since * the invalidation is handled as part of the migration process.
*/ if (range->event == MMU_NOTIFY_MIGRATE &&
range->owner == dmirror->mdevice) returntrue;
if (mmu_notifier_range_blockable(range))
mutex_lock(&dmirror->mutex); elseif (!mutex_trylock(&dmirror->mutex)) returnfalse;
start = cmd->addr;
end = start + size; if (end < start) return -EINVAL;
ret = dmirror_bounce_init(&bounce, start, size); if (ret) return ret; if (copy_from_user(bounce.ptr, u64_to_user_ptr(cmd->ptr),
bounce.size)) {
ret = -EFAULT; goto fini;
}
while (1) {
mutex_lock(&dmirror->mutex);
ret = dmirror_do_write(dmirror, start, end, &bounce);
mutex_unlock(&dmirror->mutex); if (ret != -ENOENT) break;
start = cmd->addr + (bounce.cpages << PAGE_SHIFT);
ret = dmirror_fault(dmirror, start, end, true); if (ret) break;
cmd->faults++;
}
/* * For ZONE_DEVICE private type, this is a fake device so we allocate * real system memory to store our device memory. * For ZONE_DEVICE coherent type we use the actual dpage to store the * data and ignore rpage.
*/ if (dmirror_is_private_zone(mdevice)) {
rpage = alloc_page(GFP_HIGHUSER); if (!rpage) return NULL;
}
spin_lock(&mdevice->lock);
/* * Note that spage might be NULL which is OK since it is an * unallocated pte_none() or read-only zero page.
*/
spage = migrate_pfn_to_page(*src); if (WARN(spage && is_zone_device_page(spage), "page already in device spage pfn: 0x%lx\n",
page_to_pfn(spage))) continue;
dpage = dmirror_devmem_alloc_page(mdevice); if (!dpage) continue;
rpage = BACKING_PAGE(dpage); if (spage)
copy_highpage(rpage, spage); else
clear_highpage(rpage);
/* * Normally, a device would use the page->zone_device_data to * point to the mirror but here we use it to hold the page for * the simulated device memory and that page holds the pointer * to the mirror.
*/
rpage->zone_device_data = dmirror;
pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n",
page_to_pfn(spage), page_to_pfn(dpage));
*dst = migrate_pfn(page_to_pfn(dpage)); if ((*src & MIGRATE_PFN_WRITE) ||
(!spage && args->vma->vm_flags & VM_WRITE))
*dst |= MIGRATE_PFN_WRITE;
}
}
page = make_device_exclusive(mm, addr, NULL, &folio); if (IS_ERR(page)) {
ret = PTR_ERR(page); break;
}
ret = dmirror_atomic_map(addr, page, dmirror);
folio_unlock(folio);
folio_put(folio);
}
mmap_read_unlock(mm);
mmput(mm);
if (ret) return ret;
/* Return the migrated data for verification. */
ret = dmirror_bounce_init(&bounce, start, size); if (ret) return ret;
mutex_lock(&dmirror->mutex);
ret = dmirror_do_read(dmirror, start, end, &bounce);
mutex_unlock(&dmirror->mutex); if (ret == 0) { if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
bounce.size))
ret = -EFAULT;
}
pr_debug("Migrating from sys mem to device mem\n");
dmirror_migrate_alloc_and_copy(&args, dmirror);
migrate_vma_pages(&args);
dmirror_migrate_finalize_and_map(&args, dmirror);
migrate_vma_finalize(&args);
}
mmap_read_unlock(mm);
mmput(mm);
/* * Return the migrated data for verification. * Only for pages in device zone
*/
ret = dmirror_bounce_init(&bounce, start, size); if (ret) return ret;
mutex_lock(&dmirror->mutex);
ret = dmirror_do_read(dmirror, start, end, &bounce);
mutex_unlock(&dmirror->mutex); if (ret == 0) { if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
bounce.size))
ret = -EFAULT;
}
cmd->cpages = bounce.cpages;
dmirror_bounce_fini(&bounce); return ret;
if (mmu_notifier_range_blockable(range))
mutex_lock(&dmirror->mutex); elseif (!mutex_trylock(&dmirror->mutex)) returnfalse;
/* * Snapshots only need to set the sequence number since any * invalidation in the interval invalidates the whole snapshot.
*/
mmu_interval_set_seq(mni, cur_seq);
start = cmd->addr;
end = start + size; if (end < start) return -EINVAL;
/* Since the mm is for the mirrored process, get a reference first. */ if (!mmget_not_zero(mm)) return -EINVAL;
/* * Register a temporary notifier to detect invalidations even if it * overlaps with other mmu_interval_notifiers.
*/
uptr = u64_to_user_ptr(cmd->ptr); for (addr = start; addr < end; addr = next) { unsignedlong n;
/* Removes free pages from the free list so they can't be re-allocated */ staticvoid dmirror_remove_free_pages(struct dmirror_chunk *devmem)
{ struct dmirror_device *mdevice = devmem->mdevice; struct page *page;
for (page = mdevice->free_pages; page; page = page->zone_device_data) if (dmirror_page_to_chunk(page) == devmem)
mdevice->free_pages = page->zone_device_data;
}
mutex_lock(&mdevice->devmem_lock); if (mdevice->devmem_chunks) { for (i = 0; i < mdevice->devmem_count; i++) { struct dmirror_chunk *devmem =
mdevice->devmem_chunks[i];
/* Return page to our allocator if not freeing the chunk */ if (!dmirror_page_to_chunk(page)->remove) {
mdevice->cfree++;
page->zone_device_data = mdevice->free_pages;
mdevice->free_pages = page;
}
spin_unlock(&mdevice->lock);
}
/* * Normally, a device would use the page->zone_device_data to point to * the mirror but here we use it to hold the page for the simulated * device memory and that page holds the pointer to the mirror.
*/
rpage = vmf->page->zone_device_data;
dmirror = rpage->zone_device_data;
/* FIXME demonstrate how we can adjust migrate range */
args.vma = vmf->vma;
args.start = vmf->address;
args.end = args.start + PAGE_SIZE;
args.src = &src_pfns;
args.dst = &dst_pfns;
args.pgmap_owner = dmirror->mdevice;
args.flags = dmirror_select_device(dmirror);
args.fault_page = vmf->page;
if (migrate_vma_setup(&args)) return VM_FAULT_SIGBUS;
ret = dmirror_devmem_fault_alloc_and_copy(&args, dmirror); if (ret) return ret;
migrate_vma_pages(&args); /* * No device finalize step is needed since * dmirror_devmem_fault_alloc_and_copy() will have already * invalidated the device page table.
*/
migrate_vma_finalize(&args); return 0;
}
staticvoid __exit hmm_dmirror_exit(void)
{ int id;
for (id = 0; id < DMIRROR_NDEVICES; id++) if (dmirror_devices[id].zone_device_type)
dmirror_device_remove(dmirror_devices + id);
unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES);
}
module_init(hmm_dmirror_init);
module_exit(hmm_dmirror_exit);
MODULE_DESCRIPTION("HMM (Heterogeneous Memory Management) test module");
MODULE_LICENSE("GPL");
Messung V0.5
¤ Dauer der Verarbeitung: 0.15 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.