ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte); /* * Hugepages under user process are always in RAM and never * swapped out, but theoretically it needs to be checked.
*/
present = pte && !huge_pte_none_mostly(huge_ptep_get(walk->mm, addr, pte)); for (; addr != end; vec++, addr += PAGE_SIZE)
*vec = present;
walk->private = vec;
spin_unlock(ptl); #else
BUG(); #endif return 0;
}
/* * Later we can get more picky about what "in core" means precisely. * For now, simply check to see if the page is in the page cache, * and is up to date; i.e. that no page-in operation would be required * at this time if an application were to map and access this page.
*/ staticunsignedchar mincore_page(struct address_space *mapping, pgoff_t index)
{ unsignedchar present = 0; struct folio *folio;
/* * When tmpfs swaps out a page from a file, any process mapping that * file will not get a swp_entry_t in its pte, but rather it is like * any other file mapping (ie. marked !present and faulted in with * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
*/
folio = filemap_get_incore_folio(mapping, index); if (!IS_ERR(folio)) {
present = folio_test_uptodate(folio);
folio_put(folio);
}
step = 1; /* We need to do cache lookup too for pte markers */ if (pte_none_mostly(pte))
__mincore_unmapped_range(addr, addr + PAGE_SIZE,
vma, vec); elseif (pte_present(pte)) { unsignedint batch = pte_batch_hint(ptep, pte);
staticinlinebool can_do_mincore(struct vm_area_struct *vma)
{ if (vma_is_anonymous(vma)) returntrue; if (!vma->vm_file) returnfalse; /* * Reveal pagecache information only for non-anonymous mappings that * correspond to the files the calling process could (if tried) open * for writing; otherwise we'd be including shared non-exclusive * mappings, which opens a side channel.
*/ return inode_owner_or_capable(&nop_mnt_idmap,
file_inode(vma->vm_file)) ||
file_permission(vma->vm_file, MAY_WRITE) == 0;
}
/* * Do a chunk of "sys_mincore()". We've already checked * all the arguments, we hold the mmap semaphore: we should * just return the amount of info we're asked for.
*/ staticlong do_mincore(unsignedlong addr, unsignedlong pages, unsignedchar *vec)
{ struct vm_area_struct *vma; unsignedlong end; int err;
/* * The mincore(2) system call. * * mincore() returns the memory residency status of the pages in the * current process's address space specified by [addr, addr + len). * The status is returned in a vector of bytes. The least significant * bit of each byte is 1 if the referenced page is in memory, otherwise * it is zero. * * Because the status of a page can change after mincore() checks it * but before it returns to the application, the returned vector may * contain stale information. Only locked pages are guaranteed to * remain in memory. * * return values: * zero - success * -EFAULT - vec points to an illegal address * -EINVAL - addr is not a multiple of PAGE_SIZE * -ENOMEM - Addresses in the range [addr, addr + len] are * invalid for the address space of this process, or * specify one or more pages which are not currently * mapped * -EAGAIN - A kernel resource was temporarily unavailable.
*/
SYSCALL_DEFINE3(mincore, unsignedlong, start, size_t, len, unsignedchar __user *, vec)
{ long retval; unsignedlong pages; unsignedchar *tmp;
start = untagged_addr(start);
/* Check the start address: needs to be page-aligned.. */ if (unlikely(start & ~PAGE_MASK)) return -EINVAL;
/* ..and we need to be passed a valid user-space range */ if (!access_ok((void __user *) start, len)) return -ENOMEM;
/* This also avoids any overflows on PAGE_ALIGN */
pages = len >> PAGE_SHIFT;
pages += (offset_in_page(len)) != 0;
if (!access_ok(vec, pages)) return -EFAULT;
tmp = (void *) __get_free_page(GFP_USER); if (!tmp) return -EAGAIN;
retval = 0; while (pages) { /* * Do at most PAGE_SIZE entries per iteration, due to * the temporary buffer size.
*/
mmap_read_lock(current->mm);
retval = do_mincore(start, min(pages, PAGE_SIZE), tmp);
mmap_read_unlock(current->mm);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.