bool can_do_mlock(void)
{ if (rlimit(RLIMIT_MEMLOCK) != 0) returntrue; if (capable(CAP_IPC_LOCK)) returntrue; returnfalse;
}
EXPORT_SYMBOL(can_do_mlock);
/* * Mlocked folios are marked with the PG_mlocked flag for efficient testing * in vmscan and, possibly, the fault path; and to support semi-accurate * statistics. * * An mlocked folio [folio_test_mlocked(folio)] is unevictable. As such, it * will be ostensibly placed on the LRU "unevictable" list (actually no such * list exists), rather than the [in]active lists. PG_unevictable is set to * indicate the unevictable state.
*/
staticstruct lruvec *__mlock_folio(struct folio *folio, struct lruvec *lruvec)
{ /* There is nothing more we can do while it's off LRU */ if (!folio_test_clear_lru(folio)) return lruvec;
lruvec = folio_lruvec_relock_irq(folio, lruvec);
if (unlikely(folio_evictable(folio))) { /* * This is a little surprising, but quite possible: PG_mlocked * must have got cleared already by another CPU. Could this * folio be unevictable? I'm not sure, but move it now if so.
*/ if (folio_test_unevictable(folio)) {
lruvec_del_folio(lruvec, folio);
folio_clear_unevictable(folio);
lruvec_add_folio(lruvec, folio);
if (folio_test_unevictable(folio)) { /* Then mlock_count is maintained, but might undercount */ if (folio->mlock_count)
folio->mlock_count--; if (folio->mlock_count) goto out;
} /* else assume that was the last mlock: reclaim will fix it if not */
munlock: if (folio_test_clear_mlocked(folio)) {
__zone_stat_mod_folio(folio, NR_MLOCK, -nr_pages); if (isolated || !folio_test_unevictable(folio))
__count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages); else
__count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages);
}
/* folio_evictable() has to be checked *after* clearing Mlocked */ if (isolated && folio_test_unevictable(folio) && folio_evictable(folio)) {
lruvec_del_folio(lruvec, folio);
folio_clear_unevictable(folio);
lruvec_add_folio(lruvec, folio);
__count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages);
}
out: if (isolated)
folio_set_lru(folio); return lruvec;
}
/* * Flags held in the low bits of a struct folio pointer on the mlock_fbatch.
*/ #define LRU_FOLIO 0x1 #define NEW_FOLIO 0x2 staticinlinestruct folio *mlock_lru(struct folio *folio)
{ return (struct folio *)((unsignedlong)folio + LRU_FOLIO);
}
/* * mlock_folio_batch() is derived from folio_batch_move_lru(): perhaps that can * make use of such folio pointer flags in future, but for now just keep it for * mlock. We could use three separate folio batches instead, but one feels * better (munlocking a full folio batch does not need to drain mlocking folio * batches first).
*/ staticvoid mlock_folio_batch(struct folio_batch *fbatch)
{ struct lruvec *lruvec = NULL; unsignedlong mlock; struct folio *folio; int i;
for (i = 0; i < folio_batch_count(fbatch); i++) {
folio = fbatch->folios[i];
mlock = (unsignedlong)folio & (LRU_FOLIO | NEW_FOLIO);
folio = (struct folio *)((unsignedlong)folio - mlock);
fbatch->folios[i] = folio;
/** * mlock_new_folio - mlock a newly allocated folio not yet on LRU * @folio: folio to be mlocked, either normal or a THP head.
*/ void mlock_new_folio(struct folio *folio)
{ struct folio_batch *fbatch; int nr_pages = folio_nr_pages(folio);
/** * munlock_folio - munlock a folio * @folio: folio to be munlocked, either normal or a THP head.
*/ void munlock_folio(struct folio *folio)
{ struct folio_batch *fbatch;
local_lock(&mlock_fbatch.lock);
fbatch = this_cpu_ptr(&mlock_fbatch.fbatch); /* * folio_test_clear_mlocked(folio) must be left to __munlock_folio(), * which will check whether the folio is multiply mlocked.
*/
folio_get(folio); if (!folio_batch_add(fbatch, folio) ||
!folio_may_be_lru_cached(folio) || lru_cache_disabled())
mlock_folio_batch(fbatch);
local_unlock(&mlock_fbatch.lock);
}
staticinlinebool allow_mlock_munlock(struct folio *folio, struct vm_area_struct *vma, unsignedlong start, unsignedlong end, unsignedint step)
{ /* * For unlock, allow munlock large folio which is partially * mapped to VMA. As it's possible that large folio is * mlocked and VMA is split later. * * During memory pressure, such kind of large folio can * be split. And the pages are not in VM_LOCKed VMA * can be reclaimed.
*/ if (!(vma->vm_flags & VM_LOCKED)) returntrue;
/* folio_within_range() cannot take KSM, but any small folio is OK */ if (!folio_test_large(folio)) returntrue;
/* folio not in range [start, end), skip mlock */ if (!folio_within_range(folio, vma, start, end)) returnfalse;
/* folio is not fully mapped, skip mlock */ if (step != folio_nr_pages(folio)) returnfalse;
/* * mlock_vma_pages_range() - mlock any pages already in the range, * or munlock all pages in the range. * @vma - vma containing range to be mlock()ed or munlock()ed * @start - start address in @vma of the range * @end - end of range in @vma * @newflags - the new set of flags for @vma. * * Called for mlock(), mlock2() and mlockall(), to set @vma VM_LOCKED; * called for munlock() and munlockall(), to clear VM_LOCKED from @vma.
*/ staticvoid mlock_vma_pages_range(struct vm_area_struct *vma, unsignedlong start, unsignedlong end, vm_flags_t newflags)
{ staticconststruct mm_walk_ops mlock_walk_ops = {
.pmd_entry = mlock_pte_range,
.walk_lock = PGWALK_WRLOCK_VERIFY,
};
/* * There is a slight chance that concurrent page migration, * or page reclaim finding a page of this now-VM_LOCKED vma, * will call mlock_vma_folio() and raise page's mlock_count: * double counting, leaving the page unevictable indefinitely. * Communicate this danger to mlock_vma_folio() with VM_IO, * which is a VM_SPECIAL flag not allowed on VM_LOCKED vmas. * mmap_lock is held in write mode here, so this weird * combination should not be visible to other mmap_lock users; * but WRITE_ONCE so rmap walkers must see VM_IO if VM_LOCKED.
*/ if (newflags & VM_LOCKED)
newflags |= VM_IO;
vma_start_write(vma);
vm_flags_reset_once(vma, newflags);
/* * mlock_fixup - handle mlock[all]/munlock[all] requests. * * Filters out "special" vmas -- VM_LOCKED never gets set for these, and * munlock is a no-op. However, for some special vmas, we go ahead and * populate the ptes. * * For vmas that pass the filters, merge/split as appropriate.
*/ staticint mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, struct vm_area_struct **prev, unsignedlong start, unsignedlong end, vm_flags_t newflags)
{ struct mm_struct *mm = vma->vm_mm; int nr_pages; int ret = 0;
vm_flags_t oldflags = vma->vm_flags;
if (newflags == oldflags || (oldflags & VM_SPECIAL) ||
is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) ||
vma_is_dax(vma) || vma_is_secretmem(vma) || (oldflags & VM_DROPPABLE)) /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */ goto out;
vma = vma_modify_flags(vmi, *prev, vma, start, end, newflags); if (IS_ERR(vma)) {
ret = PTR_ERR(vma); goto out;
}
/* * vm_flags is protected by the mmap_lock held in write mode. * It's okay if try_to_unmap_one unmaps a page just after we * set VM_LOCKED, populate_vma_page_range will bring it back.
*/ if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) { /* No work to do, and mlocking twice would be wrong */
vma_start_write(vma);
vm_flags_reset(vma, newflags);
} else {
mlock_vma_pages_range(vma, start, end, newflags);
}
out:
*prev = vma; return ret;
}
if (mmap_write_lock_killable(current->mm)) return -EINTR;
locked += current->mm->locked_vm; if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) { /* * It is possible that the regions requested intersect with * previously mlocked areas, that part area in "mm->locked_vm" * should not be counted to new mlock increment count. So check * and adjust locked count if necessary.
*/
locked -= count_mm_mlocked_page_nr(current->mm,
start, len);
}
/* check against resource limits */ if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
error = apply_vma_lock_flags(start, len, flags);
mmap_write_unlock(current->mm); if (error) return error;
if (flags & MLOCK_ONFAULT)
vm_flags |= VM_LOCKONFAULT;
return do_mlock(start, len, vm_flags);
}
SYSCALL_DEFINE2(munlock, unsignedlong, start, size_t, len)
{ int ret;
start = untagged_addr(start);
len = PAGE_ALIGN(len + (offset_in_page(start)));
start &= PAGE_MASK;
if (mmap_write_lock_killable(current->mm)) return -EINTR;
ret = apply_vma_lock_flags(start, len, 0);
mmap_write_unlock(current->mm);
return ret;
}
/* * Take the MCL_* flags passed into mlockall (or 0 if called from munlockall) * and translate into the appropriate modifications to mm->def_flags and/or the * flags for all current VMAs. * * There are a couple of subtleties with this. If mlockall() is called multiple * times with different flags, the values do not necessarily stack. If mlockall * is called once including the MCL_FUTURE flag and then a second time without * it, VM_LOCKED and VM_LOCKONFAULT will be cleared from mm->def_flags.
*/ staticint apply_mlockall_flags(int flags)
{
VMA_ITERATOR(vmi, current->mm, 0); struct vm_area_struct *vma, *prev = NULL;
vm_flags_t to_add = 0;
if (mmap_write_lock_killable(current->mm)) return -EINTR;
ret = -ENOMEM; if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
capable(CAP_IPC_LOCK))
ret = apply_mlockall_flags(flags);
mmap_write_unlock(current->mm); if (!ret && (flags & MCL_CURRENT))
mm_populate(0, TASK_SIZE);
return ret;
}
SYSCALL_DEFINE0(munlockall)
{ int ret;
if (mmap_write_lock_killable(current->mm)) return -EINTR;
ret = apply_mlockall_flags(0);
mmap_write_unlock(current->mm); return ret;
}
/* * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB * shm segments) get accounted against the user_struct instead.
*/ static DEFINE_SPINLOCK(shmlock_user_lock);
int user_shm_lock(size_t size, struct ucounts *ucounts)
{ unsignedlong lock_limit, locked; long memlock; int allowed = 0;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.