/* SPDX-License-Identifier: GPL-2.0-or-later */ /* internal.h: mm/ internal definitions * * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com)
*/ #ifndef __MM_INTERNAL_H #define __MM_INTERNAL_H
/* * Maintains state across a page table move. The operation assumes both source * and destination VMAs already exist and are specified by the user. * * Partial moves are permitted, but the old and new ranges must both reside * within a VMA. * * mmap lock must be held in write and VMA write locks must be held on any VMA * that is visible. * * Use the PAGETABLE_MOVE() macro to initialise this struct. * * The old_addr and new_addr fields are updated as the page table move is * executed. * * NOTE: The page table move is affected by reading from [old_addr, old_end), * and old_addr may be updated for better page table alignment, so len_in * represents the length of the range being copied as specified by the user.
*/ struct pagetable_move_control { struct vm_area_struct *old; /* Source VMA. */ struct vm_area_struct *new; /* Destination VMA. */ unsignedlong old_addr; /* Address from which the move begins. */ unsignedlong old_end; /* Exclusive address at which old range ends. */ unsignedlong new_addr; /* Address to move page tables to. */ unsignedlong len_in; /* Bytes to remap specified by user. */
bool need_rmap_locks; /* Do rmap locks need to be taken? */ bool for_stack; /* Is this an early temp stack being moved? */
};
/* * The set of flags that only affect watermark checking and reclaim * behaviour. This is used by the MM to obey the caller constraints * about IO, FS and watermark checking while ignoring placement * hints such as HIGHMEM usage.
*/ #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
__GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\
__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\
__GFP_NOLOCKDEP)
/* The GFP flags allowed during early boot */ #define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
/* Control allocation cpuset and node placement constraints */ #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
/* Do not use these with a slab allocator */ #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
/* * Different from WARN_ON_ONCE(), no warning will be issued * when we specify __GFP_NOWARN.
*/ #define WARN_ON_ONCE_GFP(cond, gfp) ({ \ staticbool __section(".data..once") __warned; \ int __ret_warn_once = !!(cond); \
\ if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
__warned = true; \
WARN_ON(1); \
} \
unlikely(__ret_warn_once); \
})
void page_writeback_init(void);
/* * If a 16GB hugetlb folio were mapped by PTEs of all of its 4kB pages, * its nr_pages_mapped would be 0x400000: choose the ENTIRELY_MAPPED bit * above that range, instead of 2*(PMD_SIZE/PAGE_SIZE). Hugetlb currently * leaves nr_pages_mapped at 0, but avoid surprise if it participates later.
*/ #define ENTIRELY_MAPPED 0x800000 #define FOLIO_PAGES_MAPPED (ENTIRELY_MAPPED - 1)
/* * Flags passed to __show_mem() and show_free_areas() to suppress output in * various contexts.
*/ #define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */
/* * How many individual pages have an elevated _mapcount. Excludes * the folio's entire_mapcount. * * Don't use this function outside of debugging code.
*/ staticinlineint folio_nr_pages_mapped(conststruct folio *folio)
{ if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) return -1; return atomic_read(&folio->_nr_pages_mapped) & FOLIO_PAGES_MAPPED;
}
/* * Retrieve the first entry of a folio based on a provided entry within the * folio. We cannot rely on folio->swap as there is no guarantee that it has * been initialized. Used for calling arch_swap_restore()
*/ staticinline swp_entry_t folio_swap(swp_entry_t entry, conststruct folio *folio)
{
swp_entry_t swap = {
.val = ALIGN_DOWN(entry.val, folio_nr_pages(folio)),
};
/* * This is a file-backed mapping, and is about to be memory mapped - invoke its * mmap hook and safely handle error conditions. On error, VMA hooks will be * mutated. * * @file: File which backs the mapping. * @vma: VMA which we are mapping. * * Returns: 0 if success, error otherwise.
*/ staticinlineint mmap_file(struct file *file, struct vm_area_struct *vma)
{ int err = vfs_mmap(file, vma);
if (likely(!err)) return 0;
/* * OK, we tried to call the file hook for mmap(), but an error * arose. The mapping is in an inconsistent state and we most not invoke * any further hooks on it.
*/
vma->vm_ops = &vma_dummy_vm_ops;
return err;
}
/* * If the VMA has a close hook then close it, and since closing it might leave * it in an inconsistent state which makes the use of any hooks suspect, clear * them down by installing dummy empty hooks.
*/ staticinlinevoid vma_close(struct vm_area_struct *vma)
{ if (vma->vm_ops && vma->vm_ops->close) {
vma->vm_ops->close(vma);
/* * The mapping is in an inconsistent state, and no further hooks * may be invoked upon it.
*/
vma->vm_ops = &vma_dummy_vm_ops;
}
}
#ifdef CONFIG_MMU
/* Flags for folio_pte_batch(). */ typedefint __bitwise fpb_t;
/* * Merge PTE write bits: if any PTE in the batch is writable, modify the * PTE at @ptentp to be writable.
*/ #define FPB_MERGE_WRITE ((__force fpb_t)BIT(3))
/* * Merge PTE young and dirty bits: if any PTE in the batch is young or dirty, * modify the PTE at @ptentp to be young or dirty, respectively.
*/ #define FPB_MERGE_YOUNG_DIRTY ((__force fpb_t)BIT(4))
/** * folio_pte_batch_flags - detect a PTE batch for a large folio * @folio: The large folio to detect a PTE batch for. * @vma: The VMA. Only relevant with FPB_MERGE_WRITE, otherwise can be NULL. * @ptep: Page table pointer for the first entry. * @ptentp: Pointer to a COPY of the first page table entry whose flags this * function updates based on @flags if appropriate. * @max_nr: The maximum number of table entries to consider. * @flags: Flags to modify the PTE batch semantics. * * Detect a PTE batch: consecutive (present) PTEs that map consecutive * pages of the same large folio in a single VMA and a single page table. * * All PTEs inside a PTE batch have the same PTE bits set, excluding the PFN, * the accessed bit, writable bit, dirty bit (unless FPB_RESPECT_DIRTY is set) * and soft-dirty bit (unless FPB_RESPECT_SOFT_DIRTY is set). * * @ptep must map any page of the folio. max_nr must be at least one and * must be limited by the caller so scanning cannot exceed a single VMA and * a single page table. * * Depending on the FPB_MERGE_* flags, the pte stored at @ptentp will * be updated: it's crucial that a pointer to a COPY of the first * page table entry, obtained through ptep_get(), is provided as @ptentp. * * This function will be inlined to optimize based on the input parameters; * consider using folio_pte_batch() instead if applicable. * * Return: the number of table entries in the batch.
*/ staticinlineunsignedint folio_pte_batch_flags(struct folio *folio, struct vm_area_struct *vma, pte_t *ptep, pte_t *ptentp, unsignedint max_nr, fpb_t flags)
{ bool any_writable = false, any_young = false, any_dirty = false;
pte_t expected_pte, pte = *ptentp; unsignedint nr, cur_nr;
VM_WARN_ON_FOLIO(!pte_present(pte), folio);
VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio);
VM_WARN_ON_FOLIO(page_folio(pfn_to_page(pte_pfn(pte))) != folio, folio); /* * Ensure this is a pointer to a copy not a pointer into a page table. * If this is a stack value, it won't be a valid virtual address, but * that's fine because it also cannot be pointing into the page table.
*/
VM_WARN_ON(virt_addr_valid(ptentp) && PageTable(virt_to_page(ptentp)));
/* Limit max_nr to the actual remaining PFNs in the folio we could batch. */
max_nr = min_t(unsignedlong, max_nr,
folio_pfn(folio) + folio_nr_pages(folio) - pte_pfn(pte));
/** * pte_move_swp_offset - Move the swap entry offset field of a swap pte * forward or backward by delta * @pte: The initial pte state; is_swap_pte(pte) must be true and * non_swap_entry() must be false. * @delta: The direction and the offset we are moving; forward if delta * is positive; backward if delta is negative * * Moves the swap offset, while maintaining all other fields, including * swap type, and any swp pte bits. The resulting pte is returned.
*/ staticinline pte_t pte_move_swp_offset(pte_t pte, long delta)
{
swp_entry_t entry = pte_to_swp_entry(pte);
pte_t new = __swp_entry_to_pte(__swp_entry(swp_type(entry),
(swp_offset(entry) + delta)));
if (pte_swp_soft_dirty(pte)) new = pte_swp_mksoft_dirty(new); if (pte_swp_exclusive(pte)) new = pte_swp_mkexclusive(new); if (pte_swp_uffd_wp(pte)) new = pte_swp_mkuffd_wp(new);
returnnew;
}
/** * pte_next_swp_offset - Increment the swap entry offset field of a swap pte. * @pte: The initial pte state; is_swap_pte(pte) must be true and * non_swap_entry() must be false. * * Increments the swap offset, while maintaining all other fields, including * swap type, and any swp pte bits. The resulting pte is returned.
*/ staticinline pte_t pte_next_swp_offset(pte_t pte)
{ return pte_move_swp_offset(pte, 1);
}
/** * swap_pte_batch - detect a PTE batch for a set of contiguous swap entries * @start_ptep: Page table pointer for the first entry. * @max_nr: The maximum number of table entries to consider. * @pte: Page table entry for the first entry. * * Detect a batch of contiguous swap entries: consecutive (non-present) PTEs * containing swap entries all with consecutive offsets and targeting the same * swap type, all with matching swp pte bits. * * max_nr must be at least one and must be limited by the caller so scanning * cannot exceed a single page table. * * Return: the number of table entries in the batch.
*/ staticinlineint swap_pte_batch(pte_t *start_ptep, int max_nr, pte_t pte)
{
pte_t expected_pte = pte_next_swp_offset(pte); const pte_t *end_ptep = start_ptep + max_nr;
swp_entry_t entry = pte_to_swp_entry(pte);
pte_t *ptep = start_ptep + 1; unsignedshort cgroup_id;
/** * folio_evictable - Test whether a folio is evictable. * @folio: The folio to test. * * Test whether @folio is evictable -- i.e., should be placed on * active/inactive lists vs unevictable list. * * Reasons folio might not be evictable: * 1. folio's mapping marked unevictable * 2. One of the pages in the folio is part of an mlocked VMA
*/ staticinlinebool folio_evictable(struct folio *folio)
{ bool ret;
/* Prevent address_space of inode and swap cache from being freed */
rcu_read_lock();
ret = !mapping_unevictable(folio_mapping(folio)) &&
!folio_test_mlocked(folio);
rcu_read_unlock(); return ret;
}
/* * Turn a non-refcounted page (->_refcount == 0) into refcounted with * a count of one.
*/ staticinlinevoid set_page_refcounted(struct page *page)
{
VM_BUG_ON_PAGE(PageTail(page), page);
VM_BUG_ON_PAGE(page_ref_count(page), page);
set_page_count(page, 1);
}
/* * Return true if a folio needs ->release_folio() calling upon it.
*/ staticinlinebool folio_needs_release(struct folio *folio)
{ struct address_space *mapping = folio_mapping(folio);
/* * in mm/page_alloc.c
*/ #define K(x) ((x) << (PAGE_SHIFT-10))
externchar * const zone_names[MAX_NR_ZONES];
/* perform sanity checks on struct pages being allocated or freed */
DECLARE_STATIC_KEY_MAYBE(CONFIG_DEBUG_VM, check_pages_enabled);
externint min_free_kbytes; externint defrag_mode;
void setup_per_zone_wmarks(void); void calculate_min_free_kbytes(void); int __meminit init_per_zone_wmark_min(void); void page_alloc_sysctl_init(void);
/* * Structure for holding the mostly immutable allocation parameters passed * between functions involved in allocations, including the alloc_pages* * family of functions. * * nodemask, migratetype and highest_zoneidx are initialized only once in * __alloc_pages() and then never change. * * zonelist, preferred_zone and highest_zoneidx are set first in * __alloc_pages() for the fast path, and might be later changed * in __alloc_pages_slowpath(). All other functions pass the whole structure * by a const pointer.
*/ struct alloc_context { struct zonelist *zonelist;
nodemask_t *nodemask; struct zoneref *preferred_zoneref; int migratetype;
/* * highest_zoneidx represents highest usable zone index of * the allocation request. Due to the nature of the zone, * memory on lower zone than the highest_zoneidx will be * protected by lowmem_reserve[highest_zoneidx]. * * highest_zoneidx is also used by reclaim/compaction to limit * the target zone since higher zone than this index cannot be * usable for this allocation request.
*/ enum zone_type highest_zoneidx; bool spread_dirty_pages;
};
/* * This function returns the order of a free page in the buddy system. In * general, page_zone(page)->lock must be held by the caller to prevent the * page from being allocated in parallel and returning garbage as the order. * If a caller does not hold page_zone(page)->lock, it must guarantee that the * page cannot be allocated or merged in parallel. Alternatively, it must * handle invalid values gracefully, and use buddy_order_unsafe() below.
*/ staticinlineunsignedint buddy_order(struct page *page)
{ /* PageBuddy() must be checked by the caller */ return page_private(page);
}
/* * Like buddy_order(), but for callers who cannot afford to hold the zone lock. * PageBuddy() should be checked first by the caller to minimize race window, * and invalid values must be handled gracefully. * * READ_ONCE is used so that if the caller assigns the result into a local * variable and e.g. tests it for valid range before using, the compiler cannot * decide to remove the variable and inline the page_private(page) multiple * times, potentially observing different values in the tests and the actual * use of the result.
*/ #define buddy_order_unsafe(page) READ_ONCE(page_private(page))
/* * This function checks whether a page is free && is the buddy * we can coalesce a page and its buddy if * (a) the buddy is not in a hole (check before calling!) && * (b) the buddy is in the buddy system && * (c) a page and its buddy have the same order && * (d) a page and its buddy are in the same zone. * * For recording whether a page is in the buddy system, we set PageBuddy. * Setting, clearing, and testing PageBuddy is serialized by zone->lock. * * For recording page's order, we use page_private(page).
*/ staticinlinebool page_is_buddy(struct page *page, struct page *buddy, unsignedint order)
{ if (!page_is_guard(buddy) && !PageBuddy(buddy)) returnfalse;
if (buddy_order(buddy) != order) returnfalse;
/* * zone check is done late to avoid uselessly calculating * zone/node ids for pages that could never merge.
*/ if (page_zone_id(page) != page_zone_id(buddy)) returnfalse;
VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
returntrue;
}
/* * Locate the struct page for both the matching buddy in our * pair (buddy1) and the combined O(n+1) page they form (page). * * 1) Any buddy B1 will have an order O twin B2 which satisfies * the following equation: * B2 = B1 ^ (1 << O) * For example, if the starting buddy (buddy2) is #8 its order * 1 buddy is #10: * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10 * * 2) Any buddy B will have an order O+1 parent P which * satisfies the following equation: * P = B & ~(1 << O) * * Assumption: *_mem_map is contiguous at least up to MAX_PAGE_ORDER
*/ staticinlineunsignedlong
__find_buddy_pfn(unsignedlong page_pfn, unsignedint order)
{ return page_pfn ^ (1 << order);
}
/* * Find the buddy of @page and validate it. * @page: The input page * @pfn: The pfn of the page, it saves a call to page_to_pfn() when the * function is used in the performance-critical __free_one_page(). * @order: The order of the page * @buddy_pfn: The output pointer to the buddy pfn, it also saves a call to * page_to_pfn(). * * The found buddy can be a non PageBuddy, out of @page's zone, or its order is * not the same as @page. The validation is necessary before use it. * * Return: the found buddy page or NULL if not found.
*/ staticinlinestruct page *find_buddy_page_pfn(struct page *page, unsignedlong pfn, unsignedint order, unsignedlong *buddy_pfn)
{ unsignedlong __buddy_pfn = __find_buddy_pfn(pfn, order); struct page *buddy;
/* * This will have no effect, other than possibly generating a warning, if the * caller passes in a non-large folio.
*/ staticinlinevoid folio_set_order(struct folio *folio, unsignedint order)
{ if (WARN_ON_ONCE(!order || !folio_test_large(folio))) return;
/* * At this point, there is no one trying to add the folio to * deferred_list. If folio is not in deferred_list, it's safe * to check without acquiring the split_queue_lock.
*/ if (data_race(list_empty(&folio->_deferred_list))) returnfalse;
externvoid zone_pcp_reset(struct zone *zone); externvoid zone_pcp_disable(struct zone *zone); externvoid zone_pcp_enable(struct zone *zone); externvoid zone_pcp_init(struct zone *zone);
#ifdefined CONFIG_COMPACTION || defined CONFIG_CMA
/* * in mm/compaction.c
*/ /* * compact_control is used to track pages being migrated and the free pages * they are being migrated to during memory compaction. The free_pfn starts * at the end of a zone and migrate_pfn begins at the start. Movable pages * are moved to the end of a zone during a compaction run and the run * completes when free_pfn <= migrate_pfn
*/ struct compact_control { struct list_head freepages[NR_PAGE_ORDERS]; /* List of free pages to migrate to */ struct list_head migratepages; /* List of pages being migrated */ unsignedint nr_freepages; /* Number of isolated free pages */ unsignedint nr_migratepages; /* Number of pages to migrate */ unsignedlong free_pfn; /* isolate_freepages search base */ /* * Acts as an in/out parameter to page isolation for migration. * isolate_migratepages uses it as a search base. * isolate_migratepages_block will update the value to the next pfn * after the last isolated one.
*/ unsignedlong migrate_pfn; unsignedlong fast_start_pfn; /* a pfn to start linear scan from */ struct zone *zone; unsignedlong total_migrate_scanned; unsignedlong total_free_scanned; unsignedshort fast_search_fail;/* failures to use free list searches */ short search_order; /* order to start a fast search at */ const gfp_t gfp_mask; /* gfp mask of a direct compactor */ int order; /* order a direct compactor needs */ int migratetype; /* migratetype of direct compactor */ constunsignedint alloc_flags; /* alloc flags of a direct compactor */ constint highest_zoneidx; /* zone index of a direct compactor */ enum migrate_mode mode; /* Async or sync migration mode */ bool ignore_skip_hint; /* Scan blocks even if marked skip */ bool no_set_skip_hint; /* Don't mark blocks for skipping */ bool ignore_block_suitable; /* Scan blocks considered unsuitable */ bool direct_compaction; /* False from kcompactd or /proc/... */ bool proactive_compaction; /* kcompactd proactive compaction */ bool whole_zone; /* Whole zone should/has been scanned */ bool contended; /* Signal lock contention */ bool finish_pageblock; /* Scan the remainder of a pageblock. Used * when there are potentially transient * isolation or migration failures to * ensure forward progress.
*/ bool alloc_contig; /* alloc_contig_range allocation */
};
/* * Used in direct compaction when a page should be taken from the freelists * immediately when one is created during the free path.
*/ struct capture_control { struct compact_control *cc; struct page *page;
};
/* * NOTE: This function can't tell whether the folio is "fully mapped" in the * range. * "fully mapped" means all the pages of folio is associated with the page * table of range while this function just check whether the folio range is * within the range [start, end). Function caller needs to do page table * check if it cares about the page table association. * * Typical usage (like mlock or madvise) is: * Caller knows at least 1 page of folio is associated with page table of VMA * and the range [start, end) is intersect with the VMA range. Caller wants * to know whether the folio is fully associated with the range. It calls * this function to check whether the folio is in the range first. Then checks * the page table to know whether the folio is fully mapped to the range.
*/ staticinlinebool
folio_within_range(struct folio *folio, struct vm_area_struct *vma, unsignedlong start, unsignedlong end)
{
pgoff_t pgoff, addr; unsignedlong vma_pglen = vma_pages(vma);
VM_WARN_ON_FOLIO(folio_test_ksm(folio), folio); if (start > end) returnfalse;
if (start < vma->vm_start)
start = vma->vm_start;
if (end > vma->vm_end)
end = vma->vm_end;
pgoff = folio_pgoff(folio);
/* if folio start address is not in vma range */ if (!in_range(pgoff, vma->vm_pgoff, vma_pglen)) returnfalse;
/* * mlock_vma_folio() and munlock_vma_folio(): * should be called with vma's mmap_lock held for read or write, * under page table lock for the pte/pmd being added or removed. * * mlock is usually called at the end of folio_add_*_rmap_*(), munlock at * the end of folio_remove_rmap_*(); but new anon folios are managed by * folio_add_lru_vma() calling mlock_new_folio().
*/ void mlock_folio(struct folio *folio); staticinlinevoid mlock_vma_folio(struct folio *folio, struct vm_area_struct *vma)
{ /* * The VM_SPECIAL check here serves two purposes. * 1) VM_IO check prevents migration from double-counting during mlock. * 2) Although mmap_region() and mlock_fixup() take care that VM_LOCKED * is never left set on a VM_SPECIAL vma, there is an interval while * file->f_op->mmap() is using vm_insert_page(s), when VM_LOCKED may * still be set while VM_SPECIAL bits are added: so ignore it then.
*/ if (unlikely((vma->vm_flags & (VM_LOCKED|VM_SPECIAL)) == VM_LOCKED))
mlock_folio(folio);
}
void munlock_folio(struct folio *folio); staticinlinevoid munlock_vma_folio(struct folio *folio, struct vm_area_struct *vma)
{ /* * munlock if the function is called. Ideally, we should only * do munlock if any page of folio is unmapped from VMA and * cause folio not fully mapped to VMA. * * But it's not easy to confirm that's the situation. So we * always munlock the folio and page reclaim will correct it * if it's wrong.
*/ if (unlikely(vma->vm_flags & VM_LOCKED))
munlock_folio(folio);
}
/** * vma_address - Find the virtual address a page range is mapped at * @vma: The vma which maps this object. * @pgoff: The page offset within its object. * @nr_pages: The number of pages to consider. * * If any page in this range is mapped by this VMA, return the first address * where any of these pages appear. Otherwise, return -EFAULT.
*/ staticinlineunsignedlong vma_address(conststruct vm_area_struct *vma,
pgoff_t pgoff, unsignedlong nr_pages)
{ unsignedlong address;
if (pgoff >= vma->vm_pgoff) {
address = vma->vm_start +
((pgoff - vma->vm_pgoff) << PAGE_SHIFT); /* Check for address beyond vma (or wrapped through 0?) */ if (address < vma->vm_start || address >= vma->vm_end)
address = -EFAULT;
} elseif (pgoff + nr_pages - 1 >= vma->vm_pgoff) { /* Test above avoids possibility of wrap to 0 on 32-bit */
address = vma->vm_start;
} else {
address = -EFAULT;
} return address;
}
/* * Then at what user virtual address will none of the range be found in vma? * Assumes that vma_address() already returned a good starting address.
*/ staticinlineunsignedlong vma_address_end(struct page_vma_mapped_walk *pvmw)
{ struct vm_area_struct *vma = pvmw->vma;
pgoff_t pgoff; unsignedlong address;
/* Common case, plus ->pgoff is invalid for KSM */ if (pvmw->nr_pages == 1) return pvmw->address + PAGE_SIZE;
/* * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or * anything, so we only pin the file and drop the mmap_lock if only * FAULT_FLAG_ALLOW_RETRY is set, while this is the first attempt.
*/ if (fault_flag_allow_retry_first(flags) &&
!(flags & FAULT_FLAG_RETRY_NOWAIT)) {
fpin = get_file(vmf->vma->vm_file);
release_fault_lock(vmf);
} return fpin;
} #else/* !CONFIG_MMU */ staticinlinevoid unmap_mapping_folio(struct folio *folio) { } staticinlinevoid mlock_new_folio(struct folio *folio) { } staticinlinebool need_mlock_drain(int cpu) { returnfalse; } staticinlinevoid mlock_drain_local(void) { } staticinlinevoid mlock_drain_remote(int cpu) { } staticinlinevoid vunmap_range_noflush(unsignedlong start, unsignedlong end)
{
} #endif/* !CONFIG_MMU */
/* Memory initialisation debug and verification */ #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
DECLARE_STATIC_KEY_TRUE(deferred_pages);
bool __init deferred_grow_zone(struct zone *zone, unsignedint order); #endif/* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
void init_deferred_page(unsignedlong pfn, int nid);
staticinlinebool node_reclaim_enabled(void)
{ /* Is any node_reclaim_mode bit set? */ return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP);
}
externvoid set_pageblock_order(void); unsignedlong reclaim_pages(struct list_head *folio_list); unsignedint reclaim_clean_pages_from_list(struct zone *zone, struct list_head *folio_list); /* The ALLOC_WMARK bits are used as an index to zone->watermark */ #define ALLOC_WMARK_MIN WMARK_MIN #define ALLOC_WMARK_LOW WMARK_LOW #define ALLOC_WMARK_HIGH WMARK_HIGH #define ALLOC_NO_WATERMARKS 0x04 /* don't check watermarks at all */
/* Mask to get the watermark bits */ #define ALLOC_WMARK_MASK (ALLOC_NO_WATERMARKS-1)
/* * Only MMU archs have async oom victim reclaim - aka oom_reaper so we * cannot assume a reduced access to memory reserves is sufficient for * !MMU
*/ #ifdef CONFIG_MMU #define ALLOC_OOM 0x08 #else #define ALLOC_OOM ALLOC_NO_WATERMARKS #endif
#define ALLOC_NON_BLOCK 0x10 /* Caller cannot block. Allow access * to 25% of the min watermark or * 62.5% if __GFP_HIGH is set.
*/ #define ALLOC_MIN_RESERVE 0x20 /* __GFP_HIGH set. Allow access to 50% * of the min watermark.
*/ #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ #define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ #ifdef CONFIG_ZONE_DMA32 #define ALLOC_NOFRAGMENT 0x100 /* avoid mixing pageblock types */ #else #define ALLOC_NOFRAGMENT 0x0 #endif #define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */ #define ALLOC_TRYLOCK 0x400 /* Only use spin_trylock in allocation path */ #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
/* Flags that allow allocations below the min watermark. */ #define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
enum ttu_flags; struct tlbflush_unmap_batch;
/* * only for MM internal work items which do not depend on * any allocations or locks which might depend on allocations
*/ externstruct workqueue_struct *mm_percpu_wq;
/* * Parses a string with mem suffixes into its order. Useful to parse kernel * parameters.
*/ staticinlineint get_order_from_str(constchar *size_str, unsignedlong valid_orders)
{ unsignedlong size; char *endptr; int order;
size = memparse(size_str, &endptr);
if (!is_power_of_2(size)) return -EINVAL;
order = get_order(size); if (BIT(order) & ~valid_orders) return -EINVAL;
return order;
}
enum { /* mark page accessed */
FOLL_TOUCH = 1 << 16, /* a retry, previous pass started an IO */
FOLL_TRIED = 1 << 17, /* we are working on non-current tsk/mm */
FOLL_REMOTE = 1 << 18, /* pages must be released via unpin_user_page */
FOLL_PIN = 1 << 19, /* gup_fast: prevent fall-back to slow gup */
FOLL_FAST_ONLY = 1 << 20, /* allow unlocking the mmap lock */
FOLL_UNLOCKABLE = 1 << 21, /* VMA lookup+checks compatible with MADV_POPULATE_(READ|WRITE) */
FOLL_MADV_POPULATE = 1 << 22,
};
/* * Indicates for which pages that are write-protected in the page table, * whether GUP has to trigger unsharing via FAULT_FLAG_UNSHARE such that the * GUP pin will remain consistent with the pages mapped into the page tables * of the MM. * * Temporary unmapping of PageAnonExclusive() pages or clearing of * PageAnonExclusive() has to protect against concurrent GUP: * * Ordinary GUP: Using the PT lock * * GUP-fast and fork(): mm->write_protect_seq * * GUP-fast and KSM or temporary unmapping (swap, migration): see * folio_try_share_anon_rmap_*() * * Must be called with the (sub)page that's actually referenced via the * page table entry, which might not necessarily be the head page for a * PTE-mapped THP. * * If the vma is NULL, we're coming from the GUP-fast path and might have * to fallback to the slow path just to lookup the vma.
*/ staticinlinebool gup_must_unshare(struct vm_area_struct *vma, unsignedint flags, struct page *page)
{ /* * FOLL_WRITE is implicitly handled correctly as the page table entry * has to be writable -- and if it references (part of) an anonymous * folio, that part is required to be marked exclusive.
*/ if ((flags & (FOLL_WRITE | FOLL_PIN)) != FOLL_PIN) returnfalse; /* * Note: PageAnon(page) is stable until the page is actually getting * freed.
*/ if (!PageAnon(page)) { /* * We only care about R/O long-term pining: R/O short-term * pinning does not have the semantics to observe successive * changes through the process page tables.
*/ if (!(flags & FOLL_LONGTERM)) returnfalse;
/* We really need the vma ... */ if (!vma) returntrue;
/* * ... because we only care about writable private ("COW") * mappings where we have to break COW early.
*/ return is_cow_mapping(vma->vm_flags);
}
/* Paired with a memory barrier in folio_try_share_anon_rmap_*(). */ if (IS_ENABLED(CONFIG_HAVE_GUP_FAST))
smp_rmb();
/* * Note that KSM pages cannot be exclusive, and consequently, * cannot get pinned.
*/ return !PageAnonExclusive(page);
}
staticinlinebool vma_soft_dirty_enabled(struct vm_area_struct *vma)
{ /* * NOTE: we must check this before VM_SOFTDIRTY on soft-dirty * enablements, because when without soft-dirty being compiled in, * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY) * will be constantly true.
*/ if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) returnfalse;
/* * Soft-dirty is kind of special: its tracking is enabled when the * vma flags not set.
*/ return !(vma->vm_flags & VM_SOFTDIRTY);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.