/* SPDX-License-Identifier: GPL-2.0+ */ /* * vma_internal.h * * Header providing userland wrappers and shims for the functionality provided * by mm/vma_internal.h. * * We make the header guard the same as mm/vma_internal.h, so if this shim * header is included, it precludes the inclusion of the kernel one.
*/
#define for_each_vma(__vmi, __vma) \ while (((__vma) = vma_next(&(__vmi))) != NULL)
/* The MM code likes to work with exclusive end addresses */ #define for_each_vma_range(__vmi, __vma, __end) \ while (((__vma) = vma_find(&(__vmi), (__end))) != NULL)
/* * The shared stubs do not implement this, it amounts to an fprintf(STDERR,...) * either way :)
*/ #define pr_warn_once pr_err
#define data_race(expr) expr
#define ASSERT_EXCLUSIVE_WRITER(x)
/** * swap - swap values of @a and @b * @a: first value * @b: second value
*/ #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
struct kref {
refcount_t refcount;
};
/* * Define the task command name length as enum, then it can be visible to * BPF programs.
*/ enum {
TASK_COMM_LEN = 16,
};
/* * Flags for bug emulation. * * These occupy the top three bytes.
*/ enum {
READ_IMPLIES_EXEC = 0x0400000,
};
struct mm_struct { struct maple_tree mm_mt; int map_count; /* number of VMAs */ unsignedlong total_vm; /* Total pages mapped */ unsignedlong locked_vm; /* Pages that have PG_mlocked set */ unsignedlong data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ unsignedlong exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ unsignedlong stack_vm; /* VM_STACK */
unsignedlong def_flags;
unsignedlong flags; /* Must use atomic bitops to access */
};
struct vm_area_struct;
/* * Describes a VMA that is about to be mmap()'ed. Drivers may choose to * manipulate mutable fields which will cause those fields to be updated in the * resultant VMA. * * Helper functions are not required for manipulating any field.
*/ struct vm_area_desc { /* Immutable state. */ struct mm_struct *mm; unsignedlong start; unsignedlong end;
struct vm_area_struct { /* The first cache line has the info for VMA tree walking. */
union { struct { /* VMA covers [vm_start; vm_end) addresses within mm */ unsignedlong vm_start; unsignedlong vm_end;
};
freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */
};
struct mm_struct *vm_mm; /* The address space we belong to. */
pgprot_t vm_page_prot; /* Access permissions of this VMA. */
/* * Flags, see mm.h. * To modify use vm_flags_{init|reset|set|clear|mod} functions.
*/ union { const vm_flags_t vm_flags;
vm_flags_t __private __vm_flags;
};
#ifdef CONFIG_PER_VMA_LOCK /* * Can only be written (using WRITE_ONCE()) while holding both: * - mmap_lock (in write mode) * - vm_refcnt bit at VMA_LOCK_OFFSET is set * Can be read reliably while holding one of: * - mmap_lock (in read or write mode) * - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1 * Can be read unreliably (using READ_ONCE()) for pessimistic bailout * while holding nothing (except RCU to keep the VMA struct allocated). * * This sequence counter is explicitly allowed to overflow; sequence * counter reuse can only lead to occasional unnecessary use of the * slowpath.
*/ unsignedint vm_lock_seq; #endif
/* * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma * list, after a COW of one of the file pages. A MAP_SHARED vma * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack * or brk vma (with NULL file) can only be in an anon_vma list.
*/ struct list_head anon_vma_chain; /* Serialized by mmap_lock &
* page_table_lock */ struct anon_vma *anon_vma; /* Serialized by page_table_lock */
/* Function pointers to deal with this struct. */ conststruct vm_operations_struct *vm_ops;
/* Information about our backing store: */ unsignedlong vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
units */ struct file * vm_file; /* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */
#ifdef CONFIG_SWAP
atomic_long_t swap_readahead_info; #endif #ifndef CONFIG_MMU struct vm_region *vm_region; /* NOMMU mapping region */ #endif #ifdef CONFIG_NUMA struct mempolicy *vm_policy; /* NUMA policy for the VMA */ #endif #ifdef CONFIG_NUMA_BALANCING struct vma_numab_state *numab_state; /* NUMA Balancing state */ #endif #ifdef CONFIG_PER_VMA_LOCK /* Unstable RCU readers are allowed to read this. */
refcount_t vm_refcnt; #endif /* * For areas with an address space and backing store, * linkage into the address_space->i_mmap interval tree. *
*/ struct { struct rb_node rb; unsignedlong rb_subtree_last;
} shared; #ifdef CONFIG_ANON_VMA_NAME /* * For private and shared anonymous mappings, a pointer to a null * terminated string containing the name given to the vma, or NULL if * unnamed. Serialized by mmap_lock. Use anon_vma_name to access.
*/ struct anon_vma_name *anon_name; #endif struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
} __randomize_layout;
struct vm_fault {};
struct vm_operations_struct { void (*open)(struct vm_area_struct * area); /** * @close: Called when the VMA is being removed from the MM. * Context: User context. May sleep. Caller holds mmap_lock.
*/ void (*close)(struct vm_area_struct * area); /* Called any time before splitting to check if it's allowed */ int (*may_split)(struct vm_area_struct *area, unsignedlong addr); int (*mremap)(struct vm_area_struct *area); /* * Called by mprotect() to make driver-specific permission * checks before mprotect() is finalised. The VMA must not * be modified. Returns 0 if mprotect() can proceed.
*/ int (*mprotect)(struct vm_area_struct *vma, unsignedlong start, unsignedlong end, unsignedlong newflags);
vm_fault_t (*fault)(struct vm_fault *vmf);
vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsignedint order);
vm_fault_t (*map_pages)(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff); unsignedlong (*pagesize)(struct vm_area_struct * area);
/* notification that a previously read-only page is about to become
* writable, if an error is returned it will cause a SIGBUS */
vm_fault_t (*page_mkwrite)(struct vm_fault *vmf);
/* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */
vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf);
/* called by access_process_vm when get_user_pages() fails, typically * for use by special VMAs. See also generic_access_phys() for a generic * implementation useful for any iomem mapping.
*/ int (*access)(struct vm_area_struct *vma, unsignedlong addr, void *buf, int len, int write);
/* Called by the /proc/PID/maps code to ask the vma whether it * has a special name. Returning non-NULL will also cause this
* vma to be dumped unconditionally. */ constchar *(*name)(struct vm_area_struct *vma);
#ifdef CONFIG_NUMA /* * set_policy() op must add a reference to any non-NULL @new mempolicy * to hold the policy upon return. Caller should pass NULL @new to * remove a policy and fall back to surrounding context--i.e. do not * install a MPOL_DEFAULT policy, nor the task or system default * mempolicy.
*/ int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
/* * get_policy() op must add reference [mpol_get()] to any policy at * (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure * in mm/mempolicy.c will do this automatically. * get_policy() must NOT add a ref if the policy at (vma,addr) is not * marked as MPOL_SHARED. vma policies are protected by the mmap_lock. * If no [shared/vma] mempolicy exists at the addr, get_policy() op * must return NULL--i.e., do not "fallback" to task or system default * policy.
*/ struct mempolicy *(*get_policy)(struct vm_area_struct *vma, unsignedlong addr, pgoff_t *ilx); #endif /* * Called by vm_normal_page() for special PTEs to find the * page for @addr. This is useful if the default behavior * (using pte_page()) would not find the correct page.
*/ struct page *(*find_special_page)(struct vm_area_struct *vma, unsignedlong addr);
};
struct kmem_cache_args { /** * @align: The required alignment for the objects. * * %0 means no specific alignment is requested.
*/ unsignedint align; /** * @useroffset: Usercopy region offset. * * %0 is a valid offset, when @usersize is non-%0
*/ unsignedint useroffset; /** * @usersize: Usercopy region size. * * %0 means no usercopy region is specified.
*/ unsignedint usersize; /** * @freeptr_offset: Custom offset for the free pointer * in &SLAB_TYPESAFE_BY_RCU caches * * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer * outside of the object. This might cause the object to grow in size. * Cache creators that have a reason to avoid this can specify a custom * free pointer offset in their struct where the free pointer will be * placed. * * Note that placing the free pointer inside the object requires the * caller to ensure that no fields are invalidated that are required to * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for * details). * * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset * is specified, %use_freeptr_offset must be set %true. * * Note that @ctor currently isn't supported with custom free pointers * as a @ctor requires an external free pointer.
*/ unsignedint freeptr_offset; /** * @use_freeptr_offset: Whether a @freeptr_offset is used.
*/ bool use_freeptr_offset; /** * @ctor: A constructor for the objects. * * The constructor is invoked for each object in a newly allocated slab * page. It is the cache user's responsibility to free object in the * same state as after calling the constructor, or deal appropriately * with any differences between a freshly constructed and a reallocated * object. * * %NULL means no constructor.
*/ void (*ctor)(void *);
};
staticinlinestruct vm_area_struct *vma_next(struct vma_iterator *vmi)
{ /* * Uses mas_find() to get the first VMA when the iterator starts. * Calling mas_next() could skip the first entry.
*/ return mas_find(&vmi->mas, ULONG_MAX);
}
/* * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these * assertions should be made either under mmap_write_lock or when the object * has been isolated under mmap_write_lock, ensuring no competing writers.
*/ staticinlinevoid vma_assert_attached(struct vm_area_struct *vma)
{
WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
}
staticinlinevoid vma_mark_detached(struct vm_area_struct *vma)
{
vma_assert_write_locked(vma);
vma_assert_attached(vma); /* We are the only writer, so no need to use vma_refcount_put(). */ if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) { /* * Reader must have temporarily raised vm_refcnt but it will * drop it without using the vma since vma is write-locked.
*/
}
}
/* * These are defined in vma.h, but sadly vm_stat_account() is referenced by * kernel/fork.c, so we have to these broadly available there, and temporarily * define them here to resolve the dependency cycle.
*/
/* Currently stubbed but we may later wish to un-stub. */ staticinlinevoid vm_acct_memory(long pages); staticinlinevoid vm_unacct_memory(long pages)
{
vm_acct_memory(-pages);
}
staticinlineint anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
{ /* For testing purposes. We indicate that an anon_vma has been cloned. */ if (src->anon_vma != NULL) {
dst->anon_vma = src->anon_vma;
dst->anon_vma->was_cloned = true;
}
return 0;
}
staticinlinevoid vma_start_write(struct vm_area_struct *vma)
{ /* Used to indicate to tests that a write operation has begun. */
vma->vm_lock_seq++;
}
staticinlinevoid unlink_anon_vmas(struct vm_area_struct *vma)
{ /* For testing purposes, indicate that the anon_vma was unlinked. */
vma->anon_vma->was_unlinked = true;
}
/* * Denies creating a writable executable mapping or gaining executable permissions. * * This denies the following: * * a) mmap(PROT_WRITE | PROT_EXEC) * * b) mmap(PROT_WRITE) * mprotect(PROT_EXEC) * * c) mmap(PROT_WRITE) * mprotect(PROT_READ) * mprotect(PROT_EXEC) * * But allows the following: * * d) mmap(PROT_READ | PROT_EXEC) * mmap(PROT_READ | PROT_EXEC | PROT_BTI) * * This is only applicable if the user has set the Memory-Deny-Write-Execute * (MDWE) protection mask for the current process. * * @old specifies the VMA flags the VMA originally possessed, and @new the ones * we propose to set. * * Return: false if proposed change is OK, true if not ok and should be denied.
*/ staticinlinebool map_deny_write_exec(unsignedlong old, unsignedlongnew)
{ /* If MDWE is disabled, we have nothing to deny. */ if (!test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) returnfalse;
/* If the new VMA is not executable, we have nothing to deny. */ if (!(new & VM_EXEC)) returnfalse;
/* Under MDWE we do not accept newly writably executable VMAs... */ if (new & VM_WRITE) returntrue;
staticinlineint mapping_map_writable(struct address_space *mapping)
{ int c = atomic_read(&mapping->i_mmap_writable);
/* Derived from the raw_atomic_inc_unless_negative() implementation. */ do { if (c < 0) return -EPERM;
} while (!__sync_bool_compare_and_swap(&mapping->i_mmap_writable, c, c+1));
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.