/* * MAX_SWAPFILES defines the maximum number of swaptypes: things which can * be swapped to. The swap type and the offset into that swap type are * encoded into pte's and into pgoff_t's in the swapcache. Using five bits * for the type means that the maximum number of swapcache pages is 27 bits * on 32-bit-pgoff_t architectures. And that assumes that the architecture packs * the type/offset into the pte as 5/27 as well.
*/ #define MAX_SWAPFILES_SHIFT 5
/* * Use some of the swap files numbers for other purposes. This * is a convenient way to hook into the VM to trigger special * actions on faults.
*/
/* * PTE markers are used to persist information onto PTEs that otherwise * should be a none pte. As its name "PTE" hints, it should only be * applied to the leaves of pgtables.
*/ #define SWP_PTE_MARKER_NUM 1 #define SWP_PTE_MARKER (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
SWP_MIGRATION_NUM + SWP_DEVICE_NUM)
/* * Unaddressable device memory support. See include/linux/hmm.h and * Documentation/mm/hmm.rst. Short description is we need struct pages for * device memory that is unaddressable (inaccessible) by CPU, so that we can * migrate part of a process memory to device memory. * * When a page is migrated from CPU to device, we set the CPU page table entry * to a special SWP_DEVICE_{READ|WRITE} entry. * * When a page is mapped by the device for exclusive access we set the CPU page * table entries to a special SWP_DEVICE_EXCLUSIVE entry.
*/ #ifdef CONFIG_DEVICE_PRIVATE #define SWP_DEVICE_NUM 3 #define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM) #define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1) #define SWP_DEVICE_EXCLUSIVE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2) #else #define SWP_DEVICE_NUM 0 #endif
/* * Page migration support. * * SWP_MIGRATION_READ_EXCLUSIVE is only applicable to anonymous pages and * indicates that the referenced (part of) an anonymous page is exclusive to * a single process. For SWP_MIGRATION_WRITE, that information is implicit: * (part of) an anonymous page that are mapped writable are exclusive to a * single process.
*/ #ifdef CONFIG_MIGRATION #define SWP_MIGRATION_NUM 3 #define SWP_MIGRATION_READ (MAX_SWAPFILES + SWP_HWPOISON_NUM) #define SWP_MIGRATION_READ_EXCLUSIVE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1) #define SWP_MIGRATION_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 2) #else #define SWP_MIGRATION_NUM 0 #endif
/* * Magic header for a swap area. The first part of the union is * what the swap magic looks like for the old (limited to 128MB) * swap area format, the second part of the union adds - in the * old reserved area - some extra information. Note that the first * kilobyte is reserved for boot loader or disk label stuff... * * Having the magic at the end of the PAGE_SIZE makes detecting swap * areas somewhat tricky on machines that support multiple page sizes. * For 2.5 we'll probably want to move the magic to just beyond the * bootbits...
*/ union swap_header { struct { char reserved[PAGE_SIZE - 10]; char magic[10]; /* SWAP-SPACE or SWAPSPACE2 */
} magic; struct { char bootbits[1024]; /* Space for disklabel etc. */
__u32 version;
__u32 last_page;
__u32 nr_badpages; unsignedchar sws_uuid[16]; unsignedchar sws_volume[16];
__u32 padding[117];
__u32 badpages[1];
} info;
};
/* * current->reclaim_state points to one of these when a task is running * memory reclaim
*/ struct reclaim_state { /* pages reclaimed outside of LRU-based reclaim */ unsignedlong reclaimed; #ifdef CONFIG_LRU_GEN /* per-thread mm walk data */ struct lru_gen_mm_walk *mm_walk; #endif
};
/* * mm_account_reclaimed_pages(): account reclaimed pages outside of LRU-based * reclaim * @pages: number of pages reclaimed * * If the current process is undergoing a reclaim operation, increment the * number of reclaimed pages by @pages.
*/ staticinlinevoid mm_account_reclaimed_pages(unsignedlong pages)
{ if (current->reclaim_state)
current->reclaim_state->reclaimed += pages;
}
/* * A swap extent maps a range of a swapfile's PAGE_SIZE pages onto a range of * disk blocks. A rbtree of swap extents maps the entire swapfile (Where the * term `swapfile' refers to either a blockdevice or an IS_REG file). Apart * from setup, they're handled identically. * * We always assume that blocks are of size PAGE_SIZE.
*/ struct swap_extent { struct rb_node rb_node;
pgoff_t start_page;
pgoff_t nr_pages;
sector_t start_block;
};
/* * Max bad pages in the new format..
*/ #define MAX_SWAP_BADPAGES \
((offsetof(union swap_header, magic.magic) - \
offsetof(union swap_header, info.badpages)) / sizeof(int))
enum {
SWP_USED = (1 << 0), /* is slot in swap_info[] used? */
SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */
SWP_DISCARDABLE = (1 << 2), /* blkdev support discard */
SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */
SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */
SWP_CONTINUED = (1 << 5), /* swap_map has count continuation */
SWP_BLKDEV = (1 << 6), /* its a block device */
SWP_ACTIVATED = (1 << 7), /* set after swap_activate success */
SWP_FS_OPS = (1 << 8), /* swapfile operations go through fs */
SWP_AREA_DISCARD = (1 << 9), /* single-time swap area discards */
SWP_PAGE_DISCARD = (1 << 10), /* freed swap page-cluster discards */
SWP_STABLE_WRITES = (1 << 11), /* no overwrite PG_writeback pages */
SWP_SYNCHRONOUS_IO = (1 << 12), /* synchronous IO is efficient */ /* add others here before... */
};
/* Bit flag in swap_map */ #define SWAP_HAS_CACHE 0x40 /* Flag page is cached, in first swap_map */ #define COUNT_CONTINUED 0x80 /* Flag swap_map continuation for full count */
/* Special value in first swap_map */ #define SWAP_MAP_MAX 0x3e /* Max count */ #define SWAP_MAP_BAD 0x3f /* Note page is bad */ #define SWAP_MAP_SHMEM 0xbf /* Owned by shmem/tmpfs */
/* Special value in each swap_map continuation */ #define SWAP_CONT_MAX 0x7f /* Max count */
/* * We use this to track usage of a cluster. A cluster is a block of swap disk * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All * free clusters are organized into a list. We fetch an entry from the list to * get a free cluster. * * The flags field determines if a cluster is free. This is * protected by cluster lock.
*/ struct swap_cluster_info {
spinlock_t lock; /* * Protect swap_cluster_info fields * other than list, and swap_info_struct->swap_map * elements corresponding to the swap cluster.
*/
u16 count;
u8 flags;
u8 order; struct list_head list;
};
/* All on-list cluster must have a non-zero flag. */ enum swap_cluster_flags {
CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */
CLUSTER_FLAG_FREE,
CLUSTER_FLAG_NONFULL,
CLUSTER_FLAG_FRAG, /* Clusters with flags above are allocatable */
CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG,
CLUSTER_FLAG_FULL,
CLUSTER_FLAG_DISCARD,
CLUSTER_FLAG_MAX,
};
/* * The first page in the swap file is the swap header, which is always marked * bad to prevent it from being allocated as an entry. This also prevents the * cluster to which it belongs being marked free. Therefore 0 is safe to use as * a sentinel to indicate an entry is not valid.
*/ #define SWAP_ENTRY_INVALID 0
/* * We keep using same cluster for rotational device so IO will be sequential. * The purpose is to optimize SWAP throughput on these device.
*/ struct swap_sequential_cluster { unsignedint next[SWAP_NR_ORDERS]; /* Likely next allocation offset */
};
/* * The in-memory structure used to track swap areas.
*/ struct swap_info_struct { struct percpu_ref users; /* indicate and keep swap device valid. */ unsignedlong flags; /* SWP_USED etc: see above */ signedshort prio; /* swap priority of this type */ struct plist_node list; /* entry in swap_active_head */ signedchar type; /* strange name for an index */ unsignedint max; /* extent of the swap_map */ unsignedchar *swap_map; /* vmalloc'ed array of usage counts */ unsignedlong *zeromap; /* kvmalloc'ed bitmap to track zero pages */ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ struct list_head free_clusters; /* free clusters list */ struct list_head full_clusters; /* full clusters list */ struct list_head nonfull_clusters[SWAP_NR_ORDERS]; /* list of cluster that contains at least one free slot */ struct list_head frag_clusters[SWAP_NR_ORDERS]; /* list of cluster that are fragmented or contented */
atomic_long_t frag_cluster_nr[SWAP_NR_ORDERS]; unsignedint pages; /* total of usable pages of swap */
atomic_long_t inuse_pages; /* number of those currently in use */ struct swap_sequential_cluster *global_cluster; /* Use one global cluster for rotating device */
spinlock_t global_cluster_lock; /* Serialize usage of global cluster */ struct rb_root swap_extent_root;/* root of the swap extent rbtree */ struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ struct completion comp; /* seldom referenced */
spinlock_t lock; /* * protect map scan related fields like * swap_map, lowest_bit, highest_bit, * inuse_pages, cluster_next, * cluster_nr, lowest_alloc, * highest_alloc, free/discard cluster * list. other fields are only changed * at swapon/swapoff, so are protected * by swap_lock. changing flags need * hold this lock and swap_lock. If * both locks need hold, hold swap_lock * first.
*/
spinlock_t cont_lock; /* * protect swap count continuation page * list.
*/ struct work_struct discard_work; /* discard worker */ struct work_struct reclaim_work; /* reclaim worker */ struct list_head discard_clusters; /* discard clusters list */ struct plist_node avail_lists[]; /* * entries in swap_avail_heads, one * entry per node. * Must be last as the number of the * array is nr_node_ids, which is not * a fixed value so have to allocate * dynamically. * And it has to be an array so that * plist_for_each_* can work.
*/
};
staticinlinebool folio_may_be_lru_cached(struct folio *folio)
{ /* * Holding PMD-sized folios in per-CPU LRU cache unbalances accounting. * Holding small numbers of low-order mTHP folios in per-CPU LRU cache * will be sensible, but nobody has implemented and tested that yet.
*/ return !folio_test_large(folio);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.