/* * The fourth extended filesystem constants/structures
*/
/* * with AGGRESSIVE_CHECK allocator runs consistency checks over * structures. these checks slow things down a lot
*/ #define AGGRESSIVE_CHECK__
/* * with DOUBLE_CHECK defined mballoc creates persistent in-core * bitmaps, maintains and uses them to check for double allocations
*/ #define DOUBLE_CHECK__
/* * Define EXT4FS_DEBUG to produce debug messages
*/ #undef EXT4FS_DEBUG
/* * For each criteria, mballoc has slightly different way of finding * the required blocks nad usually, higher the criteria the slower the * allocation. We start at lower criterias and keep falling back to * higher ones if we are not able to find any blocks. Lower (earlier) * criteria are faster.
*/ enum criteria { /* * Used when number of blocks needed is a power of 2. This * doesn't trigger any disk IO except prefetch and is the * fastest criteria.
*/
CR_POWER2_ALIGNED,
/* * Tries to lookup in-memory data structures to find the most * suitable group that satisfies goal request. No disk IO * except block prefetch.
*/
CR_GOAL_LEN_FAST,
/* * Same as CR_GOAL_LEN_FAST but is allowed to reduce the goal * length to the best available length for faster allocation.
*/
CR_BEST_AVAIL_LEN,
/* * Reads each block group sequentially, performing disk IO if * necessary, to find suitable block group. Tries to * allocate goal length but might trim the request if nothing * is found after enough tries.
*/
CR_GOAL_LEN_SLOW,
/* * Finds the first free set of blocks and allocates * those. This is only used in rare cases when * CR_GOAL_LEN_SLOW also fails to allocate anything.
*/
CR_ANY_FREE,
/* * Number of criterias defined.
*/
EXT4_MB_NUM_CRS
};
/* * Flags used in mballoc's allocation_context flags field. * * Also used to show what's going on for debugging purposes when the * flag field is exported via the traceport interface
*/
/* prefer goal again. length */ #define EXT4_MB_HINT_MERGE 0x0001 /* first blocks in the file */ #define EXT4_MB_HINT_FIRST 0x0008 /* data is being allocated */ #define EXT4_MB_HINT_DATA 0x0020 /* don't preallocate (for tails) */ #define EXT4_MB_HINT_NOPREALLOC 0x0040 /* allocate for locality group */ #define EXT4_MB_HINT_GROUP_ALLOC 0x0080 /* allocate goal blocks or none */ #define EXT4_MB_HINT_GOAL_ONLY 0x0100 /* goal is meaningful */ #define EXT4_MB_HINT_TRY_GOAL 0x0200 /* blocks already pre-reserved by delayed allocation */ #define EXT4_MB_DELALLOC_RESERVED 0x0400 /* We are doing stream allocation */ #define EXT4_MB_STREAM_ALLOC 0x0800 /* Use reserved root blocks if needed */ #define EXT4_MB_USE_ROOT_BLOCKS 0x1000 /* Use blocks from reserved pool */ #define EXT4_MB_USE_RESERVED 0x2000 /* Do strict check for free blocks while retrying block allocation */ #define EXT4_MB_STRICT_CHECK 0x4000
struct ext4_allocation_request { /* target inode for block we're allocating */ struct inode *inode; /* how many blocks we want to allocate */ unsignedint len; /* logical block in target inode */
ext4_lblk_t logical; /* the closest logical allocated block to the left */
ext4_lblk_t lleft; /* the closest logical allocated block to the right */
ext4_lblk_t lright; /* phys. target (a hint) */
ext4_fsblk_t goal; /* phys. block for the closest logical allocated block to the left */
ext4_fsblk_t pleft; /* phys. block for the closest logical allocated block to the right */
ext4_fsblk_t pright; /* flags. see above EXT4_MB_HINT_* */ unsignedint flags;
};
/* * Logical to physical block mapping, used by ext4_map_blocks() * * This structure is used to pass requests into ext4_map_blocks() as * well as to store the information returned by ext4_map_blocks(). It * takes less room on the stack than a struct buffer_head.
*/ #define EXT4_MAP_NEW BIT(BH_New) #define EXT4_MAP_MAPPED BIT(BH_Mapped) #define EXT4_MAP_UNWRITTEN BIT(BH_Unwritten) #define EXT4_MAP_BOUNDARY BIT(BH_Boundary) #define EXT4_MAP_DELAYED BIT(BH_Delay) /* * This is for use in ext4_map_query_blocks() for a special case where we can * have a physically and logically contiguous blocks split across two leaf * nodes instead of a single extent. This is required in case of atomic writes * to know whether the returned extent is last in leaf. If yes, then lookup for * next in leaf block in ext4_map_query_blocks_next_in_leaf(). * - This is never going to be added to any buffer head state. * - We use the next available bit after BH_BITMAP_UPTODATE.
*/ #define EXT4_MAP_QUERY_LAST_IN_LEAF BIT(BH_BITMAP_UPTODATE + 1) #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
EXT4_MAP_DELAYED | EXT4_MAP_QUERY_LAST_IN_LEAF)
struct ext4_io_end_vec { struct list_head list; /* list of io_end_vec */
loff_t offset; /* offset in the file */
ssize_t size; /* size of the extent */
};
/* * For converting unwritten extents on a work queue. 'handle' is used for * buffered writeback.
*/ typedefstruct ext4_io_end { struct list_head list; /* per-file finished IO list */
handle_t *handle; /* handle reserved for extent
* conversion */ struct inode *inode; /* file being written to */ struct bio *bio; /* Linked list of completed
* bios covering the extent */ unsignedint flag; /* unwritten or not */
refcount_t count; /* reference counter */ struct list_head list_vec; /* list of ext4_io_end_vec */
} ext4_io_end_t;
/* Translate a block number to a cluster number */ #define EXT4_B2C(sbi, blk) ((blk) >> (sbi)->s_cluster_bits) /* Translate a cluster number to a block number */ #define EXT4_C2B(sbi, cluster) ((cluster) << (sbi)->s_cluster_bits) /* Translate # of blks to # of clusters */ #define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \
(sbi)->s_cluster_bits) /* Mask out the low bits to get the starting block of the cluster */ #define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \
~((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) #define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \
~((ext4_lblk_t) (s)->s_cluster_ratio - 1)) /* Fill in the low bits to get the last block of the cluster */ #define EXT4_LBLK_CFILL(sbi, lblk) ((lblk) | \
((ext4_lblk_t) (sbi)->s_cluster_ratio - 1)) /* Get the cluster offset */ #define EXT4_PBLK_COFF(s, pblk) ((pblk) & \
((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) #define EXT4_LBLK_COFF(s, lblk) ((lblk) & \
((ext4_lblk_t) (s)->s_cluster_ratio - 1))
#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ #define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ #define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
/* Flags that should be inherited by new inodes from their parent. */ #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL |\
EXT4_DAX_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */ #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\
EXT4_PROJINHERIT_FL))
/* Flags that are appropriate for non-directories/regular files. */ #define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
/* The only flags that should be swapped */ #define EXT4_FL_SHOULD_SWAP (EXT4_HUGE_FILE_FL | EXT4_EXTENTS_FL)
/* Flags which are mutually exclusive to DAX */ #define EXT4_DAX_MUT_EXCL (EXT4_VERITY_FL | EXT4_ENCRYPT_FL |\
EXT4_JOURNAL_DATA_FL | EXT4_INLINE_DATA_FL)
/* Mask out flags that are inappropriate for the given type of inode. */ staticinline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
{ if (S_ISDIR(mode)) return flags; elseif (S_ISREG(mode)) return flags & EXT4_REG_FLMASK; else return flags & EXT4_OTHER_FLMASK;
}
/* * Inode flags used for atomic set/get
*/ enum {
EXT4_INODE_SECRM = 0, /* Secure deletion */
EXT4_INODE_UNRM = 1, /* Undelete */
EXT4_INODE_COMPR = 2, /* Compress file */
EXT4_INODE_SYNC = 3, /* Synchronous updates */
EXT4_INODE_IMMUTABLE = 4, /* Immutable file */
EXT4_INODE_APPEND = 5, /* writes to file may only append */
EXT4_INODE_NODUMP = 6, /* do not dump file */
EXT4_INODE_NOATIME = 7, /* do not update atime */ /* Reserved for compression usage... */
EXT4_INODE_DIRTY = 8,
EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */
EXT4_INODE_NOCOMPR = 10, /* Don't compress */
EXT4_INODE_ENCRYPT = 11, /* Encrypted file */ /* End compression flags --- maybe not all used */
EXT4_INODE_INDEX = 12, /* hash-indexed directory */
EXT4_INODE_IMAGIC = 13, /* AFS directory */
EXT4_INODE_JOURNAL_DATA = 14, /* file data should be journaled */
EXT4_INODE_NOTAIL = 15, /* file tail should not be merged */
EXT4_INODE_DIRSYNC = 16, /* dirsync behaviour (directories only) */
EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/
EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */
EXT4_INODE_EXTENTS = 19, /* Inode uses extents */
EXT4_INODE_VERITY = 20, /* Verity protected inode */
EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */ /* 22 was formerly EXT4_INODE_EOFBLOCKS */
EXT4_INODE_DAX = 25, /* Inode is DAX */
EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */
EXT4_INODE_PROJINHERIT = 29, /* Create with parents projid */
EXT4_INODE_CASEFOLD = 30, /* Casefolded directory */
EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */
};
/* * Since it's pretty easy to mix up bit numbers and hex values, we use a * build-time check to make sure that EXT4_XXX_FL is consistent with respect to * EXT4_INODE_XXX. If all is well, the macros will be dropped, so, it won't cost * any extra space in the compiled kernel image, otherwise, the build will fail. * It's important that these values are the same, since we are using * EXT4_INODE_XXX to test for flag values, but EXT4_XXX_FL must be consistent * with the values of FS_XXX_FL defined in include/linux/fs.h and the on-disk * values found in ext2, ext3 and ext4 filesystems, and of course the values * defined in e2fsprogs. * * It's not paranoia if the Murphy's Law really *is* out to get you. :-)
*/ #define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1U << EXT4_INODE_##FLAG)) #define CHECK_FLAG_VALUE(FLAG) BUILD_BUG_ON(!TEST_FLAG_VALUE(FLAG))
/* The struct ext4_new_group_input in kernel space, with free_blocks_count */ struct ext4_new_group_data {
__u32 group;
__u64 block_bitmap;
__u64 inode_bitmap;
__u64 inode_table;
__u32 blocks_count;
__u16 reserved_blocks;
__u16 mdata_blocks;
__u32 free_clusters_count;
};
/* Indexes used to index group tables in ext4_new_group_data */ enum {
BLOCK_BITMAP = 0, /* block bitmap */
INODE_BITMAP, /* inode bitmap */
INODE_TABLE, /* inode tables */
GROUP_TABLE_COUNT,
};
/* * Flags used by ext4_map_blocks()
*/ /* Allocate any needed blocks and/or convert an unwritten
extent to be an initialized ext4 */ #define EXT4_GET_BLOCKS_CREATE 0x0001 /* Request the creation of an unwritten extent */ #define EXT4_GET_BLOCKS_UNWRIT_EXT 0x0002 #define EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT (EXT4_GET_BLOCKS_UNWRIT_EXT|\
EXT4_GET_BLOCKS_CREATE) /* Caller is from the delayed allocation writeout path
* finally doing the actual allocation of delayed blocks */ #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 /* caller is from the direct IO path, request to creation of an unwritten extents if not allocated, split the unwritten
extent if blocks has been preallocated already*/ #define EXT4_GET_BLOCKS_PRE_IO 0x0008 #define EXT4_GET_BLOCKS_CONVERT 0x0010 #define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\
EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT) /* Eventual metadata allocation (due to growing extent tree)
* should not fail, so try to use reserved blocks for that.*/ #define EXT4_GET_BLOCKS_METADATA_NOFAIL 0x0020 /* Don't normalize allocation size (used for fallocate) */ #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 /* Convert written extents to unwritten */ #define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0100 /* Write zeros to newly created written extents */ #define EXT4_GET_BLOCKS_ZERO 0x0200 #define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\
EXT4_GET_BLOCKS_ZERO) /* Caller is in the context of data submission, such as writeback, * fsync, etc. Especially, in the generic writeback path, caller will * submit data before dropping transaction handle. This allows jbd2
* to avoid submitting data before commit. */ #define EXT4_GET_BLOCKS_IO_SUBMIT 0x0400 /* Convert extent to initialized after IO complete */ #define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT |\
EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT |\
EXT4_GET_BLOCKS_IO_SUBMIT) /* Caller is in the atomic contex, find extent if it has been cached */ #define EXT4_GET_BLOCKS_CACHED_NOWAIT 0x0800 /* * Atomic write caller needs this to query in the slow path of mixed mapping * case, when a contiguous extent can be split across two adjacent leaf nodes. * Look EXT4_MAP_QUERY_LAST_IN_LEAF.
*/ #define EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF 0x1000
/* * The bit position of these flags must not overlap with any of the * EXT4_GET_BLOCKS_*. They are used by ext4_find_extent(), * read_extent_tree_block(), ext4_split_extent_at(), * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf(). * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be * caching the extents when reading from the extent tree while a * truncate or punch hole operation is in progress.
*/ #define EXT4_EX_NOCACHE 0x40000000 #define EXT4_EX_FORCE_CACHE 0x20000000 #define EXT4_EX_NOFAIL 0x10000000 /* * ext4_map_query_blocks() uses this filter mask to filter the flags needed to * pass while lookup/querying of on disk extent tree.
*/ #define EXT4_EX_QUERY_FILTER (EXT4_EX_NOCACHE | EXT4_EX_FORCE_CACHE |\
EXT4_EX_NOFAIL |\
EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF)
/* * Extended fields will fit into an inode if the filesystem was formatted * with large inodes (-I 256 or larger) and there are not currently any EAs * consuming all of the available space. For new inodes we always reserve * enough space for the kernel's known extended fields, but for inodes * created with an old kernel this might not have been the case. None of * the extended inode fields is critical for correct filesystem operation. * This macro checks if a certain field fits in the inode. Note that * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
*/ #define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
((offsetof(typeof(*ext4_inode), field) + \ sizeof((ext4_inode)->field)) \
<= (EXT4_GOOD_OLD_INODE_SIZE + \
(einode)->i_extra_isize)) \
/* * We use an encoding that preserves the times for extra epoch "00": * * extra msb of adjust for signed * epoch 32-bit 32-bit tv_sec to * bits time decoded 64-bit tv_sec 64-bit tv_sec valid time range * 0 0 1 -0x80000000..-0x00000001 0x000000000 1901-12-13..1969-12-31 * 0 0 0 0x000000000..0x07fffffff 0x000000000 1970-01-01..2038-01-19 * 0 1 1 0x080000000..0x0ffffffff 0x100000000 2038-01-19..2106-02-07 * 0 1 0 0x100000000..0x17fffffff 0x100000000 2106-02-07..2174-02-25 * 1 0 1 0x180000000..0x1ffffffff 0x200000000 2174-02-25..2242-03-16 * 1 0 0 0x200000000..0x27fffffff 0x200000000 2242-03-16..2310-04-04 * 1 1 1 0x280000000..0x2ffffffff 0x300000000 2310-04-04..2378-04-22 * 1 1 0 0x300000000..0x37fffffff 0x300000000 2378-04-22..2446-05-10 * * Note that previous versions of the kernel on 64-bit systems would * incorrectly use extra epoch bits 1,1 for dates between 1901 and * 1970. e2fsck will correct this, assuming that it is run on the * affected filesystem before 2242.
*/
/* * Lock subclasses for i_data_sem in the ext4_inode_info structure. * * These are needed to avoid lockdep false positives when we need to * allocate blocks to the quota inode during ext4_map_blocks(), while * holding i_data_sem for a normal (non-quota) inode. Since we don't * do quota tracking for the quota inode, this avoids deadlock (as * well as infinite recursion, since it isn't turtles all the way * down...) * * I_DATA_SEM_NORMAL - Used for most inodes * I_DATA_SEM_OTHER - Used by move_inode.c for the second normal inode * where the second inode has larger inode number * than the first * I_DATA_SEM_QUOTA - Used for quota inodes only * I_DATA_SEM_EA - Used for ea_inodes only
*/ enum {
I_DATA_SEM_NORMAL = 0,
I_DATA_SEM_OTHER,
I_DATA_SEM_QUOTA,
I_DATA_SEM_EA
};
/* * fourth extended file system inode data in memory
*/ struct ext4_inode_info {
__le32 i_data[15]; /* unconverted */
__u32 i_dtime;
ext4_fsblk_t i_file_acl;
/* * i_block_group is the number of the block group which contains * this file's inode. Constant across the lifetime of the inode, * it is used for making block allocation decisions - we try to * place a file's data blocks near its inode block, and new inodes * near to their parent directory's inode.
*/
ext4_group_t i_block_group;
ext4_lblk_t i_dir_start_lookup; #if (BITS_PER_LONG < 64) unsignedlong i_state_flags; /* Dynamic state flags */ #endif unsignedlong i_flags;
/* * Extended attributes can be read independently of the main file * data. Taking i_rwsem even when reading would cause contention * between readers of EAs and writers of regular file data, so * instead we synchronize on xattr_sem when reading or changing * EAs.
*/ struct rw_semaphore xattr_sem;
/* * Inodes with EXT4_STATE_ORPHAN_FILE use i_orphan_idx. Otherwise * i_orphan is used.
*/ union { struct list_head i_orphan; /* unlinked but open inodes */ unsignedint i_orphan_idx; /* Index in orphan file */
};
/* Fast commit related info */
/* For tracking dentry create updates */ struct list_head i_fc_dilist; struct list_head i_fc_list; /* * inodes that need fast commit * protected by sbi->s_fc_lock.
*/
/* Start of lblk range that needs to be committed in this fast commit */
ext4_lblk_t i_fc_lblk_start;
/* End of lblk range that needs to be committed in this fast commit */
ext4_lblk_t i_fc_lblk_len;
spinlock_t i_raw_lock; /* protects updates to the raw inode */
/* Fast commit wait queue for this inode */
wait_queue_head_t i_fc_wait;
/* * Protect concurrent accesses on i_fc_lblk_start, i_fc_lblk_len * and inode's EXT4_FC_STATE_COMMITTING state bit.
*/
spinlock_t i_fc_lock;
/* * i_disksize keeps track of what the inode size is ON DISK, not * in memory. During truncate, i_size is set to the new size by * the VFS prior to calling ext4_truncate(), but the filesystem won't * set i_disksize to 0 until the truncate is actually under way. * * The intent is that i_disksize always represents the blocks which * are used by this file. This allows recovery to restart truncate * on orphans if we crash during truncate. We actually write i_disksize * into the on-disk inode when writing inodes out, instead of i_size. * * The only time when i_disksize and i_size may be different is when * a truncate is in progress. The only things which change i_disksize * are ext4_get_block (growth) and ext4_truncate (shrinkth).
*/
loff_t i_disksize;
/* * i_data_sem is for serialising ext4_truncate() against * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's * data tree are chopped off during truncate. We can't do that in * ext4 because whenever we perform intermediate commits during * truncate, the inode and all the metadata blocks *must* be in a * consistent state which allows truncation of the orphans to restart * during recovery. Hence we must fix the get_block-vs-truncate race * by other means, so we have i_data_sem.
*/ struct rw_semaphore i_data_sem; struct inode vfs_inode; struct jbd2_inode *jinode;
/* * File creation time. Its function is same as that of * struct timespec64 i_{a,c,m}time in the generic inode.
*/ struct timespec64 i_crtime;
/* mballoc */
atomic_t i_prealloc_active;
/* allocation reservation info for delalloc */ /* In case of bigalloc, this refer to clusters rather than blocks */ unsignedint i_reserved_data_blocks; struct rb_root i_prealloc_node;
rwlock_t i_prealloc_lock;
/* extents status tree */ struct ext4_es_tree i_es_tree;
rwlock_t i_es_lock; struct list_head i_es_list; unsignedint i_es_all_nr; /* protected by i_es_lock */ unsignedint i_es_shk_nr; /* protected by i_es_lock */
ext4_lblk_t i_es_shrink_lblk; /* Offset where we start searching for extents to shrink. Protected by
i_es_lock */
/* ialloc */
ext4_group_t i_last_alloc_group;
/* pending cluster reservations for bigalloc file systems */ struct ext4_pending_tree i_pending_tree;
/* * File system states
*/ #define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ #define EXT4_ERROR_FS 0x0002 /* Errors detected */ #define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */ #define EXT4_FC_REPLAY 0x0020 /* Fast commit replay ongoing */
/* * Misc. filesystem flags
*/ #define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */ #define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */ #define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */
/* * Mount flags set via mount options or defaults
*/ #define EXT4_MOUNT_NO_MBCACHE 0x00001 /* Do not use mbcache */ #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ #define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ #define EXT4_MOUNT_ERRORS_MASK 0x00070 #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ #ifdef CONFIG_FS_DAX #define EXT4_MOUNT_DAX_ALWAYS 0x00200 /* Direct Access */ #else #define EXT4_MOUNT_DAX_ALWAYS 0 #endif #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ #define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ #define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ #define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */ #define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */ #define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */ #define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */ #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ #define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */ #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ #define EXT4_MOUNT_QUOTA 0x40000 /* Some quota option set */ #define EXT4_MOUNT_USRQUOTA 0x80000 /* "old" user quota, * enable enforcement for hidden
* quota files */ #define EXT4_MOUNT_GRPQUOTA 0x100000 /* "old" group quota, enable * enforcement for hidden quota
* files */ #define EXT4_MOUNT_PRJQUOTA 0x200000 /* Enable project quota
* enforcement */ #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ #define EXT4_MOUNT_WARN_ON_ERROR 0x2000000 /* Trigger WARN_ON on error */ #define EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS 0x4000000 #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ #define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
/* * Mount flags set either automatically (could not be set by mount option) * based on per file system feature or property or in special cases such as * distinguishing between explicit mount option definition and default.
*/ #define EXT4_MOUNT2_EXPLICIT_DELALLOC 0x00000001 /* User explicitly
specified delalloc */ #define EXT4_MOUNT2_STD_GROUP_SIZE 0x00000002 /* We have standard group size of blocksize * 8
blocks */ #define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated
file systems */ #define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly
specified journal checksum */
#define EXT4_MOUNT2_JOURNAL_FAST_COMMIT 0x00000010 /* Journal fast commit */ #define EXT4_MOUNT2_DAX_NEVER 0x00000020 /* Do not allow Direct Access */ #define EXT4_MOUNT2_DAX_INODE 0x00000040 /* For printing options only */ #define EXT4_MOUNT2_MB_OPTIMIZE_SCAN 0x00000080 /* Optimize group * scanning in mballoc
*/ #define EXT4_MOUNT2_ABORT 0x00000100 /* Abort filesystem */
/* * Structure of the super block
*/ struct ext4_super_block { /*00*/ __le32 s_inodes_count; /* Inodes count */
__le32 s_blocks_count_lo; /* Blocks count */
__le32 s_r_blocks_count_lo; /* Reserved blocks count */
__le32 s_free_blocks_count_lo; /* Free blocks count */ /*10*/ __le32 s_free_inodes_count; /* Free inodes count */
__le32 s_first_data_block; /* First Data Block */
__le32 s_log_block_size; /* Block size */
__le32 s_log_cluster_size; /* Allocation cluster size */ /*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
__le32 s_clusters_per_group; /* # Clusters per group */
__le32 s_inodes_per_group; /* # Inodes per group */
__le32 s_mtime; /* Mount time */ /*30*/ __le32 s_wtime; /* Write time */
__le16 s_mnt_count; /* Mount count */
__le16 s_max_mnt_count; /* Maximal mount count */
__le16 s_magic; /* Magic signature */
__le16 s_state; /* File system state */
__le16 s_errors; /* Behaviour when detecting errors */
__le16 s_minor_rev_level; /* minor revision level */ /*40*/ __le32 s_lastcheck; /* time of last check */
__le32 s_checkinterval; /* max. time between checks */
__le32 s_creator_os; /* OS */
__le32 s_rev_level; /* Revision level */ /*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */
__le16 s_def_resgid; /* Default gid for reserved blocks */ /* * These fields are for EXT4_DYNAMIC_REV superblocks only. * * Note: the difference between the compatible feature set and * the incompatible feature set is that if there is a bit set * in the incompatible feature set that the kernel doesn't * know about, it should refuse to mount the filesystem. * * e2fsck's requirements are more strict; if it doesn't know * about a feature in either the compatible or incompatible * feature set, it must abort and not try to meddle with * things it doesn't understand...
*/
__le32 s_first_ino; /* First non-reserved inode */
__le16 s_inode_size; /* size of inode structure */
__le16 s_block_group_nr; /* block group # of this superblock */
__le32 s_feature_compat; /* compatible feature set */ /*60*/ __le32 s_feature_incompat; /* incompatible feature set */
__le32 s_feature_ro_compat; /* readonly-compatible feature set */ /*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ /*78*/ char s_volume_name[EXT4_LABEL_MAX] __nonstring; /* volume name */ /*88*/ char s_last_mounted[64] __nonstring; /* directory where last mounted */ /*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */ /* * Performance hints. Directory preallocation should only * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
*/
__u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
__u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
__le16 s_reserved_gdt_blocks; /* Per group desc for online growth */ /* * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
*/ /*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */ /*E0*/ __le32 s_journal_inum; /* inode number of journal file */
__le32 s_journal_dev; /* device number of journal file */
__le32 s_last_orphan; /* start of list of inodes to delete */
__le32 s_hash_seed[4]; /* HTREE hash seed */
__u8 s_def_hash_version; /* Default hash version to use */
__u8 s_jnl_backup_type;
__le16 s_desc_size; /* size of group descriptor */ /*100*/ __le32 s_default_mount_opts;
__le32 s_first_meta_bg; /* First metablock block group */
__le32 s_mkfs_time; /* When the filesystem was created */
__le32 s_jnl_blocks[17]; /* Backup of the journal inode */ /* 64bit support valid if EXT4_FEATURE_INCOMPAT_64BIT */ /*150*/ __le32 s_blocks_count_hi; /* Blocks count */
__le32 s_r_blocks_count_hi; /* Reserved blocks count */
__le32 s_free_blocks_count_hi; /* Free blocks count */
__le16 s_min_extra_isize; /* All inodes have at least # bytes */
__le16 s_want_extra_isize; /* New inodes should reserve # bytes */
__le32 s_flags; /* Miscellaneous flags */
__le16 s_raid_stride; /* RAID stride */
__le16 s_mmp_update_interval; /* # seconds to wait in MMP checking */
__le64 s_mmp_block; /* Block for multi-mount protection */
__le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
__u8 s_log_groups_per_flex; /* FLEX_BG group size */
__u8 s_checksum_type; /* metadata checksum algorithm used */
__u8 s_encryption_level; /* versioning level for encryption */
__u8 s_reserved_pad; /* Padding to next 32bits */
__le64 s_kbytes_written; /* nr of lifetime kilobytes written */
__le32 s_snapshot_inum; /* Inode number of active snapshot */
__le32 s_snapshot_id; /* sequential ID of active snapshot */
__le64 s_snapshot_r_blocks_count; /* reserved blocks for active
snapshot's future use */
__le32 s_snapshot_list; /* inode number of the head of the
on-disk snapshot list */ #define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count)
__le32 s_error_count; /* number of fs errors */
__le32 s_first_error_time; /* first time an error happened */
__le32 s_first_error_ino; /* inode involved in first error */
__le64 s_first_error_block; /* block involved of first error */
__u8 s_first_error_func[32] __nonstring; /* function where the error happened */
__le32 s_first_error_line; /* line number where error happened */
__le32 s_last_error_time; /* most recent time of an error */
__le32 s_last_error_ino; /* inode involved in last error */
__le32 s_last_error_line; /* line number where error happened */
__le64 s_last_error_block; /* block involved of last error */
__u8 s_last_error_func[32] __nonstring; /* function where the error happened */ #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
__u8 s_mount_opts[64];
__le32 s_usr_quota_inum; /* inode for tracking user quota */
__le32 s_grp_quota_inum; /* inode for tracking group quota */
__le32 s_overhead_clusters; /* overhead blocks/clusters in fs */
__le32 s_backup_bgs[2]; /* groups with sparse_super2 SBs */
__u8 s_encrypt_algos[4]; /* Encryption algorithms in use */
__u8 s_encrypt_pw_salt[16]; /* Salt used for string2key algorithm */
__le32 s_lpf_ino; /* Location of the lost+found inode */
__le32 s_prj_quota_inum; /* inode for tracking project quota */
__le32 s_checksum_seed; /* crc32c(uuid) if csum_seed set */
__u8 s_wtime_hi;
__u8 s_mtime_hi;
__u8 s_mkfs_time_hi;
__u8 s_lastcheck_hi;
__u8 s_first_error_time_hi;
__u8 s_last_error_time_hi;
__u8 s_first_error_errcode;
__u8 s_last_error_errcode;
__le16 s_encoding; /* Filename charset encoding */
__le16 s_encoding_flags; /* Filename charset encoding flags */
__le32 s_orphan_file_inum; /* Inode for tracking orphan inodes */
__le32 s_reserved[94]; /* Padding to the end of the block */
__le32 s_checksum; /* crc32c(superblock) */
};
/* Number of quota types we support */ #define EXT4_MAXQUOTAS 3
#define EXT4_ENC_UTF8_12_1 1
/* Types of ext4 journal triggers */ enum ext4_journal_trigger_type {
EXT4_JTR_ORPHAN_FILE,
EXT4_JTR_NONE /* This must be the last entry for indexing to work! */
};
struct ext4_orphan_block {
atomic_t ob_free_entries; /* Number of free orphan entries in block */ struct buffer_head *ob_bh; /* Buffer for orphan block */
};
/* * Info about orphan file.
*/ struct ext4_orphan_info { int of_blocks; /* Number of orphan blocks in a file */
__u32 of_csum_seed; /* Checksum seed for orphan file */ struct ext4_orphan_block *of_binfo; /* Array with info about orphan
* file blocks */
};
/* * fourth extended-fs super-block data in memory
*/ struct ext4_sb_info { unsignedlong s_desc_size; /* Size of a group descriptor in bytes */ unsignedlong s_inodes_per_block;/* Number of inodes per block */ unsignedlong s_blocks_per_group;/* Number of blocks in a group */ unsignedlong s_clusters_per_group; /* Number of clusters in a group */ unsignedlong s_inodes_per_group;/* Number of inodes in a group */ unsignedlong s_itb_per_group; /* Number of inode table blocks per group */ unsignedlong s_gdb_count; /* Number of group descriptor blocks */ unsignedlong s_desc_per_block; /* Number of group descriptors per block */
ext4_group_t s_groups_count; /* Number of groups in the fs */
ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */ unsignedlong s_overhead; /* # of fs overhead clusters */ unsignedint s_cluster_ratio; /* Number of blocks per cluster */ unsignedint s_cluster_bits; /* log2 of s_cluster_ratio */
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ struct buffer_head * s_sbh; /* Buffer containing the super block */ struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ /* Array of bh's for the block group descriptors */ struct buffer_head * __rcu *s_group_desc; unsignedint s_mount_opt; unsignedint s_mount_opt2; unsignedlong s_mount_flags; unsignedint s_def_mount_opt; unsignedint s_def_mount_opt2;
ext4_fsblk_t s_sb_block;
atomic64_t s_resv_clusters;
kuid_t s_resuid;
kgid_t s_resgid; unsignedshort s_mount_state; unsignedshort s_pad; int s_addr_per_block_bits; int s_desc_per_block_bits; int s_inode_size; int s_first_ino; unsignedint s_inode_readahead_blks; unsignedint s_inode_goal;
u32 s_hash_seed[4]; int s_def_hash_version; int s_hash_unsigned; /* 3 if hash should be unsigned, 0 if not */ struct percpu_counter s_freeclusters_counter; struct percpu_counter s_freeinodes_counter; struct percpu_counter s_dirs_counter; struct percpu_counter s_dirtyclusters_counter; struct percpu_counter s_sra_exceeded_retry_limit; struct blockgroup_lock *s_blockgroup_lock; struct proc_dir_entry *s_proc; struct kobject s_kobj; struct completion s_kobj_unregister; struct super_block *s_sb; struct buffer_head *s_mmp_bh;
/* Journaling */ struct journal_s *s_journal; unsignedlong s_ext4_flags; /* Ext4 superblock flags */ struct mutex s_orphan_lock; /* Protects on disk list changes */ struct list_head s_orphan; /* List of orphaned inodes in on disk
list */ struct ext4_orphan_info s_orphan_info; unsignedlong s_commit_interval;
u32 s_max_batch_time;
u32 s_min_batch_time; struct file *s_journal_bdev_file; #ifdef CONFIG_QUOTA /* Names of quota files with journalled quota */ char __rcu *s_qf_names[EXT4_MAXQUOTAS]; int s_jquota_fmt; /* Format of quota to use */ #endif unsignedint s_want_extra_isize; /* New inodes should reserve # bytes */ struct ext4_system_blocks __rcu *s_system_blks;
/* Encryption policy for '-o test_dummy_encryption' */ struct fscrypt_dummy_policy s_dummy_enc_policy;
/* * Barrier between writepages ops and changing any inode's JOURNAL_DATA * or EXTENTS flag or between writepages ops and changing DELALLOC or * DIOREAD_NOLOCK mount options on remount.
*/ struct percpu_rw_semaphore s_writepages_rwsem; struct dax_device *s_daxdev;
u64 s_dax_part_off; #ifdef CONFIG_EXT4_DEBUG unsignedlong s_simulate_fail; #endif /* Record the errseq of the backing block device */
errseq_t s_bdev_wb_err;
spinlock_t s_bdev_wb_lock;
/* Information about errors that happened during this mount */
spinlock_t s_error_lock; int s_add_error_count; int s_first_error_code;
__u32 s_first_error_line;
__u32 s_first_error_ino;
__u64 s_first_error_block; constchar *s_first_error_func;
time64_t s_first_error_time; int s_last_error_code;
__u32 s_last_error_line;
__u32 s_last_error_ino;
__u64 s_last_error_block; constchar *s_last_error_func;
time64_t s_last_error_time; /* * If we are in a context where we cannot update the on-disk * superblock, we queue the work here. This is used to update * the error information in the superblock, and for periodic * updates of the superblock called from the commit callback * function.
*/ struct work_struct s_sb_upd_work;
/* Atomic write unit values in bytes */ unsignedint s_awu_min; unsignedint s_awu_max;
/* Ext4 fast commit sub transaction ID */
atomic_t s_fc_subtid;
/* * After commit starts, the main queue gets locked, and the further * updates get added in the staging queue.
*/ #define FC_Q_MAIN 0 #define FC_Q_STAGING 1 struct list_head s_fc_q[2]; /* Inodes staged for fast commit * that have data changes in them.
*/ struct list_head s_fc_dentry_q[2]; /* directory entry updates */ unsignedint s_fc_bytes; /* * Main fast commit lock. This lock protects accesses to the * following fields: * ei->i_fc_list, s_fc_dentry_q, s_fc_q, s_fc_bytes, s_fc_bh.
*/ struct mutex s_fc_lock; struct buffer_head *s_fc_bh; struct ext4_fc_stats s_fc_stats;
tid_t s_fc_ineligible_tid; #ifdef CONFIG_EXT4_DEBUG int s_fc_debug_max_replay; #endif struct ext4_fc_replay_state s_fc_replay_state;
};
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.