/* * The fourth extended filesystem constants/structures
*/
/* * with AGGRESSIVE_CHECK allocator runs consistency checks over * structures. these checks slow things down a lot
*/ #define AGGRESSIVE_CHECK__
/* * with DOUBLE_CHECK defined mballoc creates persistent in-core * bitmaps, maintains and uses them to check for double allocations
*/ #define DOUBLE_CHECK__
/* * Define EXT4FS_DEBUG to produce debug messages
*/ #undef EXT4FS_DEBUG
/* * For each criteria, mballoc has slightly different way of finding * the required blocks nad usually, higher the criteria the slower the * allocation. We start at lower criterias and keep falling back to * higher ones if we are not able to find any blocks. Lower (earlier) * criteria are faster.
*/ enum criteria { /* * Used when number of blocks needed is a power of 2. This * doesn't trigger any disk IO except prefetch and is the * fastest criteria.
*/
CR_POWER2_ALIGNED,
/* * Tries to lookup in-memory data structures to find the most * suitable group that satisfies goal request. No disk IO * except block prefetch.
*/
CR_GOAL_LEN_FAST,
/* * Same as CR_GOAL_LEN_FAST but is allowed to reduce the goal * length to the best available length for faster allocation.
*/
CR_BEST_AVAIL_LEN,
/* * Reads each block group sequentially, performing disk IO if * necessary, to find suitable block group. Tries to * allocate goal length but might trim the request if nothing * is found after enough tries.
*/
CR_GOAL_LEN_SLOW,
/* * Finds the first free set of blocks and allocates * those. This is only used in rare cases when * CR_GOAL_LEN_SLOW also fails to allocate anything.
*/
CR_ANY_FREE,
/* * Number of criterias defined.
*/
EXT4_MB_NUM_CRS
};
/* * Flags used in mballoc's allocation_context flags field. * * Also used to show what's going on for debugging purposes when the * flag field is exported via the traceport interface
*/
/* prefer goal again. length */ #define EXT4_MB_HINT_MERGE 0x0001 /* first blocks in the file */ #define EXT4_MB_HINT_FIRST 0x0008 /* data is being allocated */ #define EXT4_MB_HINT_DATA 0x0020 /* don't preallocate (for tails) */ #define EXT4_MB_HINT_NOPREALLOC 0x0040 /* allocate for locality group */ #define EXT4_MB_HINT_GROUP_ALLOC 0x0080 /* allocate goal blocks or none */ #define EXT4_MB_HINT_GOAL_ONLY 0x0100 /* goal is meaningful */ #define EXT4_MB_HINT_TRY_GOAL 0x0200 /* blocks already pre-reserved by delayed allocation */ #define EXT4_MB_DELALLOC_RESERVED 0x0400 /* We are doing stream allocation */ #define EXT4_MB_STREAM_ALLOC 0x0800 /* Use reserved root blocks if needed */ #define EXT4_MB_USE_ROOT_BLOCKS 0x1000 /* Use blocks from reserved pool */ #define EXT4_MB_USE_RESERVED 0x2000 /* Do strict check for free blocks while retrying block allocation */ #define EXT4_MB_STRICT_CHECK 0x4000
struct ext4_allocation_request { /* target inode for block we're allocating */ struct inode *inode; /* how many blocks we want to allocate */ unsignedint len; /* logical block in target inode */
ext4_lblk_t logical; /* the closest logical allocated block to the left */
ext4_lblk_t lleft; /* the closest logical allocated block to the right */
ext4_lblk_t lright; /* phys. target (a hint) */
ext4_fsblk_t goal; /* phys. block for the closest logical allocated block to the left */
ext4_fsblk_t pleft; /* phys. block for the closest logical allocated block to the right */
ext4_fsblk_t pright; /* flags. see above EXT4_MB_HINT_* */ unsignedint flags;
};
/* * Logical to physical block mapping, used by ext4_map_blocks() * * This structure is used to pass requests into ext4_map_blocks() as * well as to store the information returned by ext4_map_blocks(). It * takes less room on the stack than a struct buffer_head.
*/ #define EXT4_MAP_NEW BIT(BH_New) #define EXT4_MAP_MAPPED BIT(BH_Mapped) #define EXT4_MAP_UNWRITTEN BIT(BH_Unwritten) #define EXT4_MAP_BOUNDARY BIT(BH_Boundary) #define EXT4_MAP_DELAYED BIT(BH_Delay) /* * This is for use in ext4_map_query_blocks() for a special case where we can * have a physically and logically contiguous blocks split across two leaf * nodes instead of a single extent. This is required in case of atomic writes * to know whether the returned extent is last in leaf. If yes, then lookup for * next in leaf block in ext4_map_query_blocks_next_in_leaf(). * - This is never going to be added to any buffer head state. * - We use the next available bit after BH_BITMAP_UPTODATE.
*/ #define EXT4_MAP_QUERY_LAST_IN_LEAF BIT(BH_BITMAP_UPTODATE + 1) #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
EXT4_MAP_DELAYED | EXT4_MAP_QUERY_LAST_IN_LEAF)
struct ext4_io_end_vec { struct list_head list; /* list of io_end_vec */
loff_t offset; /* offset in the file */
ssize_t size; /* size of the extent */
};
/* * For converting unwritten extents on a work queue. 'handle' is used for * buffered writeback.
*/ typedefstruct ext4_io_end { struct list_head list; /* per-file finished IO list */
handle_t *handle; /* handle reserved for extent
* conversion */ struct inode *inode; /* file being written to */ struct bio *bio; /* Linked list of completed
* bios covering the extent */ unsignedint flag; /* unwritten or not */
refcount_t count; /* reference counter */ struct list_head list_vec; /* list of ext4_io_end_vec */
} ext4_io_end_t;
/* Translate a block number to a cluster number */ #define EXT4_B2C(sbi, blk) ((blk) >> (sbi)->s_cluster_bits) /* Translate a cluster number to a block number */ #define EXT4_C2B(sbi, cluster) ((cluster) << (sbi)->s_cluster_bits) /* Translate # of blks to # of clusters */ #define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \
(sbi)->s_cluster_bits) /* Mask out the low bits to get the starting block of the cluster */ #define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \
~((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) #define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \
~((ext4_lblk_t) (s)->s_cluster_ratio - 1)) /* Fill in the low bits to get the last block of the cluster */ #define EXT4_LBLK_CFILL(sbi, lblk) ((lblk) | \
((ext4_lblk_t) (sbi)->s_cluster_ratio - 1)) /* Get the cluster offset */ #define EXT4_PBLK_COFF(s, pblk) ((pblk) & \
((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) #define EXT4_LBLK_COFF(s, lblk) ((lblk) & \
((ext4_lblk_t) (s)->s_cluster_ratio - 1))
#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ #define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ #define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
/* Flags that should be inherited by new inodes from their parent. */ #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL |\
EXT4_DAX_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */ #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\
EXT4_PROJINHERIT_FL))
/* Flags that are appropriate for non-directories/regular files. */ #define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
/* The only flags that should be swapped */ #define EXT4_FL_SHOULD_SWAP (EXT4_HUGE_FILE_FL | EXT4_EXTENTS_FL)
/* Flags which are mutually exclusive to DAX */ #define EXT4_DAX_MUT_EXCL (EXT4_VERITY_FL | EXT4_ENCRYPT_FL |\
EXT4_JOURNAL_DATA_FL | EXT4_INLINE_DATA_FL)
/* Mask out flags that are inappropriate for the given type of inode. */ staticinline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
{ if (S_ISDIR(mode)) return flags; elseif (S_ISREG(mode)) return flags & EXT4_REG_FLMASK; else return flags & EXT4_OTHER_FLMASK;
}
/* * Inode flags used for atomic set/get
*/ enum {
EXT4_INODE_SECRM = 0, /* Secure deletion */
EXT4_INODE_UNRM = 1, /* Undelete */
EXT4_INODE_COMPR = 2, /* Compress file */
EXT4_INODE_SYNC = 3, /* Synchronous updates */
EXT4_INODE_IMMUTABLE = 4, /* Immutable file */
EXT4_INODE_APPEND = 5, /* writes to file may only append */
EXT4_INODE_NODUMP = 6, /* do not dump file */
EXT4_INODE_NOATIME = 7, /* do not update atime */ /* Reserved for compression usage... */
EXT4_INODE_DIRTY = 8,
EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */
EXT4_INODE_NOCOMPR = 10, /* Don't compress */
EXT4_INODE_ENCRYPT = 11, /* Encrypted file */ /* End compression flags --- maybe not all used */
EXT4_INODE_INDEX = 12, /* hash-indexed directory */
EXT4_INODE_IMAGIC = 13, /* AFS directory */
EXT4_INODE_JOURNAL_DATA = 14, /* file data should be journaled */
EXT4_INODE_NOTAIL = 15, /* file tail should not be merged */
EXT4_INODE_DIRSYNC = 16, /* dirsync behaviour (directories only) */
EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/
EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */
EXT4_INODE_EXTENTS = 19, /* Inode uses extents */
EXT4_INODE_VERITY = 20, /* Verity protected inode */
EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */ /* 22 was formerly EXT4_INODE_EOFBLOCKS */
EXT4_INODE_DAX = 25, /* Inode is DAX */
EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */
EXT4_INODE_PROJINHERIT = 29, /* Create with parents projid */
EXT4_INODE_CASEFOLD = 30, /* Casefolded directory */
EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */
};
/* * Since it's pretty easy to mix up bit numbers and hex values, we use a * build-time check to make sure that EXT4_XXX_FL is consistent with respect to * EXT4_INODE_XXX. If all is well, the macros will be dropped, so, it won't cost * any extra space in the compiled kernel image, otherwise, the build will fail. * It's important that these values are the same, since we are using * EXT4_INODE_XXX to test for flag values, but EXT4_XXX_FL must be consistent * with the values of FS_XXX_FL defined in include/linux/fs.h and the on-disk * values found in ext2, ext3 and ext4 filesystems, and of course the values * defined in e2fsprogs. * * It's not paranoia if the Murphy's Law really *is* out to get you. :-)
*/ #define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1U << EXT4_INODE_##FLAG)) #define CHECK_FLAG_VALUE(FLAG) BUILD_BUG_ON(!TEST_FLAG_VALUE(FLAG))
/* The struct ext4_new_group_input in kernel space, with free_blocks_count */ struct ext4_new_group_data {
__u32 group;
__u64 block_bitmap;
__u64 inode_bitmap;
__u64 inode_table;
__u32 blocks_count;
__u16 reserved_blocks;
__u16 mdata_blocks;
__u32 free_clusters_count;
};
/* Indexes used to index group tables in ext4_new_group_data */ enum {
BLOCK_BITMAP = 0, /* block bitmap */
INODE_BITMAP, /* inode bitmap */
INODE_TABLE, /* inode tables */
GROUP_TABLE_COUNT,
};
/* * Flags used by ext4_map_blocks()
*/ /* Allocate any needed blocks and/or convert an unwritten
extent to be an initialized ext4 */ #define EXT4_GET_BLOCKS_CREATE 0x0001 /* Request the creation of an unwritten extent */ #define EXT4_GET_BLOCKS_UNWRIT_EXT 0x0002 #define EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT (EXT4_GET_BLOCKS_UNWRIT_EXT|\
EXT4_GET_BLOCKS_CREATE) /* Caller is from the delayed allocation writeout path
* finally doing the actual allocation of delayed blocks */ #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 /* caller is from the direct IO path, request to creation of an unwritten extents if not allocated, split the unwritten
extent if blocks has been preallocated already*/ #define EXT4_GET_BLOCKS_PRE_IO 0x0008 #define EXT4_GET_BLOCKS_CONVERT 0x0010 #define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\
EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT) /* Eventual metadata allocation (due to growing extent tree)
* should not fail, so try to use reserved blocks for that.*/ #define EXT4_GET_BLOCKS_METADATA_NOFAIL 0x0020 /* Don't normalize allocation size (used for fallocate) */ #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 /* Convert written extents to unwritten */ #define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0100 /* Write zeros to newly created written extents */ #define EXT4_GET_BLOCKS_ZERO 0x0200 #define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\
EXT4_GET_BLOCKS_ZERO) /* Caller is in the context of data submission, such as writeback, * fsync, etc. Especially, in the generic writeback path, caller will * submit data before dropping transaction handle. This allows jbd2
* to avoid submitting data before commit. */ #define EXT4_GET_BLOCKS_IO_SUBMIT 0x0400 /* Convert extent to initialized after IO complete */ #define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT |\
EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT |\
EXT4_GET_BLOCKS_IO_SUBMIT) /* Caller is in the atomic contex, find extent if it has been cached */ #define EXT4_GET_BLOCKS_CACHED_NOWAIT 0x0800 /* * Atomic write caller needs this to query in the slow path of mixed mapping * case, when a contiguous extent can be split across two adjacent leaf nodes. * Look EXT4_MAP_QUERY_LAST_IN_LEAF.
*/ #define EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF 0x1000
/* * The bit position of these flags must not overlap with any of the * EXT4_GET_BLOCKS_*. They are used by ext4_find_extent(), * read_extent_tree_block(), ext4_split_extent_at(), * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf(). * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be * caching the extents when reading from the extent tree while a * truncate or punch hole operation is in progress.
*/ #define EXT4_EX_NOCACHE 0x40000000 #define EXT4_EX_FORCE_CACHE 0x20000000 #define EXT4_EX_NOFAIL 0x10000000 /* * ext4_map_query_blocks() uses this filter mask to filter the flags needed to * pass while lookup/querying of on disk extent tree.
*/ #define EXT4_EX_QUERY_FILTER (EXT4_EX_NOCACHE | EXT4_EX_FORCE_CACHE |\
EXT4_EX_NOFAIL |\
EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF)
/* * Extended fields will fit into an inode if the filesystem was formatted * with large inodes (-I 256 or larger) and there are not currently any EAs * consuming all of the available space. For new inodes we always reserve * enough space for the kernel's known extended fields, but for inodes * created with an old kernel this might not have been the case. None of * the extended inode fields is critical for correct filesystem operation. * This macro checks if a certain field fits in the inode. Note that * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
*/ #define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
((offsetof(typeof(*ext4_inode), field) + \ sizeof((ext4_inode)->field)) \
<= (EXT4_GOOD_OLD_INODE_SIZE + \
(einode)->i_extra_isize)) \
/* * We use an encoding that preserves the times for extra epoch "00": * * extra msb of adjust for signed * epoch 32-bit 32-bit tv_sec to * bits time decoded 64-bit tv_sec 64-bit tv_sec valid time range * 0 0 1 -0x80000000..-0x00000001 0x000000000 1901-12-13..1969-12-31 * 0 0 0 0x000000000..0x07fffffff 0x000000000 1970-01-01..2038-01-19 * 0 1 1 0x080000000..0x0ffffffff 0x100000000 2038-01-19..2106-02-07 * 0 1 0 0x100000000..0x17fffffff 0x100000000 2106-02-07..2174-02-25 * 1 0 1 0x180000000..0x1ffffffff 0x200000000 2174-02-25..2242-03-16 * 1 0 0 0x200000000..0x27fffffff 0x200000000 2242-03-16..2310-04-04 * 1 1 1 0x280000000..0x2ffffffff 0x300000000 2310-04-04..2378-04-22 * 1 1 0 0x300000000..0x37fffffff 0x300000000 2378-04-22..2446-05-10 * * Note that previous versions of the kernel on 64-bit systems would * incorrectly use extra epoch bits 1,1 for dates between 1901 and * 1970. e2fsck will correct this, assuming that it is run on the * affected filesystem before 2242.
*/
/* * Lock subclasses for i_data_sem in the ext4_inode_info structure. * * These are needed to avoid lockdep false positives when we need to * allocate blocks to the quota inode during ext4_map_blocks(), while * holding i_data_sem for a normal (non-quota) inode. Since we don't * do quota tracking for the quota inode, this avoids deadlock (as * well as infinite recursion, since it isn't turtles all the way * down...) * * I_DATA_SEM_NORMAL - Used for most inodes * I_DATA_SEM_OTHER - Used by move_inode.c for the second normal inode * where the second inode has larger inode number * than the first * I_DATA_SEM_QUOTA - Used for quota inodes only * I_DATA_SEM_EA - Used for ea_inodes only
*/ enum {
I_DATA_SEM_NORMAL = 0,
I_DATA_SEM_OTHER,
I_DATA_SEM_QUOTA,
I_DATA_SEM_EA
};
/* * fourth extended file system inode data in memory
*/ struct ext4_inode_info {
__le32 i_data[15]; /* unconverted */
__u32 i_dtime;
ext4_fsblk_t i_file_acl;
/* * i_block_group is the number of the block group which contains * this file's inode. Constant across the lifetime of the inode, * it is used for making block allocation decisions - we try to * place a file's data blocks near its inode block, and new inodes * near to their parent directory's inode.
*/
ext4_group_t i_block_group;
ext4_lblk_t i_dir_start_lookup; #if (BITS_PER_LONG < 64) unsignedlong i_state_flags; /* Dynamic state flags */ #endif unsignedlong i_flags;
/* * Extended attributes can be read independently of the main file * data. Taking i_rwsem even when reading would cause contention * between readers of EAs and writers of regular file data, so * instead we synchronize on xattr_sem when reading or changing * EAs.
*/ struct rw_semaphore xattr_sem;
/* * Inodes with EXT4_STATE_ORPHAN_FILE use i_orphan_idx. Otherwise * i_orphan is used.
*/ union { struct list_head i_orphan; /* unlinked but open inodes */ unsignedint i_orphan_idx; /* Index in orphan file */
};
/* Fast commit related info */
/* For tracking dentry create updates */ struct list_head i_fc_dilist; struct list_head i_fc_list; /* * inodes that need fast commit * protected by sbi->s_fc_lock.
*/
/* Start of lblk range that needs to be committed in this fast commit */
ext4_lblk_t i_fc_lblk_start;
/* End of lblk range that needs to be committed in this fast commit */
ext4_lblk_t i_fc_lblk_len;
spinlock_t i_raw_lock; /* protects updates to the raw inode */
/* Fast commit wait queue for this inode */
wait_queue_head_t i_fc_wait;
/* * Protect concurrent accesses on i_fc_lblk_start, i_fc_lblk_len * and inode's EXT4_FC_STATE_COMMITTING state bit.
*/
spinlock_t i_fc_lock;
/* * i_disksize keeps track of what the inode size is ON DISK, not * in memory. During truncate, i_size is set to the new size by * the VFS prior to calling ext4_truncate(), but the filesystem won't * set i_disksize to 0 until the truncate is actually under way. * * The intent is that i_disksize always represents the blocks which * are used by this file. This allows recovery to restart truncate * on orphans if we crash during truncate. We actually write i_disksize * into the on-disk inode when writing inodes out, instead of i_size. * * The only time when i_disksize and i_size may be different is when * a truncate is in progress. The only things which change i_disksize * are ext4_get_block (growth) and ext4_truncate (shrinkth).
*/
loff_t i_disksize;
/* * i_data_sem is for serialising ext4_truncate() against * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's * data tree are chopped off during truncate. We can't do that in * ext4 because whenever we perform intermediate commits during * truncate, the inode and all the metadata blocks *must* be in a * consistent state which allows truncation of the orphans to restart * during recovery. Hence we must fix the get_block-vs-truncate race * by other means, so we have i_data_sem.
*/ struct rw_semaphore i_data_sem; struct inode vfs_inode; struct jbd2_inode *jinode;
/* * File creation time. Its function is same as that of * struct timespec64 i_{a,c,m}time in the generic inode.
*/ struct timespec64 i_crtime;
/* mballoc */
atomic_t i_prealloc_active;
/* allocation reservation info for delalloc */ /* In case of bigalloc, this refer to clusters rather than blocks */ unsignedint i_reserved_data_blocks; struct rb_root i_prealloc_node;
rwlock_t i_prealloc_lock;
/* extents status tree */ struct ext4_es_tree i_es_tree;
rwlock_t i_es_lock; struct list_head i_es_list; unsignedint i_es_all_nr; /* protected by i_es_lock */ unsignedint i_es_shk_nr; /* protected by i_es_lock */
ext4_lblk_t i_es_shrink_lblk; /* Offset where we start searching for extents to shrink. Protected by
i_es_lock */
/* ialloc */
ext4_group_t i_last_alloc_group;
/* pending cluster reservations for bigalloc file systems */ struct ext4_pending_tree i_pending_tree;
/* * File system states
*/ #define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ #define EXT4_ERROR_FS 0x0002 /* Errors detected */ #define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */ #define EXT4_FC_REPLAY 0x0020 /* Fast commit replay ongoing */
/* * Misc. filesystem flags
*/ #define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */ #define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */ #define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */
/* * Mount flags set via mount options or defaults
*/ #define EXT4_MOUNT_NO_MBCACHE 0x00001 /* Do not use mbcache */ #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ #define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ #define EXT4_MOUNT_ERRORS_MASK 0x00070 #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ #ifdef CONFIG_FS_DAX #define EXT4_MOUNT_DAX_ALWAYS 0x00200 /* Direct Access */ #else #define EXT4_MOUNT_DAX_ALWAYS 0 #endif #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ #define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ #define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ #define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */ #define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */ #define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */ #define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */ #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ #define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */ #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ #define EXT4_MOUNT_QUOTA 0x40000 /* Some quota option set */ #define EXT4_MOUNT_USRQUOTA 0x80000 /* "old" user quota, * enable enforcement for hidden
* quota files */ #define EXT4_MOUNT_GRPQUOTA 0x100000 /* "old" group quota, enable * enforcement for hidden quota
* files */ #define EXT4_MOUNT_PRJQUOTA 0x200000 /* Enable project quota
* enforcement */ #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ #define EXT4_MOUNT_WARN_ON_ERROR 0x2000000 /* Trigger WARN_ON on error */ #define EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS 0x4000000 #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ #define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
/* * Mount flags set either automatically (could not be set by mount option) * based on per file system feature or property or in special cases such as * distinguishing between explicit mount option definition and default.
*/ #define EXT4_MOUNT2_EXPLICIT_DELALLOC 0x00000001 /* User explicitly
specified delalloc */ #define EXT4_MOUNT2_STD_GROUP_SIZE 0x00000002 /* We have standard group size of blocksize * 8
blocks */ #define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated
file systems */ #define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly
specified journal checksum */
#define EXT4_MOUNT2_JOURNAL_FAST_COMMIT 0x00000010 /* Journal fast commit */ #define EXT4_MOUNT2_DAX_NEVER 0x00000020 /* Do not allow Direct Access */ #define EXT4_MOUNT2_DAX_INODE 0x00000040 /* For printing options only */ #define EXT4_MOUNT2_MB_OPTIMIZE_SCAN 0x00000080 /* Optimize group * scanning in mballoc
*/ #define EXT4_MOUNT2_ABORT 0x00000100 /* Abort filesystem */
/* * Structure of the super block
*/ struct ext4_super_block { /*00*/ __le32 s_inodes_count; /* Inodes count */
__le32 s_blocks_count_lo; /* Blocks count */
__le32 s_r_blocks_count_lo; /* Reserved blocks count */
__le32 s_free_blocks_count_lo; /* Free blocks count */ /*10*/ __le32 s_free_inodes_count; /* Free inodes count */
__le32 s_first_data_block; /* First Data Block */
__le32 s_log_block_size; /* Block size */
__le32 s_log_cluster_size; /* Allocation cluster size */ /*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
__le32 s_clusters_per_group; /* # Clusters per group */
__le32 s_inodes_per_group; /* # Inodes per group */
__le32 s_mtime; /* Mount time */ /*30*/ __le32 s_wtime; /* Write time */
__le16 s_mnt_count; /* Mount count */
__le16 s_max_mnt_count; /* Maximal mount count */
__le16 s_magic; /* Magic signature */
__le16 s_state; /* File system state */
__le16 s_errors; /* Behaviour when detecting errors */
__le16 s_minor_rev_level; /* minor revision level */ /*40*/ __le32 s_lastcheck; /* time of last check */
__le32 s_checkinterval; /* max. time between checks */
__le32 s_creator_os; /* OS */
__le32 s_rev_level; /* Revision level */ /*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */
__le16 s_def_resgid; /* Default gid for reserved blocks */ /* * These fields are for EXT4_DYNAMIC_REV superblocks only. * * Note: the difference between the compatible feature set and * the incompatible feature set is that if there is a bit set * in the incompatible feature set that the kernel doesn't * know about, it should refuse to mount the filesystem. * * e2fsck's requirements are more strict; if it doesn't know * about a feature in either the compatible or incompatible * feature set, it must abort and not try to meddle with * things it doesn't understand...
*/
__le32 s_first_ino; /* First non-reserved inode */
__le16 s_inode_size; /* size of inode structure */
__le16 s_block_group_nr; /* block group # of this superblock */
__le32 s_feature_compat; /* compatible feature set */ /*60*/ __le32 s_feature_incompat; /* incompatible feature set */
__le32 s_feature_ro_compat; /* readonly-compatible feature set */ /*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ /*78*/ char s_volume_name[EXT4_LABEL_MAX] __nonstring; /* volume name */ /*88*/ char s_last_mounted[64] __nonstring; /* directory where last mounted */ /*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */ /* * Performance hints. Directory preallocation should only * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
*/
__u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
__u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
__le16 s_reserved_gdt_blocks; /* Per group desc for online growth */ /* * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
*/ /*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */ /*E0*/ __le32 s_journal_inum; /* inode number of journal file */
__le32 s_journal_dev; /* device number of journal file */
__le32 s_last_orphan; /* start of list of inodes to delete */
__le32 s_hash_seed[4]; /* HTREE hash seed */
__u8 s_def_hash_version; /* Default hash version to use */
__u8 s_jnl_backup_type;
__le16 s_desc_size; /* size of group descriptor */ /*100*/ __le32 s_default_mount_opts;
__le32 s_first_meta_bg; /* First metablock block group */
__le32 s_mkfs_time; /* When the filesystem was created */
__le32 s_jnl_blocks[17]; /* Backup of the journal inode */ /* 64bit support valid if EXT4_FEATURE_INCOMPAT_64BIT */ /*150*/ __le32 s_blocks_count_hi; /* Blocks count */
__le32 s_r_blocks_count_hi; /* Reserved blocks count */
__le32 s_free_blocks_count_hi; /* Free blocks count */
__le16 s_min_extra_isize; /* All inodes have at least # bytes */
__le16 s_want_extra_isize; /* New inodes should reserve # bytes */
__le32 s_flags; /* Miscellaneous flags */
__le16 s_raid_stride; /* RAID stride */
__le16 s_mmp_update_interval; /* # seconds to wait in MMP checking */
__le64 s_mmp_block; /* Block for multi-mount protection */
__le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
__u8 s_log_groups_per_flex; /* FLEX_BG group size */
__u8 s_checksum_type; /* metadata checksum algorithm used */
__u8 s_encryption_level; /* versioning level for encryption */
__u8 s_reserved_pad; /* Padding to next 32bits */
__le64 s_kbytes_written; /* nr of lifetime kilobytes written */
__le32 s_snapshot_inum; /* Inode number of active snapshot */
__le32 s_snapshot_id; /* sequential ID of active snapshot */
__le64 s_snapshot_r_blocks_count; /* reserved blocks for active
snapshot's future use */
__le32 s_snapshot_list; /* inode number of the head of the
on-disk snapshot list */ #define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count)
__le32 s_error_count; /* number of fs errors */
__le32 s_first_error_time; /* first time an error happened */
__le32 s_first_error_ino; /* inode involved in first error */
__le64 s_first_error_block; /* block involved of first error */
__u8 s_first_error_func[32] __nonstring; /* function where the error happened */
__le32 s_first_error_line; /* line number where error happened */
__le32 s_last_error_time; /* most recent time of an error */
__le32 s_last_error_ino; /* inode involved in last error */
__le32 s_last_error_line; /* line number where error happened */
__le64 s_last_error_block; /* block involved of last error */
__u8 s_last_error_func[32] __nonstring; /* function where the error happened */ #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
__u8 s_mount_opts[64];
__le32 s_usr_quota_inum; /* inode for tracking user quota */
__le32 s_grp_quota_inum; /* inode for tracking group quota */
__le32 s_overhead_clusters; /* overhead blocks/clusters in fs */
__le32 s_backup_bgs[2]; /* groups with sparse_super2 SBs */
__u8 s_encrypt_algos[4]; /* Encryption algorithms in use */
__u8 s_encrypt_pw_salt[16]; /* Salt used for string2key algorithm */
__le32 s_lpf_ino; /* Location of the lost+found inode */
__le32 s_prj_quota_inum; /* inode for tracking project quota */
__le32 s_checksum_seed; /* crc32c(uuid) if csum_seed set */
__u8 s_wtime_hi;
__u8 s_mtime_hi;
__u8 s_mkfs_time_hi;
__u8 s_lastcheck_hi;
__u8 s_first_error_time_hi;
__u8 s_last_error_time_hi;
__u8 s_first_error_errcode;
__u8 s_last_error_errcode;
__le16 s_encoding; /* Filename charset encoding */
__le16 s_encoding_flags; /* Filename charset encoding flags */
__le32 s_orphan_file_inum; /* Inode for tracking orphan inodes */
__le32 s_reserved[94]; /* Padding to the end of the block */
__le32 s_checksum; /* crc32c(superblock) */
};
/* Number of quota types we support */ #define EXT4_MAXQUOTAS 3
#define EXT4_ENC_UTF8_12_1 1
/* Types of ext4 journal triggers */ enum ext4_journal_trigger_type {
EXT4_JTR_ORPHAN_FILE,
EXT4_JTR_NONE /* This must be the last entry for indexing to work! */
};
struct ext4_orphan_block {
atomic_t ob_free_entries; /* Number of free orphan entries in block */ struct buffer_head *ob_bh; /* Buffer for orphan block */
};
/* * Info about orphan file.
*/ struct ext4_orphan_info { int of_blocks; /* Number of orphan blocks in a file */
__u32 of_csum_seed; /* Checksum seed for orphan file */ struct ext4_orphan_block *of_binfo; /* Array with info about orphan
* file blocks */
};
/* * fourth extended-fs super-block data in memory
*/ struct ext4_sb_info { unsignedlong s_desc_size; /* Size of a group descriptor in bytes */ unsignedlong s_inodes_per_block;/* Number of inodes per block */ unsignedlong s_blocks_per_group;/* Number of blocks in a group */ unsignedlong s_clusters_per_group; /* Number of clusters in a group */ unsignedlong s_inodes_per_group;/* Number of inodes in a group */ unsignedlong s_itb_per_group; /* Number of inode table blocks per group */ unsignedlong s_gdb_count; /* Number of group descriptor blocks */ unsignedlong s_desc_per_block; /* Number of group descriptors per block */
ext4_group_t s_groups_count; /* Number of groups in the fs */
ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */ unsignedlong s_overhead; /* # of fs overhead clusters */ unsignedint s_cluster_ratio; /* Number of blocks per cluster */ unsignedint s_cluster_bits; /* log2 of s_cluster_ratio */
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ struct buffer_head * s_sbh; /* Buffer containing the super block */ struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ /* Array of bh's for the block group descriptors */ struct buffer_head * __rcu *s_group_desc; unsignedint s_mount_opt; unsignedint s_mount_opt2; unsignedlong s_mount_flags; unsignedint s_def_mount_opt; unsignedint s_def_mount_opt2;
ext4_fsblk_t s_sb_block;
atomic64_t s_resv_clusters;
kuid_t s_resuid;
kgid_t s_resgid; unsignedshort s_mount_state; unsignedshort s_pad; int s_addr_per_block_bits; int s_desc_per_block_bits; int s_inode_size; int s_first_ino; unsignedint s_inode_readahead_blks; unsignedint s_inode_goal;
u32 s_hash_seed[4]; int s_def_hash_version; int s_hash_unsigned; /* 3 if hash should be unsigned, 0 if not */ struct percpu_counter s_freeclusters_counter; struct percpu_counter s_freeinodes_counter; struct percpu_counter s_dirs_counter; struct percpu_counter s_dirtyclusters_counter; struct percpu_counter s_sra_exceeded_retry_limit; struct blockgroup_lock *s_blockgroup_lock; struct proc_dir_entry *s_proc; struct kobject s_kobj; struct completion s_kobj_unregister; struct super_block *s_sb; struct buffer_head *s_mmp_bh;
/* Journaling */ struct journal_s *s_journal; unsignedlong s_ext4_flags; /* Ext4 superblock flags */ struct mutex s_orphan_lock; /* Protects on disk list changes */ struct list_head s_orphan; /* List of orphaned inodes in on disk
list */ struct ext4_orphan_info s_orphan_info; unsignedlong s_commit_interval;
u32 s_max_batch_time;
u32 s_min_batch_time; struct file *s_journal_bdev_file; #ifdef CONFIG_QUOTA /* Names of quota files with journalled quota */ char __rcu *s_qf_names[EXT4_MAXQUOTAS]; int s_jquota_fmt; /* Format of quota to use */ #endif unsignedint s_want_extra_isize; /* New inodes should reserve # bytes */ struct ext4_system_blocks __rcu *s_system_blks;
/* Encryption policy for '-o test_dummy_encryption' */ struct fscrypt_dummy_policy s_dummy_enc_policy;
/* * Barrier between writepages ops and changing any inode's JOURNAL_DATA * or EXTENTS flag or between writepages ops and changing DELALLOC or * DIOREAD_NOLOCK mount options on remount.
*/ struct percpu_rw_semaphore s_writepages_rwsem; struct dax_device *s_daxdev;
u64 s_dax_part_off; #ifdef CONFIG_EXT4_DEBUG unsignedlong s_simulate_fail; #endif /* Record the errseq of the backing block device */
errseq_t s_bdev_wb_err;
spinlock_t s_bdev_wb_lock;
/* Information about errors that happened during this mount */
spinlock_t s_error_lock; int s_add_error_count; int s_first_error_code;
__u32 s_first_error_line;
__u32 s_first_error_ino;
__u64 s_first_error_block; constchar *s_first_error_func;
time64_t s_first_error_time; int s_last_error_code;
__u32 s_last_error_line;
__u32 s_last_error_ino;
__u64 s_last_error_block; constchar *s_last_error_func;
time64_t s_last_error_time; /* * If we are in a context where we cannot update the on-disk * superblock, we queue the work here. This is used to update * the error information in the superblock, and for periodic * updates of the superblock called from the commit callback * function.
*/ struct work_struct s_sb_upd_work;
/* Atomic write unit values in bytes */ unsignedint s_awu_min; unsignedint s_awu_max;
/* Ext4 fast commit sub transaction ID */
atomic_t s_fc_subtid;
/* * After commit starts, the main queue gets locked, and the further * updates get added in the staging queue.
*/ #define FC_Q_MAIN 0 #define FC_Q_STAGING 1 struct list_head s_fc_q[2]; /* Inodes staged for fast commit * that have data changes in them.
*/ struct list_head s_fc_dentry_q[2]; /* directory entry updates */ unsignedint s_fc_bytes; /* * Main fast commit lock. This lock protects accesses to the * following fields: * ei->i_fc_list, s_fc_dentry_q, s_fc_q, s_fc_bytes, s_fc_bh.
*/ struct mutex s_fc_lock; struct buffer_head *s_fc_bh; struct ext4_fc_stats s_fc_stats;
tid_t s_fc_ineligible_tid; #ifdef CONFIG_EXT4_DEBUG int s_fc_debug_max_replay; #endif struct ext4_fc_replay_state s_fc_replay_state;
};
/* * Returns: sbi->field[index] * Used to access an array element from the following sbi fields which require * rcu protection to avoid dereferencing an invalid pointer due to reassignment * - s_group_desc * - s_group_info * - s_flex_group
*/ #define sbi_array_rcu_deref(sbi, field, index) \
({ \
typeof(*((sbi)->field)) _v; \
rcu_read_lock(); \
_v = ((typeof(_v)*)rcu_dereference((sbi)->field))[index]; \
rcu_read_unlock(); \
_v; \
})
/* * run-time mount flags
*/ enum {
EXT4_MF_MNTDIR_SAMPLED,
EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */
EXT4_MF_JOURNAL_DESTROY /* Journal is in process of destroying */
};
staticinlinevoid ext4_set_mount_flag(struct super_block *sb, int bit)
{
set_bit(bit, &EXT4_SB(sb)->s_mount_flags);
}
staticinlinevoid ext4_clear_mount_flag(struct super_block *sb, int bit)
{
clear_bit(bit, &EXT4_SB(sb)->s_mount_flags);
}
/* * Error number codes for s_{first,last}_error_errno * * Linux errno numbers are architecture specific, so we need to translate * them into something which is architecture independent. We don't define * codes for all errno's; just the ones which are most likely to be the cause * of an ext4_error() call.
*/ #define EXT4_ERR_UNKNOWN 1 #define EXT4_ERR_EIO 2 #define EXT4_ERR_ENOMEM 3 #define EXT4_ERR_EFSBADCRC 4 #define EXT4_ERR_EFSCORRUPTED 5 #define EXT4_ERR_ENOSPC 6 #define EXT4_ERR_ENOKEY 7 #define EXT4_ERR_EROFS 8 #define EXT4_ERR_EFBIG 9 #define EXT4_ERR_EEXIST 10 #define EXT4_ERR_ERANGE 11 #define EXT4_ERR_EOVERFLOW 12 #define EXT4_ERR_EBUSY 13 #define EXT4_ERR_ENOTDIR 14 #define EXT4_ERR_ENOTEMPTY 15 #define EXT4_ERR_ESHUTDOWN 16 #define EXT4_ERR_EFAULT 17
/* * Inode dynamic state flags
*/ enum {
EXT4_STATE_NEW, /* inode is newly created */
EXT4_STATE_XATTR, /* has in-inode xattrs */
EXT4_STATE_NO_EXPAND, /* No space for expansion */
EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */
EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
EXT4_STATE_NEWENTRY, /* File just added to dir */
EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */
EXT4_STATE_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
EXT4_STATE_FC_COMMITTING, /* Fast commit ongoing */
EXT4_STATE_FC_FLUSHING_DATA, /* Fast commit flushing data */
EXT4_STATE_ORPHAN_FILE, /* Inode orphaned in orphan file */
};
/* Add these declarations here only so that these functions can be
* found by name. Otherwise, they are very hard to locate. */ staticinlineint ext4_test_inode_flag(struct inode *inode, int bit); staticinlinevoid ext4_set_inode_flag(struct inode *inode, int bit); staticinlinevoid ext4_clear_inode_flag(struct inode *inode, int bit);
EXT4_INODE_BIT_FNS(flag, flags, 0)
/* Add these declarations here only so that these functions can be
* found by name. Otherwise, they are very hard to locate. */ staticinlineint ext4_test_inode_state(struct inode *inode, int bit); staticinlinevoid ext4_set_inode_state(struct inode *inode, int bit); staticinlinevoid ext4_clear_inode_state(struct inode *inode, int bit); #if (BITS_PER_LONG < 64)
EXT4_INODE_BIT_FNS(state, state_flags, 0)
staticinlinevoid ext4_clear_state_flags(struct ext4_inode_info *ei)
{ /* We depend on the fact that callers will set i_flags */
} #endif #else /* Assume that user mode programs are passing in an ext4fs superblock, not * a kernel struct super_block. This will allow us to call the feature-test
* macros from user land. */ #define EXT4_SB(sb) (sb) #endif
#define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001 #define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002 #define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004 #define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008 #define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010 #define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020 #define EXT4_FEATURE_COMPAT_SPARSE_SUPER2 0x0200 /* * The reason why "FAST_COMMIT" is a compat feature is that, FS becomes * incompatible only if fast commit blocks are present in the FS. Since we * clear the journal (and thus the fast commit blocks), we don't mark FS as * incompatible. We also have a JBD2 incompat feature, which gets set when * there are fast commit blocks present in the journal.
*/ #define EXT4_FEATURE_COMPAT_FAST_COMMIT 0x0400 #define EXT4_FEATURE_COMPAT_STABLE_INODES 0x0800 #define EXT4_FEATURE_COMPAT_ORPHAN_FILE 0x1000 /* Orphan file exists */
#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 #define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008 #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 /* * METADATA_CSUM also enables group descriptor checksums (GDT_CSUM). When * METADATA_CSUM is set, group descriptor checksums use the same algorithm as * all other data structures' checksums. However, the METADATA_CSUM and * GDT_CSUM bits are mutually exclusive.
*/ #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 #define EXT4_FEATURE_RO_COMPAT_READONLY 0x1000 #define EXT4_FEATURE_RO_COMPAT_PROJECT 0x2000 #define EXT4_FEATURE_RO_COMPAT_VERITY 0x8000 #define EXT4_FEATURE_RO_COMPAT_ORPHAN_PRESENT 0x10000 /* Orphan file may be
non-empty */
/* * Minimum number of groups in a flexgroup before we separate out * directories into the first block group of a flexgroup
*/ #define EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME 4
/* * Structure of a directory entry
*/ #define EXT4_NAME_LEN 255 /* * Base length of the ext4 directory entry excluding the name length
*/ #define EXT4_BASE_DIR_LEN (sizeof(struct ext4_dir_entry_2) - EXT4_NAME_LEN)
struct ext4_dir_entry {
__le32 inode; /* Inode number */
__le16 rec_len; /* Directory entry length */
__le16 name_len; /* Name length */ char name[EXT4_NAME_LEN]; /* File name */
};
/* * Encrypted Casefolded entries require saving the hash on disk. This structure * followed ext4_dir_entry_2's name[name_len] at the next 4 byte aligned * boundary.
*/ struct ext4_dir_entry_hash {
__le32 hash;
__le32 minor_hash;
};
/* * The new version of the directory entry. Since EXT4 structures are * stored in intel byte order, and the name_len field could never be * bigger than 255 chars, it's safe to reclaim the extra byte for the * file_type field.
*/ struct ext4_dir_entry_2 {
__le32 inode; /* Inode number */
__le16 rec_len; /* Directory entry length */
__u8 name_len; /* Name length */
__u8 file_type; /* See file type macros EXT4_FT_* below */ char name[EXT4_NAME_LEN]; /* File name */
};
/* * Access the hashes at the end of ext4_dir_entry_2
*/ #define EXT4_DIRENT_HASHES(entry) \
((struct ext4_dir_entry_hash *) \
(((void *)(entry)) + \
((8 + (entry)->name_len + EXT4_DIR_ROUND) & ~EXT4_DIR_ROUND))) #define EXT4_DIRENT_HASH(entry) le32_to_cpu(EXT4_DIRENT_HASHES(entry)->hash) #define EXT4_DIRENT_MINOR_HASH(entry) \
le32_to_cpu(EXT4_DIRENT_HASHES(entry)->minor_hash)
/* * This is a bogus directory entry at the end of each leaf block that * records checksums.
*/ struct ext4_dir_entry_tail {
__le32 det_reserved_zero1; /* Pretend to be unused */
__le16 det_rec_len; /* 12 */
__u8 det_reserved_zero2; /* Zero name length */
__u8 det_reserved_ft; /* 0xDE, fake file type */
__le32 det_checksum; /* crc32c(uuid+inum+dirblock) */
};
/* * Ext4 directory file types. Only the low 3 bits are used. The * other bits are reserved for now.
*/ #define EXT4_FT_UNKNOWN 0 #define EXT4_FT_REG_FILE 1 #define EXT4_FT_DIR 2 #define EXT4_FT_CHRDEV 3 #define EXT4_FT_BLKDEV 4 #define EXT4_FT_FIFO 5 #define EXT4_FT_SOCK 6 #define EXT4_FT_SYMLINK 7
#define EXT4_FT_MAX 8
#define EXT4_FT_DIR_CSUM 0xDE
/* * EXT4_DIR_PAD defines the directory entries boundaries * * NOTE: It must be a multiple of 4
*/ #define EXT4_DIR_PAD 4 #define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1) #define EXT4_MAX_REC_LEN ((1<<16)-1)
/* * The rec_len is dependent on the type of directory. Directories that are * casefolded and encrypted need to store the hash as well, so we add room for * ext4_extended_dir_entry_2. For all entries related to '.' or '..' you should * pass NULL for dir, as those entries do not use the extra fields.
*/ staticinlineunsignedint ext4_dir_rec_len(__u8 name_len, conststruct inode *dir)
{ int rec_len = (name_len + 8 + EXT4_DIR_ROUND);
/* * If we ever get support for fs block sizes > page_size, we'll need * to remove the #if statements in the next two functions...
*/ staticinlineunsignedint
ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
{ unsigned len = le16_to_cpu(dlen);
/* * Describe an inode's exact location on disk and in memory
*/ struct ext4_iloc
{ struct buffer_head *bh; unsignedlong offset;
ext4_group_t block_group;
};
/* * This structure is stuffed into the struct file's private_data field * for directories. It is where we put information so that we can do * readdir operations in hash tree order.
*/ struct dir_private_info { struct rb_root root; struct rb_node *curr_node; struct fname *extra_fname;
loff_t last_pos;
__u32 curr_hash;
__u32 curr_minor_hash;
__u32 next_hash;
u64 cookie; bool initialized;
};
/* calculate the first block number of the group */ staticinline ext4_fsblk_t
ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
{ return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
}
/* * Special error return code only used by dx_probe() and its callers.
*/ #define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1))
/* * This structure will be used for multiple mount protection. It will be * written into the block number saved in the s_mmp_block field in the * superblock. Programs that check MMP should assume that if * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe * to use the filesystem, regardless of how old the timestamp is.
*/ #define EXT4_MMP_MAGIC 0x004D4D50U /* ASCII for MMP */ #define EXT4_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */ #define EXT4_MMP_SEQ_FSCK 0xE24D4D50U /* mmp_seq value when being fscked */ #define EXT4_MMP_SEQ_MAX 0xE24D4D4FU /* maximum valid mmp_seq value */
struct mmp_struct {
__le32 mmp_magic; /* Magic number for MMP */
__le32 mmp_seq; /* Sequence no. updated periodically */
/* * mmp_time, mmp_nodename & mmp_bdevname are only used for information * purposes and do not affect the correctness of the algorithm
*/
__le64 mmp_time; /* Time last updated */ char mmp_nodename[64]; /* Node which last updated MMP block */ char mmp_bdevname[32]; /* Bdev which last updated MMP block */
/* * mmp_check_interval is used to verify if the MMP block has been * updated on the block device. The value is updated based on the * maximum time to write the MMP block during an update cycle.
*/
__le16 mmp_check_interval;
/* arguments passed to the mmp thread */ struct mmpd_data { struct buffer_head *bh; /* bh from initial read_mmp_block() */ struct super_block *sb; /* super block of the fs */
};
/* * Check interval multiplier * The MMP block is written every update interval and initially checked every * update interval x the multiplier (the value is then adapted based on the * write latency). The reason is that writes can be delayed under load and we * don't want readers to incorrectly assume that the filesystem is no longer * in use.
*/ #define EXT4_MMP_CHECK_MULT 2UL
/* * Minimum interval for MMP checking in seconds.
*/ #define EXT4_MMP_MIN_CHECK_INTERVAL 5UL
/* * Maximum interval for MMP checking in seconds.
*/ #define EXT4_MMP_MAX_CHECK_INTERVAL 300UL
/* * Function prototypes
*/
/* * Ok, these declarations are also in <linux/kernel.h> but none of the * ext4 source programs needs to include it so they are duplicated here.
*/ # define NORET_TYPE /**/ # define ATTRIB_NORET __attribute__((noreturn)) # define NORET_AND noreturn,
/* inode.c */ void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, struct ext4_inode_info *ei); int ext4_inode_is_fast_symlink(struct inode *inode); void ext4_check_map_extents_env(struct inode *inode); struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count, bool wait, struct buffer_head **bhs); int ext4_get_block_unwritten(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); int ext4_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create); int ext4_walk_page_buffers(handle_t *handle, struct inode *inode, struct buffer_head *head, unsigned from, unsigned to, int *partial, int (*fn)(handle_t *handle, struct inode *inode, struct buffer_head *bh)); int do_journal_get_write_access(handle_t *handle, struct inode *inode, struct buffer_head *bh); void ext4_set_inode_mapping_order(struct inode *inode); #define FALL_BACK_TO_NONDELALLOC 1 #define CONVERT_INLINE_DATA 2
typedefenum {
EXT4_IGET_NORMAL = 0,
EXT4_IGET_SPECIAL = 0x0001, /* OK to iget a system inode */
EXT4_IGET_HANDLE = 0x0002, /* Inode # is from a handle */
EXT4_IGET_BAD = 0x0004, /* Allow to iget a bad inode */
EXT4_IGET_EA_INODE = 0x0008 /* Inode should contain an EA value */
} ext4_iget_flags;
/* * Reading s_groups_count requires using smp_rmb() afterwards. See * the locking protocol documented in the comments of ext4_group_add() * in resize.c
*/ staticinline ext4_group_t ext4_get_groups_count(struct super_block *sb)
{
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
#define ext4_std_error(sb, errno) \ do { \ if ((errno)) \
__ext4_std_error((sb), __func__, __LINE__, (errno)); \
} while (0)
#ifdef CONFIG_SMP /* Each CPU can accumulate percpu_counter_batch clusters in their local * counters. So we need to make sure we have free clusters more * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times.
*/ #define EXT4_FREECLUSTERS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids)) #else #define EXT4_FREECLUSTERS_WATERMARK 0 #endif
/* Update i_disksize. Requires i_rwsem to avoid races with truncate */ staticinlinevoid ext4_update_i_disksize(struct inode *inode, loff_t newsize)
{
WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
!inode_is_locked(inode));
down_write(&EXT4_I(inode)->i_data_sem); if (newsize > EXT4_I(inode)->i_disksize)
WRITE_ONCE(EXT4_I(inode)->i_disksize, newsize);
up_write(&EXT4_I(inode)->i_data_sem);
}
/* Update i_size, i_disksize. Requires i_rwsem to avoid races with truncate */ staticinlineint ext4_update_inode_size(struct inode *inode, loff_t newsize)
{ int changed = 0;
int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
loff_t len);
struct ext4_group_info { unsignedlong bb_state; #ifdef AGGRESSIVE_CHECK unsignedlong bb_check_counter; #endif struct rb_root bb_free_root;
ext4_grpblk_t bb_first_free; /* first free block */
ext4_grpblk_t bb_free; /* total free blocks */
ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ int bb_avg_fragment_size_order; /* order of average
fragment in BG */
ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */
ext4_group_t bb_group; /* Group number */ struct list_head bb_prealloc_list; #ifdef DOUBLE_CHECK void *bb_bitmap; #endif struct rw_semaphore alloc_sem;
ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block * regions, index is order. * bb_counters[3] = 5 means
* 5 free 8-block regions. */
};
/* * Returns true if the filesystem is busy enough that attempts to * access the block group locks has run into contention.
*/ staticinlineint ext4_fs_is_busy(struct ext4_sb_info *sbi)
{ return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD);
}
staticinlinebool ext4_try_lock_group(struct super_block *sb, ext4_group_t group)
{ if (!spin_trylock(ext4_group_lock_ptr(sb, group))) returnfalse; /* * We're able to grab the lock right away, so drop the lock * contention counter.
*/
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0); returntrue;
}
staticinlinevoid ext4_lock_group(struct super_block *sb, ext4_group_t group)
{ if (!ext4_try_lock_group(sb, group)) { /* * The lock is busy, so bump the contention counter, * and then wait on the spin lock.
*/
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1,
EXT4_MAX_CONTENTION);
spin_lock(ext4_group_lock_ptr(sb, group));
}
}
/* * Add new method to test whether block and inode bitmaps are properly * initialized. With uninit_bg reading the block from disk is not enough * to mark the bitmap uptodate. We need to also zero-out the bitmap
*/ #define BH_BITMAP_UPTODATE BH_JBDPrivateStart
staticinlineint ext4_buffer_uptodate(struct buffer_head *bh)
{ /* * If the buffer has the write error flag, we have failed * to write out data in the block. In this case, we don't * have to read the block because we may read the old data * successfully.
*/ if (buffer_write_io_error(bh))
set_buffer_uptodate(bh); return buffer_uptodate(bh);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.