/* * Number of metadata items necessary for an unlink operation: * * 1 for the possible orphan item * 1 for the dir item * 1 for the dir index * 1 for the inode ref * 1 for the inode * 1 for the parent inode
*/ #define BTRFS_UNLINK_METADATA_UNITS 6
/* * The reserved space at the beginning of each device. It covers the primary * super block and leaves space for potential use by other tools like * bootloaders or to lower potential damage of accidental overwrite.
*/ #define BTRFS_DEVICE_RANGE_RESERVED (SZ_1M) /* * Runtime (in-memory) states of filesystem
*/ enum { /* * Filesystem is being remounted, allow to skip some operations, like * defrag
*/
BTRFS_FS_STATE_REMOUNTING, /* Filesystem in RO mode */
BTRFS_FS_STATE_RO, /* Track if a transaction abort has been reported on this filesystem */
BTRFS_FS_STATE_TRANS_ABORTED, /* * Bio operations should be blocked on this filesystem because a source * or target device is being destroyed as part of a device replace
*/
BTRFS_FS_STATE_DEV_REPLACING, /* The btrfs_fs_info created for self-tests */
BTRFS_FS_STATE_DUMMY_FS_INFO,
/* Checksum errors are ignored. */
BTRFS_FS_STATE_NO_DATA_CSUMS,
BTRFS_FS_STATE_SKIP_META_CSUMS,
/* Indicates there was an error cleaning up a log tree. */
BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
/* No more delayed iput can be queued. */
BTRFS_FS_STATE_NO_DELAYED_IPUT,
BTRFS_FS_STATE_COUNT
};
enum {
BTRFS_FS_CLOSING_START,
BTRFS_FS_CLOSING_DONE,
BTRFS_FS_LOG_RECOVERING,
BTRFS_FS_OPEN,
BTRFS_FS_QUOTA_ENABLED,
BTRFS_FS_UPDATE_UUID_TREE_GEN,
BTRFS_FS_CREATING_FREE_SPACE_TREE,
BTRFS_FS_BTREE_ERR,
BTRFS_FS_LOG1_ERR,
BTRFS_FS_LOG2_ERR,
BTRFS_FS_QUOTA_OVERRIDE, /* Used to record internally whether fs has been frozen */
BTRFS_FS_FROZEN, /* * Indicate that balance has been set up from the ioctl and is in the * main phase. The fs_info::balance_ctl is initialized.
*/
BTRFS_FS_BALANCE_RUNNING,
/* * Indicate that relocation of a chunk has started, it's set per chunk * and is toggled between chunks.
*/
BTRFS_FS_RELOC_RUNNING,
/* Indicate that the cleaner thread is awake and doing something. */
BTRFS_FS_CLEANER_RUNNING,
/* * The checksumming has an optimized version and is considered fast, * so we don't need to offload checksums to workqueues.
*/
BTRFS_FS_CSUM_IMPL_FAST,
/* Indicate that the discard workqueue can service discards. */
BTRFS_FS_DISCARD_RUNNING,
/* Indicate that we need to cleanup space cache v1 */
BTRFS_FS_CLEANUP_SPACE_CACHE_V1,
/* Indicate that we can't trust the free space tree for caching yet */
BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED,
/* Indicate whether there are any tree modification log users */
BTRFS_FS_TREE_MOD_LOG_USERS,
/* Indicate that we want the transaction kthread to commit right now. */
BTRFS_FS_COMMIT_TRANS,
/* Indicate we have half completed snapshot deletions pending. */
BTRFS_FS_UNFINISHED_DROPS,
/* Indicate we have to finish a zone to do next allocation. */
BTRFS_FS_NEED_ZONE_FINISH,
/* Indicate that we want to commit the transaction. */
BTRFS_FS_NEED_TRANS_COMMIT,
/* This is set when active zone tracking is needed. */
BTRFS_FS_ACTIVE_ZONE_TRACKING,
/* * Indicate if we have some features changed, this is mostly for * cleaner thread to update the sysfs interface.
*/
BTRFS_FS_FEATURE_CHANGED,
/* * Indicate that we have found a tree block which is only aligned to * sectorsize, but not to nodesize. This should be rare nowadays.
*/
BTRFS_FS_UNALIGNED_TREE_BLOCK,
#if BITS_PER_LONG == 32 /* Indicate if we have error/warn message printed on 32bit systems */
BTRFS_FS_32BIT_ERROR,
BTRFS_FS_32BIT_WARN, #endif
};
/* * Compat flags that we support. If any incompat flags are set other than the * ones specified below then we will fail to mount
*/ #define BTRFS_FEATURE_COMPAT_SUPP 0ULL #define BTRFS_FEATURE_COMPAT_SAFE_SET 0ULL #define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL
#ifdef CONFIG_BTRFS_EXPERIMENTAL /* * Features under developmen like Extent tree v2 support is enabled * only under CONFIG_BTRFS_EXPERIMENTAL
*/ #define BTRFS_FEATURE_INCOMPAT_SUPP \
(BTRFS_FEATURE_INCOMPAT_SUPP_STABLE | \
BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE | \
BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2)
/* * Free clusters are used to claim free space in relatively large chunks, * allowing us to do less seeky writes. They are used for all metadata * allocations. In ssd_spread mode they are also used for data allocations.
*/ struct btrfs_free_cluster {
spinlock_t lock;
spinlock_t refill_lock; struct rb_root root;
/* Largest extent in this cluster */
u64 max_size;
/* First extent starting offset */
u64 window_start;
/* We did a full search and couldn't create a cluster */ bool fragmented;
struct btrfs_block_group *block_group; /* * When a cluster is allocated from a block group, we put the cluster * onto a list in the block group so that it can be freed before the * block group is freed.
*/ struct list_head block_group_list;
};
/* Discard control. */ /* * Async discard uses multiple lists to differentiate the discard filter * parameters. Index 0 is for completely free block groups where we need to * ensure the entire block group is trimmed without being lossy. Indices * afterwards represent monotonically decreasing discard filter sizes to * prioritize what should be discarded next.
*/ #define BTRFS_NR_DISCARD_LISTS 3 #define BTRFS_DISCARD_INDEX_UNUSED 0 #define BTRFS_DISCARD_INDEX_START 1
/* Store data about transaction commits, exported via sysfs. */ struct btrfs_commit_stats { /* Total number of commits */
u64 commit_count; /* The maximum commit duration so far in ns */
u64 max_commit_dur; /* The last commit duration in ns */
u64 last_commit_dur; /* The total commit duration in ns */
u64 total_commit_dur; /* Start of the last critical section in ns. */
u64 critical_section_start_time;
};
/* * Block reservation for extent, checksum, root tree and delayed dir * index item.
*/ struct btrfs_block_rsv global_block_rsv; /* Block reservation for metadata operations */ struct btrfs_block_rsv trans_block_rsv; /* Block reservation for chunk tree */ struct btrfs_block_rsv chunk_block_rsv; /* Block reservation for delayed operations */ struct btrfs_block_rsv delayed_block_rsv; /* Block reservation for delayed refs */ struct btrfs_block_rsv delayed_refs_rsv; /* Block reservation for treelog tree */ struct btrfs_block_rsv treelog_rsv;
struct btrfs_block_rsv empty_block_rsv;
/* * Updated while holding the lock 'trans_lock'. Due to the life cycle of * a transaction, it can be directly read while holding a transaction * handle, everywhere else must be read with btrfs_get_fs_generation(). * Should always be updated using btrfs_set_fs_generation().
*/
u64 generation; /* * Always use btrfs_get_last_trans_committed() and * btrfs_set_last_trans_committed() to read and update this field.
*/
u64 last_trans_committed; /* * Generation of the last transaction used for block group relocation * since the filesystem was last mounted (or 0 if none happened yet). * Must be written and read while holding btrfs_fs_info::commit_root_sem.
*/
u64 last_reloc_trans;
/* * This is updated to the current trans every time a full commit is * required instead of the faster short fsync log commits
*/
u64 last_trans_log_full_commit; unsignedlonglong mount_opt;
int compress_type; int compress_level;
u32 commit_interval; /* * It is a suggestive number, the read side is safe even it gets a * wrong number because we will write out the data into a regular * extent. The write side(mount/remount) is under ->s_umount lock, * so it is also safe.
*/
u64 max_inline;
/* * Used to protect the incompat_flags, compat_flags, compat_ro_flags * when they are updated. * * Because we do not clear the flags for ever, so we needn't use * the lock on the read side. * * We also needn't use the lock when we mount the fs, because * there is no other task which will update the flag.
*/
spinlock_t super_lock; struct btrfs_super_block *super_copy; struct btrfs_super_block *super_for_commit; struct super_block *sb; struct inode *btree_inode; struct mutex tree_log_mutex; struct mutex transaction_kthread_mutex; struct mutex cleaner_mutex; struct mutex chunk_mutex;
/* * This is taken to make sure we don't set block groups ro after the * free space cache has been allocated on them.
*/ struct mutex ro_block_group_mutex;
/* * This is used during read/modify/write to make sure no two ios are * trying to mod the same stripe at the same time.
*/ struct btrfs_stripe_hash_table *stripe_hash_table;
/* * This protects the ordered operations list only while we are * processing all of the entries on it. This way we make sure the * commit code doesn't find the list temporarily empty because another * function happens to be doing non-waiting preflush before jumping * into the main commit.
*/ struct mutex ordered_operations_mutex;
struct rw_semaphore commit_root_sem;
struct rw_semaphore cleanup_work_sem;
struct rw_semaphore subvol_sem;
spinlock_t trans_lock; /* * The reloc mutex goes with the trans lock, it is taken during commit * to protect us from the relocation code.
*/ struct mutex reloc_mutex;
/* This protects tree_mod_log and tree_mod_seq_list */
rwlock_t tree_mod_log_lock; struct rb_root tree_mod_log; struct list_head tree_mod_seq_list;
atomic_t async_delalloc_pages;
/* This is used to protect the following list -- ordered_roots. */
spinlock_t ordered_root_lock;
/* * All fs/file tree roots in which there are data=ordered extents * pending writeback are added into this list. * * These can span multiple transactions and basically include every * dirty data page that isn't from nodatacow.
*/ struct list_head ordered_roots;
struct mutex delalloc_root_mutex;
spinlock_t delalloc_root_lock; /* All fs/file tree roots that have delalloc inodes. */ struct list_head delalloc_roots;
/* * There is a pool of worker threads for checksumming during writes and * a pool for checksumming after reads. This is because readers can * run with FS locks held, and the writers may be waiting for those * locks. We don't want ordering in the pending list to cause * deadlocks, and so the two are serviced separately. * * A third pool does submit_bio to avoid deadlocking with the other two.
*/ struct btrfs_workqueue *workers; struct btrfs_workqueue *delalloc_workers; struct btrfs_workqueue *flush_workers; struct workqueue_struct *endio_workers; struct workqueue_struct *endio_meta_workers; struct workqueue_struct *rmw_workers; struct workqueue_struct *compressed_write_workers; struct btrfs_workqueue *endio_write_workers; struct btrfs_workqueue *endio_freespace_worker; struct btrfs_workqueue *caching_workers;
/* * Fixup workers take dirty pages that didn't properly go through the * cow mechanism and make them safe to write. It happens for the * sys_munmap function call path.
*/ struct btrfs_workqueue *fixup_workers; struct btrfs_workqueue *delayed_workers;
/* Track the number of blocks (sectors) read by the filesystem. */ struct percpu_counter stats_read_blocks;
/* Used to keep from writing metadata until there is a nice batch */ struct percpu_counter dirty_metadata_bytes; struct percpu_counter delalloc_bytes; struct percpu_counter ordered_bytes;
s32 dirty_metadata_batch;
s32 delalloc_batch;
/* Protected by 'trans_lock'. */ struct list_head dirty_cowonly_roots;
struct btrfs_fs_devices *fs_devices;
/* * The space_info list is effectively read only after initial setup. * It is populated at mount time and cleaned up after all block groups * are removed. RCU is used to protect it.
*/ struct list_head space_info;
struct btrfs_space_info *data_sinfo;
struct reloc_control *reloc_ctl;
/* data_alloc_cluster is only used in ssd_spread mode */ struct btrfs_free_cluster data_alloc_cluster;
/* All metadata allocations go through this cluster. */ struct btrfs_free_cluster meta_alloc_cluster;
/* Auto defrag inodes go here. */
spinlock_t defrag_inodes_lock; struct rb_root defrag_inodes;
atomic_t defrag_running;
/* Used to protect avail_{data, metadata, system}_alloc_bits */
seqlock_t profiles_lock; /* * These three are in extended format (availability of single chunks is * denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other types are denoted * by corresponding BTRFS_BLOCK_GROUP_* bits)
*/
u64 avail_data_alloc_bits;
u64 avail_metadata_alloc_bits;
u64 avail_system_alloc_bits;
/* Cancellation requests for chunk relocation */
atomic_t reloc_cancel_req;
u32 data_chunk_allocations;
u32 metadata_ratio;
/* Private scrub information */ struct mutex scrub_lock;
atomic_t scrubs_running;
atomic_t scrub_pause_req;
atomic_t scrubs_paused;
atomic_t scrub_cancel_req;
wait_queue_head_t scrub_pause_wait; /* * The worker pointers are NULL iff the refcount is 0, ie. scrub is not * running.
*/
refcount_t scrub_workers_refcnt; struct workqueue_struct *scrub_workers;
struct btrfs_discard_ctl discard_ctl;
/* Is qgroup tracking in a consistent state? */
u64 qgroup_flags;
/* Holds configuration and tracking. Protected by qgroup_lock. */ struct rb_root qgroup_tree;
spinlock_t qgroup_lock;
/* * Protect user change for quota operations. If a transaction is needed, * it must be started before locking this lock.
*/ struct mutex qgroup_ioctl_lock;
/* List of dirty qgroups to be written at next commit. */ struct list_head dirty_qgroups;
/* Used by qgroup for an efficient tree traversal. */
u64 qgroup_seq;
/* * If this is not 0, then it indicates a serious filesystem error has * happened and it contains that error (negative errno value).
*/ int fs_error;
/* Filesystem state */ unsignedlong fs_state;
struct btrfs_delayed_root *delayed_root;
/* Entries are eb->start >> nodesize_bits */ struct xarray buffer_tree;
/* Next backup root to be overwritten */ int backup_root_index;
/* Device replace state */ struct btrfs_dev_replace dev_replace;
struct semaphore uuid_tree_rescan_sem;
/* Used to reclaim the metadata space in the background. */ struct work_struct async_reclaim_work; struct work_struct async_data_reclaim_work; struct work_struct preempt_reclaim_work;
/* Reclaim partially filled block groups in the background */ struct work_struct reclaim_bgs_work; /* Protected by unused_bgs_lock. */ struct list_head reclaim_bgs; int bg_reclaim_threshold;
/* Protects the lists unused_bgs and reclaim_bgs. */
spinlock_t unused_bgs_lock; /* Protected by unused_bgs_lock. */ struct list_head unused_bgs; struct mutex unused_bg_unpin_mutex; /* Protect block groups that are going to be deleted */ struct mutex reclaim_bgs_lock;
/* Cached block sizes */
u32 nodesize;
u32 nodesize_bits;
u32 sectorsize; /* ilog2 of sectorsize, use to avoid 64bit division */
u32 sectorsize_bits;
u32 csum_size;
u32 csums_per_leaf;
u32 stripesize;
/* * Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular * filesystem, on zoned it depends on the device constraints.
*/
u64 max_extent_size;
/* Block groups and devices containing active swapfiles. */
spinlock_t swapfile_pins_lock; struct rb_root swapfile_pins;
struct crypto_shash *csum_shash;
/* Type of exclusive operation running, protected by super_lock */ enum btrfs_exclusive_operation exclusive_operation;
/* * Zone size > 0 when in ZONED mode, otherwise it's used for a check * if the mode is enabled
*/
u64 zone_size;
/* Updates are not protected by any lock */ struct btrfs_commit_stats commit_stats;
/* * Last generation where we dropped a non-relocation root. * Use btrfs_set_last_root_drop_gen() and btrfs_get_last_root_drop_gen() * to change it and to read it, respectively.
*/
u64 last_root_drop_gen;
/* * Annotations for transaction events (structures are empty when * compiled without lockdep).
*/ struct lockdep_map btrfs_trans_num_writers_map; struct lockdep_map btrfs_trans_num_extwriters_map; struct lockdep_map btrfs_state_change_map[4]; struct lockdep_map btrfs_trans_pending_ordered_map; struct lockdep_map btrfs_ordered_extent_map;
/* * Take the number of bytes to be checksummed and figure out how many leaves * it would require to store the csums for that many bytes.
*/ staticinline u64 btrfs_csum_bytes_to_leaves( conststruct btrfs_fs_info *fs_info, u64 csum_bytes)
{ const u64 num_csums = csum_bytes >> fs_info->sectorsize_bits;
/* * Use this if we would be adding new items, as we could split nodes as we cow * down the tree.
*/ staticinline u64 btrfs_calc_insert_metadata_size(conststruct btrfs_fs_info *fs_info, unsigned num_items)
{ return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
}
/* * Doing a truncate or a modification won't result in new nodes or leaves, just * what we need for COW.
*/ staticinline u64 btrfs_calc_metadata_size(conststruct btrfs_fs_info *fs_info, unsigned num_items)
{ return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
}
staticinlineint btrfs_fs_closing(conststruct btrfs_fs_info *fs_info)
{ /* Do it this way so we only ever do one test_bit in the normal case. */ if (test_bit(BTRFS_FS_CLOSING_START, &fs_info->flags)) { if (test_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags)) return 2; return 1;
} return 0;
}
/* * If we remount the fs to be R/O or umount the fs, the cleaner needn't do * anything except sleeping. This function is used to check the status of * the fs. * We check for BTRFS_FS_STATE_RO to avoid races with a concurrent remount, * since setting and checking for SB_RDONLY in the superblock's flags is not * atomic.
*/ staticinlineint btrfs_need_cleaner_sleep(conststruct btrfs_fs_info *fs_info)
{ return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) ||
btrfs_fs_closing(fs_info);
}
/* * We use folio flag owner_2 to indicate there is an ordered extent with * unfinished IO.
*/ #define folio_test_ordered(folio) folio_test_owner_2(folio) #define folio_set_ordered(folio) folio_set_owner_2(folio) #define folio_clear_ordered(folio) folio_clear_owner_2(folio)
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.