/* * Arbitratry maximum size of one discard request to limit potentially long time * spent in blkdev_issue_discard().
*/ #define BTRFS_MAX_DISCARD_CHUNK_SIZE (SZ_1G)
/* Used by sanity check for btrfs_raid_types. */ #define const_ffs(n) (__builtin_ctzll(n) + 1)
/* * The conversion from BTRFS_BLOCK_GROUP_* bits to btrfs_raid_type requires * RAID0 always to be the lowest profile bit. * Although it's part of on-disk format and should never change, do extra * compile-time sanity checks.
*/
static_assert(const_ffs(BTRFS_BLOCK_GROUP_RAID0) <
const_ffs(BTRFS_BLOCK_GROUP_PROFILE_MASK & ~BTRFS_BLOCK_GROUP_RAID0));
static_assert(const_ilog2(BTRFS_BLOCK_GROUP_RAID0) >
ilog2(BTRFS_BLOCK_GROUP_TYPE_MASK));
/* ilog2() can handle both constants and variables */ #define BTRFS_BG_FLAG_TO_INDEX(profile) \
ilog2((profile) >> (ilog2(BTRFS_BLOCK_GROUP_RAID0) - 1))
enum btrfs_raid_types { /* SINGLE is the special one as it doesn't have on-disk bit. */
BTRFS_RAID_SINGLE = 0,
/* * Use sequence counter to get consistent device stat data on * 32-bit processors.
*/ #if BITS_PER_LONG==32 && defined(CONFIG_SMP) #include <linux/seqlock.h> #define __BTRFS_NEED_DEVICE_DATA_ORDERED #define btrfs_device_data_ordered_init(device) \
seqcount_init(&device->data_seqcount) #else #define btrfs_device_data_ordered_init(device) do { } while (0) #endif
/* * Device's major-minor number. Must be set even if the device is not * opened (bdev == NULL), unless the device is missing.
*/
dev_t devt; unsignedlong dev_state;
blk_status_t last_flush_error;
/* * size of the device on the current transaction * * This variant is update when committing the transaction, * and protected by chunk mutex
*/
u64 commit_total_bytes;
/* bytes used on the current transaction */
u64 commit_bytes_used;
/* Bio used for flushing device barriers */ struct bio flush_bio; struct completion flush_wait;
/* per-device scrub information */ struct scrub_ctx *scrub_ctx;
/* disk I/O failure stats. For detailed description refer to
* enum btrfs_dev_stat_values in ioctl.h */ int dev_stats_valid;
/* Counter to record the change of device stats */
atomic_t dev_stats_ccnt;
atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX];
struct extent_io_tree alloc_state;
struct completion kobj_unregister; /* For sysfs/FSID/devinfo/devid/ */ struct kobject devid_kobj;
/* Bandwidth limit for scrub, in bytes */
u64 scrub_speed_max;
};
/* * Block group or device which contains an active swapfile. Used for preventing * unsafe operations while a swapfile is active. * * These are sorted on (ptr, inode) (note that a block group or device can * contain more than one swapfile). We compare the pointer values because we * don't actually care what the object is, we just need a quick check whether * the object exists in the rbtree.
*/ struct btrfs_swapfile_pin { struct rb_node node; void *ptr; struct inode *inode; /* * If true, ptr points to a struct btrfs_block_group. Otherwise, ptr * points to a struct btrfs_device.
*/ bool is_block_group; /* * Only used when 'is_block_group' is true and it is the number of * extents used by a swapfile for this block group ('ptr' field).
*/ int bg_extent_count;
};
#define BTRFS_DEFAULT_RR_MIN_CONTIG_READ (SZ_256K) /* Keep in sync with raid_attr table, current maximum is RAID1C4. */ #define BTRFS_RAID1_MAX_MIRRORS (4) /* * Read policies for mirrored block group profiles, read picks the stripe based * on these policies.
*/ enum btrfs_read_policy { /* Use process PID to choose the stripe */
BTRFS_READ_POLICY_PID, #ifdef CONFIG_BTRFS_EXPERIMENTAL /* Balancing RAID1 reads across all striped devices (round-robin). */
BTRFS_READ_POLICY_RR, /* Read from a specific device. */
BTRFS_READ_POLICY_DEVID, #endif
BTRFS_NR_READ_POLICY,
};
#ifdef CONFIG_BTRFS_EXPERIMENTAL /* * Checksum mode - offload it to workqueues or do it synchronously in * btrfs_submit_chunk().
*/ enum btrfs_offload_csum_mode { /* * Choose offloading checksum or do it synchronously automatically. * Do it synchronously if the checksum is fast, or offload to workqueues * otherwise.
*/
BTRFS_OFFLOAD_CSUM_AUTO, /* Always offload checksum to workqueues. */
BTRFS_OFFLOAD_CSUM_FORCE_ON, /* Never offload checksum to workqueues. */
BTRFS_OFFLOAD_CSUM_FORCE_OFF,
}; #endif
struct btrfs_fs_devices {
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
/* * UUID written into the btree blocks: * * - If metadata_uuid != fsid then super block must have * BTRFS_FEATURE_INCOMPAT_METADATA_UUID flag set. * * - Following shall be true at all times: * - metadata_uuid == btrfs_header::fsid * - metadata_uuid == btrfs_dev_item::fsid * * - Relations between fsid and metadata_uuid in sb and fs_devices: * - Normal: * fs_devices->fsid == fs_devices->metadata_uuid == sb->fsid * sb->metadata_uuid == 0 * * - When the BTRFS_FEATURE_INCOMPAT_METADATA_UUID flag is set: * fs_devices->fsid == sb->fsid * fs_devices->metadata_uuid == sb->metadata_uuid * * - When in-memory fs_devices->temp_fsid is true * fs_devices->fsid = random * fs_devices->metadata_uuid == sb->fsid
*/
u8 metadata_uuid[BTRFS_FSID_SIZE];
struct list_head fs_list;
/* * Number of devices under this fsid including missing and * replace-target device and excludes seed devices.
*/
u64 num_devices;
/* * The number of devices that successfully opened, including * replace-target, excludes seed devices.
*/
u64 open_devices;
/* The number of devices that are under the chunk allocation list. */
u64 rw_devices;
/* Count of missing devices under this fsid excluding seed device. */
u64 missing_devices;
u64 total_rw_bytes;
/* * Count of devices from btrfs_super_block::num_devices for this fsid, * which includes the seed device, excludes the transient replace-target * device.
*/
u64 total_devices;
/* Highest generation number of seen devices */
u64 latest_generation;
/* * The mount device or a device with highest generation after removal * or replace.
*/ struct btrfs_device *latest_dev;
/* * All of the devices in the filesystem, protected by a mutex so we can * safely walk it to write out the super blocks without worrying about * adding/removing by the multi-device code. Scrubbing super block can * kick off supers writing by holding this mutex lock.
*/ struct mutex device_list_mutex;
/* List of all devices, protected by device_list_mutex */ struct list_head devices;
/* Devices which can satisfy space allocation. Protected by * chunk_mutex. */ struct list_head alloc_list;
struct list_head seed_list;
/* Count fs-devices opened. */ int opened;
/* * Counter of the processes that are holding this fs_devices but not * yet opened. * This is for mounting handling, as we can only open the fs_devices * after a super block is created. But we cannot take uuid_mutex * during sget_fc(), thus we have to hold the fs_devices (meaning it * cannot be released) until a super block is returned.
*/ int holding;
/* Set when we find or add a device that doesn't have the nonrot flag set. */ bool rotating; /* Devices support TRIM/discard commands. */ bool discardable; /* The filesystem is a seed filesystem. */ bool seeding; /* The mount needs to use a randomly generated fsid. */ bool temp_fsid; /* Enable/disable the filesystem stats tracking. */ bool collect_fs_stats;
/* Policy used to read the mirrored stripes. */ enum btrfs_read_policy read_policy;
#ifdef CONFIG_BTRFS_EXPERIMENTAL /* * Minimum contiguous reads before switching to next device, the unit * is one block/sectorsize.
*/
u32 rr_min_contig_read;
/* Device to be used for reading in case of RAID1. */
u64 read_devid;
/* Checksum mode - offload it or do it synchronously. */ enum btrfs_offload_csum_mode offload_csum_mode; #endif
};
/* * Context for IO subsmission for device stripe. * * - Track the unfinished mirrors for mirror based profiles * Mirror based profiles are SINGLE/DUP/RAID1/RAID10. * * - Contain the logical -> physical mapping info * Used by submit_stripe_bio() for mapping logical bio * into physical device address. * * - Contain device replace info * Used by handle_ops_on_dev_replace() to copy logical bios * into the new device. * * - Contain RAID56 full stripe logical bytenrs
*/ struct btrfs_io_context {
refcount_t refs; struct btrfs_fs_info *fs_info; /* Taken from struct btrfs_chunk_map::type. */
u64 map_type; struct bio *orig_bio;
atomic_t error;
u16 max_errors; bool use_rst;
/* * The total number of stripes, including the extra duplicated * stripe for replace.
*/
u16 num_stripes;
/* * The mirror_num of this bioc. * * This is for reads which use 0 as mirror_num, thus we should return a * valid mirror_num (>0) for the reader.
*/
u16 mirror_num;
/* * The following two members are for dev-replace case only. * * @replace_nr_stripes: Number of duplicated stripes which need to be * written to replace target. * Should be <= 2 (2 for DUP, otherwise <= 1). * @replace_stripe_src: The array indicates where the duplicated stripes * are from. * * The @replace_stripe_src[] array is mostly for RAID56 cases. * As non-RAID56 stripes share the same contents of the mapped range, * thus no need to bother where the duplicated ones are from. * * But for RAID56 case, all stripes contain different contents, thus * we need a way to know the mapping. * * There is an example for the two members, using a RAID5 write: * * num_stripes: 4 (3 + 1 duplicated write) * stripes[0]: dev = devid 1, physical = X * stripes[1]: dev = devid 2, physical = Y * stripes[2]: dev = devid 3, physical = Z * stripes[3]: dev = devid 0, physical = Y * * replace_nr_stripes = 1 * replace_stripe_src = 1 <- Means stripes[1] is involved in replace. * The duplicated stripe index would be * (@num_stripes - 1). * * Note, that we can still have cases replace_nr_stripes = 2 for DUP. * In that case, all stripes share the same content, thus we don't * need to bother @replace_stripe_src value at all.
*/
u16 replace_nr_stripes;
s16 replace_stripe_src; /* * Logical bytenr of the full stripe start, only for RAID56 cases. * * When this value is set to other than (u64)-1, the stripes[] should * follow this pattern: * * (real_stripes = num_stripes - replace_nr_stripes) * (data_stripes = (is_raid6) ? (real_stripes - 2) : (real_stripes - 1)) * * stripes[0]: The first data stripe * stripes[1]: The second data stripe * ... * stripes[data_stripes - 1]: The last data stripe * stripes[data_stripes]: The P stripe * stripes[data_stripes + 1]: The Q stripe (only for RAID6).
*/
u64 full_stripe_logical; struct btrfs_io_stripe stripes[];
};
struct btrfs_raid_attr {
u8 sub_stripes; /* sub_stripes info for map */
u8 dev_stripes; /* stripes per dev */
u8 devs_max; /* max devs to use */
u8 devs_min; /* min devs needed */
u8 tolerated_failures; /* max tolerated fail devs */
u8 devs_increment; /* ndevs has to be a multiple of this */
u8 ncopies; /* how many copies to data has */
u8 nparity; /* number of stripes worth of bytes to store
* parity information */
u8 mindev_error; /* error code if min devs requisite is unmet */ constchar raid_name[8]; /* name of the raid */
u64 bg_flag; /* block group flag of the raid */
};
struct btrfs_chunk_map { struct rb_node rb_node; /* For mount time dev extent verification. */ int verified_stripes;
refcount_t refs;
u64 start;
u64 chunk_len;
u64 stripe_size;
u64 type; int io_align; int io_width; int num_stripes; int sub_stripes; struct btrfs_io_stripe stripes[];
};
/* * Do the type safe conversion from stripe_nr to offset inside the chunk. * * @stripe_nr is u32, with left shift it can overflow u32 for chunks larger * than 4G. This does the proper type cast to avoid overflow.
*/ staticinline u64 btrfs_stripe_nr_to_offset(u32 stripe_nr)
{ return (u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT;
}
staticinlineint btrfs_dev_stat_read_and_reset(struct btrfs_device *dev, int index)
{ int ret;
ret = atomic_xchg(dev->dev_stat_values + index, 0); /* * atomic_xchg implies a full memory barriers as per atomic_t.txt: * - RMW operations that have a return value are fully ordered; * * This implicit memory barriers is paired with the smp_rmb in * btrfs_run_dev_stats
*/
atomic_inc(&dev->dev_stats_ccnt); return ret;
}
staticinlinevoid btrfs_dev_stat_set(struct btrfs_device *dev, int index, unsignedlong val)
{
atomic_set(dev->dev_stat_values + index, val); /* * This memory barrier orders stores updating statistics before stores * updating dev_stats_ccnt. * * It pairs with smp_rmb() in btrfs_run_dev_stats().
*/
smp_mb__before_atomic();
atomic_inc(&dev->dev_stats_ccnt);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.