/* * An in-memory representation of a version number for versioned structures on disk. * * A version number consists of two portions, a major version and a minor version. Any format * change which does not require an explicit upgrade step from the previous version should * increment the minor version. Any format change which either requires an explicit upgrade step, * or is wholly incompatible (i.e. can not be upgraded to), should increment the major version, and * set the minor version to 0.
*/ struct version_number {
u32 major_version;
u32 minor_version;
};
/* * A packed, machine-independent, on-disk representation of a version_number. Both fields are * stored in little-endian byte order.
*/ struct packed_version_number {
__le32 major_version;
__le32 minor_version;
} __packed;
/* The registry of component ids for use in headers */ #define VDO_SUPER_BLOCK 0 #define VDO_LAYOUT 1 #define VDO_RECOVERY_JOURNAL 2 #define VDO_SLAB_DEPOT 3 #define VDO_BLOCK_MAP 4 #define VDO_GEOMETRY_BLOCK 5
/* The header for versioned data stored on disk. */ struct header {
u32 id; /* The component this is a header for */ struct version_number version; /* The version of the data format */
size_t size; /* The size of the data following this header */
};
/* A packed, machine-independent, on-disk representation of a component header. */ struct packed_header {
__le32 id; struct packed_version_number version;
__le64 size;
} __packed;
struct volume_region { /* The ID of the region */ enum volume_region_id id; /* * The absolute starting offset on the device. The region continues until the next region * begins.
*/
physical_block_number_t start_block;
} __packed;
struct volume_geometry { /* For backwards compatibility */
u32 unused; /* The nonce of this volume */
nonce_t nonce; /* The uuid of this volume */
uuid_t uuid; /* The block offset to be applied to bios */
block_count_t bio_offset; /* The regions in ID order */ struct volume_region regions[VDO_VOLUME_REGION_COUNT]; /* The index config */ struct index_config index_config;
} __packed;
/* This volume geometry struct is used for sizing only */ struct volume_geometry_4_0 { /* For backwards compatibility */
u32 unused; /* The nonce of this volume */
nonce_t nonce; /* The uuid of this volume */
uuid_t uuid; /* The regions in ID order */ struct volume_region regions[VDO_VOLUME_REGION_COUNT]; /* The index config */ struct index_config index_config;
} __packed;
/** * DOC: Block map entries * * The entry for each logical block in the block map is encoded into five bytes, which saves space * in both the on-disk and in-memory layouts. It consists of the 36 low-order bits of a * physical_block_number_t (addressing 256 terabytes with a 4KB block size) and a 4-bit encoding of * a block_mapping_state. * * Of the 8 high bits of the 5-byte structure: * * Bits 7..4: The four highest bits of the 36-bit physical block number * Bits 3..0: The 4-bit block_mapping_state * * The following 4 bytes are the low order bytes of the physical block number, in little-endian * order. * * Conversion functions to and from a data location are provided.
*/ struct block_map_entry { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ unsigned mapping_state : 4; unsigned pbn_high_nibble : 4; #else unsigned pbn_high_nibble : 4; unsigned mapping_state : 4; #endif
enum block_map_page_validity {
VDO_BLOCK_MAP_PAGE_VALID,
VDO_BLOCK_MAP_PAGE_INVALID, /* Valid page found in the wrong location on disk */
VDO_BLOCK_MAP_PAGE_BAD,
};
/* The state of the recovery journal as encoded in the VDO super block. */ struct recovery_journal_state_7_0 { /* Sequence number to start the journal */
sequence_number_t journal_start; /* Number of logical blocks used by VDO */
block_count_t logical_blocks_used; /* Number of block map pages allocated */
block_count_t block_map_data_blocks;
} __packed;
/* * A recovery journal entry stores three physical locations: a data location that is the value of a * single mapping in the block map tree, and the two locations of the block map pages and slots * that are acquiring and releasing a reference to the location. The journal entry also stores an * operation code that says whether the mapping is for a logical block or for the block map tree * itself.
*/ struct recovery_journal_entry { struct block_map_slot slot; struct data_location mapping; struct data_location unmapping; enum journal_operation operation;
};
/* The packed, on-disk representation of a recovery journal entry. */ struct packed_recovery_journal_entry { /* * In little-endian bit order: * Bits 15..12: The four highest bits of the 36-bit physical block number of the block map * tree page * Bits 11..2: The 10-bit block map page slot number * Bit 1..0: The journal_operation of the entry (this actually only requires 1 bit, but * it is convenient to keep the extra bit as part of this field.
*/ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ unsigned operation : 2; unsigned slot_low : 6; unsigned slot_high : 4; unsigned pbn_high_nibble : 4; #else unsigned slot_low : 6; unsigned operation : 2; unsigned pbn_high_nibble : 4; unsigned slot_high : 4; #endif
/* * Bits 47..16: The 32 low-order bits of the block map page PBN, in little-endian byte * order
*/
__le32 pbn_low_word;
/* * Bits 87..48: The five-byte block map entry encoding the location that will be stored in * the block map page slot
*/ struct block_map_entry mapping;
/* * Bits 127..88: The five-byte block map entry encoding the location that was stored in the * block map page slot
*/ struct block_map_entry unmapping;
} __packed;
/* The packed, on-disk representation of an old format recovery journal entry. */ struct packed_recovery_journal_entry_1 { /* * In little-endian bit order: * Bits 15..12: The four highest bits of the 36-bit physical block number of the block map * tree page * Bits 11..2: The 10-bit block map page slot number * Bits 1..0: The 2-bit journal_operation of the entry *
*/ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ unsigned operation : 2; unsigned slot_low : 6; unsigned slot_high : 4; unsigned pbn_high_nibble : 4; #else unsigned slot_low : 6; unsigned operation : 2; unsigned pbn_high_nibble : 4; unsigned slot_high : 4; #endif
/* * Bits 47..16: The 32 low-order bits of the block map page PBN, in little-endian byte * order
*/
__le32 pbn_low_word;
/* * Bits 87..48: The five-byte block map entry encoding the location that was or will be * stored in the block map page slot
*/ struct block_map_entry block_map_entry;
} __packed;
struct recovery_block_header {
sequence_number_t block_map_head; /* Block map head sequence number */
sequence_number_t slab_journal_head; /* Slab journal head seq. number */
sequence_number_t sequence_number; /* Sequence number for this block */
nonce_t nonce; /* A given VDO instance's nonce */
block_count_t logical_blocks_used; /* Logical blocks in use */
block_count_t block_map_data_blocks; /* Allocated block map pages */
journal_entry_count_t entry_count; /* Number of entries written */
u8 check_byte; /* The protection check byte */
u8 recovery_count; /* Number of recoveries completed */ enum vdo_metadata_type metadata_type; /* Metadata type */
};
/* * The packed, on-disk representation of a recovery journal block header. All fields are kept in * little-endian byte order.
*/ struct packed_journal_header { /* Block map head 64-bit sequence number */
__le64 block_map_head;
/* Slab journal head 64-bit sequence number */
__le64 slab_journal_head;
/* The 64-bit sequence number for this block */
__le64 sequence_number;
/* A given VDO instance's 64-bit nonce */
__le64 nonce;
/* 8-bit metadata type (should always be one for the recovery journal) */
u8 metadata_type;
/* 16-bit count of the entries encoded in the block */
__le16 entry_count;
/* 64-bit count of the logical blocks used when this block was opened */
__le64 logical_blocks_used;
/* 64-bit count of the block map blocks used when this block was opened */
__le64 block_map_data_blocks;
/* The protection check byte */
u8 check_byte;
/* The number of recoveries completed */
u8 recovery_count;
} __packed;
/* The number of recoveries completed */
u8 recovery_count;
/* The number of entries in this sector */
u8 entry_count;
/* Journal entries for this sector */ struct packed_recovery_journal_entry entries[];
} __packed;
enum { /* The number of entries in each sector (except the last) when filled */
RECOVERY_JOURNAL_ENTRIES_PER_SECTOR =
((VDO_SECTOR_SIZE - sizeof(struct packed_journal_sector)) / sizeof(struct packed_recovery_journal_entry)),
RECOVERY_JOURNAL_ENTRIES_PER_BLOCK = RECOVERY_JOURNAL_ENTRIES_PER_SECTOR * 7, /* The number of entries in a v1 recovery journal block. */
RECOVERY_JOURNAL_1_ENTRIES_PER_BLOCK = 311, /* The number of entries in each v1 sector (except the last) when filled */
RECOVERY_JOURNAL_1_ENTRIES_PER_SECTOR =
((VDO_SECTOR_SIZE - sizeof(struct packed_journal_sector)) / sizeof(struct packed_recovery_journal_entry_1)), /* The number of entries in the last sector when a block is full */
RECOVERY_JOURNAL_1_ENTRIES_IN_LAST_SECTOR =
(RECOVERY_JOURNAL_1_ENTRIES_PER_BLOCK % RECOVERY_JOURNAL_1_ENTRIES_PER_SECTOR),
};
/* A type representing a reference count of a block. */ typedef u8 vdo_refcount_t;
/* The absolute position of an entry in a recovery journal or slab journal. */ struct journal_point {
sequence_number_t sequence_number;
journal_entry_count_t entry_count;
};
/* A packed, platform-independent encoding of a struct journal_point. */ struct packed_journal_point { /* * The packed representation is the little-endian 64-bit representation of the low-order 48 * bits of the sequence number, shifted up 16 bits, or'ed with the 16-bit entry count. * * Very long-term, the top 16 bits of the sequence number may not always be zero, as this * encoding assumes--see BZ 1523240.
*/
__le64 encoded_point;
} __packed;
/* The format of each sector of a reference_block on disk. */ struct packed_reference_sector { struct packed_journal_point commit_point;
vdo_refcount_t counts[COUNTS_PER_SECTOR];
} __packed;
/* * vdo_slab journal blocks may have one of two formats, depending upon whether or not any of the * entries in the block are block map increments. Since the steady state for a VDO is that all of * the necessary block map pages will be allocated, most slab journal blocks will have only data * entries. Such blocks can hold more entries, hence the two formats.
*/
/* A single slab journal entry */ struct slab_journal_entry {
slab_block_number sbn; enum journal_operation operation; bool increment;
};
/* A single slab journal entry in its on-disk form */ typedefstruct {
u8 offset_low8;
u8 offset_mid8;
/* The unpacked representation of the header of a slab journal block */ struct slab_journal_block_header { /* Sequence number for head of journal */
sequence_number_t head; /* Sequence number for this block */
sequence_number_t sequence_number; /* The nonce for a given VDO instance */
nonce_t nonce; /* Recovery journal point for last entry */ struct journal_point recovery_point; /* Metadata type */ enum vdo_metadata_type metadata_type; /* Whether this block contains block map increments */ bool has_block_map_increments; /* The number of entries in the block */
journal_entry_count_t entry_count;
};
/* * The packed, on-disk representation of a slab journal block header. All fields are kept in * little-endian byte order.
*/ struct packed_slab_journal_block_header { /* 64-bit sequence number for head of journal */
__le64 head; /* 64-bit sequence number for this block */
__le64 sequence_number; /* Recovery journal point for the last entry, packed into 64 bits */ struct packed_journal_point recovery_point; /* The 64-bit nonce for a given VDO instance */
__le64 nonce; /* 8-bit metadata type (should always be two, for the slab journal) */
u8 metadata_type; /* Whether this block contains block map increments */ bool has_block_map_increments; /* 16-bit count of the entries encoded in the block */
__le16 entry_count;
} __packed;
/* The payload of a slab journal block which has block map increments */ struct full_slab_journal_entries { /* The entries themselves */
packed_slab_journal_entry entries[VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK]; /* The bit map indicating which entries are block map increments */
u8 entry_types[VDO_SLAB_JOURNAL_ENTRY_TYPES_SIZE];
} __packed;
typedefunion { /* Entries which include block map increments */ struct full_slab_journal_entries full_entries; /* Entries which are only data updates */
packed_slab_journal_entry entries[VDO_SLAB_JOURNAL_ENTRIES_PER_BLOCK]; /* Ensure the payload fills to the end of the block */
u8 space[VDO_SLAB_JOURNAL_PAYLOAD_SIZE];
} __packed slab_journal_payload;
/* The offset of a slab journal tail block. */ typedef u8 tail_block_offset_t;
struct slab_summary_entry { /* Bits 7..0: The offset of the tail block within the slab journal */
tail_block_offset_t tail_block_offset;
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ /* Bits 13..8: A hint about the fullness of the slab */ unsignedint fullness_hint : 6; /* Bit 14: Whether the ref_counts must be loaded from the layer */ unsignedint load_ref_counts : 1; /* Bit 15: The believed cleanliness of this slab */ unsignedint is_dirty : 1; #else /* Bit 15: The believed cleanliness of this slab */ unsignedint is_dirty : 1; /* Bit 14: Whether the ref_counts must be loaded from the layer */ unsignedint load_ref_counts : 1; /* Bits 13..8: A hint about the fullness of the slab */ unsignedint fullness_hint : 6; #endif
} __packed;
struct partition { enum partition_id id; /* The id of this partition */
physical_block_number_t offset; /* The offset into the layout of this partition */
block_count_t count; /* The number of blocks in the partition */ struct partition *next; /* A pointer to the next partition in the layout */
};
/* * The configuration of the VDO service.
*/ struct vdo_config {
block_count_t logical_blocks; /* number of logical blocks */
block_count_t physical_blocks; /* number of physical blocks */
block_count_t slab_size; /* number of blocks in a slab */
block_count_t recovery_journal_size; /* number of recovery journal blocks */
block_count_t slab_journal_blocks; /* number of slab journal blocks */
};
/* This is the structure that captures the vdo fields saved as a super block component. */ struct vdo_component { enum vdo_state state;
u64 complete_recoveries;
u64 read_only_recoveries; struct vdo_config config;
nonce_t nonce;
};
/* * A packed, machine-independent, on-disk representation of the vdo_config in the VDO component * data in the super block.
*/ struct packed_vdo_config {
__le64 logical_blocks;
__le64 physical_blocks;
__le64 slab_size;
__le64 recovery_journal_size;
__le64 slab_journal_blocks;
} __packed;
/* * A packed, machine-independent, on-disk representation of version 41.0 of the VDO component data * in the super block.
*/ struct packed_vdo_component_41_0 {
__le32 state;
__le64 complete_recoveries;
__le64 read_only_recoveries; struct packed_vdo_config config;
__le64 nonce;
} __packed;
/* * The version of the on-disk format of a VDO volume. This should be incremented any time the * on-disk representation of any VDO structure changes. Changes which require only online upgrade * steps should increment the minor version. Changes which require an offline upgrade or which can * not be upgraded to at all should increment the major version and set the minor version to 0.
*/ externconststruct version_number VDO_VOLUME_VERSION_67_0;
/* Our partitioning of the underlying storage */ struct layout layout;
};
/** * vdo_are_same_version() - Check whether two version numbers are the same. * @version_a: The first version. * @version_b: The second version. * * Return: true if the two versions are the same.
*/ staticinlinebool vdo_are_same_version(struct version_number version_a, struct version_number version_b)
{ return ((version_a.major_version == version_b.major_version) &&
(version_a.minor_version == version_b.minor_version));
}
/** * vdo_is_upgradable_version() - Check whether an actual version is upgradable to an expected * version. * @expected_version: The expected version. * @actual_version: The version being validated. * * An actual version is upgradable if its major number is expected but its minor number differs, * and the expected version's minor number is greater than the actual version's minor number. * * Return: true if the actual version is upgradable.
*/ staticinlinebool vdo_is_upgradable_version(struct version_number expected_version, struct version_number actual_version)
{ return ((expected_version.major_version == actual_version.major_version) &&
(expected_version.minor_version > actual_version.minor_version));
}
/** * vdo_pack_version_number() - Convert a version_number to its packed on-disk representation. * @version: The version number to convert. * * Return: the platform-independent representation of the version
*/ staticinlinestruct packed_version_number vdo_pack_version_number(struct version_number version)
{ return (struct packed_version_number) {
.major_version = __cpu_to_le32(version.major_version),
.minor_version = __cpu_to_le32(version.minor_version),
};
}
/** * vdo_unpack_version_number() - Convert a packed_version_number to its native in-memory * representation. * @version: The version number to convert. * * Return: The platform-independent representation of the version.
*/ staticinlinestruct version_number vdo_unpack_version_number(struct packed_version_number version)
{ return (struct version_number) {
.major_version = __le32_to_cpu(version.major_version),
.minor_version = __le32_to_cpu(version.minor_version),
};
}
/** * vdo_pack_header() - Convert a component header to its packed on-disk representation. * @header: The header to convert. * * Return: the platform-independent representation of the header
*/ staticinlinestruct packed_header vdo_pack_header(conststruct header *header)
{ return (struct packed_header) {
.id = __cpu_to_le32(header->id),
.version = vdo_pack_version_number(header->version),
.size = __cpu_to_le64(header->size),
};
}
/** * vdo_unpack_header() - Convert a packed_header to its native in-memory representation. * @header: The header to convert. * * Return: The platform-independent representation of the version.
*/ staticinlinestruct header vdo_unpack_header(conststruct packed_header *header)
{ return (struct header) {
.id = __le32_to_cpu(header->id),
.version = vdo_unpack_version_number(header->version),
.size = __le64_to_cpu(header->size),
};
}
/** * vdo_get_index_region_start() - Get the start of the index region from a geometry. * @geometry: The geometry. * * Return: The start of the index region.
*/ staticinline physical_block_number_t __must_check
vdo_get_index_region_start(struct volume_geometry geometry)
{ return geometry.regions[VDO_INDEX_REGION].start_block;
}
/** * vdo_get_data_region_start() - Get the start of the data region from a geometry. * @geometry: The geometry. * * Return: The start of the data region.
*/ staticinline physical_block_number_t __must_check
vdo_get_data_region_start(struct volume_geometry geometry)
{ return geometry.regions[VDO_DATA_REGION].start_block;
}
/** * vdo_get_index_region_size() - Get the size of the index region from a geometry. * @geometry: The geometry. * * Return: The size of the index region.
*/ staticinline physical_block_number_t __must_check
vdo_get_index_region_size(struct volume_geometry geometry)
{ return vdo_get_data_region_start(geometry) -
vdo_get_index_region_start(geometry);
}
int __must_check vdo_parse_geometry_block(unsignedchar *block, struct volume_geometry *geometry);
/** * vdo_is_valid_recovery_journal_sector() - Determine whether the header of the given sector could * describe a valid sector for the given journal block * header. * @header: The unpacked block header to compare against. * @sector: The packed sector to check. * @sector_number: The number of the sector being checked. * * Return: true if the sector matches the block header.
*/ staticinlinebool __must_check
vdo_is_valid_recovery_journal_sector(conststruct recovery_block_header *header, conststruct packed_journal_sector *sector,
u8 sector_number)
{ if ((header->check_byte != sector->check_byte) ||
(header->recovery_count != sector->recovery_count)) returnfalse;
if (header->metadata_type == VDO_METADATA_RECOVERY_JOURNAL_2) return sector->entry_count <= RECOVERY_JOURNAL_ENTRIES_PER_SECTOR;
if (sector_number == 7) return sector->entry_count <= RECOVERY_JOURNAL_1_ENTRIES_IN_LAST_SECTOR;
/** * vdo_compute_recovery_journal_block_number() - Compute the physical block number of the recovery * journal block which would have a given sequence * number. * @journal_size: The size of the journal. * @sequence_number: The sequence number. * * Return: The pbn of the journal block which would the specified sequence number.
*/ staticinline physical_block_number_t __must_check
vdo_compute_recovery_journal_block_number(block_count_t journal_size,
sequence_number_t sequence_number)
{ /* * Since journal size is a power of two, the block number modulus can just be extracted * from the low-order bits of the sequence.
*/ return (sequence_number & (journal_size - 1));
}
/** * vdo_get_journal_block_sector() - Find the recovery journal sector from the block header and * sector number. * @header: The header of the recovery journal block. * @sector_number: The index of the sector (1-based). * * Return: A packed recovery journal sector.
*/ staticinlinestruct packed_journal_sector * __must_check
vdo_get_journal_block_sector(struct packed_journal_header *header, int sector_number)
{ char *sector_data = ((char *) header) + (VDO_SECTOR_SIZE * sector_number);
/** * vdo_compute_slab_count() - Compute the number of slabs a depot with given parameters would have. * @first_block: PBN of the first data block. * @last_block: PBN of the last data block. * @slab_size_shift: Exponent for the number of blocks per slab. * * Return: The number of slabs.
*/ staticinline slab_count_t vdo_compute_slab_count(physical_block_number_t first_block,
physical_block_number_t last_block, unsignedint slab_size_shift)
{ return (slab_count_t) ((last_block - first_block) >> slab_size_shift);
}
int __must_check vdo_configure_slab(block_count_t slab_size,
block_count_t slab_journal_blocks, struct slab_config *slab_config);
/** * vdo_get_saved_reference_count_size() - Get the number of blocks required to save a reference * counts state covering the specified number of data * blocks. * @block_count: The number of physical data blocks that can be referenced. * * Return: The number of blocks required to save reference counts with the given block count.
*/ staticinline block_count_t vdo_get_saved_reference_count_size(block_count_t block_count)
{ return DIV_ROUND_UP(block_count, COUNTS_PER_BLOCK);
}
/** * vdo_get_slab_journal_start_block() - Get the physical block number of the start of the slab * journal relative to the start block allocator partition. * @slab_config: The slab configuration of the VDO. * @origin: The first block of the slab.
*/ staticinline physical_block_number_t __must_check
vdo_get_slab_journal_start_block(conststruct slab_config *slab_config,
physical_block_number_t origin)
{ return origin + slab_config->data_blocks + slab_config->reference_count_blocks;
}
/** * vdo_advance_journal_point() - Move the given journal point forward by one entry. * @point: The journal point to adjust. * @entries_per_block: The number of entries in one full block.
*/ staticinlinevoid vdo_advance_journal_point(struct journal_point *point,
journal_entry_count_t entries_per_block)
{
point->entry_count++; if (point->entry_count == entries_per_block) {
point->sequence_number++;
point->entry_count = 0;
}
}
/** * vdo_before_journal_point() - Check whether the first point precedes the second point. * @first: The first journal point. * @second: The second journal point. * * Return: true if the first point precedes the second point.
*/ staticinlinebool vdo_before_journal_point(conststruct journal_point *first, conststruct journal_point *second)
{ return ((first->sequence_number < second->sequence_number) ||
((first->sequence_number == second->sequence_number) &&
(first->entry_count < second->entry_count)));
}
/** * vdo_pack_journal_point() - Encode the journal location represented by a * journal_point into a packed_journal_point. * @unpacked: The unpacked input point. * @packed: The packed output point.
*/ staticinlinevoid vdo_pack_journal_point(conststruct journal_point *unpacked, struct packed_journal_point *packed)
{
packed->encoded_point =
__cpu_to_le64((unpacked->sequence_number << 16) | unpacked->entry_count);
}
/** * vdo_unpack_journal_point() - Decode the journal location represented by a packed_journal_point * into a journal_point. * @packed: The packed input point. * @unpacked: The unpacked output point.
*/ staticinlinevoid vdo_unpack_journal_point(conststruct packed_journal_point *packed, struct journal_point *unpacked)
{
u64 native = __le64_to_cpu(packed->encoded_point);
/** * vdo_get_slab_summary_hint_shift() - Compute the shift for slab summary hints. * @slab_size_shift: Exponent for the number of blocks per slab. * * Return: The hint shift.
*/ staticinline u8 __must_check vdo_get_slab_summary_hint_shift(unsignedint slab_size_shift)
{ return ((slab_size_shift > VDO_SLAB_SUMMARY_FULLNESS_HINT_BITS) ?
(slab_size_shift - VDO_SLAB_SUMMARY_FULLNESS_HINT_BITS) :
0);
}
void vdo_encode_super_block(u8 *buffer, struct vdo_component_states *states); int __must_check vdo_decode_super_block(u8 *buffer);
/* We start with 0L and postcondition with ~0L to match our historical usage in userspace. */ staticinline u32 vdo_crc32(constvoid *buf, unsignedlong len)
{ return (crc32(0L, buf, len) ^ ~0L);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.