/* * Region and Dirty bitmaps. * * dm-clone logically splits the source and destination devices in regions of * fixed size. The destination device's regions are gradually hydrated, i.e., * we copy (clone) the source's regions to the destination device. Eventually, * all regions will get hydrated and all I/O will be served from the * destination device. * * We maintain an on-disk bitmap which tracks the state of each of the * destination device's regions, i.e., whether they are hydrated or not. * * To save constantly doing look ups on disk we keep an in core copy of the * on-disk bitmap, the region_map. * * In order to track which regions are hydrated during a metadata transaction, * we use a second set of bitmaps, the dmap (dirty bitmap), which includes two * bitmaps, namely dirty_regions and dirty_words. The dirty_regions bitmap * tracks the regions that got hydrated during the current metadata * transaction. The dirty_words bitmap tracks the dirty words, i.e. longs, of * the dirty_regions bitmap. * * This allows us to precisely track the regions that were hydrated during the * current metadata transaction and update the metadata accordingly, when we * commit the current transaction. This is important because dm-clone should * only commit the metadata of regions that were properly flushed to the * destination device beforehand. Otherwise, in case of a crash, we could end * up with a corrupted dm-clone device. * * When a region finishes hydrating dm-clone calls * dm_clone_set_region_hydrated(), or for discard requests * dm_clone_cond_set_range(), which sets the corresponding bits in region_map * and dmap. * * During a metadata commit we scan dmap->dirty_words and dmap->dirty_regions * and update the on-disk metadata accordingly. Thus, we don't have to flush to * disk the whole region_map. We can just flush the dirty region_map bits. * * We use the helper dmap->dirty_words bitmap, which is smaller than the * original region_map, to reduce the amount of memory accesses during a * metadata commit. Moreover, as dm-bitset also accesses the on-disk bitmap in * 64-bit word granularity, the dirty_words bitmap helps us avoid useless disk * accesses. * * We could update directly the on-disk bitmap, when dm-clone calls either * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), buts this * inserts significant metadata I/O overhead in dm-clone's I/O path. Also, as * these two functions don't block, we can call them in interrupt context, * e.g., in a hooked overwrite bio's completion routine, and further reduce the * I/O completion latency. * * We maintain two dirty bitmap sets. During a metadata commit we atomically * swap the currently used dmap with the unused one. This allows the metadata * update functions to run concurrently with an ongoing commit.
*/ struct dirty_map { unsignedlong *dirty_words; unsignedlong *dirty_regions; unsignedint changed;
};
/* * Reading the space map root can fail, so we read it into this * buffer before the superblock is locked and updated.
*/
__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
/* * Check if the superblock is formatted or not. We consider the superblock to * be formatted in case we find non-zero bytes in it.
*/ staticint __superblock_all_zeroes(struct dm_block_manager *bm, bool *formatted)
{ int r; unsignedint i, nr_words; struct dm_block *sblock;
__le64 *data_le, zero = cpu_to_le64(0);
/* * We don't use a validator here because the superblock could be all * zeroes.
*/
r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &sblock); if (r) {
DMERR("Failed to read_lock superblock"); return r;
}
/* This assumes that the block size is a multiple of 8 bytes */
BUG_ON(dm_bm_block_size(bm) % sizeof(__le64));
nr_words = dm_bm_block_size(bm) / sizeof(__le64); for (i = 0; i < nr_words; i++) { if (data_le[i] != zero) {
*formatted = true; break;
}
}
if (r) {
DMERR("Failed to read_lock superblock"); return r;
}
sb = dm_block_data(sblock);
/* Verify that target_size and region_size haven't changed. */ if (cmd->region_size != le64_to_cpu(sb->region_size) ||
cmd->target_size != le64_to_cpu(sb->target_size)) {
DMERR("Region and/or target size don't match the ones in metadata");
r = -EINVAL; goto out_with_lock;
}
r = dm_tm_open_with_sm(cmd->bm, SUPERBLOCK_LOCATION,
sb->metadata_space_map_root, sizeof(sb->metadata_space_map_root),
&cmd->tm, &cmd->sm);
if (r) {
DMERR("dm_tm_open_with_sm failed"); goto out_with_lock;
}
cmd->region_map = kvmalloc(bitmap_size(cmd->nr_regions), GFP_KERNEL); if (!cmd->region_map) {
DMERR("Failed to allocate memory for region bitmap");
r = -ENOMEM; goto out_with_md;
}
r = __create_persistent_data_structures(cmd, true); if (r) goto out_with_region_map;
r = __load_bitset_in_core(cmd); if (r) {
DMERR("Failed to load on-disk region map"); goto out_with_pds;
}
r = dirty_map_init(cmd); if (r) goto out_with_pds;
if (bitmap_full(cmd->region_map, cmd->nr_regions))
cmd->hydration_done = true;
/* Flush bitset cache */
r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root); if (r) {
DMERR("dm_bitset_flush failed"); return r;
}
/* Flush to disk all blocks, except the superblock */
r = dm_tm_pre_commit(cmd->tm); if (r) {
DMERR("dm_tm_pre_commit failed"); return r;
}
/* Save the space map root in cmd->metadata_space_map_root */
r = __copy_sm_root(cmd); if (r) {
DMERR("__copy_sm_root failed"); return r;
}
/* Lock the superblock */
r = superblock_write_lock_zero(cmd, &sblock); if (r) {
DMERR("Failed to write_lock superblock"); return r;
}
/* Save the metadata in superblock */
sb = dm_block_data(sblock);
__prepare_superblock(cmd, sb);
/* Unlock superblock and commit it to disk */
r = dm_tm_commit(cmd->tm, sblock); if (r) {
DMERR("Failed to commit superblock"); return r;
}
/* * FIXME: Find a more efficient way to check if the hydration is done.
*/ if (bitmap_full(cmd->region_map, cmd->nr_regions))
cmd->hydration_done = true;
word = 0; do {
word = find_next_bit(dmap->dirty_words, cmd->nr_words, word);
if (word == cmd->nr_words) break;
r = __update_metadata_word(cmd, dmap->dirty_regions, word);
if (r) return r;
__clear_bit(word, dmap->dirty_words);
word++;
} while (word < cmd->nr_words);
r = __metadata_commit(cmd);
if (r) return r;
/* Update the changed flag */
spin_lock_irq(&cmd->bitmap_lock);
dmap->changed = 0;
spin_unlock_irq(&cmd->bitmap_lock);
return 0;
}
int dm_clone_metadata_pre_commit(struct dm_clone_metadata *cmd)
{ int r = 0; struct dirty_map *dmap, *next_dmap;
down_write(&cmd->lock);
if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) {
r = -EPERM; goto out;
}
/* Get current dirty bitmap */
dmap = cmd->current_dmap;
/* Get next dirty bitmap */
next_dmap = (dmap == &cmd->dmap[0]) ? &cmd->dmap[1] : &cmd->dmap[0];
/* * The last commit failed, so we don't have a clean dirty-bitmap to * use.
*/ if (WARN_ON(next_dmap->changed || cmd->committing_dmap)) {
r = -EINVAL; goto out;
}
/* Set old dirty bitmap as currently committing */
cmd->committing_dmap = dmap;
out:
up_write(&cmd->lock);
return r;
}
int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
{ int r = -EPERM;
down_write(&cmd->lock);
if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) goto out;
if (WARN_ON(!cmd->committing_dmap)) {
r = -EINVAL; goto out;
}
r = __flush_dmap(cmd, cmd->committing_dmap); if (!r) { /* Clear committing dmap */
cmd->committing_dmap = NULL;
}
out:
up_write(&cmd->lock);
return r;
}
int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsignedlong region_nr)
{ int r = 0; struct dirty_map *dmap; unsignedlong word, flags;
if (unlikely(region_nr >= cmd->nr_regions)) {
DMERR("Region %lu out of range (total number of regions %lu)",
region_nr, cmd->nr_regions); return -ERANGE;
}
int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsignedlong start, unsignedlong nr_regions)
{ int r = 0; struct dirty_map *dmap; unsignedlong word, region_nr;
if (unlikely(start >= cmd->nr_regions || (start + nr_regions) < start ||
(start + nr_regions) > cmd->nr_regions)) {
DMERR("Invalid region range: start %lu, nr_regions %lu (total number of regions %lu)",
start, nr_regions, cmd->nr_regions); return -ERANGE;
}
/* * WARNING: This must not be called concurrently with either * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), as it changes * cmd->region_map without taking the cmd->bitmap_lock spinlock. The only * exception is after setting the metadata to read-only mode, using * dm_clone_metadata_set_read_only(). * * We don't take the spinlock because __load_bitset_in_core() does I/O, so it * may block.
*/ int dm_clone_reload_in_core_bitset(struct dm_clone_metadata *cmd)
{ int r = -EINVAL;
down_write(&cmd->lock);
if (cmd->fail_io) goto out;
r = __load_bitset_in_core(cmd);
out:
up_write(&cmd->lock);
spin_lock_irqsave(&cmd->bitmap_lock, flags);
r = cmd->dmap[0].changed || cmd->dmap[1].changed;
spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
return r;
}
int dm_clone_metadata_abort(struct dm_clone_metadata *cmd)
{ int r = -EPERM;
down_write(&cmd->lock);
if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) goto out;
__destroy_persistent_data_structures(cmd);
r = __create_persistent_data_structures(cmd, false); if (r) { /* If something went wrong we can neither write nor read the metadata */
cmd->fail_io = true;
}
out:
up_write(&cmd->lock);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.