/* List of snapshots per Origin */ struct list_head list;
/* * You can't use a snapshot if this is 0 (e.g. if full). * A snapshot-merge target never clears this.
*/ int valid;
/* * The snapshot overflowed because of a write to the snapshot device. * We don't have to invalidate the snapshot in this case, but we need * to prevent further writes.
*/ int snapshot_overflowed;
/* Origin writes don't trigger exceptions until this is set */ int active;
atomic_t pending_exceptions_count;
spinlock_t pe_allocation_lock;
/* Protected by "pe_allocation_lock" */
sector_t exception_start_sequence;
/* Protected by kcopyd single-threaded callback */
sector_t exception_complete_sequence;
/* * A list of pending exceptions that completed out of order. * Protected by kcopyd single-threaded callback.
*/ struct rb_root out_of_order_tree;
/* Wait for events based on state_bits */ unsignedlong state_bits;
/* Range of chunks currently being merged. */
chunk_t first_merging_chunk; int num_merging_chunks;
/* * The merge operation failed if this flag is set. * Failure modes are handled as follows: * - I/O error reading the header * => don't load the target; abort. * - Header does not have "valid" flag set * => use the origin; forget about the snapshot. * - I/O error when reading exceptions * => don't load the target; abort. * (We can't use the intermediate origin state.) * - I/O error while merging * => stop merging; set merge_failed; process I/O normally.
*/ bool merge_failed:1;
/* * Incoming bios that overlap with chunks being merged must wait * for them to be committed.
*/ struct bio_list bios_queued_during_merge;
};
/* * state_bits: * RUNNING_MERGE - Merge operation is in progress. * SHUTDOWN_MERGE - Set to signal that merge needs to be stopped; * cleared afterwards.
*/ #define RUNNING_MERGE 0 #define SHUTDOWN_MERGE 1
/* * Maximum number of chunks being copied on write. * * The value was decided experimentally as a trade-off between memory * consumption, stalling the kernel's workqueues and maintaining a high enough * throughput.
*/ #define DEFAULT_COW_THRESHOLD 2048
staticunsignedint cow_threshold = DEFAULT_COW_THRESHOLD;
module_param_named(snapshot_cow_threshold, cow_threshold, uint, 0644);
MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write");
DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, "A percentage of time allocated for copy on write");
staticint bdev_equal(struct block_device *lhs, struct block_device *rhs)
{ /* * There is only ever one instance of a particular block * device so we can compare pointers safely.
*/ return lhs == rhs;
}
/* * Origin buffers waiting for this to complete are held * in a bio list
*/ struct bio_list origin_bios; struct bio_list snapshot_bios;
/* Pointer back to snapshot context */ struct dm_snapshot *snap;
/* * 1 indicates the exception has already been sent to * kcopyd.
*/ int started;
/* There was copying error. */ int copy_error;
/* A sequence number, it is used for in-order completion. */
sector_t exception_sequence;
struct rb_node out_of_order_node;
/* * For writing a complete chunk, bypassing the copy.
*/ struct bio *full_bio;
bio_end_io_t *full_bio_end_io;
};
/* * Hash table mapping origin volumes to lists of snapshots and * a lock to protect it
*/ staticstruct kmem_cache *exception_cache; staticstruct kmem_cache *pending_cache;
staticint __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk)
{ struct dm_snap_tracked_chunk *c; int found = 0;
spin_lock_irq(&s->tracked_chunk_lock);
hlist_for_each_entry(c,
&s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) { if (c->chunk == chunk) {
found = 1; break;
}
}
spin_unlock_irq(&s->tracked_chunk_lock);
return found;
}
/* * This conflicting I/O is extremely improbable in the caller, * so fsleep(1000) is sufficient and there is no need for a wait queue.
*/ staticvoid __check_for_conflicting_io(struct dm_snapshot *s, chunk_t chunk)
{ while (__chunk_is_tracked(s, chunk))
fsleep(1000);
}
/* * One of these per registered origin, held in the snapshot_origins hash
*/ struct origin { /* The origin device */ struct block_device *bdev;
struct list_head hash_list;
/* List of snapshots for this origin */ struct list_head snapshots;
};
/* * This structure is allocated for each origin target
*/ struct dm_origin { struct dm_dev *dev; struct dm_target *ti; unsignedint split_boundary; struct list_head hash_list;
};
/* * Size of the hash table for origin volumes. If we make this * the size of the minors list then it should be nearly perfect
*/ #define ORIGIN_HASH_SIZE 256 #define ORIGIN_MASK 0xFF staticstruct list_head *_origins; staticstruct list_head *_dm_origins; staticstruct rw_semaphore _origins_lock;
_origins = kmalloc_array(ORIGIN_HASH_SIZE, sizeof(struct list_head),
GFP_KERNEL); if (!_origins) {
DMERR("unable to allocate memory for _origins"); return -ENOMEM;
} for (i = 0; i < ORIGIN_HASH_SIZE; i++)
INIT_LIST_HEAD(_origins + i);
_dm_origins = kmalloc_array(ORIGIN_HASH_SIZE, sizeof(struct list_head),
GFP_KERNEL); if (!_dm_origins) {
DMERR("unable to allocate memory for _dm_origins");
kfree(_origins); return -ENOMEM;
} for (i = 0; i < ORIGIN_HASH_SIZE; i++)
INIT_LIST_HEAD(_dm_origins + i);
/* * _origins_lock must be held when calling this function. * Returns number of snapshots registered using the supplied cow device, plus: * snap_src - a snapshot suitable for use as a source of exception handover * snap_dest - a snapshot capable of receiving exception handover. * snap_merge - an existing snapshot-merge target linked to the same origin. * There can be at most one snapshot-merge target. The parameter is optional. * * Possible return values and states of snap_src and snap_dest. * 0: NULL, NULL - first new snapshot * 1: snap_src, NULL - normal snapshot * 2: snap_src, snap_dest - waiting for handover * 2: snap_src, NULL - handed over, waiting for old to be deleted * 1: NULL, snap_dest - source got destroyed without handover
*/ staticint __find_snapshots_sharing_cow(struct dm_snapshot *snap, struct dm_snapshot **snap_src, struct dm_snapshot **snap_dest, struct dm_snapshot **snap_merge)
{ struct dm_snapshot *s; struct origin *o; int count = 0; int active;
o = __lookup_origin(snap->origin->bdev); if (!o) goto out;
list_for_each_entry(s, &o->snapshots, list) { if (dm_target_is_snapshot_merge(s->ti) && snap_merge)
*snap_merge = s; if (!bdev_equal(s->cow->bdev, snap->cow->bdev)) continue;
down_read(&s->lock);
active = s->active;
up_read(&s->lock);
if (active) { if (snap_src)
*snap_src = s;
} elseif (snap_dest)
*snap_dest = s;
count++;
}
out: return count;
}
/* * On success, returns 1 if this snapshot is a handover destination, * otherwise returns 0.
*/ staticint __validate_exception_handover(struct dm_snapshot *snap)
{ struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; struct dm_snapshot *snap_merge = NULL;
/* Does snapshot need exceptions handed over to it? */ if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest,
&snap_merge) == 2) ||
snap_dest) {
snap->ti->error = "Snapshot cow pairing for exception table handover failed"; return -EINVAL;
}
/* * If no snap_src was found, snap cannot become a handover * destination.
*/ if (!snap_src) return 0;
/* * Non-snapshot-merge handover?
*/ if (!dm_target_is_snapshot_merge(snap->ti)) return 1;
/* * Do not allow more than one merging snapshot.
*/ if (snap_merge) {
snap->ti->error = "A snapshot is already merging."; return -EINVAL;
}
if (!snap_src->store->type->prepare_merge ||
!snap_src->store->type->commit_merge) {
snap->ti->error = "Snapshot exception store does not support snapshot-merge."; return -EINVAL;
}
/* Sort the list according to chunk size, largest-first smallest-last */
list_for_each_entry(l, &o->snapshots, list) if (l->store->chunk_size < s->store->chunk_size) break;
list_add_tail(&s->list, &l->list);
}
/* * Make a note of the snapshot and its origin so we can look it * up when the origin has a write on it. * * Also validate snapshot exception store handovers. * On success, returns 1 if this registration is a handover destination, * otherwise returns 0.
*/ staticint register_snapshot(struct dm_snapshot *snap)
{ struct origin *o, *new_o = NULL; struct block_device *bdev = snap->origin->bdev; int r = 0;
new_o = kmalloc(sizeof(*new_o), GFP_KERNEL); if (!new_o) return -ENOMEM;
down_write(&_origins_lock);
r = __validate_exception_handover(snap); if (r < 0) {
kfree(new_o); goto out;
}
o = __lookup_origin(bdev); if (o)
kfree(new_o); else { /* New origin */
o = new_o;
/* Initialise the struct */
INIT_LIST_HEAD(&o->snapshots);
o->bdev = bdev;
__insert_origin(o);
}
__insert_snapshot(o, snap);
out:
up_write(&_origins_lock);
return r;
}
/* * Move snapshot to correct place in list according to chunk size.
*/ staticvoid reregister_snapshot(struct dm_snapshot *s)
{ struct block_device *bdev = s->origin->bdev;
down_write(&_origins_lock);
o = __lookup_origin(s->origin->bdev);
list_del(&s->list); if (o && list_empty(&o->snapshots)) {
list_del(&o->hash_list);
kfree(o);
}
up_write(&_origins_lock);
}
/* * Implementation of the exception hash tables. * The lowest hash_shift bits of the chunk number are ignored, allowing * some consecutive chunks to be grouped together.
*/ static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk);
/* Lock to protect access to the completed and pending exception hash tables. */ struct dm_exception_table_lock { struct hlist_bl_head *complete_slot; struct hlist_bl_head *pending_slot;
};
/* * Return the exception data for a sector, or NULL if not * remapped.
*/ staticstruct dm_exception *dm_lookup_exception(struct dm_exception_table *et,
chunk_t chunk)
{ struct hlist_bl_head *slot; struct hlist_bl_node *pos; struct dm_exception *e;
l = &eh->table[exception_hash(eh, new_e->old_chunk)];
/* Add immediately if this table doesn't support consecutive chunks */ if (!eh->hash_shift) goto out;
/* List is ordered by old_chunk */
hlist_bl_for_each_entry(e, pos, l, hash_list) { /* Insert after an existing chunk? */ if (new_e->old_chunk == (e->old_chunk +
dm_consecutive_chunk_count(e) + 1) &&
new_e->new_chunk == (dm_chunk_number(e->new_chunk) +
dm_consecutive_chunk_count(e) + 1)) {
dm_consecutive_chunk_count_inc(e);
free_completed_exception(new_e); return;
}
/* Insert before an existing chunk? */ if (new_e->old_chunk == (e->old_chunk - 1) &&
new_e->new_chunk == (dm_chunk_number(e->new_chunk) - 1)) {
dm_consecutive_chunk_count_inc(e);
e->old_chunk--;
e->new_chunk--;
free_completed_exception(new_e); return;
}
if (new_e->old_chunk < e->old_chunk) break;
}
out: if (!e) { /* * Either the table doesn't support consecutive chunks or slot * l is empty.
*/
hlist_bl_add_head(&new_e->hash_list, l);
} elseif (new_e->old_chunk < e->old_chunk) { /* Add before an existing exception */
hlist_bl_add_before(&new_e->hash_list, &e->hash_list);
} else { /* Add to l's tail: e is the last exception in this slot */
hlist_bl_add_behind(&new_e->hash_list, &e->hash_list);
}
}
/* * Callback used by the exception stores to load exceptions when * initialising.
*/ staticint dm_add_exception(void *context, chunk_t old, chunk_t new)
{ struct dm_exception_table_lock lock; struct dm_snapshot *s = context; struct dm_exception *e;
e = alloc_completed_exception(GFP_KERNEL); if (!e) return -ENOMEM;
e->old_chunk = old;
/* Consecutive_count is implicitly initialised to zero */
e->new_chunk = new;
/* * Although there is no need to lock access to the exception tables * here, if we don't then hlist_bl_add_head(), called by * dm_insert_exception(), will complain about accessing the * corresponding list without locking it first.
*/
dm_exception_table_lock_init(s, old, &lock);
/* * Return a minimum chunk size of all snapshots that have the specified origin. * Return zero if the origin has no snapshots.
*/ static uint32_t __minimum_chunk_size(struct origin *o)
{ struct dm_snapshot *snap; unsignedint chunk_size = rounddown_pow_of_two(UINT_MAX);
if (o)
list_for_each_entry(snap, &o->snapshots, list)
chunk_size = min_not_zero(chunk_size,
snap->store->chunk_size);
return (uint32_t) chunk_size;
}
/* * Hard coded magic.
*/ staticint calc_max_buckets(void)
{ /* use a fixed size of 2MB */ unsignedlong mem = 2 * 1024 * 1024;
mem /= sizeof(struct hlist_bl_head);
return mem;
}
/* * Allocate room for a suitable hash table.
*/ staticint init_hash_tables(struct dm_snapshot *s)
{
sector_t hash_size, cow_dev_size, max_buckets;
/* * Calculate based on the size of the original volume or * the COW volume...
*/
cow_dev_size = get_dev_size(s->cow->bdev);
max_buckets = calc_max_buckets();
/* * Remove one chunk from the index of completed exceptions.
*/ staticint __remove_single_exception_chunk(struct dm_snapshot *s,
chunk_t old_chunk)
{ struct dm_exception *e;
e = dm_lookup_exception(&s->complete, old_chunk); if (!e) {
DMERR("Corruption detected: exception for block %llu is on disk but not in memory",
(unsignedlonglong)old_chunk); return -EINVAL;
}
/* * If this is the only chunk using this exception, remove exception.
*/ if (!dm_consecutive_chunk_count(e)) {
dm_remove_exception(e);
free_completed_exception(e); return 0;
}
/* * The chunk may be either at the beginning or the end of a * group of consecutive chunks - never in the middle. We are * removing chunks in the opposite order to that in which they * were added, so this should always be true. * Decrement the consecutive chunk counter and adjust the * starting point if necessary.
*/ if (old_chunk == e->old_chunk) {
e->old_chunk++;
e->new_chunk++;
} elseif (old_chunk != e->old_chunk +
dm_consecutive_chunk_count(e)) {
DMERR("Attempt to merge block %llu from the middle of a chunk range [%llu - %llu]",
(unsignedlonglong)old_chunk,
(unsignedlonglong)e->old_chunk,
(unsignedlonglong)
e->old_chunk + dm_consecutive_chunk_count(e)); return -EINVAL;
}
dm_consecutive_chunk_count_dec(e);
return 0;
}
staticvoid flush_bios(struct bio *bio);
staticint remove_single_exception_chunk(struct dm_snapshot *s)
{ struct bio *b = NULL; int r;
chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1;
down_write(&s->lock);
/* * Process chunks (and associated exceptions) in reverse order * so that dm_consecutive_chunk_count_dec() accounting works.
*/ do {
r = __remove_single_exception_chunk(s, old_chunk); if (r) goto out;
} while (old_chunk-- > s->first_merging_chunk);
staticvoid snapshot_merge_next_chunks(struct dm_snapshot *s)
{ int i, linear_chunks;
chunk_t old_chunk, new_chunk; struct dm_io_region src, dest;
sector_t io_size;
uint64_t previous_count;
BUG_ON(!test_bit(RUNNING_MERGE, &s->state_bits)); if (unlikely(test_bit(SHUTDOWN_MERGE, &s->state_bits))) goto shut;
/* * valid flag never changes during merge, so no lock required.
*/ if (!s->valid) {
DMERR("Snapshot is invalid: can't merge"); goto shut;
}
linear_chunks = s->store->type->prepare_merge(s->store, &old_chunk,
&new_chunk); if (linear_chunks <= 0) { if (linear_chunks < 0) {
DMERR("Read error in exception store: shutting down merge");
down_write(&s->lock);
s->merge_failed = true;
up_write(&s->lock);
} goto shut;
}
/* Adjust old_chunk and new_chunk to reflect start of linear region */
old_chunk = old_chunk + 1 - linear_chunks;
new_chunk = new_chunk + 1 - linear_chunks;
/* * Use one (potentially large) I/O to copy all 'linear_chunks' * from the exception store to the origin
*/
io_size = linear_chunks * s->store->chunk_size;
/* * Reallocate any exceptions needed in other snapshots then * wait for the pending exceptions to complete. * Each time any pending exception (globally on the system) * completes we are woken and repeat the process to find out * if we can proceed. While this may not seem a particularly * efficient algorithm, it is not expected to have any * significant impact on performance.
*/
previous_count = read_pending_exceptions_done_count(); while (origin_write_extent(s, dest.sector, io_size)) {
wait_event(_pending_exceptions_done,
(read_pending_exceptions_done_count() !=
previous_count)); /* Retry after the wait, until all exceptions are done. */
previous_count = read_pending_exceptions_done_count();
}
if (read_err || write_err) { if (read_err)
DMERR("Read error: shutting down merge."); else
DMERR("Write error: shutting down merge."); goto shut;
}
if (blkdev_issue_flush(s->origin->bdev) < 0) {
DMERR("Flush after merge failed: shutting down merge"); goto shut;
}
if (s->store->type->commit_merge(s->store,
s->num_merging_chunks) < 0) {
DMERR("Write error in exception store: shutting down merge"); goto shut;
}
if (remove_single_exception_chunk(s) < 0) goto shut;
snapshot_merge_next_chunks(s);
return;
shut:
down_write(&s->lock);
s->merge_failed = true;
b = __release_queued_bios_after_merge(s);
up_write(&s->lock);
error_bios(b);
merge_shutdown(s);
}
staticvoid start_merge(struct dm_snapshot *s)
{ if (!test_and_set_bit(RUNNING_MERGE, &s->state_bits))
snapshot_merge_next_chunks(s);
}
/* * Stop the merging process and wait until it finishes.
*/ staticvoid stop_merge(struct dm_snapshot *s)
{
set_bit(SHUTDOWN_MERGE, &s->state_bits);
wait_on_bit(&s->state_bits, RUNNING_MERGE, TASK_UNINTERRUPTIBLE);
clear_bit(SHUTDOWN_MERGE, &s->state_bits);
}
if (!s->discard_zeroes_cow && s->discard_passdown_origin) { /* * TODO: really these are disjoint.. but ti->num_discard_bios * and dm_bio_get_target_bio_nr() require rigid constraints.
*/
ti->error = "discard_passdown_origin feature depends on discard_zeroes_cow";
r = -EINVAL;
}
if (argc < 4) {
ti->error = "requires 4 or more arguments";
r = -EINVAL; goto bad;
}
if (dm_target_is_snapshot_merge(ti)) {
num_flush_bios = 2;
origin_mode = BLK_OPEN_WRITE;
}
s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) {
ti->error = "Cannot allocate private snapshot structure";
r = -ENOMEM; goto bad;
}
as.argc = argc;
as.argv = argv;
dm_consume_args(&as, 4);
r = parse_snapshot_features(&as, s, ti); if (r) goto bad_features;
origin_path = argv[0];
argv++;
argc--;
r = dm_get_device(ti, origin_path, origin_mode, &s->origin); if (r) {
ti->error = "Cannot get origin device"; goto bad_origin;
}
cow_path = argv[0];
argv++;
argc--;
r = dm_get_device(ti, cow_path, dm_table_get_mode(ti->table), &s->cow); if (r) {
ti->error = "Cannot get COW device"; goto bad_cow;
} if (s->cow->bdev && s->cow->bdev == s->origin->bdev) {
ti->error = "COW device cannot be the same as origin device";
r = -EINVAL; goto bad_store;
}
r = dm_exception_store_create(ti, argc, argv, s, &args_used, &s->store); if (r) {
ti->error = "Couldn't create exception store";
r = -EINVAL; goto bad_store;
}
/* Allocate hash table for COW data */ if (init_hash_tables(s)) {
ti->error = "Unable to allocate hash table space";
r = -ENOMEM; goto bad_hash_tables;
}
init_waitqueue_head(&s->in_progress_wait);
s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(s->kcopyd_client)) {
r = PTR_ERR(s->kcopyd_client);
ti->error = "Could not create kcopyd client"; goto bad_kcopyd;
}
r = mempool_init_slab_pool(&s->pending_pool, MIN_IOS, pending_cache); if (r) {
ti->error = "Could not allocate mempool for pending exceptions"; goto bad_pending_pool;
}
for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]);
/* Add snapshot to the list of snapshots for this origin */ /* Exceptions aren't triggered till snapshot_resume() is called */
r = register_snapshot(s); if (r == -ENOMEM) {
ti->error = "Snapshot origin struct allocation failed"; goto bad_load_and_register;
} elseif (r < 0) { /* invalid handover, register_snapshot has set ti->error */ goto bad_load_and_register;
}
/* * Metadata must only be loaded into one table at once, so skip this * if metadata will be handed over during resume. * Chunk size will be set during the handover - set it to zero to * ensure it's ignored.
*/ if (r > 0) {
s->store->chunk_size = 0; return 0;
}
r = s->store->type->read_metadata(s->store, dm_add_exception,
(void *)s); if (r < 0) {
ti->error = "Failed to read snapshot metadata"; goto bad_read_metadata;
} elseif (r > 0) {
s->valid = 0;
DMWARN("Snapshot is marked invalid.");
}
if (!s->store->chunk_size) {
ti->error = "Chunk size not set";
r = -EINVAL; goto bad_read_metadata;
}
r = dm_set_target_max_io_len(ti, s->store->chunk_size); if (r) goto bad_read_metadata;
/* * Swap all snapshot context information between the two instances.
*/
u.table_swap = snap_dest->complete;
snap_dest->complete = snap_src->complete;
snap_src->complete = u.table_swap;
staticbool wait_for_in_progress(struct dm_snapshot *s, bool unlock_origins)
{ if (unlikely(s->in_progress > cow_threshold)) {
spin_lock(&s->in_progress_wait.lock); if (likely(s->in_progress > cow_threshold)) { /* * NOTE: this throttle doesn't account for whether * the caller is servicing an IO that will trigger a COW * so excess throttling may result for chunks not required * to be COW'd. But if cow_threshold was reached, extra * throttling is unlikely to negatively impact performance.
*/
DECLARE_WAITQUEUE(wait, current);
/* * Flush a list of buffers.
*/ staticvoid flush_bios(struct bio *bio)
{ struct bio *n;
while (bio) {
n = bio->bi_next;
bio->bi_next = NULL;
submit_bio_noacct(bio);
bio = n;
}
}
staticint do_origin(struct dm_dev *origin, struct bio *bio, bool limit);
/* * Flush a list of buffers.
*/ staticvoid retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
{ struct bio *n; int r;
while (bio) {
n = bio->bi_next;
bio->bi_next = NULL;
r = do_origin(s->origin, bio, false); if (r == DM_MAPIO_REMAPPED)
submit_bio_noacct(bio);
bio = n;
}
}
/* * Error a list of buffers.
*/ staticvoid error_bios(struct bio *bio)
{ struct bio *n;
while (bio) {
n = bio->bi_next;
bio->bi_next = NULL;
bio_io_error(bio);
bio = n;
}
}
staticvoid __invalidate_snapshot(struct dm_snapshot *s, int err)
{ if (!s->valid) return;
if (err == -EIO)
DMERR("Invalidating snapshot: Error reading/writing."); elseif (err == -ENOMEM)
DMERR("Invalidating snapshot: Unable to allocate exception.");
if (s->store->type->drop_snapshot)
s->store->type->drop_snapshot(s->store);
down_read(&s->lock);
dm_exception_table_lock(&lock); if (!s->valid) {
up_read(&s->lock);
free_completed_exception(e);
error = 1;
goto out;
}
/* * Add a proper exception. After inserting the completed exception all * subsequent snapshot reads to this chunk will be redirected to the * COW device. This ensures that we do not starve. Moreover, as long * as the pending exception exists, neither origin writes nor snapshot * merging can overwrite the chunk in origin.
*/
dm_insert_exception(&s->complete, e);
up_read(&s->lock);
/* Wait for conflicting reads to drain */ if (__chunk_is_tracked(s, pe->e.old_chunk)) {
dm_exception_table_unlock(&lock);
__check_for_conflicting_io(s, pe->e.old_chunk);
dm_exception_table_lock(&lock);
}
out: /* Remove the in-flight exception from the list */
dm_remove_exception(&pe->e);
BUG_ON(pe->exception_sequence == pe2->exception_sequence); if (pe->exception_sequence < pe2->exception_sequence)
p = &((*p)->rb_left); else
p = &((*p)->rb_right);
}
/* * Looks to see if this snapshot already has a pending exception * for this chunk, otherwise it allocates a new one and inserts * it into the pending table. * * NOTE: a write lock must be held on the chunk's pending exception table slot * before calling this.
*/ staticstruct dm_snap_pending_exception *
__find_pending_exception(struct dm_snapshot *s, struct dm_snap_pending_exception *pe, chunk_t chunk)
{ struct dm_snap_pending_exception *pe2;
if (!s->valid || (unlikely(s->snapshot_overflowed) &&
bio_data_dir(bio) == WRITE)) {
r = DM_MAPIO_KILL; goto out_unlock;
}
if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { if (s->discard_passdown_origin && dm_bio_get_target_bio_nr(bio)) { /* * passdown discard to origin (without triggering * snapshot exceptions via do_origin; doing so would * defeat the goal of freeing space in origin that is * implied by the "discard_passdown_origin" feature)
*/
bio_set_dev(bio, s->origin->bdev);
track_chunk(s, bio, chunk); goto out_unlock;
} /* discard to snapshot (target_bio_nr == 0) zeroes exceptions */
}
/* If the block is already remapped - use that, else remap it */
e = dm_lookup_exception(&s->complete, chunk); if (e) {
remap_exception(s, e, bio, chunk); if (unlikely(bio_op(bio) == REQ_OP_DISCARD) &&
io_overlaps_chunk(s, bio)) {
dm_exception_table_unlock(&lock);
up_read(&s->lock);
zero_exception(s, e, bio, chunk);
r = DM_MAPIO_SUBMITTED; /* discard is not issued */ goto out;
} goto out_unlock;
}
if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { /* * If no exception exists, complete discard immediately * otherwise it'll trigger copy-out.
*/
bio_endio(bio);
r = DM_MAPIO_SUBMITTED; goto out_unlock;
}
/* * Write to snapshot - higher level takes care of RW/RO * flags so we should only get this if we are * writable.
*/ if (bio_data_dir(bio) == WRITE) {
pe = __lookup_pending_exception(s, chunk); if (!pe) {
dm_exception_table_unlock(&lock);
pe = alloc_pending_exception(s);
dm_exception_table_lock(&lock);
e = dm_lookup_exception(&s->complete, chunk); if (e) {
free_pending_exception(pe);
remap_exception(s, e, bio, chunk); goto out_unlock;
}
pe = __find_pending_exception(s, pe, chunk); if (!pe) {
dm_exception_table_unlock(&lock);
up_read(&s->lock);
down_write(&s->lock);
if (s->store->userspace_supports_overflow) { if (s->valid && !s->snapshot_overflowed) {
s->snapshot_overflowed = 1;
DMERR("Snapshot overflowed: Unable to allocate exception.");
}
} else
__invalidate_snapshot(s, -ENOMEM);
up_write(&s->lock);
r = DM_MAPIO_KILL; goto out;
}
}
remap_exception(s, &pe->e, bio, chunk);
r = DM_MAPIO_SUBMITTED;
if (!pe->started && io_overlaps_chunk(s, bio)) {
pe->started = 1;
/* * A snapshot-merge target behaves like a combination of a snapshot * target and a snapshot-origin target. It only generates new * exceptions in other snapshots and not in the one that is being * merged. * * For each chunk, if there is an existing exception, it is used to * redirect I/O to the cow device. Otherwise I/O is sent to the origin, * which in turn might generate exceptions in other snapshots. * If merging is currently taking place on the chunk in question, the * I/O is deferred by adding it to s->bios_queued_during_merge.
*/ staticint snapshot_merge_map(struct dm_target *ti, struct bio *bio)
{ struct dm_exception *e; struct dm_snapshot *s = ti->private; int r = DM_MAPIO_REMAPPED;
chunk_t chunk;
init_tracked_chunk(bio);
if (bio->bi_opf & REQ_PREFLUSH) { if (!dm_bio_get_target_bio_nr(bio))
bio_set_dev(bio, s->origin->bdev); else
bio_set_dev(bio, s->cow->bdev); return DM_MAPIO_REMAPPED;
}
if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { /* Once merging, discards no longer effect change */
bio_endio(bio); return DM_MAPIO_SUBMITTED;
}
/* Full merging snapshots are redirected to the origin */ if (!s->valid) goto redirect_to_origin;
/* If the block is already remapped - use that */
e = dm_lookup_exception(&s->complete, chunk); if (e) { /* Queue writes overlapping with chunks being merged */ if (bio_data_dir(bio) == WRITE &&
chunk >= s->first_merging_chunk &&
chunk < (s->first_merging_chunk +
s->num_merging_chunks)) {
bio_set_dev(bio, s->origin->bdev);
bio_list_add(&s->bios_queued_during_merge, bio);
r = DM_MAPIO_SUBMITTED; goto out_unlock;
}
remap_exception(s, e, bio, chunk);
if (bio_data_dir(bio) == WRITE)
track_chunk(s, bio, chunk); goto out_unlock;
}
down_read(&_origins_lock);
(void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); if (snap_src && snap_dest) {
down_read(&snap_src->lock); if (s == snap_src) {
DMERR("Unable to resume snapshot source until handover completes.");
r = -EINVAL;
} elseif (!dm_suspended(snap_src->ti)) {
DMERR("Unable to perform snapshot handover until source is suspended.");
r = -EINVAL;
}
up_read(&snap_src->lock);
}
up_read(&_origins_lock);
case STATUSTYPE_TABLE: /* * kdevname returns a static pointer so we need * to make private copies if the output is to * make sense.
*/
DMEMIT("%s %s", snap->origin->name, snap->cow->name);
sz += snap->store->type->status(snap->store, type, result + sz,
maxlen - sz);
num_features = snap->discard_zeroes_cow + snap->discard_passdown_origin; if (num_features) {
DMEMIT(" %u", num_features); if (snap->discard_zeroes_cow)
DMEMIT(" discard_zeroes_cow"); if (snap->discard_passdown_origin)
DMEMIT(" discard_passdown_origin");
} break;
/* All discards are split on chunk_size boundary */
limits->discard_granularity = snap->store->chunk_size;
limits->max_hw_discard_sectors = snap->store->chunk_size;
up_read(&_origins_lock);
}
}
/* *--------------------------------------------------------------- * Origin methods *---------------------------------------------------------------
*/ /* * If no exceptions need creating, DM_MAPIO_REMAPPED is returned and any * supplied bio was ignored. The caller may submit it immediately. * (No remapping actually occurs as the origin is always a direct linear * map.) * * If further exceptions are required, DM_MAPIO_SUBMITTED is returned * and any supplied bio is added to a list to be submitted once all * the necessary exceptions exist.
*/ staticint __origin_write(struct list_head *snapshots, sector_t sector, struct bio *bio)
{ int r = DM_MAPIO_REMAPPED; struct dm_snapshot *snap; struct dm_exception *e; struct dm_snap_pending_exception *pe, *pe2; struct dm_snap_pending_exception *pe_to_start_now = NULL; struct dm_snap_pending_exception *pe_to_start_last = NULL; struct dm_exception_table_lock lock;
chunk_t chunk;
/* Do all the snapshots on this origin */
list_for_each_entry(snap, snapshots, list) { /* * Don't make new exceptions in a merging snapshot * because it has effectively been deleted
*/ if (dm_target_is_snapshot_merge(snap->ti)) continue;
/* Nothing to do if writing beyond end of snapshot */ if (sector >= dm_table_get_size(snap->ti->table)) continue;
/* * Remember, different snapshots can have * different chunk sizes.
*/
chunk = sector_to_chunk(snap->store, sector);
dm_exception_table_lock_init(snap, chunk, &lock);
/* Only deal with valid and active snapshots */ if (!snap->valid || !snap->active) goto next_snapshot;
pe = __lookup_pending_exception(snap, chunk); if (!pe) { /* * Check exception table to see if block is already * remapped in this snapshot and trigger an exception * if not.
*/
e = dm_lookup_exception(&snap->complete, chunk); if (e) goto next_snapshot;
dm_exception_table_unlock(&lock);
pe = alloc_pending_exception(snap);
dm_exception_table_lock(&lock);
pe2 = __lookup_pending_exception(snap, chunk);
if (!pe2) {
e = dm_lookup_exception(&snap->complete, chunk); if (e) {
free_pending_exception(pe); goto next_snapshot;
}
pe = __insert_pending_exception(snap, pe, chunk); if (!pe) {
dm_exception_table_unlock(&lock);
up_read(&snap->lock);
/* * If an origin bio was supplied, queue it to wait for the * completion of this exception, and start this one last, * at the end of the function.
*/ if (bio) {
bio_list_add(&pe->origin_bios, bio);
bio = NULL;
if (pe_to_start_now) {
start_copy(pe_to_start_now);
pe_to_start_now = NULL;
}
}
/* * Submit the exception against which the bio is queued last, * to give the other exceptions a head start.
*/ if (pe_to_start_last)
start_copy(pe_to_start_last);
return r;
}
/* * Called on a write from the origin driver.
*/ staticint do_origin(struct dm_dev *origin, struct bio *bio, bool limit)
{ struct origin *o; int r = DM_MAPIO_REMAPPED;
again:
down_read(&_origins_lock);
o = __lookup_origin(origin->bdev); if (o) { if (limit) { struct dm_snapshot *s;
list_for_each_entry(s, &o->snapshots, list) if (unlikely(!wait_for_in_progress(s, true))) goto again;
}
r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio);
}
up_read(&_origins_lock);
return r;
}
/* * Trigger exceptions in all non-merging snapshots. * * The chunk size of the merging snapshot may be larger than the chunk * size of some other snapshot so we may need to reallocate multiple * chunks in other snapshots. * * We scan all the overlapping exceptions in the other snapshots. * Returns 1 if anything was reallocated and must be waited for, * otherwise returns 0. * * size must be a multiple of merging_snap's chunk_size.
*/ staticint origin_write_extent(struct dm_snapshot *merging_snap,
sector_t sector, unsignedint size)
{ int must_wait = 0;
sector_t n; struct origin *o;
/* * The origin's __minimum_chunk_size() got stored in max_io_len * by snapshot_merge_resume().
*/
down_read(&_origins_lock);
o = __lookup_origin(merging_snap->origin->bdev); for (n = 0; n < size; n += merging_snap->ti->max_io_len) if (__origin_write(&o->snapshots, sector + n, NULL) ==
DM_MAPIO_SUBMITTED)
must_wait = 1;
up_read(&_origins_lock);
return must_wait;
}
/* * Origin: maps a linear range of a device, with hooks for snapshotting.
*/
/* * Construct an origin mapping: <dev_path> * The context for an origin is merely a 'struct dm_dev *' * pointing to the real device.
*/ staticint origin_ctr(struct dm_target *ti, unsignedint argc, char **argv)
{ int r; struct dm_origin *o;
if (argc != 1) {
ti->error = "origin: incorrect number of arguments"; return -EINVAL;
}
o = kmalloc(sizeof(struct dm_origin), GFP_KERNEL); if (!o) {
ti->error = "Cannot allocate private origin structure";
r = -ENOMEM; goto bad_alloc;
}
r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &o->dev); if (r) {
ti->error = "Cannot get target device"; goto bad_open;
}
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.29Bemerkung:
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.