// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2001-2002 Sistina Software (UK) Limited. * Copyright (C) 2006-2008 Red Hat GmbH * * This file is released under the GPL.
*/
/* *--------------------------------------------------------------- * Persistent snapshots, by persistent we mean that the snapshot * will survive a reboot. *---------------------------------------------------------------
*/
/* * We need to store a record of which parts of the origin have * been copied to the snapshot device. The snapshot code * requires that we copy exception chunks to chunk aligned areas * of the COW store. It makes sense therefore, to store the * metadata in chunk size blocks. * * There is no backward or forward compatibility implemented, * snapshots with different disk versions than the kernel will * not be usable. It is expected that "lvcreate" will blank out * the start of a fresh COW device before calling the snapshot * constructor. * * The first chunk of the COW device just contains the header. * After this there is a chunk filled with exception metadata, * followed by as many exception chunks as can fit in the * metadata areas. * * All on disk structures are in little-endian format. The end * of the exceptions info is indicated by an exception with a * new_chunk of 0, which is invalid since it would point to the * header chunk.
*/
/* * Magic for persistent snapshots: "SnAp" - Feeble isn't it.
*/ #define SNAP_MAGIC 0x70416e53
/* * The on-disk version of the metadata.
*/ #define SNAPSHOT_DISK_VERSION 1
#define NUM_SNAPSHOT_HDR_CHUNKS 1
struct disk_header {
__le32 magic;
/* * Is this snapshot valid. There is no way of recovering * an invalid snapshot.
*/
__le32 valid;
/* * The top level structure for a persistent exception store.
*/ struct pstore { struct dm_exception_store *store; int version; int valid;
uint32_t exceptions_per_area;
/* * Now that we have an asynchronous kcopyd there is no * need for large chunk sizes, so it wont hurt to have a * whole chunks worth of metadata in memory at once.
*/ void *area;
/* * An area of zeros used to clear the next area.
*/ void *zero_area;
/* * An area used for header. The header can be written * concurrently with metadata (when invalidating the snapshot), * so it needs a separate buffer.
*/ void *header_area;
/* * Used to keep track of which metadata area the data in * 'chunk' refers to.
*/
chunk_t current_area;
/* * The next free chunk for an exception. * * When creating exceptions, all the chunks here and above are * free. It holds the next chunk to be allocated. On rare * occasions (e.g. after a system crash) holes can be left in * the exception store because chunks can be committed out of * order. * * When merging exceptions, it does not necessarily mean all the * chunks here and above are free. It holds the value it would * have held if all chunks had been committed in order of * allocation. Consequently the value may occasionally be * slightly too low, but since it's only used for 'status' and * it can never reach its minimum value too early this doesn't * matter.
*/
chunk_t next_free;
/* * The index of next free exception in the current * metadata area.
*/
uint32_t current_committed;
/* * Read or write a chunk aligned and sized block of data from a device.
*/ staticint chunk_io(struct pstore *ps, void *area, chunk_t chunk, blk_opf_t opf, int metadata)
{ struct dm_io_region where = {
.bdev = dm_snap_cow(ps->store->snap)->bdev,
.sector = ps->store->chunk_size * chunk,
.count = ps->store->chunk_size,
}; struct dm_io_request io_req = {
.bi_opf = opf,
.mem.type = DM_IO_VMA,
.mem.ptr.vma = area,
.client = ps->io_client,
.notify.fn = NULL,
}; struct mdata_req req;
if (!metadata) return dm_io(&io_req, 1, &where, NULL, IOPRIO_DEFAULT);
req.where = &where;
req.io_req = &io_req;
/* * Issue the synchronous I/O from a different thread * to avoid submit_bio_noacct recursion.
*/
INIT_WORK_ONSTACK(&req.work, do_metadata);
queue_work(ps->metadata_wq, &req.work);
flush_workqueue(ps->metadata_wq);
destroy_work_on_stack(&req.work);
return req.result;
}
/* * Convert a metadata area index to a chunk index.
*/ static chunk_t area_location(struct pstore *ps, chunk_t area)
{ return NUM_SNAPSHOT_HDR_CHUNKS + ((ps->exceptions_per_area + 1) * area);
}
if (sector_div(next_free, stride) == NUM_SNAPSHOT_HDR_CHUNKS)
ps->next_free++;
}
/* * Read or write a metadata area. Remembering to skip the first * chunk which holds the header.
*/ staticint area_io(struct pstore *ps, blk_opf_t opf)
{
chunk_t chunk = area_location(ps, ps->current_area);
/* * Access functions for the disk exceptions, these do the endian conversions.
*/ staticstruct disk_exception *get_exception(struct pstore *ps, void *ps_area,
uint32_t index)
{
BUG_ON(index >= ps->exceptions_per_area);
/* * Registers the exceptions that are present in the current area. * 'full' is filled in to indicate if the area has been * filled.
*/ staticint insert_exceptions(struct pstore *ps, void *ps_area, int (*callback)(void *callback_context,
chunk_t old, chunk_t new), void *callback_context, int *full)
{ int r; unsignedint i; struct core_exception e;
/* presume the area is full */
*full = 1;
for (i = 0; i < ps->exceptions_per_area; i++) {
read_exception(ps, ps_area, i, &e);
/* * If the new_chunk is pointing at the start of * the COW device, where the first metadata area * is we know that we've hit the end of the * exceptions. Therefore the area is not full.
*/ if (e.new_chunk == 0LL) {
ps->current_committed = i;
*full = 0; break;
}
/* * Keep track of the start of the free chunks.
*/ if (ps->next_free <= e.new_chunk)
ps->next_free = e.new_chunk + 1;
/* * Otherwise we add the exception to the snapshot.
*/
r = callback(callback_context, e.old_chunk, e.new_chunk); if (r) return r;
}
return 0;
}
staticint read_exceptions(struct pstore *ps, int (*callback)(void *callback_context, chunk_t old,
chunk_t new), void *callback_context)
{ int r, full = 1; struct dm_bufio_client *client;
chunk_t prefetch_area = 0;
/* * First chunk is the fixed header. * Then there are (ps->current_area + 1) metadata chunks, each one * separated from the next by ps->exceptions_per_area data chunks.
*/
*metadata_sectors = (ps->current_area + 1 + NUM_SNAPSHOT_HDR_CHUNKS) *
store->chunk_size;
}
/* * Add the callback to the back of the array. This code * is the only place where the callback array is * manipulated, and we know that it will never be called * multiple times concurrently.
*/
cb = ps->callbacks + ps->callback_count++;
cb->callback = callback;
cb->context = callback_context;
/* * If there are exceptions in flight and we have not yet * filled this metadata area there's nothing more to do.
*/ if (!atomic_dec_and_test(&ps->pending_count) &&
(ps->current_committed != ps->exceptions_per_area)) return;
/* * If we completely filled the current area, then wipe the next one.
*/ if ((ps->current_committed == ps->exceptions_per_area) &&
zero_disk_area(ps, ps->current_area + 1))
ps->valid = 0;
/* * Advance to the next area if this one is full.
*/ if (ps->current_committed == ps->exceptions_per_area) {
ps->current_committed = 0;
ps->current_area++;
zero_memory_area(ps);
}
for (i = 0; i < ps->callback_count; i++) {
cb = ps->callbacks + i;
cb->callback(cb->context, ps->valid);
}
/* * When current area is empty, move back to preceding area.
*/ if (!ps->current_committed) { /* * Have we finished?
*/ if (!ps->current_area) return 0;
ps->current_area--;
r = area_io(ps, REQ_OP_READ); if (r < 0) return r;
ps->current_committed = ps->exceptions_per_area;
}
/* * Find number of consecutive chunks within the current area, * working backwards.
*/ for (nr_consecutive = 1; nr_consecutive < ps->current_committed;
nr_consecutive++) {
read_exception(ps, ps->area,
ps->current_committed - 1 - nr_consecutive, &ce); if (ce.old_chunk != *last_old_chunk - nr_consecutive ||
ce.new_chunk != *last_new_chunk - nr_consecutive) break;
}
return nr_consecutive;
}
staticint persistent_commit_merge(struct dm_exception_store *store, int nr_merged)
{ int r, i; struct pstore *ps = get_info(store);
BUG_ON(nr_merged > ps->current_committed);
for (i = 0; i < nr_merged; i++)
clear_exception(ps, ps->current_committed - 1 - i);
r = area_io(ps, REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA); if (r < 0) return r;
ps->current_committed -= nr_merged;
/* * At this stage, only persistent_usage() uses ps->next_free, so * we make no attempt to keep ps->next_free strictly accurate * as exceptions may have been committed out-of-order originally. * Once a snapshot has become merging, we set it to the value it * would have held had all the exceptions been committed in order. * * ps->current_area does not get reduced by prepare_merge() until * after commit_merge() has removed the nr_merged previous exceptions.
*/
ps->next_free = area_location(ps, ps->current_area) +
ps->current_committed + 1;
r = dm_exception_store_type_register(&_persistent_type); if (r) {
DMERR("Unable to register persistent exception store type"); return r;
}
r = dm_exception_store_type_register(&_persistent_compat_type); if (r) {
DMERR("Unable to register old-style persistent exception store type");
dm_exception_store_type_unregister(&_persistent_type); return r;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.