/* * Snapshot trees: * * Keys in BTREE_ID_snapshot_trees identify a whole tree of snapshot nodes; they * exist to provide a stable identifier for the whole lifetime of a snapshot * tree.
*/
if (BCH_SNAPSHOT_SUBVOL(s.v))
prt_str(out, "subvol "); if (BCH_SNAPSHOT_WILL_DELETE(s.v))
prt_str(out, "will_delete "); if (BCH_SNAPSHOT_DELETED(s.v))
prt_str(out, "deleted ");
prt_printf(out, "parent %10u children %10u %10u subvol %u tree %u",
le32_to_cpu(s.v->parent),
le32_to_cpu(s.v->children[0]),
le32_to_cpu(s.v->children[1]),
le32_to_cpu(s.v->subvol),
le32_to_cpu(s.v->tree));
if (fsck_err_on(ret ||
root_id != bch2_snapshot_root(c, root_id) ||
st.k->p.offset != le32_to_cpu(s.tree),
trans, snapshot_tree_to_missing_snapshot, "snapshot tree points to missing/incorrect snapshot:\n%s",
(bch2_bkey_val_to_text(&buf, c, st.s_c),
prt_newline(&buf),
ret
? prt_printf(&buf, "(%s)", bch2_err_str(ret))
: bch2_bkey_val_to_text(&buf, c, snapshot_k.s_c),
buf.buf))) {
ret = bch2_btree_delete_at(trans, iter, 0); goto err;
}
if (!st.v->master_subvol) goto out;
ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), false, &subvol); if (ret && !bch2_err_matches(ret, ENOENT)) goto err;
if (fsck_err_on(ret,
trans, snapshot_tree_to_missing_subvol, "snapshot tree points to missing subvolume:\n%s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
fsck_err_on(!bch2_snapshot_is_ancestor(c,
le32_to_cpu(subvol.snapshot),
root_id),
trans, snapshot_tree_to_wrong_subvol, "snapshot tree points to subvolume that does not point to snapshot in this tree:\n%s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol),
trans, snapshot_tree_to_snapshot_subvol, "snapshot tree points to snapshot subvolume:\n%s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { struct bkey_i_snapshot_tree *u;
u32 subvol_id;
ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
bch_err_fn(c, ret);
if (bch2_err_matches(ret, ENOENT)) { /* nothing to be done here */
ret = 0; goto err;
}
if (ret) goto err;
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot_tree);
ret = PTR_ERR_OR_ZERO(u); if (ret) goto err;
/* * For each snapshot_tree, make sure it points to the root of a snapshot tree * and that snapshot entry points back to it, or delete it. * * And, make sure it points to a subvolume within that snapshot tree, or correct * it to point to the oldest subvolume within that snapshot tree.
*/ int bch2_check_snapshot_trees(struct bch_fs *c)
{ int ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter,
BTREE_ID_snapshot_trees, POS_MIN,
BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
check_snapshot_tree(trans, &iter, k)));
bch_err_fn(c, ret); return ret;
}
/* * Look up snapshot tree for @tree_id and find root, * make sure @snap_id is a descendent:
*/ staticint snapshot_tree_ptr_good(struct btree_trans *trans,
u32 snap_id, u32 tree_id)
{ struct bch_snapshot_tree s_t; int ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t);
if (bch2_err_matches(ret, ENOENT)) return 0; if (ret) return ret;
for (i = 0; i < 3; i++) if (!s.parent) { if (s.skip[i]) returnfalse;
} else { if (!bch2_snapshot_is_ancestor_early(trans->c, id, le32_to_cpu(s.skip[i]))) returnfalse;
}
returntrue;
}
/* * snapshot_tree pointer was incorrect: look up root snapshot node, make sure * its snapshot_tree pointer is correct (allocate new one if necessary), then * update this node's pointer to root node's pointer:
*/ staticint snapshot_tree_ptr_repair(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, struct bch_snapshot *s)
{ struct bch_fs *c = trans->c; struct btree_iter root_iter; struct bch_snapshot_tree s_t; struct bkey_s_c_snapshot root; struct bkey_i_snapshot *u;
u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id; int ret;
root = bch2_bkey_get_iter_typed(trans, &root_iter,
BTREE_ID_snapshots, POS(0, root_id),
BTREE_ITER_with_updates, snapshot);
ret = bkey_err(root); if (ret) goto err;
tree_id = le32_to_cpu(root.v->tree);
ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); if (ret && !bch2_err_matches(ret, ENOENT)) return ret;
if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) {
u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u) ?:
bch2_snapshot_tree_create(trans, root_id,
bch2_snapshot_oldest_subvol(c, root_id, NULL),
&tree_id); if (ret) goto err;
id = le32_to_cpu(s.parent); if (id) {
ret = bch2_snapshot_lookup(trans, id, &v); if (bch2_err_matches(ret, ENOENT))
bch_err(c, "snapshot with nonexistent parent:\n %s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)); if (ret) goto err;
if (le32_to_cpu(v.children[0]) != k.k->p.offset &&
le32_to_cpu(v.children[1]) != k.k->p.offset) {
bch_err(c, "snapshot parent %u missing pointer to child %llu",
id, k.k->p.offset);
ret = -EINVAL; goto err;
}
}
for (i = 0; i < 2 && s.children[i]; i++) {
id = le32_to_cpu(s.children[i]);
ret = bch2_snapshot_lookup(trans, id, &v); if (bch2_err_matches(ret, ENOENT))
bch_err(c, "snapshot node %llu has nonexistent child %u",
k.k->p.offset, id); if (ret) goto err;
if (le32_to_cpu(v.parent) != k.k->p.offset) {
bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)",
id, le32_to_cpu(v.parent), k.k->p.offset);
ret = -EINVAL; goto err;
}
}
if (should_have_subvol) {
id = le32_to_cpu(s.subvol);
ret = bch2_subvolume_get(trans, id, false, &subvol); if (bch2_err_matches(ret, ENOENT))
bch_err(c, "snapshot points to nonexistent subvolume:\n %s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)); if (ret) goto err;
if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) {
bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL",
k.k->p.offset);
ret = -EINVAL; goto err;
}
} else { if (fsck_err_on(s.subvol,
trans, snapshot_should_not_have_subvol, "snapshot should not point to subvol:\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u); if (ret) goto err;
u->v.subvol = 0;
s = u->v;
}
}
ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree)); if (ret < 0) goto err;
if (fsck_err_on(!ret,
trans, snapshot_to_bad_snapshot_tree, "snapshot points to missing/incorrect tree:\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = snapshot_tree_ptr_repair(trans, iter, k, &s); if (ret) goto err;
}
ret = 0;
real_depth = bch2_snapshot_depth(c, parent_id);
if (fsck_err_on(le32_to_cpu(s.depth) != real_depth,
trans, snapshot_bad_depth, "snapshot with incorrect depth field, should be %u:\n%s",
real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u); if (ret) goto err;
u->v.depth = cpu_to_le32(real_depth);
s = u->v;
}
ret = snapshot_skiplist_good(trans, k.k->p.offset, s); if (ret < 0) goto err;
if (fsck_err_on(!ret,
trans, snapshot_bad_skiplist, "snapshot with bad skiplist field:\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u); if (ret) goto err;
for (i = 0; i < ARRAY_SIZE(u->v.skip); i++)
u->v.skip[i] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent_id));
bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_le32);
s = u->v;
}
ret = 0;
err:
fsck_err:
printbuf_exit(&buf); return ret;
}
int bch2_check_snapshots(struct bch_fs *c)
{ /* * We iterate backwards as checking/fixing the depth field requires that * the parent's depth already be correct:
*/ int ret = bch2_trans_run(c,
for_each_btree_key_reverse_commit(trans, iter,
BTREE_ID_snapshots, POS_MAX,
BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
check_snapshot(trans, &iter, k)));
bch_err_fn(c, ret); return ret;
}
int __bch2_check_key_has_snapshot(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k)
{ struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; int ret = 0; enum snapshot_id_state state = bch2_snapshot_id_state(c, k.k->p.snapshot);
/* Snapshot was definitively deleted, this error is marked autofix */ if (fsck_err_on(state == SNAPSHOT_ID_deleted,
trans, bkey_in_deleted_snapshot, "key in deleted snapshot %s, delete?",
(bch2_btree_id_to_text(&buf, iter->btree_id),
prt_char(&buf, ' '),
bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_internal_snapshot_node) ?: 1;
if (state == SNAPSHOT_ID_empty) { /* * Snapshot missing: we should have caught this with btree_lost_data and * kicked off reconstruct_snapshots, so if we end up here we have no * idea what happened. * * Do not delete unless we know that subvolumes and snapshots * are consistent: * * XXX: * * We could be smarter here, and instead of using the generic * recovery pass ratelimiting, track if there have been any * changes to the snapshots or inodes btrees since those passes * last ran.
*/
ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_check_snapshots) ?: ret;
ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_check_subvols) ?: ret;
if (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots))
ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret;
if (!parent_id) { /* * We're deleting the root of a snapshot tree: update the * snapshot_tree entry to point to the new root, or delete it if * this is the last snapshot ID in this tree:
*/ struct bkey_i_snapshot_tree *s_t;
BUG_ON(s->v.children[1]);
s_t = bch2_bkey_get_mut_typed(trans, &tree_iter,
BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s->v.tree)),
0, snapshot_tree);
ret = PTR_ERR_OR_ZERO(s_t); if (ret) goto err;
/* * Create new snapshot IDs as children of an existing snapshot ID:
*/ staticint bch2_snapshot_node_create_children(struct btree_trans *trans, u32 parent,
u32 *new_snapids,
u32 *snapshot_subvols, unsigned nr_snapids)
{ struct btree_iter iter; struct bkey_i_snapshot *n_parent; int ret = 0;
n_parent = bch2_bkey_get_mut_typed(trans, &iter,
BTREE_ID_snapshots, POS(0, parent),
0, snapshot);
ret = PTR_ERR_OR_ZERO(n_parent); if (unlikely(ret)) { if (bch2_err_matches(ret, ENOENT))
bch_err(trans->c, "snapshot %u not found", parent); return ret;
}
if (n_parent->v.children[0] || n_parent->v.children[1]) {
bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children");
ret = -EINVAL; goto err;
}
ret = create_snapids(trans, parent, le32_to_cpu(n_parent->v.tree),
new_snapids, snapshot_subvols, nr_snapids); if (ret) goto err;
/* * Create a snapshot node that is the root of a new tree:
*/ staticint bch2_snapshot_node_create_tree(struct btree_trans *trans,
u32 *new_snapids,
u32 *snapshot_subvols, unsigned nr_snapids)
{ struct bkey_i_snapshot_tree *n_tree; int ret;
n_tree = __bch2_snapshot_tree_create(trans);
ret = PTR_ERR_OR_ZERO(n_tree) ?:
create_snapids(trans, 0, n_tree->k.p.offset,
new_snapids, snapshot_subvols, nr_snapids); if (ret) return ret;
/* * If we have an unlinked inode in an internal snapshot node, and the inode * really has been deleted in all child snapshots, how does this get cleaned up? * * first there is the problem of how keys that have been overwritten in all * child snapshots get deleted (unimplemented?), but inodes may perhaps be * special? * * also: unlinked inode in internal snapshot appears to not be getting deleted * correctly if inode doesn't exist in leaf snapshots * * solution: * * for a key in an interior snapshot node that needs work to be done that * requires it to be mutated: iterate over all descendent leaf nodes and copy * that key to snapshot leaf nodes, where we can mutate it
*/
for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) if (s->children[i] &&
!snapshot_list_has_id(delete_leaves, s->children[i]) &&
!interior_delete_has_id(delete_interior, s->children[i])) return s->children[i];
for (unsigned i = 0; i < ARRAY_SIZE(s->children); i++) {
u32 live_child = s->children[i]
? __live_child(t, s->children[i], delete_leaves, delete_interior)
: 0; if (live_child) return live_child;
}
/* * For a given snapshot, if it doesn't have a subvolume that points to it, and * it doesn't have child snapshot nodes - it's now redundant and we can mark it * as deleted.
*/ staticint check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s_c k)
{ if (k.k->type != KEY_TYPE_snapshot) return 0;
struct bch_fs *c = trans->c; struct snapshot_delete *d = &c->snapshot_delete; struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); unsigned live_children = 0; int ret = 0;
if (BCH_SNAPSHOT_SUBVOL(s.v)) return 0;
if (BCH_SNAPSHOT_DELETED(s.v)) return 0;
mutex_lock(&d->progress_lock); for (unsigned i = 0; i < 2; i++) {
u32 child = le32_to_cpu(s.v->children[i]);
int __bch2_delete_dead_snapshots(struct bch_fs *c)
{ struct snapshot_delete *d = &c->snapshot_delete; int ret = 0;
if (!mutex_trylock(&d->lock)) return 0;
if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) goto out_unlock;
struct btree_trans *trans = bch2_trans_get(c);
/* * For every snapshot node: If we have no live children and it's not * pointed to by a subvolume, delete it:
*/
d->running = true;
d->pos = BBPOS_MIN;
ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k,
check_should_delete_snapshot(trans, k)); if (!bch2_err_matches(ret, EROFS))
bch_err_msg(c, ret, "walking snapshots"); if (ret) goto err;
if (!d->delete_leaves.nr && !d->delete_interior.nr) goto err;
ret = commit_do(trans, NULL, NULL, 0, bch2_trans_log_msg(trans, &buf));
printbuf_exit(&buf); if (ret) goto err;
}
ret = !bch2_request_incompat_feature(c, bcachefs_metadata_version_snapshot_deletion_v2)
? delete_dead_snapshot_keys_v2(trans)
: delete_dead_snapshot_keys_v1(trans); if (!bch2_err_matches(ret, EROFS))
bch_err_msg(c, ret, "deleting keys from dying snapshots"); if (ret) goto err;
darray_for_each(d->delete_leaves, i) {
ret = commit_do(trans, NULL, NULL, 0,
bch2_snapshot_node_delete(trans, *i)); if (!bch2_err_matches(ret, EROFS))
bch_err_msg(c, ret, "deleting snapshot %u", *i); if (ret) goto err;
}
/* * Fixing children of deleted snapshots can't be done completely * atomically, if we crash between here and when we delete the interior * nodes some depth fields will be off:
*/
ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN,
BTREE_ITER_intent, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &d->delete_interior)); if (ret) goto err;
if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot)) {
ret = 1; break;
}
}
bch2_trans_iter_exit(trans, &iter);
return ret;
}
staticbool interior_snapshot_needs_delete(struct bkey_s_c_snapshot snap)
{ /* If there's one child, it's redundant and keys will be moved to the child */ return !!snap.v->children[0] + !!snap.v->children[1] == 1;
}
int bch2_snapshots_read(struct bch_fs *c)
{ /* * Initializing the is_ancestor bitmaps requires ancestors to already be * initialized - so mark in reverse:
*/ int ret = bch2_trans_run(c,
for_each_btree_key_reverse(trans, iter, BTREE_ID_snapshots,
POS_MAX, 0, k,
__bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
bch2_check_snapshot_needs_deletion(trans, k)));
bch_err_fn(c, ret);
/* * It's important that we check if we need to reconstruct snapshots * before going RW, so we mark that pass as required in the superblock - * otherwise, we could end up deleting keys with missing snapshot nodes * instead
*/
BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
test_bit(BCH_FS_may_go_rw, &c->flags));
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.