/* * Returns true if it's a btree we can easily reconstruct, or otherwise won't * cause data loss if it's missing:
*/ staticbool btree_id_important(enum btree_id btree)
{ if (btree_id_is_alloc(btree)) returnfalse;
switch (btree) { case BTREE_ID_quotas: case BTREE_ID_snapshot_trees: case BTREE_ID_logged_ops: case BTREE_ID_rebalance_work: case BTREE_ID_subvolume_children: returnfalse; default: returntrue;
}
}
if (bch2_err_matches(ret, EIO)) {
bch2_btree_node_evict(trans, cur_k.k);
cur = NULL;
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur_k.k->k.p); if (ret) break; continue;
}
bch_err_msg(c, ret, "getting btree node"); if (ret) break;
if (bch2_btree_node_is_stale(c, cur)) {
bch_info(c, "btree node older than nodes found by scanning\n %s", buf.buf);
six_unlock_read(&cur->c.lock);
bch2_btree_node_evict(trans, cur_k.k);
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur_k.k->k.p);
cur = NULL; if (ret) break; continue;
}
ret = lockrestart_do(trans,
btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan)); if (ret < 0) goto err;
if (ret == DID_FILL_FROM_SCAN) {
new_pass = true;
ret = 0;
}
if (ret == DROP_THIS_NODE) {
six_unlock_read(&cur->c.lock);
bch2_btree_node_evict(trans, cur_k.k);
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur_k.k->k.p);
cur = NULL; if (ret) break; continue;
}
if (prev)
six_unlock_read(&prev->c.lock);
prev = NULL;
if (ret == DROP_PREV_NODE) {
bch_info(c, "dropped prev node");
bch2_btree_node_evict(trans, prev_k.k);
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, prev_k.k->k.p); if (ret) break;
/* * XXX: we're not passing the trans object here because we're not set up * to handle a transaction restart - this code needs to be rewritten * when we start doing online topology repair
*/
bch2_trans_unlock_long(trans); if (mustfix_fsck_err_on(!have_child,
c, btree_node_topology_interior_node_empty, "empty interior btree node at %s", buf.buf))
ret = DROP_THIS_NODE;
err:
fsck_err: if (!IS_ERR_OR_NULL(prev))
six_unlock_read(&prev->c.lock); if (!IS_ERR_OR_NULL(cur))
six_unlock_read(&cur->c.lock);
if (r->error) {
bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf);
ret = bch2_btree_has_scanned_nodes(c, btree); if (ret < 0) goto err;
if (!ret) {
__fsck_err(trans,
FSCK_CAN_FIX|(!btree_id_important(btree) ? FSCK_AUTOFIX : 0),
btree_root_unreadable_and_scan_found_nothing, "no nodes found for btree %s, continue?", buf.buf);
int bch2_check_topology(struct bch_fs *c)
{ struct btree_trans *trans = bch2_trans_get(c); struct bpos pulled_from_scan = POS_MIN; int ret = 0;
bch2_trans_srcu_unlock(trans);
for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) { bool reconstructed_root = false;
recover:
ret = lockrestart_do(trans, bch2_check_root(trans, i, &reconstructed_root)); if (ret) break;
if (*prev != b) { int ret = bch2_btree_node_check_topology(trans, b); if (ret) return ret;
}
*prev = b;
}
struct bkey deleted = KEY(0, 0, 0); struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL }; struct printbuf buf = PRINTBUF; int ret = 0;
deleted.p = k.k->p;
if (initial) {
BUG_ON(static_branch_unlikely(&bch2_journal_seq_verify) &&
k.k->bversion.lo > atomic64_read(&c->journal.seq));
if (fsck_err_on(btree_id != BTREE_ID_accounting &&
k.k->bversion.lo > atomic64_read(&c->key_version),
trans, bkey_version_in_future, "key version number higher than recorded %llu\n%s",
atomic64_read(&c->key_version),
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
atomic64_set(&c->key_version, k.k->bversion.lo);
}
if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, k),
trans, btree_bitmap_not_marked, "btree ptr not marked in member info btree allocated bitmap\n%s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k),
buf.buf))) {
mutex_lock(&c->sb_lock);
bch2_dev_btree_bitmap_mark(c, k);
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
}
/* * We require a commit before key_trigger() because * key_trigger(BTREE_TRIGGER_GC) is not idempotant; we'll calculate the * wrong result if we run it multiple times.
*/ unsigned flags = !iter ? BTREE_TRIGGER_is_root : 0;
ret = bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(k),
BTREE_TRIGGER_check_repair|flags); if (ret) goto out;
if (trans->nr_updates) {
ret = bch2_trans_commit(trans, NULL, NULL, 0) ?:
-BCH_ERR_transaction_restart_nested; goto out;
}
/* * gc.data_type doesn't yet include need_discard & need_gc_gen states - * fix that here:
*/
alloc_data_type_set(&gc, gc.data_type); if (gc.data_type != old_gc.data_type ||
gc.dirty_sectors != old_gc.dirty_sectors) {
ret = bch2_alloc_key_to_dev_counters(trans, ca, &old_gc, &gc, BTREE_TRIGGER_gc); if (ret) return ret;
/* * Ugly: alloc_key_to_dev_counters(..., BTREE_TRIGGER_gc) is not * safe w.r.t. transaction restarts, so fixup the gc_bucket so * we don't run it twice:
*/ struct bucket *gc_m = gc_bucket(ca, iter->pos.offset);
gc_m->data_type = gc.data_type;
gc_m->dirty_sectors = gc.dirty_sectors;
}
if (fsck_err_on(new.data_type != gc.data_type,
trans, alloc_key_data_type_wrong, "bucket %llu:%llu gen %u has wrong data_type" ": got %s, should be %s",
iter->pos.inode, iter->pos.offset,
gc.gen,
bch2_data_type_str(new.data_type),
bch2_data_type_str(gc.data_type))) new.data_type = gc.data_type;
#define copy_bucket_field(_errtype, _f) \ if (fsck_err_on(new._f != gc._f, \
trans, _errtype, \ "bucket %llu:%llu gen %u data type %s has wrong "#_f \ ": got %llu, should be %llu", \
iter->pos.inode, iter->pos.offset, \
gc.gen, \
bch2_data_type_str(gc.data_type), \
(u64) new._f, (u64) gc._f)) \ new._f = gc._f; \
a = bch2_alloc_to_v4_mut(trans, k);
ret = PTR_ERR_OR_ZERO(a); if (ret) return ret;
a->v = new;
/* * The trigger normally makes sure these are set, but we're not running * triggers:
*/ if (a->v.data_type == BCH_DATA_cached && !a->v.io_time[READ])
a->v.io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
ret = bch2_trans_update(trans, iter, &a->k_i, BTREE_TRIGGER_norun);
fsck_err: return ret;
}
staticint bch2_gc_alloc_done(struct bch_fs *c)
{ int ret = 0;
/** * bch2_check_allocations - walk all references to buckets, and recompute them: * * @c: filesystem object * * Returns: 0 on success, or standard errcode on failure * * Order matters here: * - Concurrent GC relies on the fact that we have a total ordering for * everything that GC walks - see gc_will_visit_node(), * gc_will_visit_root() * * - also, references move around in the course of index updates and * various other crap: everything needs to agree on the ordering * references are allowed to move around in - e.g., we're allowed to * start with a reference owned by an open_bucket (the allocator) and * move it to the btree, but not the reverse. * * This is necessary to ensure that gc doesn't miss references that * move around - if references move backwards in the ordering GC * uses, GC could skip past them
*/ int bch2_check_allocations(struct bch_fs *c)
{ int ret;
ret = bch2_gc_accounting_start(c) ?:
bch2_gc_start(c) ?:
bch2_gc_alloc_start(c) ?:
bch2_gc_reflink_start(c); if (ret) goto out;
gc_pos_set(c, gc_phase(GC_PHASE_start));
ret = bch2_mark_superblocks(c);
bch_err_msg(c, ret, "marking superblocks"); if (ret) goto out;
ret = bch2_gc_btrees(c); if (ret) goto out;
c->gc_count++;
ret = bch2_gc_alloc_done(c) ?:
bch2_gc_accounting_done(c) ?:
bch2_gc_stripes_done(c) ?:
bch2_gc_reflink_done(c);
out:
percpu_down_write(&c->mark_lock); /* Indicates that gc is no longer in progress: */
__gc_pos_set(c, gc_phase(GC_PHASE_not_running));
bch2_gc_free(c);
percpu_up_write(&c->mark_lock);
up_write(&c->gc_lock);
up_read(&c->state_lock);
/* * At startup, allocations can happen directly instead of via the * allocator thread - issue wakeup in case they blocked on gc_lock:
*/
closure_wake_up(&c->freelist_wait);
if (!ret && !test_bit(BCH_FS_errors_not_fixed, &c->flags))
bch2_sb_members_clean_deleted(c);
int bch2_gc_gens(struct bch_fs *c)
{
u64 b, start_time = local_clock(); int ret;
if (!mutex_trylock(&c->gc_gens_lock)) return 0;
trace_and_count(c, gc_gens_start, c);
/* * We have to use trylock here. Otherwise, we would * introduce a deadlock in the RO path - we take the * state lock at the start of going RO.
*/ if (!down_read_trylock(&c->state_lock)) {
mutex_unlock(&c->gc_gens_lock); return 0;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.