if (unlikely(btree_node_just_written(b)) &&
bch2_btree_post_write_cleanup(c, b))
bch2_trans_node_reinit_iter(trans, b);
/* * If the last bset has been written, or if it's gotten too big - start * a new bset to insert into:
*/ if (want_new_bset(c, b))
bch2_btree_init_next(trans, b);
}
static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i)
{ while (--i >= trans->updates) { if (same_leaf_as_prev(trans, i)) continue;
if ((i->flags & BTREE_TRIGGER_norun) ||
!btree_node_type_has_trans_triggers(i->bkey_type)) return 0;
/* * Transactional triggers create new btree_insert_entries, so we can't * pass them a pointer to a btree_insert_entry, that memory is going to * move:
*/ struct bkey old_k = i->old_k; struct bkey_s_c old = { &old_k, i->old_v }; conststruct bkey_ops *old_ops = bch2_bkey_type_ops(old.k->type); conststruct bkey_ops *new_ops = bch2_bkey_type_ops(i->k->k.type); unsigned flags = i->flags|BTREE_TRIGGER_transactional;
while (sort_id_start < trans->nr_updates) { unsigned i, sort_id = trans->updates[sort_id_start].sort_order; bool trans_trigger_run;
/* * For a given btree, this algorithm runs insert triggers before * overwrite triggers: this is so that when extents are being * moved (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop * references before they are re-added. * * Running triggers will append more updates to the list of * updates as we're walking it:
*/ do {
trans_trigger_run = false;
for (i = sort_id_start;
i < trans->nr_updates && trans->updates[i].sort_order <= sort_id;
i++) { if (trans->updates[i].sort_order < sort_id) {
sort_id_start = i; continue;
}
int ret = run_one_trans_trigger(trans, trans->updates + i); if (ret < 0) return ret; if (ret)
trans_trigger_run = true;
}
} while (trans_trigger_run);
static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans)
{
trans_for_each_update(trans, i) if (btree_node_type_has_triggers(i->bkey_type) &&
gc_visited(trans->c, gc_pos_btree(i->btree_id, i->level, i->k->k.p))) { int ret = run_one_mem_trigger(trans, i, i->flags|BTREE_TRIGGER_gc); if (ret) return ret;
}
bch2_trans_verify_not_unlocked_or_in_restart(trans); #if 0 /* todo: bring back dynamic fault injection */ if (race_fault()) {
trace_and_count(c, trans_restart_fault_inject, trans, trace_ip); return btree_trans_restart(trans, BCH_ERR_transaction_restart_fault_inject);
} #endif /* * Check if the insert will fit in the leaf node with the write lock * held, otherwise another thread could write the node changing the * amount of space available:
*/
prefetch(&trans->c->journal.flags);
trans_for_each_update(trans, i) { /* Multiple inserts might go to same leaf: */ if (!same_leaf_as_prev(trans, i))
u64s = 0;
/* * Don't get journal reservation until after we know insert will * succeed:
*/ if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) {
ret = bch2_trans_journal_res_get(trans,
(flags & BCH_WATERMARK_MASK)|
JOURNAL_RES_GET_NONBLOCK); if (ret) return ret;
if (unlikely(trans->journal_transaction_names))
journal_transaction_name(trans);
}
/* * Not allowed to fail after we've gotten our journal reservation - we * have to use it:
*/
h = trans->hooks; while (h) {
ret = h->fn(trans, h); if (ret) return ret;
h = h->next;
}
struct bkey_i *accounting;
percpu_down_read(&c->mark_lock); for (accounting = btree_trans_subbuf_base(trans, &trans->accounting);
accounting != btree_trans_subbuf_top(trans, &trans->accounting);
accounting = bkey_next(accounting)) {
ret = bch2_accounting_trans_commit_hook(trans,
bkey_i_to_accounting(accounting), flags); if (ret) goto revert_fs_usage;
}
percpu_up_read(&c->mark_lock);
/* XXX: we only want to run this if deltas are nonzero */
bch2_trans_account_disk_usage_change(trans);
trans_for_each_update(trans, i) if (btree_node_type_has_atomic_triggers(i->bkey_type)) {
ret = run_one_mem_trigger(trans, i, BTREE_TRIGGER_atomic|i->flags); if (ret) goto fatal_err;
}
if (unlikely(c->gc_pos.phase)) {
ret = bch2_trans_commit_run_gc_triggers(trans); if (ret) goto fatal_err;
}
if (!(flags & BCH_TRANS_COMMIT_no_journal_res))
validate_context.flags = BCH_VALIDATE_write|BCH_VALIDATE_commit;
for (struct jset_entry *i = btree_trans_journal_entries_start(trans);
i != btree_trans_journal_entries_top(trans);
i = vstruct_next(i)) {
ret = bch2_journal_entry_validate(c, NULL, i,
bcachefs_metadata_version_current,
CPU_BIG_ENDIAN, validate_context); if (unlikely(ret)) {
bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n",
trans->fn); goto fatal_err;
}
}
return 0;
fatal_err:
bch2_fs_fatal_error(c, "fatal error in transaction commit: %s", bch2_err_str(ret));
percpu_down_read(&c->mark_lock);
revert_fs_usage: for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting);
i != accounting;
i = bkey_next(i))
bch2_accounting_trans_commit_revert(trans, bkey_i_to_accounting(i), flags);
percpu_up_read(&c->mark_lock); return ret;
}
static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans)
{ /* * Accounting keys aren't deduped in the journal: we have to compare * each individual update against what's in the btree to see if it has * been applied yet, and accounting updates also don't overwrite, * they're deltas that accumulate.
*/
trans_for_each_update(trans, i) if (i->k->k.type != KEY_TYPE_accounting)
bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p);
}
if (!same_leaf_as_next(trans, i)) { if (u64s_delta <= 0) {
ret = bch2_foreground_maybe_merge(trans, i->path,
i->level, flags); if (unlikely(ret)) return ret;
}
u64s_delta = 0;
}
}
ret = bch2_trans_lock_write(trans); if (unlikely(ret)) return ret;
ret = bch2_trans_commit_write_locked(trans, flags, stopped_at, trace_ip);
if (!ret && unlikely(trans->journal_replay_not_finished))
bch2_drop_overwrites_from_journal(trans);
bch2_trans_unlock_updates_write(trans);
if (!ret && trans->journal_pin)
bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
trans->journal_pin,
bch2_trans_commit_journal_pin_flush);
/* * Drop journal reservation after dropping write locks, since dropping * the journal reservation may kick off a journal write:
*/ if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res)))
bch2_journal_res_put(&c->journal, &trans->journal_res);
return ret;
}
staticint journal_reclaim_wait_done(struct bch_fs *c)
{ int ret = bch2_journal_error(&c->journal) ?:
bch2_btree_key_cache_wait_done(c);
if (!ret)
journal_reclaim_kick(&c->journal); return ret;
}
if (bch2_err_matches(ret, BCH_ERR_journal_res_blocked)) { /* * XXX: this should probably be a separate BTREE_INSERT_NONBLOCK * flag
*/ if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
watermark < BCH_WATERMARK_reclaim) {
ret = bch_err_throw(c, journal_reclaim_would_deadlock); goto out;
}
/* * This is for updates done in the early part of fsck - btree_gc - before we've * gone RW. we only add the new key to the list of keys for journal replay to * do.
*/ static noinline int
do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans)
{ struct bch_fs *c = trans->c;
BUG_ON(current != c->recovery_task);
trans_for_each_update(trans, i) { int ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k); if (ret) return ret;
}
for (struct jset_entry *i = btree_trans_journal_entries_start(trans);
i != btree_trans_journal_entries_top(trans);
i = vstruct_next(i)) { if (i->type == BCH_JSET_ENTRY_btree_keys ||
i->type == BCH_JSET_ENTRY_write_buffer_keys) {
jset_entry_for_each_key(i, k) { int ret = bch2_journal_key_insert(c, i->btree_id, i->level, k); if (ret) return ret;
}
}
if (i->type == BCH_JSET_ENTRY_btree_root) {
guard(mutex)(&c->btree_root_lock);
for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting);
i != btree_trans_subbuf_top(trans, &trans->accounting);
i = bkey_next(i)) { int ret = bch2_journal_key_insert(c, BTREE_ID_accounting, 0, i); if (ret) return ret;
}
return 0;
}
int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
{ struct btree_insert_entry *errored_at = NULL; struct bch_fs *c = trans->c; unsigned journal_u64s = 0; int ret = 0;
ret = do_bch2_trans_commit(trans, flags, &errored_at, _RET_IP_);
/* make sure we didn't drop or screw up locks: */
bch2_trans_verify_locks(trans);
if (ret) goto err;
trace_and_count(c, transaction_commit, trans, _RET_IP_);
out: if (likely(!(flags & BCH_TRANS_COMMIT_no_check_rw)))
enumerated_ref_put(&c->writes, BCH_WRITE_REF_trans);
out_reset: if (!ret)
bch2_trans_downgrade(trans);
bch2_trans_reset_updates(trans);
return ret;
err:
ret = bch2_trans_commit_error(trans, flags, errored_at, ret, _RET_IP_); if (ret) goto out;
/* * We might have done another transaction commit in the error path - * i.e. btree write buffer flush - which will have made use of * trans->journal_res, but with BCH_TRANS_COMMIT_no_journal_res that is * how the journal sequence number to pin is passed in - so we must * restart:
*/ if (flags & BCH_TRANS_COMMIT_no_journal_res) {
ret = bch_err_throw(c, transaction_restart_nested); goto out;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.