/* * bch2_varint_decode can read past the end of the buffer by at * most 7 bytes (it won't be used):
*/ unsigned key_u64s = k.k->u64s + 1;
/* * Allocate some extra space so that the transaction commit path is less * likely to have to reallocate, since that requires a transaction * restart:
*/
key_u64s = min(256U, (key_u64s * 3) / 2);
key_u64s = roundup_pow_of_two(key_u64s);
struct bkey_cached *ck = bkey_cached_alloc(trans, ck_path, key_u64s); int ret = PTR_ERR_OR_ZERO(ck); if (ret) return ret;
if (unlikely(!ck)) {
ck = bkey_cached_reuse(bc); if (unlikely(!ck)) {
bch_err(c, "error allocating memory for key cache item, btree %s",
bch2_btree_id_str(ck_path->btree_id)); return bch_err_throw(c, ENOMEM_btree_key_cache_create);
}
}
bch2_trans_iter_init(trans, &iter, ck_path->btree_id, ck_path->pos,
BTREE_ITER_intent|
BTREE_ITER_key_cache_fill|
BTREE_ITER_cached_nofill);
iter.flags &= ~BTREE_ITER_with_journal;
k = bch2_btree_iter_peek_slot(trans, &iter);
ret = bkey_err(k); if (ret) goto err;
/* Recheck after btree lookup, before allocating: */
ck_path = trans->paths + ck_path_idx;
ret = bch2_btree_key_cache_find(c, ck_path->btree_id, ck_path->pos) ? -EEXIST : 0; if (unlikely(ret)) goto out;
ret = btree_key_cache_create(trans, btree_iter_path(trans, &iter), ck_path, k); if (ret) goto err;
if (trace_key_cache_fill_enabled())
do_trace_key_cache_fill(trans, ck_path, k);
out: /* We're not likely to need this iterator again: */
bch2_set_btree_iter_dontneed(trans, &iter);
err:
bch2_trans_iter_exit(trans, &iter); return ret;
}
int bch2_btree_path_traverse_cached(struct btree_trans *trans,
btree_path_idx_t path_idx, unsigned flags)
{
EBUG_ON(trans->paths[path_idx].level);
int ret; do {
ret = btree_path_traverse_cached_fast(trans, path_idx); if (unlikely(ret == -ENOENT))
ret = btree_key_cache_fill(trans, path_idx, flags);
} while (ret == -EEXIST);
ret = bch2_btree_iter_traverse(trans, &c_iter); if (ret) goto out;
ck = (void *) btree_iter_path(trans, &c_iter)->l[0].b; if (!ck) goto out;
if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { if (evict) goto evict; goto out;
}
if (journal_seq && ck->journal.seq != journal_seq) goto out;
trans->journal_res.seq = ck->journal.seq;
/* * If we're at the end of the journal, we really want to free up space * in the journal right away - we don't want to pin that old journal * sequence number with a new btree node write, we want to re-journal * the update
*/ if (ck->journal.seq == journal_last_seq(j))
commit_flags |= BCH_WATERMARK_reclaim;
if (ck->journal.seq != journal_last_seq(j) ||
!test_bit(JOURNAL_space_low, &c->journal.flags))
commit_flags |= BCH_TRANS_COMMIT_no_journal_res;
struct bkey_s_c btree_k = bch2_btree_iter_peek_slot(trans, &b_iter);
ret = bkey_err(btree_k); if (ret) goto err;
/* * Check that we're not violating cache coherency rules: */
BUG_ON(bkey_deleted(btree_k.k));
if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
set_bit(BKEY_CACHED_DIRTY, &ck->flags);
atomic_long_inc(&c->btree_key_cache.nr_dirty);
if (bch2_nr_btree_keys_need_flush(c))
kick_reclaim = true;
}
/* * To minimize lock contention, we only add the journal pin here and * defer pin updates to the flush callback via ->seq. Be careful not to * update ->seq on nojournal commits because we don't want to update the * pin to a seq that doesn't include journal updates on disk. Otherwise * we risk losing the update after a crash. * * The only exception is if the pin is not active in the first place. We * have to add the pin because journal reclaim drives key cache * flushing. The flush callback will not proceed unless ->seq matches * the latest pin, so make sure it starts with a consistent value.
*/ if (!(insert_entry->flags & BTREE_UPDATE_nojournal) ||
!journal_pin_active(&ck->journal)) {
ck->seq = trans->journal_res.seq;
}
bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
&ck->journal, bch2_btree_key_cache_journal_flush);
if (kick_reclaim)
journal_reclaim_kick(&c->journal); returntrue;
}
/* * We just did an update to the btree, bypassing the key cache: the key * cache key is now stale and must be dropped, even if dirty:
*/ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
atomic_long_dec(&c->btree_key_cache.nr_dirty);
bch2_journal_pin_drop(&c->journal, &ck->journal);
}
struct btree_path *path2; unsigned i;
trans_for_each_path(trans, path2, i) if (path2->l[0].b == (void *) ck) { /* * It's safe to clear should_be_locked here because * we're evicting from the key cache, and we still have * the underlying btree locked: filling into the key * cache would require taking a write lock on the btree * node
*/
path2->should_be_locked = false;
__bch2_btree_path_unlock(trans, path2);
path2->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_drop);
btree_path_set_dirty(trans, path2, BTREE_ITER_NEED_TRAVERSE);
}
/* * Scanning is expensive while a rehash is in progress - most elements * will be on the new hashtable, if it's in progress * * A rehash could still start while we're scanning - that's ok, we'll * still see most elements.
*/ if (unlikely(tbl->nest)) {
rcu_read_unlock();
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); return SHRINK_STOP;
}
iter = bc->shrink_iter; if (iter >= tbl->size)
iter = 0;
start = iter;
do { struct rhash_head *pos, *next;
pos = rht_ptr_rcu(&tbl->buckets[iter]);
while (!rht_is_a_nulls(pos)) {
next = rht_dereference_bucket_rcu(pos->next, tbl, iter);
ck = container_of(pos, struct bkey_cached, hash);
/* * Avoid hammering our shrinker too much if it's nearly empty - the * shrinker code doesn't take into account how big our cache is, if it's * mostly empty but the system is under memory pressure it causes nasty * lock contention:
*/
nr -= 128;
/* * The loop is needed to guard against racing with rehash:
*/ while (atomic_long_read(&bc->nr_keys)) {
rcu_read_lock();
tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); if (tbl) { if (tbl->nest) { /* wait for in progress rehash */
rcu_read_unlock();
mutex_lock(&bc->table.mutex);
mutex_unlock(&bc->table.mutex); continue;
} for (i = 0; i < tbl->size; i++) while (pos = rht_ptr_rcu(&tbl->buckets[i]), !rht_is_a_nulls(pos)) {
ck = container_of(pos, struct bkey_cached, hash);
BUG_ON(!bkey_cached_evict(bc, ck));
kfree(ck->k);
kmem_cache_free(bch2_key_cache, ck);
}
}
rcu_read_unlock();
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.