/* makes the assumption that no two keys are the same. */ staticint bsearch(struct btree_node *n, uint64_t key, int want_hi)
{ int lo = -1, hi = le32_to_cpu(n->header.nr_entries);
while (hi - lo > 1) { int mid = lo + ((hi - lo) / 2);
uint64_t mid_key = le64_to_cpu(n->keys[mid]);
if (index > nr_entries ||
index >= max_entries ||
nr_entries >= max_entries) {
DMERR("too many entries in btree node for insert");
__dm_unbless_for_disk(value); return -ENOMEM;
}
/* * Deletion uses a recursive algorithm, since we have limited stack space * we explicitly manage our own stack on the heap.
*/ #define MAX_SPINE_DEPTH 64 struct frame { struct dm_block *b; struct btree_node *n; unsignedint level; unsignedint nr_children; unsignedint current_child;
};
while (unprocessed_frames(s)) {
f = s->spine + s->top--;
dm_tm_unlock(s->tm, f->b);
}
}
int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
{ int r; struct del_stack *s;
/* * dm_btree_del() is called via an ioctl, as such should be * considered an FS op. We can't recurse back into the FS, so we * allocate GFP_NOFS.
*/
s = kmalloc(sizeof(*s), GFP_NOFS); if (!s) return -ENOMEM;
s->info = info;
s->tm = info->tm;
s->top = -1;
r = push_frame(s, root, 0); if (r) goto out;
while (unprocessed_frames(s)) {
uint32_t flags; struct frame *f;
dm_block_t b;
r = top_frame(s, &f); if (r) goto out;
if (f->current_child >= f->nr_children) {
pop_frame(s); continue;
}
flags = le32_to_cpu(f->n->header.flags); if (flags & INTERNAL_NODE) {
b = value64(f->n, f->current_child);
f->current_child++;
r = push_frame(s, b, f->level); if (r) goto out;
} elseif (is_internal_level(info, f)) {
b = value64(f->n, f->current_child);
f->current_child++;
r = push_frame(s, b, f->level + 1); if (r) goto out;
} else { if (info->value_type.dec)
info->value_type.dec(info->value_type.context,
value_ptr(f->n, 0), f->nr_children);
pop_frame(s);
}
}
out: if (r) { /* cleanup all frames of del_stack */
unlock_all_frames(s);
}
kfree(s);
r = bn_read_lock(info, root, &node); if (r) return r;
n = dm_block_data(node);
flags = le32_to_cpu(n->header.flags);
nr_entries = le32_to_cpu(n->header.nr_entries);
if (flags & INTERNAL_NODE) {
i = lower_bound(n, key); if (i < 0) { /* * avoid early -ENODATA return when all entries are * higher than the search @key.
*/
i = 0;
} if (i >= nr_entries) {
r = -ENODATA; goto out;
}
r = dm_btree_lookup_next_single(info, value64(n, i), key, rkey, value_le); if (r == -ENODATA && i < (nr_entries - 1)) {
i++;
r = dm_btree_lookup_next_single(info, value64(n, i), key, rkey, value_le);
}
} else {
i = upper_bound(n, key); if (i < 0 || i >= nr_entries) {
r = -ENODATA; goto out;
}
/* * Copies entries from one region of a btree node to another. The regions * must not overlap.
*/ staticvoid copy_entries(struct btree_node *dest, unsignedint dest_offset, struct btree_node *src, unsignedint src_offset, unsignedint count)
{
size_t value_size = le32_to_cpu(dest->header.value_size);
/* * Moves entries from one region fo a btree node to another. The regions * may overlap.
*/ staticvoid move_entries(struct btree_node *dest, unsignedint dest_offset, struct btree_node *src, unsignedint src_offset, unsignedint count)
{
size_t value_size = le32_to_cpu(dest->header.value_size);
/* * Erases the first 'count' entries of a btree node, shifting following * entries down into their place.
*/ staticvoid shift_down(struct btree_node *n, unsignedint count)
{
move_entries(n, 0, n, count, le32_to_cpu(n->header.nr_entries) - count);
}
/* * Moves entries in a btree node up 'count' places, making space for * new entries at the start of the node.
*/ staticvoid shift_up(struct btree_node *n, unsignedint count)
{
move_entries(n, count, n, 0, le32_to_cpu(n->header.nr_entries));
}
/* * Redistributes entries between two btree nodes to make them * have similar numbers of entries.
*/ staticvoid redistribute2(struct btree_node *left, struct btree_node *right)
{ unsignedint nr_left = le32_to_cpu(left->header.nr_entries); unsignedint nr_right = le32_to_cpu(right->header.nr_entries); unsignedint total = nr_left + nr_right; unsignedint target_left = total / 2; unsignedint target_right = total - target_left;
/* patch up the spine */ if (key < le64_to_cpu(rn->keys[0])) {
unlock_block(s->info, right);
s->nodes[1] = left;
} else {
unlock_block(s->info, left);
s->nodes[1] = right;
}
return 0;
}
/* * We often need to modify a sibling node. This function shadows a particular * child of the given parent node. Making sure to update the parent to point * to the new shadow.
*/ staticint shadow_child(struct dm_btree_info *info, struct dm_btree_value_type *vt, struct btree_node *parent, unsignedint index, struct dm_block **result)
{ int r, inc;
dm_block_t root; struct btree_node *node;
root = value64(parent, index);
r = dm_tm_shadow_block(info->tm, root, &btree_node_validator,
result, &inc); if (r) return r;
/* new_parent should just point to l and r now */
pn->header.flags = cpu_to_le32(INTERNAL_NODE);
pn->header.nr_entries = cpu_to_le32(2);
pn->header.max_entries = cpu_to_le32(
calc_max_entries(sizeof(__le64),
dm_bm_block_size(
dm_tm_get_bm(s->info->tm))));
pn->header.value_size = cpu_to_le32(sizeof(__le64));
/* * Make space in a node, either by moving some entries to a sibling, * or creating a new sibling node. SPACE_THRESHOLD defines the minimum * number of free entries that must be in the sibling to make the move * worth while. If the siblings are shared (eg, part of a snapshot), * then they are not touched, since this break sharing and so consume * more space than we save.
*/ #define SPACE_THRESHOLD 8 staticint rebalance_or_split(struct shadow_spine *s, struct dm_btree_value_type *vt, unsignedint parent_index, uint64_t key)
{ int r; struct btree_node *parent = dm_block_data(shadow_parent(s)); unsignedint nr_parent = le32_to_cpu(parent->header.nr_entries); unsignedint free_space; int left_shared = 0, right_shared = 0;
/* Should we move entries to the left sibling? */ if (parent_index > 0) {
dm_block_t left_b = value64(parent, parent_index - 1);
r = dm_tm_block_is_shared(s->info->tm, left_b, &left_shared); if (r) return r;
if (!left_shared) {
r = get_node_free_space(s->info, left_b, &free_space); if (r) return r;
/* * We need to split the node, normally we split two nodes * into three. But when inserting a sequence that is either * monotonically increasing or decreasing it's better to split * a single node into two.
*/ if (left_shared || right_shared || (nr_parent <= 2) ||
(parent_index == 0) || (parent_index + 1 == nr_parent)) { return split_one_into_two(s, parent_index, vt, key);
} else { return split_two_into_three(s, parent_index, vt, key);
}
}
/* * Does the node contain a particular key?
*/ staticbool contains_key(struct btree_node *node, uint64_t key)
{ int i = lower_bound(node, key);
if (i >= 0 && le64_to_cpu(node->keys[i]) == key) returntrue;
returnfalse;
}
/* * In general we preemptively make sure there's a free entry in every * node on the spine when doing an insert. But we can avoid that with * leaf nodes if we know it's an overwrite.
*/ staticbool has_space_for_insert(struct btree_node *node, uint64_t key)
{ if (node->header.nr_entries == node->header.max_entries) { if (le32_to_cpu(node->header.flags) & LEAF_NODE) { /* we don't need space if it's an overwrite */ return contains_key(node, key);
}
returnfalse;
}
returntrue;
}
staticint btree_insert_raw(struct shadow_spine *s, dm_block_t root, struct dm_btree_value_type *vt,
uint64_t key, unsignedint *index)
{ int r, i = *index, top = 1; struct btree_node *node;
for (;;) {
r = shadow_step(s, root, vt); if (r < 0) return r;
node = dm_block_data(shadow_current(s));
/* * We have to patch up the parent node, ugly, but I don't * see a way to do this automatically as part of the spine * op.
*/ if (shadow_has_parent(s) && i >= 0) { /* FIXME: second clause unness. */
__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
if (!has_space_for_insert(node, key)) { if (top)
r = btree_split_beneath(s, key); else
r = rebalance_or_split(s, vt, i, key);
if (r < 0) return r;
/* making space can cause the current node to change */
node = dm_block_data(shadow_current(s));
}
i = lower_bound(node, key);
if (le32_to_cpu(node->header.flags) & LEAF_NODE) break;
if (i < 0) { /* change the bounds on the lowest key */
node->keys[0] = cpu_to_le64(key);
i = 0;
}
root = value64(node, i);
top = 0;
}
if (i < 0 || le64_to_cpu(node->keys[i]) != key)
i++;
*index = i; return 0;
}
staticint __btree_get_overwrite_leaf(struct shadow_spine *s, dm_block_t root,
uint64_t key, int *index)
{ int r, i = -1; struct btree_node *node;
*index = 0; for (;;) {
r = shadow_step(s, root, &s->info->value_type); if (r < 0) return r;
node = dm_block_data(shadow_current(s));
/* * We have to patch up the parent node, ugly, but I don't * see a way to do this automatically as part of the spine * op.
*/ if (shadow_has_parent(s) && i >= 0) {
__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
/* * FIXME: We shouldn't use a recursive algorithm when we have limited stack * space. Also this only works for single level trees.
*/ staticint walk_node(struct dm_btree_info *info, dm_block_t block, int (*fn)(void *context, uint64_t *keys, void *leaf), void *context)
{ int r; unsignedint i, nr; struct dm_block *node; struct btree_node *n;
uint64_t keys;
r = bn_read_lock(info, block, &node); if (r) return r;
n = dm_block_data(node);
nr = le32_to_cpu(n->header.nr_entries); for (i = 0; i < nr; i++) { if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) {
r = walk_node(info, value64(n, i), fn, context); if (r) goto out;
} else {
keys = le64_to_cpu(*key_ptr(n, i));
r = fn(context, &keys, value_ptr(n, i)); if (r) goto out;
}
}
nr = le32_to_cpu(bn->header.nr_entries); for (i = 0; i < nr; i++) {
memcpy(&value_le, value_ptr(bn, i), sizeof(value_le));
dm_bm_prefetch(bm, le64_to_cpu(value_le));
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.