/* * Removing an entry from a btree * ============================== * * A very important constraint for our btree is that no node, except the * root, may have fewer than a certain number of entries. * (MIN_ENTRIES <= nr_entries <= MAX_ENTRIES). * * Ensuring this is complicated by the way we want to only ever hold the * locks on 2 nodes concurrently, and only change nodes in a top to bottom * fashion. * * Each node may have a left or right sibling. When decending the spine, * if a node contains only MIN_ENTRIES then we try and increase this to at * least MIN_ENTRIES + 1. We do this in the following ways: * * [A] No siblings => this can only happen if the node is the root, in which * case we copy the childs contents over the root. * * [B] No left sibling * ==> rebalance(node, right sibling) * * [C] No right sibling * ==> rebalance(left sibling, node) * * [D] Both siblings, total_entries(left, node, right) <= DEL_THRESHOLD * ==> delete node adding it's contents to left and right * * [E] Both siblings, total_entries(left, node, right) > DEL_THRESHOLD * ==> rebalance(left, node, right) * * After these operations it's possible that the our original node no * longer contains the desired sub tree. For this reason this rebalancing * is performed on the children of the current node. This also avoids * having a special case for the root. * * Once this rebalancing has occurred we can then step into the child node * for internal nodes. Or delete the entry for leaf nodes.
*/
/* * Some little utilities for moving node data around.
*/ staticvoid node_shift(struct btree_node *n, int shift)
{
uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
uint32_t value_size = le32_to_cpu(n->header.value_size);
staticint __rebalance2(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *r)
{ int ret; struct btree_node *left = l->n; struct btree_node *right = r->n;
uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
uint32_t nr_right = le32_to_cpu(right->header.nr_entries); /* * Ensure the number of entries in each child will be greater * than or equal to (max_entries / 3 + 1), so no matter which * child is used for removal, the number will still be not * less than (max_entries / 3).
*/ unsignedint threshold = 2 * (merge_threshold(left) + 1);
/* * We need to decrement the right block, but not it's * children, since they're still referenced by left.
*/
dm_tm_dec(info->tm, dm_block_location(r->block));
} else { /* * Rebalance.
*/ unsignedint target_left = (nr_left + nr_right) / 2;
ret = shift(left, right, nr_left - target_left); if (ret) return ret;
*key_ptr(parent, r->index) = right->keys[0];
} return 0;
}
/* * We dump as many entries from center as possible into left, then the rest * in right, then rebalance2. This wastes some cpu, but I want something * simple atm.
*/ staticint delete_center_node(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *c, struct child *r, struct btree_node *left, struct btree_node *center, struct btree_node *right,
uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
{
uint32_t max_entries = le32_to_cpu(left->header.max_entries); unsignedint shift = min(max_entries - nr_left, nr_center);
if (nr_left + shift > max_entries) {
DMERR("node shift out of bounds"); return -EINVAL;
}
node_copy(left, center, -shift);
left->header.nr_entries = cpu_to_le32(nr_left + shift);
if (shift != nr_center) {
shift = nr_center - shift;
if ((nr_right + shift) > max_entries) {
DMERR("node shift out of bounds"); return -EINVAL;
}
if (nr_left < nr_right) {
s = nr_left - target_left;
if (s < 0 && nr_center < -s) { /* not enough in central node */
ret = shift(left, center, -nr_center); if (ret) return ret;
s += nr_center;
ret = shift(left, right, s); if (ret) return ret;
nr_right += s;
} else {
ret = shift(left, center, s); if (ret) return ret;
}
ret = shift(center, right, target_right - nr_right); if (ret) return ret;
} else {
s = target_right - nr_right; if (s > 0 && nr_center < s) { /* not enough in central node */
ret = shift(center, right, nr_center); if (ret) return ret;
s -= nr_center;
ret = shift(left, right, s); if (ret) return ret;
nr_left -= s;
} else {
ret = shift(center, right, s); if (ret) return ret;
}
ret = shift(left, center, nr_left - target_left); if (ret) return ret;
}
i = lower_bound(n, key); if (i < 0) return -ENODATA;
has_left_sibling = i > 0;
has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1);
if (!has_left_sibling)
r = rebalance2(s, info, vt, i);
elseif (!has_right_sibling)
r = rebalance2(s, info, vt, i - 1);
else
r = rebalance3(s, info, vt, i - 1);
return r;
}
staticint do_leaf(struct btree_node *n, uint64_t key, unsignedint *index)
{ int i = lower_bound(n, key);
if ((i < 0) ||
(i >= le32_to_cpu(n->header.nr_entries)) ||
(le64_to_cpu(n->keys[i]) != key)) return -ENODATA;
*index = i;
return 0;
}
/* * Prepares for removal from one level of the hierarchy. The caller must * call delete_at() to remove the entry at index.
*/ staticint remove_raw(struct shadow_spine *s, struct dm_btree_info *info, struct dm_btree_value_type *vt, dm_block_t root,
uint64_t key, unsignedint *index)
{ int i = *index, r; struct btree_node *n;
for (;;) {
r = shadow_step(s, root, vt); if (r < 0) break;
/* * We have to patch up the parent node, ugly, but I don't * see a way to do this automatically as part of the spine * op.
*/ if (shadow_has_parent(s)) {
__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
staticint remove_nearest(struct shadow_spine *s, struct dm_btree_info *info, struct dm_btree_value_type *vt, dm_block_t root,
uint64_t key, int *index)
{ int i = *index, r; struct btree_node *n;
for (;;) {
r = shadow_step(s, root, vt); if (r < 0) break;
/* * We have to patch up the parent node, ugly, but I don't * see a way to do this automatically as part of the spine * op.
*/ if (shadow_has_parent(s)) {
__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
init_le64_type(info->tm, &le64_vt);
init_shadow_spine(&spine, info); for (level = 0; level < last_level; level++) {
r = remove_raw(&spine, info, &le64_vt,
root, keys[level], (unsignedint *) &index); if (r < 0) goto out;
n = dm_block_data(shadow_current(&spine));
root = value64(n, index);
}
r = remove_nearest(&spine, info, &info->value_type,
root, keys[last_level], &index); if (r < 0) goto out;
n = dm_block_data(shadow_current(&spine));
if (index < 0)
index = 0;
if (index >= le32_to_cpu(n->header.nr_entries)) {
r = -ENODATA; goto out;
}
k = le64_to_cpu(n->keys[index]); if (k >= keys[last_level] && k < end_key) { if (info->value_type.dec)
info->value_type.dec(info->value_type.context,
value_ptr(n, index), 1);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.