/* simple helper to search for an existing data extent at a given offset */ int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
{ struct btrfs_root *root = btrfs_extent_root(fs_info, start); struct btrfs_key key;
BTRFS_PATH_AUTO_FREE(path);
path = btrfs_alloc_path(); if (!path) return -ENOMEM;
/* * helper function to lookup reference count and flags of a tree block. * * the head node for delayed ref is used to store the sum of all the * reference count modifications queued up in the rbtree. the head * node may also store the extent flags to set. This way you can check * to see what the reference count and extent flags would be if all of * the delayed refs are not processed.
*/ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr,
u64 offset, int metadata, u64 *refs, u64 *flags,
u64 *owning_root)
{ struct btrfs_root *extent_root; struct btrfs_delayed_ref_head *head; struct btrfs_delayed_ref_root *delayed_refs;
BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key;
u64 num_refs;
u64 extent_flags;
u64 owner = 0; int ret;
/* * If we don't have skinny metadata, don't bother doing anything * different
*/ if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
offset = fs_info->nodesize;
metadata = 0;
}
path = btrfs_alloc_path(); if (!path) return -ENOMEM;
if (unlikely(item_size < sizeof(*ei))) {
ret = -EUCLEAN;
btrfs_err(fs_info, "unexpected extent item size, has %u expect >= %zu",
item_size, sizeof(*ei));
btrfs_abort_transaction(trans, ret); return ret;
}
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
num_refs = btrfs_extent_refs(leaf, ei); if (unlikely(num_refs == 0)) {
ret = -EUCLEAN;
btrfs_err(fs_info, "unexpected zero reference count for extent item (%llu %u %llu)",
key.objectid, key.type, key.offset);
btrfs_abort_transaction(trans, ret); return ret;
}
extent_flags = btrfs_extent_flags(leaf, ei);
owner = btrfs_get_extent_owner_root(fs_info, leaf, path->slots[0]);
} else {
num_refs = 0;
extent_flags = 0;
ret = 0;
}
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
head = btrfs_find_delayed_ref_head(fs_info, delayed_refs, bytenr); if (head) { if (!mutex_trylock(&head->mutex)) {
refcount_inc(&head->refs);
spin_unlock(&delayed_refs->lock);
btrfs_release_path(path);
/* * Mutex was contended, block until it's released and try * again
*/
mutex_lock(&head->mutex);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref_head(head); goto search_again;
}
spin_lock(&head->lock); if (head->extent_op && head->extent_op->update_flags)
extent_flags |= head->extent_op->flags_to_set;
WARN_ON(num_refs == 0); if (refs)
*refs = num_refs; if (flags)
*flags = extent_flags; if (owning_root)
*owning_root = owner;
return ret;
}
/* * Back reference rules. Back refs have three main goals: * * 1) differentiate between all holders of references to an extent so that * when a reference is dropped we can make sure it was a valid reference * before freeing the extent. * * 2) Provide enough information to quickly find the holders of an extent * if we notice a given block is corrupted or bad. * * 3) Make it easy to migrate blocks for FS shrinking or storage pool * maintenance. This is actually the same as #2, but with a slightly * different use case. * * There are two kinds of back refs. The implicit back refs is optimized * for pointers in non-shared tree blocks. For a given pointer in a block, * back refs of this kind provide information about the block's owner tree * and the pointer's key. These information allow us to find the block by * b-tree searching. The full back refs is for pointers in tree blocks not * referenced by their owner trees. The location of tree block is recorded * in the back refs. Actually the full back refs is generic, and can be * used in all cases the implicit back refs is used. The major shortcoming * of the full back refs is its overhead. Every time a tree block gets * COWed, we have to update back refs entry for all pointers in it. * * For a newly allocated tree block, we use implicit back refs for * pointers in it. This means most tree related operations only involve * implicit back refs. For a tree block created in old transaction, the * only way to drop a reference to it is COW it. So we can detect the * event that tree block loses its owner tree's reference and do the * back refs conversion. * * When a tree block is COWed through a tree, there are four cases: * * The reference count of the block is one and the tree is the block's * owner tree. Nothing to do in this case. * * The reference count of the block is one and the tree is not the * block's owner tree. In this case, full back refs is used for pointers * in the block. Remove these full back refs, add implicit back refs for * every pointers in the new block. * * The reference count of the block is greater than one and the tree is * the block's owner tree. In this case, implicit back refs is used for * pointers in the block. Add full back refs for every pointers in the * block, increase lower level extents' reference counts. The original * implicit back refs are entailed to the new block. * * The reference count of the block is greater than one and the tree is * not the block's owner tree. Add implicit back refs for every pointer in * the new block, increase lower level extents' reference count. * * Back Reference Key composing: * * The key objectid corresponds to the first byte in the extent, * The key type is used to differentiate between types of back refs. * There are different meanings of the key offset for different types * of back refs. * * File extents can be referenced by: * * - multiple snapshots, subvolumes, or different generations in one subvol * - different files inside a single subvolume * - different offsets inside a file (bookend extents in file.c) * * The extent ref structure for the implicit back refs has fields for: * * - Objectid of the subvolume root * - objectid of the file holding the reference * - original offset in the file * - how many bookend extents * * The key offset for the implicit back refs is hash of the first * three fields. * * The extent ref structure for the full back refs has field for: * * - number of pointers in the tree leaf * * The key offset for the implicit back refs is the first byte of * the tree leaf * * When a file extent is allocated, The implicit back refs is used. * the fields are filled in: * * (root_key.objectid, inode objectid, offset in file, 1) * * When a file extent is removed file truncation, we find the * corresponding implicit back refs and check the following fields: * * (btrfs_header_owner(leaf), inode objectid, offset in file) * * Btree extents can be referenced by: * * - Different subvolumes * * Both the implicit back refs and the full back refs for tree blocks * only consist of key. The key offset for the implicit back refs is * objectid of block's owner tree. The key offset for the full back refs * is the first byte of parent block. * * When implicit back refs is used, information about the lowest key and * level of the tree block are required. These information are stored in * tree block info structure.
*/
/* * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required, * is_data == BTRFS_REF_TYPE_DATA, data type is requiried, * is_data == BTRFS_REF_TYPE_ANY, either type is OK.
*/ int btrfs_get_extent_inline_ref_type(conststruct extent_buffer *eb, conststruct btrfs_extent_inline_ref *iref, enum btrfs_inline_ref_type is_data)
{ struct btrfs_fs_info *fs_info = eb->fs_info; int type = btrfs_extent_inline_ref_type(eb, iref);
u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
if (type == BTRFS_EXTENT_OWNER_REF_KEY) {
ASSERT(btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)); return type;
}
if (type == BTRFS_TREE_BLOCK_REF_KEY ||
type == BTRFS_SHARED_BLOCK_REF_KEY ||
type == BTRFS_SHARED_DATA_REF_KEY ||
type == BTRFS_EXTENT_DATA_REF_KEY) { if (is_data == BTRFS_REF_TYPE_BLOCK) { if (type == BTRFS_TREE_BLOCK_REF_KEY) return type; if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
ASSERT(fs_info); /* * Every shared one has parent tree block, * which must be aligned to sector size.
*/ if (offset && IS_ALIGNED(offset, fs_info->sectorsize)) return type;
}
} elseif (is_data == BTRFS_REF_TYPE_DATA) { if (type == BTRFS_EXTENT_DATA_REF_KEY) return type; if (type == BTRFS_SHARED_DATA_REF_KEY) {
ASSERT(fs_info); /* * Every shared one has parent tree block, * which must be aligned to sector size.
*/ if (offset &&
IS_ALIGNED(offset, fs_info->sectorsize)) return type;
}
} else {
ASSERT(is_data == BTRFS_REF_TYPE_ANY); return type;
}
}
staticinlineint extent_ref_type(u64 parent, u64 owner)
{ int type; if (owner < BTRFS_FIRST_FREE_OBJECTID) { if (parent > 0)
type = BTRFS_SHARED_BLOCK_REF_KEY; else
type = BTRFS_TREE_BLOCK_REF_KEY;
} else { if (parent > 0)
type = BTRFS_SHARED_DATA_REF_KEY; else
type = BTRFS_EXTENT_DATA_REF_KEY;
} return type;
}
staticint find_next_key(conststruct btrfs_path *path, int level, struct btrfs_key *key)
{ for (; level < BTRFS_MAX_LEVEL; level++) { if (!path->nodes[level]) break; if (path->slots[level] + 1 >=
btrfs_header_nritems(path->nodes[level])) continue; if (level == 0)
btrfs_item_key_to_cpu(path->nodes[level], key,
path->slots[level] + 1); else
btrfs_node_key_to_cpu(path->nodes[level], key,
path->slots[level] + 1); return 0;
} return 1;
}
/* * look for inline back ref. if back ref is found, *ref_ret is set * to the address of inline back ref, and 0 is returned. * * if back ref isn't found, *ref_ret is set to the address where it * should be inserted, and -ENOENT is returned. * * if insert is true and there are too many inline back refs, the path * points to the extent item, and -EAGAIN is returned. * * NOTE: inline back refs are ordered in the same way that back ref * items in the tree are ordered.
*/ static noinline_for_stack int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_path *path, struct btrfs_extent_inline_ref **ref_ret,
u64 bytenr, u64 num_bytes,
u64 parent, u64 root_objectid,
u64 owner, u64 offset, int insert)
{ struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root = btrfs_extent_root(fs_info, bytenr); struct btrfs_key key; struct extent_buffer *leaf; struct btrfs_extent_item *ei; struct btrfs_extent_inline_ref *iref;
u64 flags;
u64 item_size; unsignedlong ptr; unsignedlong end; int extra_size; int type; int want; int ret; bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA); int needed;
/* * Owner is our level, so we can just add one to get the level for the * block we are interested in.
*/ if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
key.type = BTRFS_METADATA_ITEM_KEY;
key.offset = owner;
}
again:
ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); if (ret < 0) goto out;
/* * We may be a newly converted file system which still has the old fat * extent entries for metadata, so try and see if we have one of those.
*/ if (ret > 0 && skinny_metadata) {
skinny_metadata = false; if (path->slots[0]) {
path->slots[0]--;
btrfs_item_key_to_cpu(path->nodes[0], &key,
path->slots[0]); if (key.objectid == bytenr &&
key.type == BTRFS_EXTENT_ITEM_KEY &&
key.offset == num_bytes)
ret = 0;
} if (ret) {
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
btrfs_release_path(path); goto again;
}
}
if (ret && !insert) {
ret = -ENOENT; goto out;
} elseif (WARN_ON(ret)) {
btrfs_print_leaf(path->nodes[0]);
btrfs_err(fs_info, "extent item not found for insert, bytenr %llu num_bytes %llu parent %llu root_objectid %llu owner %llu offset %llu",
bytenr, num_bytes, parent, root_objectid, owner,
offset);
ret = -EUCLEAN; goto out;
}
if (!path->keep_locks) {
btrfs_release_path(path);
path->keep_locks = 1; goto again;
}
/* * To add new inline back ref, we have to make sure * there is no corresponding back ref item. * For simplicity, we just do not add new inline back * ref if there is any kind of item for this block
*/ if (find_next_key(path, 0, &key) == 0 &&
key.objectid == bytenr &&
key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
ret = -EAGAIN; goto out;
}
}
out_no_entry:
*ref_ret = (struct btrfs_extent_inline_ref *)ptr;
out: if (path->keep_locks) {
path->keep_locks = 0;
btrfs_unlock_up_safe(path, 1);
} if (insert)
path->search_for_extension = 0; return ret;
}
/* * helper to add new inline back ref
*/ static noinline_for_stack void setup_inline_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_path *path, struct btrfs_extent_inline_ref *iref,
u64 parent, u64 root_objectid,
u64 owner, u64 offset, int refs_to_add, struct btrfs_delayed_extent_op *extent_op)
{ struct extent_buffer *leaf; struct btrfs_extent_item *ei; unsignedlong ptr; unsignedlong end; unsignedlong item_offset;
u64 refs; int size; int type;
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); if (key.type == BTRFS_METADATA_ITEM_KEY)
extent_size = fs_info->nodesize; else
extent_size = key.offset;
btrfs_print_leaf(leaf);
btrfs_err(fs_info, "invalid refs_to_mod for extent %llu num_bytes %u, has %d expect >= -%llu",
key.objectid, extent_size, refs_to_mod, refs); return -EUCLEAN;
}
refs += refs_to_mod;
btrfs_set_extent_refs(leaf, ei, refs); if (extent_op)
__run_delayed_extent_op(extent_op, leaf, ei);
type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY); /* * Function btrfs_get_extent_inline_ref_type() has already printed * error messages.
*/ if (unlikely(type == BTRFS_REF_TYPE_INVALID)) return -EUCLEAN;
if (type == BTRFS_EXTENT_DATA_REF_KEY) {
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
refs = btrfs_extent_data_ref_count(leaf, dref);
} elseif (type == BTRFS_SHARED_DATA_REF_KEY) {
sref = (struct btrfs_shared_data_ref *)(iref + 1);
refs = btrfs_shared_data_ref_count(leaf, sref);
} else {
refs = 1; /* * For tree blocks we can only drop one ref for it, and tree * blocks should not have refs > 1. * * Furthermore if we're inserting a new inline backref, we * won't reach this path either. That would be * setup_inline_extent_backref().
*/ if (unlikely(refs_to_mod != -1)) { struct btrfs_key key;
/* Adjust the range to be aligned to 512B sectors if necessary. */ if (start != aligned_start) {
len -= aligned_start - start;
len = round_down(len, SECTOR_SIZE);
start = aligned_start;
}
*discarded_bytes = 0;
if (!len) return 0;
end = start + len;
bytes_left = len;
/* Skip any superblocks on this device. */ for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
u64 sb_start = btrfs_sb_offset(j);
u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
u64 size = sb_start - start;
/* Zone reset on a zoned filesystem */ if (btrfs_can_zone_reset(dev, phys, len)) {
u64 src_disc;
ret = btrfs_reset_device_zone(dev, phys, len, &discarded); if (ret) goto out;
if (!btrfs_dev_replace_is_ongoing(dev_replace) ||
dev != dev_replace->srcdev) goto out;
src_disc = discarded;
/* Send to replace target as well */
ret = btrfs_reset_device_zone(dev_replace->tgtdev, phys, len,
&discarded);
discarded += src_disc;
} elseif (bdev_max_discard_sectors(stripe->dev->bdev)) {
ret = btrfs_issue_discard(dev->bdev, phys, len, &discarded);
} else {
ret = 0;
*bytes = 0;
}
out:
*bytes = discarded; return ret;
}
int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 num_bytes, u64 *actual_bytes)
{ int ret = 0;
u64 discarded_bytes = 0;
u64 end = bytenr + num_bytes;
u64 cur = bytenr;
/* * Avoid races with device replace and make sure the devices in the * stripes don't go away while we are discarding.
*/
btrfs_bio_counter_inc_blocked(fs_info); while (cur < end) { struct btrfs_discard_stripe *stripes; unsignedint num_stripes; int i;
num_bytes = end - cur;
stripes = btrfs_map_discard(fs_info, cur, &num_bytes, &num_stripes); if (IS_ERR(stripes)) {
ret = PTR_ERR(stripes); if (ret == -EOPNOTSUPP)
ret = 0; break;
}
for (i = 0; i < num_stripes; i++) { struct btrfs_discard_stripe *stripe = stripes + i;
u64 bytes;
if (!stripe->dev->bdev) {
ASSERT(btrfs_test_opt(fs_info, DEGRADED)); continue;
}
if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
&stripe->dev->dev_state)) continue;
ret = do_discard_extent(stripe, &bytes); if (ret) { /* * Keep going if discard is not supported by the * device.
*/ if (ret != -EOPNOTSUPP) break;
ret = 0;
} else {
discarded_bytes += bytes;
}
}
kfree(stripes); if (ret) break;
cur += num_bytes;
}
btrfs_bio_counter_dec(fs_info); if (actual_bytes)
*actual_bytes = discarded_bytes; return ret;
}
/* Can return -ENOMEM */ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_ref *generic_ref)
{ struct btrfs_fs_info *fs_info = trans->fs_info; int ret;
if (generic_ref->type == BTRFS_REF_METADATA)
ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL); else
ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0);
btrfs_ref_tree_mod(fs_info, generic_ref);
return ret;
}
/* * Insert backreference for a given extent. * * The counterpart is in __btrfs_free_extent(), with examples and more details * how it works. * * @trans: Handle of transaction * * @node: The delayed ref node used to get the bytenr/length for * extent whose references are incremented. * * @extent_op Pointer to a structure, holding information necessary when * updating a tree block's flags *
*/ staticint __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, conststruct btrfs_delayed_ref_node *node, struct btrfs_delayed_extent_op *extent_op)
{
BTRFS_PATH_AUTO_FREE(path); struct extent_buffer *leaf; struct btrfs_extent_item *item; struct btrfs_key key;
u64 bytenr = node->bytenr;
u64 num_bytes = node->num_bytes;
u64 owner = btrfs_delayed_ref_owner(node);
u64 offset = btrfs_delayed_ref_offset(node);
u64 refs; int refs_to_add = node->ref_mod; int ret;
path = btrfs_alloc_path(); if (!path) return -ENOMEM;
/* this will setup the path even if it fails to insert the back ref */
ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
node->parent, node->ref_root, owner,
offset, refs_to_add, extent_op); if ((ret < 0 && ret != -EAGAIN) || !ret) return ret;
/* * Ok we had -EAGAIN which means we didn't have space to insert and * inline extent ref, so just update the reference count and add a * normal backref.
*/
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, item);
btrfs_set_extent_refs(leaf, item, refs + refs_to_add); if (extent_op)
__run_delayed_extent_op(extent_op, leaf, item);
btrfs_release_path(path);
/* now insert the actual backref */ if (owner < BTRFS_FIRST_FREE_OBJECTID) {
ret = insert_tree_block_ref(trans, path, node, bytenr); if (ret)
btrfs_abort_transaction(trans, ret);
} else {
ret = insert_extent_data_ref(trans, path, node, bytenr); if (ret)
btrfs_abort_transaction(trans, ret);
}
/* * Don't check must_insert_reserved, as this is called from contexts * where it has already been unset.
*/ if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE ||
!href->is_data || !btrfs_is_fstree(root)) return;
ret = alloc_reserved_tree_block(trans, node, extent_op); if (!ret)
btrfs_record_squota_delta(fs_info, &delta);
} elseif (node->action == BTRFS_ADD_DELAYED_REF) {
ret = __btrfs_inc_extent_ref(trans, node, extent_op);
} elseif (node->action == BTRFS_DROP_DELAYED_REF) {
ret = __btrfs_free_extent(trans, href, node, extent_op);
} else {
BUG();
} return ret;
}
/* helper function to actually process a single delayed ref entry */ staticint run_one_delayed_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_head *href, conststruct btrfs_delayed_ref_node *node, struct btrfs_delayed_extent_op *extent_op, bool insert_reserved)
{ int ret = 0;
if (TRANS_ABORTED(trans)) { if (insert_reserved) {
btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
free_head_ref_squota_rsv(trans->fs_info, href);
} return 0;
}
if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
node->type == BTRFS_SHARED_BLOCK_REF_KEY)
ret = run_delayed_tree_ref(trans, href, node, extent_op,
insert_reserved); elseif (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
node->type == BTRFS_SHARED_DATA_REF_KEY)
ret = run_delayed_data_ref(trans, href, node, extent_op,
insert_reserved); elseif (node->type == BTRFS_EXTENT_OWNER_REF_KEY)
ret = 0; else
BUG(); if (ret && insert_reserved)
btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1); if (ret < 0)
btrfs_err(trans->fs_info, "failed to run delayed ref for logical %llu num_bytes %llu type %u action %u ref_mod %d: %d",
node->bytenr, node->num_bytes, node->type,
node->action, node->ref_mod, ret); return ret;
}
/* * We had csum deletions accounted for in our delayed refs rsv, we need * to drop the csum leaves for this update from our delayed_refs_rsv.
*/ if (head->total_ref_mod < 0 && head->is_data) { int nr_csums;
ret = btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums);
} /* must_insert_reserved can be set only if we didn't run the head ref. */ if (head->must_insert_reserved)
free_head_ref_squota_rsv(fs_info, head);
struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_ref_root *delayed_refs; int ret;
delayed_refs = &trans->transaction->delayed_refs;
ret = run_and_cleanup_extent_op(trans, head); if (ret < 0) {
btrfs_unselect_ref_head(delayed_refs, head);
btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); return ret;
} elseif (ret) { return ret;
}
/* * Need to drop our head ref lock and re-acquire the delayed ref lock * and then re-check to make sure nobody got added.
*/
spin_unlock(&head->lock);
spin_lock(&delayed_refs->lock);
spin_lock(&head->lock); if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
spin_unlock(&head->lock);
spin_unlock(&delayed_refs->lock); return 1;
}
btrfs_delete_ref_head(fs_info, delayed_refs, head);
spin_unlock(&head->lock);
spin_unlock(&delayed_refs->lock);
if (head->must_insert_reserved) {
btrfs_pin_extent(trans, head->bytenr, head->num_bytes, 1); if (head->is_data) { struct btrfs_root *csum_root;
while ((ref = btrfs_select_delayed_ref(locked_ref))) { if (ref->seq &&
btrfs_check_delayed_seq(fs_info, ref->seq)) {
spin_unlock(&locked_ref->lock);
btrfs_unselect_ref_head(delayed_refs, locked_ref); return -EAGAIN;
}
rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
RB_CLEAR_NODE(&ref->ref_node); if (!list_empty(&ref->add_list))
list_del(&ref->add_list); /* * When we play the delayed ref, also correct the ref_mod on * head
*/ switch (ref->action) { case BTRFS_ADD_DELAYED_REF: case BTRFS_ADD_DELAYED_EXTENT:
locked_ref->ref_mod -= ref->ref_mod; break; case BTRFS_DROP_DELAYED_REF:
locked_ref->ref_mod += ref->ref_mod; break; default:
WARN_ON(1);
}
/* * Record the must_insert_reserved flag before we drop the * spin lock.
*/
must_insert_reserved = locked_ref->must_insert_reserved; /* * Unsetting this on the head ref relinquishes ownership of * the rsv_bytes, so it is critical that every possible code * path from here forward frees all reserves including qgroup * reserve.
*/
locked_ref->must_insert_reserved = false;
/* * Returns 0 on success or if called with an already aborted transaction. * Returns -ENOMEM or -EIO on failure and will abort the transaction.
*/ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
u64 min_bytes)
{ struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_head *locked_ref = NULL; int ret; unsignedlong count = 0; unsignedlong max_count = 0;
u64 bytes_processed = 0;
delayed_refs = &trans->transaction->delayed_refs; if (min_bytes == 0) { /* * We may be subject to a harmless race if some task is * concurrently adding or removing a delayed ref, so silence * KCSAN and similar tools.
*/
max_count = data_race(delayed_refs->num_heads_ready);
min_bytes = U64_MAX;
}
do { if (!locked_ref) {
locked_ref = btrfs_select_ref_head(fs_info, delayed_refs); if (IS_ERR_OR_NULL(locked_ref)) { if (PTR_ERR(locked_ref) == -EAGAIN) { continue;
} else { break;
}
}
count++;
} /* * We need to try and merge add/drops of the same ref since we * can run into issues with relocate dropping the implicit ref * and then it being added back again before the drop can * finish. If we merged anything we need to re-loop so we can * get a good ref. * Or we can get node references of the same type that weren't * merged when created due to bumps in the tree mod seq, and * we need to merge them to prevent adding an inline extent * backref before dropping it (triggering a BUG_ON at * insert_inline_extent_backref()).
*/
spin_lock(&locked_ref->lock);
btrfs_merge_delayed_refs(fs_info, delayed_refs, locked_ref);
ret = btrfs_run_delayed_refs_for_head(trans, locked_ref, &bytes_processed); if (ret < 0 && ret != -EAGAIN) { /* * Error, btrfs_run_delayed_refs_for_head already * unlocked everything so just bail out
*/ return ret;
} elseif (!ret) { /* * Success, perform the usual cleanup of a processed * head
*/
ret = cleanup_ref_head(trans, locked_ref, &bytes_processed); if (ret > 0 ) { /* We dropped our lock, we need to loop. */
ret = 0; continue;
} elseif (ret) { return ret;
}
}
/* * Either success case or btrfs_run_delayed_refs_for_head * returned -EAGAIN, meaning we need to select another head
*/
#ifdef SCRAMBLE_DELAYED_REFS /* * Normally delayed refs get processed in ascending bytenr order. This * correlates in most cases to the order added. To expose dependencies on this * order, we start to process the tree in the middle instead of the beginning
*/ static u64 find_middle(struct rb_root *root)
{ struct rb_node *n = root->rb_node; struct btrfs_delayed_ref_node *entry; int alt = 1;
u64 middle;
u64 first = 0, last = 0;
n = rb_first(root); if (n) {
entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
first = entry->bytenr;
}
n = rb_last(root); if (n) {
entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
last = entry->bytenr;
}
n = root->rb_node;
while (n) {
entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
WARN_ON(!entry->in_tree);
middle = entry->bytenr;
if (alt)
n = n->rb_left; else
n = n->rb_right;
alt = 1 - alt;
} return middle;
} #endif
/* * Start processing the delayed reference count updates and extent insertions * we have queued up so far. * * @trans: Transaction handle. * @min_bytes: How many bytes of delayed references to process. After this * many bytes we stop processing delayed references if there are * any more. If 0 it means to run all existing delayed references, * but not new ones added after running all existing ones. * Use (u64)-1 (U64_MAX) to run all existing delayed references * plus any new ones that are added. * * Returns 0 on success or if called with an aborted transaction * Returns <0 on error and aborts the transaction
*/ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, u64 min_bytes)
{ struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_ref_root *delayed_refs; int ret;
/* We'll clean this up in btrfs_cleanup_transaction */ if (TRANS_ABORTED(trans)) return 0;
if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags)) return 0;
/* * Mutex was contended, block until it's released and let * caller try again
*/
mutex_lock(&head->mutex);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref_head(head);
btrfs_put_transaction(cur_trans); return -EAGAIN;
}
spin_unlock(&delayed_refs->lock);
spin_lock(&head->lock); /* * XXX: We should replace this with a proper search function in the * future.
*/ for (node = rb_first_cached(&head->ref_tree); node;
node = rb_next(node)) {
u64 ref_owner;
u64 ref_offset;
ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); /* If it's a shared ref we know a cross reference exists */ if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
ret = 1; break;
}
/* * If our ref doesn't match the one we're currently looking at * then we have a cross reference.
*/ if (ref->ref_root != btrfs_root_id(root) ||
ref_owner != btrfs_ino(inode) || ref_offset != offset) {
ret = 1; break;
}
}
spin_unlock(&head->lock);
mutex_unlock(&head->mutex);
btrfs_put_transaction(cur_trans); return ret;
}
/* * Check if there are references for a data extent other than the one belonging * to the given inode and offset. * * @inode: The only inode we expect to find associated with the data extent. * @path: A path to use for searching the extent tree. * @offset: The only offset we expect to find associated with the data extent. * @bytenr: The logical address of the data extent. * * When the extent does not have any other references other than the one we * expect to find, we always return a value of 0 with the path having a locked * leaf that contains the extent's extent item - this is necessary to ensure * we don't race with a task running delayed references, and our caller must * have such a path when calling check_delayed_ref() - it must lock a delayed * ref head while holding the leaf locked. In case the extent item is not found * in the extent tree, we return -ENOENT with the path having the leaf (locked) * where the extent item should be, in order to prevent races with another task * running delayed references, so that we don't miss any reference when calling * check_delayed_ref(). * * Note: this may return false positives, and this is because we want to be * quick here as we're called in write paths (when flushing delalloc and * in the direct IO write path). For example we can have an extent with * a single reference but that reference is not inlined, or we may have * many references in the extent tree but we also have delayed references * that cancel all the reference except the one for our inode and offset, * but it would be expensive to do such checks and complex due to all * locking to avoid races between the checks and flushing delayed refs,
--> --------------------
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.