/* * HOW DO BLOCK RESERVES WORK * * Think of block_rsv's as buckets for logically grouped metadata * reservations. Each block_rsv has a ->size and a ->reserved. ->size is * how large we want our block rsv to be, ->reserved is how much space is * currently reserved for this block reserve. * * ->failfast exists for the truncate case, and is described below. * * NORMAL OPERATION * * -> Reserve * Entrance: btrfs_block_rsv_add, btrfs_block_rsv_refill * * We call into btrfs_reserve_metadata_bytes() with our bytes, which is * accounted for in space_info->bytes_may_use, and then add the bytes to * ->reserved, and ->size in the case of btrfs_block_rsv_add. * * ->size is an over-estimation of how much we may use for a particular * operation. * * -> Use * Entrance: btrfs_use_block_rsv * * When we do a btrfs_alloc_tree_block() we call into btrfs_use_block_rsv() * to determine the appropriate block_rsv to use, and then verify that * ->reserved has enough space for our tree block allocation. Once * successful we subtract fs_info->nodesize from ->reserved. * * -> Finish * Entrance: btrfs_block_rsv_release * * We are finished with our operation, subtract our individual reservation * from ->size, and then subtract ->size from ->reserved and free up the * excess if there is any. * * There is some logic here to refill the delayed refs rsv or the global rsv * as needed, otherwise the excess is subtracted from * space_info->bytes_may_use. * * TYPES OF BLOCK RESERVES * * BLOCK_RSV_TRANS, BLOCK_RSV_DELOPS, BLOCK_RSV_CHUNK * These behave normally, as described above, just within the confines of the * lifetime of their particular operation (transaction for the whole trans * handle lifetime, for example). * * BLOCK_RSV_GLOBAL * It is impossible to properly account for all the space that may be required * to make our extent tree updates. This block reserve acts as an overflow * buffer in case our delayed refs reserve does not reserve enough space to * update the extent tree. * * We can steal from this in some cases as well, notably on evict() or * truncate() in order to help users recover from ENOSPC conditions. * * BLOCK_RSV_DELALLOC * The individual item sizes are determined by the per-inode size * calculations, which are described with the delalloc code. This is pretty * straightforward, it's just the calculation of ->size encodes a lot of * different items, and thus it gets used when updating inodes, inserting file * extents, and inserting checksums. * * BLOCK_RSV_DELREFS * We keep a running tally of how many delayed refs we have on the system. * We assume each one of these delayed refs are going to use a full * reservation. We use the transaction items and pre-reserve space for every * operation, and use this reservation to refill any gap between ->size and * ->reserved that may exist. * * From there it's straightforward, removing a delayed ref means we remove its * count from ->size and free up reservations as necessary. Since this is * the most dynamic block reserve in the system, we will try to refill this * block reserve first with any excess returned by any other block reserve. * * BLOCK_RSV_EMPTY * This is the fallback block reserve to make us try to reserve space if we * don't have a specific bucket for this allocation. It is mostly used for * updating the device tree and such, since that is a separate pool we're * content to just reserve space from the space_info on demand. * * BLOCK_RSV_TEMP * This is used by things like truncate and iput. We will temporarily * allocate a block reserve, set it to some size, and then truncate bytes * until we have no space left. With ->failfast set we'll simply return * ENOSPC from btrfs_use_block_rsv() to signal that we need to unwind and try * to make a new reservation. This is because these operations are * unbounded, so we want to do as much work as we can, and then back off and * re-reserve.
*/
/* * If we are a delayed block reserve then push to the global rsv, * otherwise dump into the global delayed reserve if it is not full.
*/ if (block_rsv->type == BTRFS_BLOCK_RSV_DELOPS)
target = global_rsv; elseif (block_rsv != global_rsv && !btrfs_block_rsv_full(delayed_rsv))
target = delayed_rsv;
if (target && block_rsv->space_info != target->space_info)
target = NULL;
/* * The global block rsv is based on the size of the extent tree, the * checksum tree and the root tree. If the fs is empty we want to set * it to a minimal amount for safety. * * We also are going to need to modify the minimum of the tree root and * any global roots we could touch.
*/
read_lock(&fs_info->global_root_lock);
rbtree_postorder_for_each_entry_safe(root, tmp, &fs_info->global_root_tree,
rb_node) { if (btrfs_root_id(root) == BTRFS_EXTENT_TREE_OBJECTID ||
btrfs_root_id(root) == BTRFS_CSUM_TREE_OBJECTID ||
btrfs_root_id(root) == BTRFS_FREE_SPACE_TREE_OBJECTID) {
num_bytes += btrfs_root_used(&root->root_item);
min_items++;
}
}
read_unlock(&fs_info->global_root_lock);
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) {
num_bytes += btrfs_root_used(&fs_info->block_group_root->root_item);
min_items++;
}
if (btrfs_fs_incompat(fs_info, RAID_STRIPE_TREE)) {
num_bytes += btrfs_root_used(&fs_info->stripe_root->root_item);
min_items++;
}
/* * But we also want to reserve enough space so we can do the fallback * global reserve for an unlink, which is an additional * BTRFS_UNLINK_METADATA_UNITS items. * * But we also need space for the delayed ref updates from the unlink, * so add BTRFS_UNLINK_METADATA_UNITS units for delayed refs, one for * each unlink metadata item.
*/
min_items += BTRFS_UNLINK_METADATA_UNITS;
switch (btrfs_root_id(root)) { case BTRFS_CSUM_TREE_OBJECTID: case BTRFS_EXTENT_TREE_OBJECTID: case BTRFS_FREE_SPACE_TREE_OBJECTID: case BTRFS_BLOCK_GROUP_TREE_OBJECTID: case BTRFS_RAID_STRIPE_TREE_OBJECTID:
root->block_rsv = &fs_info->delayed_refs_rsv; break; case BTRFS_ROOT_TREE_OBJECTID: case BTRFS_DEV_TREE_OBJECTID: case BTRFS_QUOTA_TREE_OBJECTID:
root->block_rsv = &fs_info->global_block_rsv; break; case BTRFS_CHUNK_TREE_OBJECTID:
root->block_rsv = &fs_info->chunk_block_rsv; break; case BTRFS_TREE_LOG_OBJECTID:
root->block_rsv = &fs_info->treelog_rsv; break; default:
root->block_rsv = NULL; break;
}
}
if (unlikely(btrfs_block_rsv_size(block_rsv) == 0)) goto try_reserve;
again:
ret = btrfs_block_rsv_use_bytes(block_rsv, blocksize); if (!ret) return block_rsv;
/* * The global reserve still exists to save us from ourselves, so don't * warn_on if we are short on our delayed refs reserve.
*/ if (block_rsv->type != BTRFS_BLOCK_RSV_DELREFS &&
btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { static DEFINE_RATELIMIT_STATE(_rs,
DEFAULT_RATELIMIT_INTERVAL * 10, /*DEFAULT_RATELIMIT_BURST*/ 1); if (__ratelimit(&_rs))
WARN(1, KERN_DEBUG "BTRFS: block rsv %d returned %d\n",
block_rsv->type, ret);
}
try_reserve:
ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info,
blocksize, BTRFS_RESERVE_NO_FLUSH); if (!ret) return block_rsv; /* * If we couldn't reserve metadata bytes try and use some from * the global reserve if its space type is the same as the global * reservation.
*/ if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
block_rsv->space_info == global_rsv->space_info) {
ret = btrfs_block_rsv_use_bytes(global_rsv, blocksize); if (!ret) return global_rsv;
}
/* * All hope is lost, but of course our reservations are overly * pessimistic, so instead of possibly having an ENOSPC abort here, try * one last time to force a reservation if there's enough actual space * on disk to make the reservation.
*/
ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info, blocksize,
BTRFS_RESERVE_FLUSH_EMERGENCY); if (!ret) return block_rsv;
return ERR_PTR(ret);
}
int btrfs_check_trunc_cache_free_space(conststruct btrfs_fs_info *fs_info, struct btrfs_block_rsv *rsv)
{
u64 needed_bytes; int ret;
/* 1 for slack space, 1 for updating the inode */
needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) +
btrfs_calc_metadata_size(fs_info, 1);
spin_lock(&rsv->lock); if (rsv->reserved < needed_bytes)
ret = -ENOSPC; else
ret = 0;
spin_unlock(&rsv->lock); return ret;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.