// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com * Written by Alex Tomas <alex@clusterfs.com> * * Architecture independence: * Copyright (c) 2005, Bull S.A. * Written by Pierre Peiffer <pierre.peiffer@bull.net>
*/
/* * Extents support for EXT4 * * TODO: * - ext4*_error() should be used in some situations * - analyze all BUG()/BUG_ON(), use -EIO where appropriate * - smart tree reduction
*/
/* * used by extent splitting.
*/ #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
due to ENOSPC */ #define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */ #define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */
#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ #define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
if (!ext4_has_feature_metadata_csum(inode->i_sb)) return;
et = find_ext4_extent_tail(eh);
et->et_checksum = ext4_extent_block_csum(inode, eh);
}
staticstruct ext4_ext_path *ext4_split_extent_at(handle_t *handle, struct inode *inode, struct ext4_ext_path *path,
ext4_lblk_t split, int split_flag, int flags);
staticint ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
{ /* * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this * moment, get_block can be called only for blocks inside i_size since * page cache has been already dropped and writes are blocked by * i_rwsem. So we can safely drop the i_data_sem here.
*/
BUG_ON(EXT4_JOURNAL(inode) == NULL);
ext4_discard_preallocations(inode);
up_write(&EXT4_I(inode)->i_data_sem);
*dropped = 1; return 0;
}
/* * Make sure 'handle' has at least 'check_cred' credits. If not, restart * transaction with 'restart_cred' credits. The function drops i_data_sem * when restarting transaction and gets it after transaction is restarted. * * The function returns 0 on success, 1 if transaction had to be restarted, * and < 0 in case of fatal error.
*/ int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode, int check_cred, int restart_cred, int revoke_cred)
{ int ret; int dropped = 0;
ret = ext4_journal_ensure_credits_fn(handle, check_cred, restart_cred,
revoke_cred, ext4_ext_trunc_restart_fn(inode, &dropped)); if (dropped)
down_write(&EXT4_I(inode)->i_data_sem); return ret;
}
if (path->p_bh) { /* path points to block */
BUFFER_TRACE(path->p_bh, "get_write_access");
err = ext4_journal_get_write_access(handle, inode->i_sb,
path->p_bh, EXT4_JTR_NONE); /* * The extent buffer's verified bit will be set again in * __ext4_ext_dirty(). We could leave an inconsistent * buffer if the extents updating procudure break off du * to some error happens, force to check it again.
*/ if (!err)
clear_buffer_verified(path->p_bh);
} /* path points to leaf/index in inode body */ /* we use in-core data, no need to protect them */ return err;
}
/* * Try to predict block placement assuming that we are * filling in a file which will eventually be * non-sparse --- i.e., in the case of libbfd writing * an ELF object sections out-of-order but in a way * the eventually results in a contiguous object or * executable file, or some database extending a table * space file. However, this is actually somewhat * non-ideal if we are writing a sparse file such as * qemu or KVM writing a raw image file that is going * to stay fairly sparse, since it will end up * fragmenting the file system's free space. Maybe we * should have some hueristics or some way to allow * userspace to pass a hint to file system, * especially if the latter case turns out to be * common.
*/
ex = path[depth].p_ext; if (ex) {
ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
staticint
ext4_ext_max_entries(struct inode *inode, int depth)
{ int max;
if (depth == ext_depth(inode)) { if (depth == 0)
max = ext4_ext_space_root(inode, 1); else
max = ext4_ext_space_root_idx(inode, 1);
} else { if (depth == 0)
max = ext4_ext_space_block(inode, 1); else
max = ext4_ext_space_block_idx(inode, 1);
}
return max;
}
staticint ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
{
ext4_fsblk_t block = ext4_ext_pblock(ext); int len = ext4_ext_get_actual_len(ext);
ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
/* * We allow neither: * - zero length * - overflow/wrap-around
*/ if (lblock + len <= lblock) return 0; return ext4_inode_block_valid(inode, block, len);
}
/* * The logical block in the first entry should equal to * the number in the index block.
*/ if (depth != ext_depth(inode) &&
lblk != le32_to_cpu(ext->ee_block)) return 0; while (entries) { if (!ext4_valid_extent(inode, ext)) return 0;
/* * The logical block in the first entry should equal to * the number in the parent index block.
*/ if (depth != ext_depth(inode) &&
lblk != le32_to_cpu(ext_idx->ei_block)) return 0; while (entries) { if (!ext4_valid_extent_idx(inode, ext_idx)) return 0;
/* Check for overlapping index extents */
lblock = le32_to_cpu(ext_idx->ei_block); if (lblock < cur) {
*pblk = ext4_idx_pblock(ext_idx); return 0;
}
ext_idx++;
entries--;
cur = lblock + 1;
}
} return 1;
}
staticint __ext4_ext_check(constchar *function, unsignedint line, struct inode *inode, struct ext4_extent_header *eh, int depth, ext4_fsblk_t pblk, ext4_lblk_t lblk)
{ constchar *error_msg; int max = 0, err = -EFSCORRUPTED;
if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
error_msg = "invalid magic"; goto corrupted;
} if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) {
error_msg = "unexpected eh_depth"; goto corrupted;
} if (unlikely(eh->eh_max == 0)) {
error_msg = "invalid eh_max"; goto corrupted;
}
max = ext4_ext_max_entries(inode, depth); if (unlikely(le16_to_cpu(eh->eh_max) > max)) {
error_msg = "too large eh_max"; goto corrupted;
} if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) {
error_msg = "invalid eh_entries"; goto corrupted;
} if (unlikely((eh->eh_entries == 0) && (depth > 0))) {
error_msg = "eh_entries is 0 but eh_depth is > 0"; goto corrupted;
} if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) {
error_msg = "invalid extent entries"; goto corrupted;
} if (unlikely(depth > 32)) {
error_msg = "too large eh_depth"; goto corrupted;
} /* Verify checksum on non-root extent tree nodes */ if (ext_depth(inode) != depth &&
!ext4_extent_block_csum_verify(inode, eh)) {
error_msg = "extent tree corrupted";
err = -EFSBADCRC; goto corrupted;
} return 0;
for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) { unsignedint status = EXTENT_STATUS_WRITTEN;
ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); int len = ext4_ext_get_actual_len(ex);
/* * This function is called to cache a file's extent information in the * extent status tree
*/ int ext4_ext_precache(struct inode *inode)
{ struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_ext_path *path = NULL; struct buffer_head *bh; int i = 0, depth, ret = 0;
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) return 0; /* not an extent-mapped inode */
path[0].p_hdr = ext_inode_hdr(inode);
ret = ext4_ext_check(inode, path[0].p_hdr, depth, 0); if (ret) goto out;
path[0].p_idx = EXT_FIRST_INDEX(path[0].p_hdr); while (i >= 0) { /* * If this is a leaf block or we've reached the end of * the index block, go up
*/ if ((i == depth) ||
path[i].p_idx > EXT_LAST_INDEX(path[i].p_hdr)) {
ext4_ext_path_brelse(path + i);
i--; continue;
}
bh = read_extent_tree_block(inode, path[i].p_idx++,
depth - i - 1,
EXT4_EX_FORCE_CACHE); if (IS_ERR(bh)) {
ret = PTR_ERR(bh); break;
}
i++;
path[i].p_bh = bh;
path[i].p_hdr = ext_block_hdr(bh);
path[i].p_idx = EXT_FIRST_INDEX(path[i].p_hdr);
}
ext4_set_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
out:
up_read(&ei->i_data_sem);
ext4_free_ext_path(path); return ret;
}
#ifdef EXT_DEBUG staticvoid ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
{ int k, l = path->p_depth;
staticvoid ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
{ int depth = ext_depth(inode); struct ext4_extent_header *eh; struct ext4_extent *ex; int i;
if (IS_ERR_OR_NULL(path)) return;
eh = path[depth].p_hdr;
ex = EXT_FIRST_EXTENT(eh);
ext_debug(inode, "Displaying leaf extents\n");
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
ext_debug(inode, "%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
ext4_ext_is_unwritten(ex),
ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
}
ext_debug(inode, "\n");
}
staticvoid ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
ext4_fsblk_t newblock, int level)
{ int depth = ext_depth(inode); struct ext4_extent *ex;
if (depth != level) { struct ext4_extent_idx *idx;
idx = path[level].p_idx; while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) {
ext_debug(inode, "%d: move %d:%llu in new index %llu\n",
level, le32_to_cpu(idx->ei_block),
ext4_idx_pblock(idx), newblock);
idx++;
}
return;
}
ex = path[depth].p_ext; while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) {
ext_debug(inode, "move %d:%llu:[%d]%d in new leaf %llu\n",
le32_to_cpu(ex->ee_block),
ext4_ext_pblock(ex),
ext4_ext_is_unwritten(ex),
ext4_ext_get_actual_len(ex),
newblock);
ex++;
}
}
/* * ext4_ext_binsearch_idx: * binary search for the closest index of the given block * the header must be checked before calling this
*/ staticvoid
ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t block)
{ struct ext4_extent_header *eh = path->p_hdr; struct ext4_extent_idx *r, *l, *m;
ext_debug(inode, "binsearch for %u(idx): ", block);
l = EXT_FIRST_INDEX(eh) + 1;
r = EXT_LAST_INDEX(eh); while (l <= r) {
m = l + (r - l) / 2;
ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block),
r, le32_to_cpu(r->ei_block));
if (block < le32_to_cpu(m->ei_block))
r = m - 1; else
l = m + 1;
}
/* * ext4_ext_binsearch: * binary search for closest extent of the given block * the header must be checked before calling this
*/ staticvoid
ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t block)
{ struct ext4_extent_header *eh = path->p_hdr; struct ext4_extent *r, *l, *m;
if (eh->eh_entries == 0) { /* * this leaf is empty: * we get such a leaf in split/add case
*/ return;
}
ext_debug(inode, "binsearch for %u: ", block);
l = EXT_FIRST_EXTENT(eh) + 1;
r = EXT_LAST_EXTENT(eh);
while (l <= r) {
m = l + (r - l) / 2;
ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block),
r, le32_to_cpu(r->ee_block));
if (block < le32_to_cpu(m->ee_block))
r = m - 1; else
l = m + 1;
}
if (flags & EXT4_EX_NOFAIL)
gfp_flags |= __GFP_NOFAIL;
eh = ext_inode_hdr(inode);
depth = ext_depth(inode); if (depth < 0 || depth > EXT4_MAX_EXTENT_DEPTH) {
EXT4_ERROR_INODE(inode, "inode has invalid extent depth: %d",
depth);
ret = -EFSCORRUPTED; goto err;
}
if (path) {
ext4_ext_drop_refs(path); if (depth > path[0].p_maxdepth) {
kfree(path);
path = NULL;
}
} if (!path) { /* account possible depth increase */
path = kcalloc(depth + 2, sizeof(struct ext4_ext_path),
gfp_flags); if (unlikely(!path)) return ERR_PTR(-ENOMEM);
path[0].p_maxdepth = depth + 1;
}
path[0].p_hdr = eh;
path[0].p_bh = NULL;
i = depth; if (!(flags & EXT4_EX_NOCACHE) && depth == 0)
ext4_cache_extents(inode, eh); /* walk through the tree */ while (i) {
ext_debug(inode, "depth %d: num %d, max %d\n",
ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
/* * ext4_ext_insert_index: * insert new index [@logical;@ptr] into the block at @curp; * check where to insert: before @curp or after @curp
*/ staticint ext4_ext_insert_index(handle_t *handle, struct inode *inode, struct ext4_ext_path *curp, int logical, ext4_fsblk_t ptr)
{ struct ext4_extent_idx *ix; int len, err;
err = ext4_ext_get_access(handle, inode, curp); if (err) return err;
if (logical > le32_to_cpu(curp->p_idx->ei_block)) { /* insert after */
ext_debug(inode, "insert new index %d after: %llu\n",
logical, ptr);
ix = curp->p_idx + 1;
} else { /* insert before */
ext_debug(inode, "insert new index %d before: %llu\n",
logical, ptr);
ix = curp->p_idx;
}
len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
BUG_ON(len < 0); if (len > 0) {
ext_debug(inode, "insert new index %d: " "move %d indices from 0x%p to 0x%p\n",
logical, len, ix, ix + 1);
memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
}
/* * ext4_ext_split: * inserts new subtree into the path, using free index entry * at depth @at: * - allocates all needed blocks (new leaf and all intermediate index blocks) * - makes decision where to split * - moves remaining extents and index entries (right to the split point) * into the newly allocated blocks * - initializes subtree
*/ staticint ext4_ext_split(handle_t *handle, struct inode *inode, unsignedint flags, struct ext4_ext_path *path, struct ext4_extent *newext, int at)
{ struct buffer_head *bh = NULL; int depth = ext_depth(inode); struct ext4_extent_header *neh; struct ext4_extent_idx *fidx; int i = at, k, m, a;
ext4_fsblk_t newblock, oldblock;
__le32 border;
ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
gfp_t gfp_flags = GFP_NOFS; int err = 0;
size_t ext_size = 0;
if (flags & EXT4_EX_NOFAIL)
gfp_flags |= __GFP_NOFAIL;
/* make decision: where to split? */ /* FIXME: now decision is simplest: at current extent */
/* if current leaf will be split, then we should use
* border from split point */ if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!"); return -EFSCORRUPTED;
} if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
border = path[depth].p_ext[1].ee_block;
ext_debug(inode, "leaf will be split." " next leaf starts at %d\n",
le32_to_cpu(border));
} else {
border = newext->ee_block;
ext_debug(inode, "leaf will be added." " next leaf starts at %d\n",
le32_to_cpu(border));
}
/* * If error occurs, then we break processing * and mark filesystem read-only. index won't * be inserted and tree will be in consistent * state. Next mount will repair buffers too.
*/
/* * Get array to track all allocated blocks. * We need this to handle errors and free blocks * upon them.
*/
ablocks = kcalloc(depth, sizeof(ext4_fsblk_t), gfp_flags); if (!ablocks) return -ENOMEM;
/* allocate all needed blocks */
ext_debug(inode, "allocate %d blocks for indexes/leaf\n", depth - at); for (a = 0; a < depth - at; a++) {
newblock = ext4_ext_new_meta_block(handle, inode, path,
newext, &err, flags); if (newblock == 0) goto cleanup;
ablocks[a] = newblock;
}
/* correct old index */ if (m) {
err = ext4_ext_get_access(handle, inode, path + i); if (err) goto cleanup;
le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
err = ext4_ext_dirty(handle, inode, path + i); if (err) goto cleanup;
}
i--;
}
/* insert new index */
err = ext4_ext_insert_index(handle, inode, path + at,
le32_to_cpu(border), newblock);
cleanup: if (bh) { if (buffer_locked(bh))
unlock_buffer(bh);
brelse(bh);
}
if (err) { /* free all allocated blocks in error case */ for (i = 0; i < depth; i++) { if (!ablocks[i]) continue;
ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
EXT4_FREE_BLOCKS_METADATA);
}
}
kfree(ablocks);
return err;
}
/* * ext4_ext_grow_indepth: * implements tree growing procedure: * - allocates new block * - moves top-level data (index block or leaf) into the new block * - initializes new top-level, creating index that points to the * just created block
*/ staticint ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, unsignedint flags)
{ struct ext4_extent_header *neh; struct buffer_head *bh;
ext4_fsblk_t newblock, goal = 0; struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; int err = 0;
size_t ext_size = 0;
/* Try to prepend new index to old one */ if (ext_depth(inode))
goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode))); if (goal > le32_to_cpu(es->s_first_data_block)) {
flags |= EXT4_MB_HINT_TRY_GOAL;
goal--;
} else
goal = ext4_inode_to_goal_block(inode);
newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
NULL, &err); if (newblock == 0) return err;
ext_size = sizeof(EXT4_I(inode)->i_data); /* move top-level index/leaf into new block */
memmove(bh->b_data, EXT4_I(inode)->i_data, ext_size); /* zero out unused area in the extent block */
memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size);
/* set size of new block */
neh = ext_block_hdr(bh); /* old root could have indexes or leaves
* so calculate e_max right way */ if (ext_depth(inode))
neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); else
neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
neh->eh_magic = EXT4_EXT_MAGIC;
ext4_extent_block_csum_set(inode, neh);
set_buffer_uptodate(bh);
set_buffer_verified(bh);
unlock_buffer(bh);
err = ext4_handle_dirty_metadata(handle, inode, bh); if (err) goto out;
/* * ext4_ext_create_new_leaf: * finds empty index and adds new leaf. * if no free index is found, then it requests in-depth growing.
*/ staticstruct ext4_ext_path *
ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, unsignedint mb_flags, unsignedint gb_flags, struct ext4_ext_path *path, struct ext4_extent *newext)
{ struct ext4_ext_path *curp; int depth, i, err = 0;
ext4_lblk_t ee_block = le32_to_cpu(newext->ee_block);
repeat:
i = depth = ext_depth(inode);
/* walk up to the tree and look for free index entry */
curp = path + depth; while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
i--;
curp--;
}
/* we use already allocated block for index block,
* so subsequent data blocks should be contiguous */ if (EXT_HAS_FREE_INDEX(curp)) { /* if we found index with free entry, then use that
* entry: create all needed subtree and add new leaf */
err = ext4_ext_split(handle, inode, mb_flags, path, newext, i); if (err) goto errout;
/* * only first (depth 0 -> 1) produces free space; * in all other cases we have to split the grown tree
*/
depth = ext_depth(inode); if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { /* now we need to split */ goto repeat;
}
/* * search the closest allocated block to the left for *logical * and returns it at @logical + it's physical address at @phys * if *logical is the smallest allocated block, the function * returns 0 at @phys * return value contains 0 (success) or error code
*/ staticint ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
ext4_lblk_t *logical, ext4_fsblk_t *phys)
{ struct ext4_extent_idx *ix; struct ext4_extent *ex; int depth, ee_len;
/* * Search the closest allocated block to the right for *logical * and returns it at @logical + it's physical address at @phys. * If not exists, return 0 and @phys is set to 0. We will return * 1 which means we found an allocated block and ret_ex is valid. * Or return a (< 0) error code.
*/ staticint ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
ext4_lblk_t *logical, ext4_fsblk_t *phys, struct ext4_extent *ret_ex, int flags)
{ struct buffer_head *bh = NULL; struct ext4_extent_header *eh; struct ext4_extent_idx *ix; struct ext4_extent *ex; int depth; /* Note, NOT eh_depth; depth from top of tree */ int ee_len;
if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { /* next allocated block in this leaf */
ex++; goto found_extent;
}
/* go up and search for index to the right */ while (--depth >= 0) {
ix = path[depth].p_idx; if (ix != EXT_LAST_INDEX(path[depth].p_hdr)) goto got_index;
}
/* we've gone up to the root and found no index to the right */ return 0;
got_index: /* we've found index to the right, let's * follow it and find the closest allocated
* block to the right */
ix++; while (++depth < path->p_depth) { /* subtract from p_depth to get proper eh_depth */
bh = read_extent_tree_block(inode, ix, path->p_depth - depth,
flags); if (IS_ERR(bh)) return PTR_ERR(bh);
eh = ext_block_hdr(bh);
ix = EXT_FIRST_INDEX(eh);
put_bh(bh);
}
bh = read_extent_tree_block(inode, ix, path->p_depth - depth, flags); if (IS_ERR(bh)) return PTR_ERR(bh);
eh = ext_block_hdr(bh);
ex = EXT_FIRST_EXTENT(eh);
found_extent:
*logical = le32_to_cpu(ex->ee_block);
*phys = ext4_ext_pblock(ex); if (ret_ex)
*ret_ex = *ex; if (bh)
put_bh(bh); return 1;
}
/* * ext4_ext_next_allocated_block: * returns allocated block in subsequent extent or EXT_MAX_BLOCKS. * NOTE: it considers block number from index entry as * allocated block. Thus, index entries have to be consistent * with leaves.
*/
ext4_lblk_t
ext4_ext_next_allocated_block(struct ext4_ext_path *path)
{ int depth;
BUG_ON(path == NULL);
depth = path->p_depth;
if (depth == 0 && path->p_ext == NULL) return EXT_MAX_BLOCKS;
while (depth >= 0) { struct ext4_ext_path *p = &path[depth];
if (depth == path->p_depth) { /* leaf */ if (p->p_ext && p->p_ext != EXT_LAST_EXTENT(p->p_hdr)) return le32_to_cpu(p->p_ext[1].ee_block);
} else { /* index */ if (p->p_idx != EXT_LAST_INDEX(p->p_hdr)) return le32_to_cpu(p->p_idx[1].ei_block);
}
depth--;
}
return EXT_MAX_BLOCKS;
}
/* * ext4_ext_next_leaf_block: * returns first allocated block from next leaf or EXT_MAX_BLOCKS
*/ static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
{ int depth;
BUG_ON(path == NULL);
depth = path->p_depth;
/* zero-tree has no leaf blocks at all */ if (depth == 0) return EXT_MAX_BLOCKS;
/* go to index block */
depth--;
while (depth >= 0) { if (path[depth].p_idx !=
EXT_LAST_INDEX(path[depth].p_hdr)) return (ext4_lblk_t)
le32_to_cpu(path[depth].p_idx[1].ei_block);
depth--;
}
return EXT_MAX_BLOCKS;
}
/* * ext4_ext_correct_indexes: * if leaf gets modified and modified extent is first in the leaf, * then we have to correct all indexes above. * TODO: do we need to correct tree in all cases?
*/ staticint ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, struct ext4_ext_path *path)
{ struct ext4_extent_header *eh; int depth = ext_depth(inode); struct ext4_extent *ex;
__le32 border; int k, err = 0;
if (depth == 0) { /* there is no tree at all */ return 0;
}
if (ex != EXT_FIRST_EXTENT(eh)) { /* we correct tree if first leaf got modified only */ return 0;
}
/* * TODO: we need correction if border is smaller than current one
*/
k = depth - 1;
border = path[depth].p_ext->ee_block;
err = ext4_ext_get_access(handle, inode, path + k); if (err) return err;
path[k].p_idx->ei_block = border;
err = ext4_ext_dirty(handle, inode, path + k); if (err) return err;
while (k--) { /* change all left-side indexes */ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) break;
err = ext4_ext_get_access(handle, inode, path + k); if (err) goto clean;
path[k].p_idx->ei_block = border;
err = ext4_ext_dirty(handle, inode, path + k); if (err) goto clean;
} return 0;
clean: /* * The path[k].p_bh is either unmodified or with no verified bit * set (see ext4_ext_get_access()). So just clear the verified bit * of the successfully modified extents buffers, which will force * these extents to be checked to avoid using inconsistent data.
*/ while (++k < depth)
clear_buffer_verified(path[k].p_bh);
/* * This function tries to merge the "ex" extent to the next extent in the tree. * It always tries to merge towards right. If you want to merge towards * left, pass "ex - 1" as argument instead of "ex". * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns * 1 if they got merged.
*/ staticint ext4_ext_try_to_merge_right(struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *ex)
{ struct ext4_extent_header *eh; unsignedint depth, len; int merge_done = 0, unwritten;
while (ex < EXT_LAST_EXTENT(eh)) { if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) break; /* merge with next extent! */
unwritten = ext4_ext_is_unwritten(ex);
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
+ ext4_ext_get_actual_len(ex + 1)); if (unwritten)
ext4_ext_mark_unwritten(ex);
if (ex + 1 < EXT_LAST_EXTENT(eh)) {
len = (EXT_LAST_EXTENT(eh) - ex - 1)
* sizeof(struct ext4_extent);
memmove(ex + 1, ex + 2, len);
}
le16_add_cpu(&eh->eh_entries, -1);
merge_done = 1;
WARN_ON(eh->eh_entries == 0); if (!eh->eh_entries)
EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!");
}
return merge_done;
}
/* * This function does a very simple check to see if we can collapse * an extent tree with a single extent tree leaf block into the inode.
*/ staticvoid ext4_ext_try_to_merge_up(handle_t *handle, struct inode *inode, struct ext4_ext_path *path)
{
size_t s; unsigned max_root = ext4_ext_space_root(inode, 0);
ext4_fsblk_t blk;
/* * We need to modify the block allocation bitmap and the block * group descriptor to release the extent tree block. If we * can't get the journal credits, give up.
*/ if (ext4_journal_extend(handle, 2,
ext4_free_metadata_revoke_credits(inode->i_sb, 1))) return;
/* * Copy the extent data up to the inode
*/
blk = ext4_idx_pblock(path[0].p_idx);
s = le16_to_cpu(path[1].p_hdr->eh_entries) * sizeof(struct ext4_extent_idx);
s += sizeof(struct ext4_extent_header);
/* * This function tries to merge the @ex extent to neighbours in the tree, then * tries to collapse the extent tree into the inode.
*/ staticvoid ext4_ext_try_to_merge(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *ex)
{ struct ext4_extent_header *eh; unsignedint depth; int merge_done = 0;
if (ex > EXT_FIRST_EXTENT(eh))
merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
if (!merge_done)
(void) ext4_ext_try_to_merge_right(inode, path, ex);
ext4_ext_try_to_merge_up(handle, inode, path);
}
/* * check if a portion of the "newext" extent overlaps with an * existing extent. * * If there is an overlap discovered, it updates the length of the newext * such that there will be no overlap, and then returns 1. * If there is no overlap found, it returns 0.
*/ staticunsignedint ext4_ext_check_overlap(struct ext4_sb_info *sbi, struct inode *inode, struct ext4_extent *newext, struct ext4_ext_path *path)
{
ext4_lblk_t b1, b2; unsignedint depth, len1; unsignedint ret = 0;
/* * get the next allocated block if the extent in the path * is before the requested block(s)
*/ if (b2 < b1) {
b2 = ext4_ext_next_allocated_block(path); if (b2 == EXT_MAX_BLOCKS) goto out;
b2 = EXT4_LBLK_CMASK(sbi, b2);
}
/* check for wrap through zero on extent logical start block*/ if (b1 + len1 < b1) {
len1 = EXT_MAX_BLOCKS - b1;
newext->ee_len = cpu_to_le16(len1);
ret = 1;
}
/* check for overlap */ if (b1 + len1 > b2) {
newext->ee_len = cpu_to_le16(b2 - b1);
ret = 1;
}
out: return ret;
}
/* * ext4_ext_insert_extent: * tries to merge requested extent into the existing extent or * inserts requested extent as new one into the tree, * creating new leaf in the no-space case.
*/ struct ext4_ext_path *
ext4_ext_insert_extent(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *newext, int gb_flags)
{ struct ext4_extent_header *eh; struct ext4_extent *ex, *fex; struct ext4_extent *nearex; /* nearest extent */ int depth, len, err = 0;
ext4_lblk_t next; int mb_flags = 0, unwritten;
/* try to insert block into found extent and return */ if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) {
/* * Try to see whether we should rather test the extent on * right from ex, or from the left of ex. This is because * ext4_find_extent() can return either extent on the * left, or on the right from the searched position. This * will make merging more effective.
*/ if (ex < EXT_LAST_EXTENT(eh) &&
(le32_to_cpu(ex->ee_block) +
ext4_ext_get_actual_len(ex) <
le32_to_cpu(newext->ee_block))) {
ex += 1; goto prepend;
} elseif ((ex > EXT_FIRST_EXTENT(eh)) &&
(le32_to_cpu(newext->ee_block) +
ext4_ext_get_actual_len(newext) <
le32_to_cpu(ex->ee_block)))
ex -= 1;
/* Try to append newex to the ex */ if (ext4_can_extents_be_merged(inode, ex, newext)) {
ext_debug(inode, "append [%d]%d block to %u:[%d]%d" "(from %llu)\n",
ext4_ext_is_unwritten(newext),
ext4_ext_get_actual_len(newext),
le32_to_cpu(ex->ee_block),
ext4_ext_is_unwritten(ex),
ext4_ext_get_actual_len(ex),
ext4_ext_pblock(ex));
err = ext4_ext_get_access(handle, inode,
path + depth); if (err) goto errout;
unwritten = ext4_ext_is_unwritten(ex);
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
+ ext4_ext_get_actual_len(newext)); if (unwritten)
ext4_ext_mark_unwritten(ex);
nearex = ex; goto merge;
}
prepend: /* Try to prepend newex to the ex */ if (ext4_can_extents_be_merged(inode, newext, ex)) {
ext_debug(inode, "prepend %u[%d]%d block to %u:[%d]%d" "(from %llu)\n",
le32_to_cpu(newext->ee_block),
ext4_ext_is_unwritten(newext),
ext4_ext_get_actual_len(newext),
le32_to_cpu(ex->ee_block),
ext4_ext_is_unwritten(ex),
ext4_ext_get_actual_len(ex),
ext4_ext_pblock(ex));
err = ext4_ext_get_access(handle, inode,
path + depth); if (err) goto errout;
/* probably next leaf has space for us? */
fex = EXT_LAST_EXTENT(eh);
next = EXT_MAX_BLOCKS; if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
next = ext4_ext_next_leaf_block(path); if (next != EXT_MAX_BLOCKS) { struct ext4_ext_path *npath;
/* * There is no free space in the found leaf. * We're gonna add a new leaf in the tree.
*/ if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
mb_flags |= EXT4_MB_USE_RESERVED;
path = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
path, newext); if (IS_ERR(path)) return path;
depth = ext_depth(inode);
eh = path[depth].p_hdr;
while (block <= end) {
next = 0;
flags = 0; if (!ext4_es_lookup_extent(inode, block, &next, &es)) break; if (ext4_es_is_unwritten(&es))
flags |= FIEMAP_EXTENT_UNWRITTEN; if (ext4_es_is_delayed(&es))
flags |= (FIEMAP_EXTENT_DELALLOC |
FIEMAP_EXTENT_UNKNOWN); if (ext4_es_is_hole(&es))
flags |= EXT4_FIEMAP_EXTENT_HOLE; if (next == 0)
flags |= FIEMAP_EXTENT_LAST; if (flags & (FIEMAP_EXTENT_DELALLOC|
EXT4_FIEMAP_EXTENT_HOLE))
es.es_pblk = 0; else
es.es_pblk = ext4_es_pblock(&es);
err = fiemap_fill_next_extent(fieinfo,
(__u64)es.es_lblk << blksize_bits,
(__u64)es.es_pblk << blksize_bits,
(__u64)es.es_len << blksize_bits,
flags); if (next == 0) break;
block = next; if (err < 0) return err; if (err == 1) return 0;
} return 0;
}
/* * ext4_ext_find_hole - find hole around given block according to the given path * @inode: inode we lookup in * @path: path in extent tree to @lblk * @lblk: pointer to logical block around which we want to determine hole * * Determine hole length (and start if easily possible) around given logical * block. We don't try too hard to find the beginning of the hole but @path * actually points to extent before @lblk, we provide it. * * The function returns the length of a hole starting at @lblk. We update @lblk * to the beginning of the hole if we managed to find it.
*/ static ext4_lblk_t ext4_ext_find_hole(struct inode *inode, struct ext4_ext_path *path,
ext4_lblk_t *lblk)
{ int depth = ext_depth(inode); struct ext4_extent *ex;
ext4_lblk_t len;
ex = path[depth].p_ext; if (ex == NULL) { /* there is no extent yet, so gap is [0;-] */
*lblk = 0;
len = EXT_MAX_BLOCKS;
} elseif (*lblk < le32_to_cpu(ex->ee_block)) {
len = le32_to_cpu(ex->ee_block) - *lblk;
} elseif (*lblk >= le32_to_cpu(ex->ee_block)
+ ext4_ext_get_actual_len(ex)) {
ext4_lblk_t next;
*lblk = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
next = ext4_ext_next_allocated_block(path);
BUG_ON(next == *lblk);
len = next - *lblk;
} else {
BUG();
} return len;
}
/* * ext4_ext_rm_idx: * removes index from the index block.
*/ staticint ext4_ext_rm_idx(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, int depth)
{ int err;
ext4_fsblk_t leaf; int k = depth - 1;
clean: /* * The path[k].p_bh is either unmodified or with no verified bit * set (see ext4_ext_get_access()). So just clear the verified bit * of the successfully modified extents buffers, which will force * these extents to be checked to avoid using inconsistent data.
*/ while (++k < depth)
clear_buffer_verified(path[k].p_bh);
return err;
}
/* * ext4_ext_calc_credits_for_single_extent: * This routine returns max. credits that needed to insert an extent * to the extent tree. * When pass the actual path, the caller should calculate credits * under i_data_sem.
*/ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, struct ext4_ext_path *path)
{ if (path) { int depth = ext_depth(inode); int ret = 0;
/* probably there is space in leaf? */ if (le16_to_cpu(path[depth].p_hdr->eh_entries)
< le16_to_cpu(path[depth].p_hdr->eh_max)) {
/* * There are some space in the leaf tree, no * need to account for leaf block credit * * bitmaps and block group descriptor blocks * and other metadata blocks still need to be * accounted.
*/ /* 1 bitmap, 1 block group descriptor */
ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); return ret;
}
}
/* * How many index/leaf blocks need to change/allocate to add @extents extents? * * If we add a single extent, then in the worse case, each tree level * index/leaf need to be changed in case of the tree split. * * If more extents are inserted, they could cause the whole tree split more * than once, but this is really rare.
*/ int ext4_ext_index_trans_blocks(struct inode *inode, int extents)
{ int index;
/* If we are converting the inline data, only one is needed here. */ if (ext4_has_inline_data(inode)) return 1;
/* * Extent tree can change between the time we estimate credits and * the time we actually modify the tree. Assume the worst case.
*/ if (extents <= 1)
index = (EXT4_MAX_EXTENT_DEPTH * 2) + extents; else
index = (EXT4_MAX_EXTENT_DEPTH * 3) +
DIV_ROUND_UP(extents, ext4_ext_space_block(inode, 0));
/* * ext4_rereserve_cluster - increment the reserved cluster count when * freeing a cluster with a pending reservation * * @inode - file containing the cluster * @lblk - logical block in cluster to be reserved * * Increments the reserved cluster count and adjusts quota in a bigalloc * file system when freeing a partial cluster containing at least one * delayed and unwritten block. A partial cluster meeting that * requirement will have a pending reservation. If so, the * RERESERVE_CLUSTER flag is used when calling ext4_free_blocks() to * defer reserved and allocated space accounting to a subsequent call * to this function.
*/ staticvoid ext4_rereserve_cluster(struct inode *inode, ext4_lblk_t lblk)
{ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.