// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2017-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org>
*/ #include"xfs.h" #include"xfs_fs.h" #include"xfs_shared.h" #include"xfs_format.h" #include"xfs_trans_resv.h" #include"xfs_mount.h" #include"xfs_inode.h" #include"xfs_btree.h" #include"scrub/scrub.h" #include"scrub/common.h" #include"scrub/btree.h" #include"scrub/trace.h"
/* btree scrubbing */
/* * Check for btree operation errors. See the section about handling * operational errors in common.c.
*/ staticbool
__xchk_btree_process_error( struct xfs_scrub *sc, struct xfs_btree_cur *cur, int level, int *error,
__u32 errflag, void *ret_ip)
{ if (*error == 0) returntrue;
switch (*error) { case -EDEADLOCK: case -ECHRNG: /* Used to restart an op with deadlock avoidance. */
trace_xchk_deadlock_retry(sc->ip, sc->sm, *error); break; case -EFSBADCRC: case -EFSCORRUPTED: /* Note the badness but don't abort. */
sc->sm->sm_flags |= errflag;
*error = 0;
fallthrough; default: if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE)
trace_xchk_ifork_btree_op_error(sc, cur, level,
*error, ret_ip); else
trace_xchk_btree_op_error(sc, cur, level,
*error, ret_ip); break;
} returnfalse;
}
/* * Make sure this record is in order and doesn't stray outside of the parent * keys.
*/ STATICvoid
xchk_btree_rec( struct xchk_btree *bs)
{ struct xfs_btree_cur *cur = bs->cur; union xfs_btree_rec *rec; union xfs_btree_key key; union xfs_btree_key hkey; union xfs_btree_key *keyp; struct xfs_btree_block *block; struct xfs_btree_block *keyblock; struct xfs_buf *bp;
/* Are all records across all record blocks in order? */ if (bs->lastrec_valid &&
!cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
xchk_btree_set_corrupt(bs->sc, cur, 0);
memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
bs->lastrec_valid = true;
if (cur->bc_nlevels == 1) return;
/* Is low_key(rec) at least as large as the parent low key? */
cur->bc_ops->init_key_from_rec(&key, rec);
keyblock = xfs_btree_get_block(cur, 1, &bp);
keyp = xfs_btree_key_addr(cur, cur->bc_levels[1].ptr, keyblock); if (xfs_btree_keycmp_lt(cur, &key, keyp))
xchk_btree_set_corrupt(bs->sc, cur, 1);
if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING)) return;
/* Is high_key(rec) no larger than the parent high key? */
cur->bc_ops->init_high_key_from_rec(&hkey, rec);
keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[1].ptr, keyblock); if (xfs_btree_keycmp_lt(cur, keyp, &hkey))
xchk_btree_set_corrupt(bs->sc, cur, 1);
}
/* * Make sure this key is in order and doesn't stray outside of the parent * keys.
*/ STATICvoid
xchk_btree_key( struct xchk_btree *bs, int level)
{ struct xfs_btree_cur *cur = bs->cur; union xfs_btree_key *key; union xfs_btree_key *keyp; struct xfs_btree_block *block; struct xfs_btree_block *keyblock; struct xfs_buf *bp;
/* Are all low keys across all node blocks in order? */ if (bs->lastkey[level - 1].valid &&
!cur->bc_ops->keys_inorder(cur, &bs->lastkey[level - 1].key, key))
xchk_btree_set_corrupt(bs->sc, cur, level);
memcpy(&bs->lastkey[level - 1].key, key, cur->bc_ops->key_len);
bs->lastkey[level - 1].valid = true;
if (level + 1 >= cur->bc_nlevels) return;
/* Is this block's low key at least as large as the parent low key? */
keyblock = xfs_btree_get_block(cur, level + 1, &bp);
keyp = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, keyblock); if (xfs_btree_keycmp_lt(cur, key, keyp))
xchk_btree_set_corrupt(bs->sc, cur, level);
if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING)) return;
/* Is this block's high key no larger than the parent high key? */
key = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, block);
keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
keyblock); if (xfs_btree_keycmp_lt(cur, keyp, key))
xchk_btree_set_corrupt(bs->sc, cur, level);
}
/* * Check a btree pointer. Returns true if it's ok to use this pointer. * Callers do not need to set the corrupt flag.
*/ staticbool
xchk_btree_ptr_ok( struct xchk_btree *bs, int level, union xfs_btree_ptr *ptr)
{ /* A btree rooted in an inode has no block pointer to the root. */ if (bs->cur->bc_ops->type == XFS_BTREE_TYPE_INODE &&
level == bs->cur->bc_nlevels) returntrue;
/* Otherwise, check the pointers. */ if (__xfs_btree_check_ptr(bs->cur, ptr, 0, level)) {
xchk_btree_set_corrupt(bs->sc, bs->cur, level); returnfalse;
}
returntrue;
}
/* Check that a btree block's sibling matches what we expect it. */ STATICint
xchk_btree_block_check_sibling( struct xchk_btree *bs, int level, int direction, union xfs_btree_ptr *sibling)
{ struct xfs_btree_cur *cur = bs->cur; struct xfs_btree_block *pblock; struct xfs_buf *pbp; struct xfs_btree_cur *ncur = NULL; union xfs_btree_ptr *pp; int success; int error;
/* Check the siblings of a btree block. */ STATICint
xchk_btree_block_check_siblings( struct xchk_btree *bs, struct xfs_btree_block *block)
{ struct xfs_btree_cur *cur = bs->cur; union xfs_btree_ptr leftsib; union xfs_btree_ptr rightsib; int level; int error = 0;
/* Root block should never have siblings. */ if (level == cur->bc_nlevels - 1) { if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
!xfs_btree_ptr_is_null(cur, &rightsib))
xchk_btree_set_corrupt(bs->sc, cur, level); goto out;
}
/* * Does the left & right sibling pointers match the adjacent * parent level pointers? * (These function absorbs error codes for us.)
*/
error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib); if (error) return error;
error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib); if (error) return error;
out: return error;
}
/* * Make sure this btree block isn't in the free list and that there's * an rmap record for it.
*/ STATICint
xchk_btree_check_block_owner( struct xchk_btree *bs, int level,
xfs_daddr_t daddr)
{
xfs_agnumber_t agno;
xfs_agblock_t agbno; bool init_sa; int error = 0;
/* * If the btree being examined is not itself a per-AG btree, initialize * sc->sa so that we can check for the presence of an ownership record * in the rmap btree for the AG containing the block.
*/
init_sa = bs->cur->bc_ops->type != XFS_BTREE_TYPE_AG; if (init_sa) {
error = xchk_ag_init_existing(bs->sc, agno, &bs->sc->sa); if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
level, &error)) goto out_free;
}
xchk_xref_is_used_space(bs->sc, agbno, 1); /* * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we * have to nullify it (to shut down further block owner checks) if * self-xref encounters problems.
*/ if (!bs->sc->sa.bno_cur && xfs_btree_is_bno(bs->cur->bc_ops))
bs->cur = NULL;
out_free: if (init_sa)
xchk_ag_free(bs->sc, &bs->sc->sa);
return error;
}
/* Check the owner of a btree block. */ STATICint
xchk_btree_check_owner( struct xchk_btree *bs, int level, struct xfs_buf *bp)
{ struct xfs_btree_cur *cur = bs->cur;
/* * In theory, xfs_btree_get_block should only give us a null buffer * pointer for the root of a root-in-inode btree type, but we need * to check defensively here in case the cursor state is also screwed * up.
*/ if (bp == NULL) { if (cur->bc_ops->type != XFS_BTREE_TYPE_INODE)
xchk_btree_set_corrupt(bs->sc, bs->cur, level); return 0;
}
/* * We want to cross-reference each btree block with the bnobt * and the rmapbt. We cannot cross-reference the bnobt or * rmapbt while scanning the bnobt or rmapbt, respectively, * because we cannot alter the cursor and we'd prefer not to * duplicate cursors. Therefore, save the buffer daddr for * later scanning.
*/ if (xfs_btree_is_bno(cur->bc_ops) || xfs_btree_is_rmap(cur->bc_ops)) { struct check_owner *co;
co = kmalloc(sizeof(struct check_owner), XCHK_GFP_FLAGS); if (!co) return -ENOMEM;
/* Decide if we want to check minrecs of a btree block in the inode root. */ staticinlinebool
xchk_btree_check_iroot_minrecs( struct xchk_btree *bs)
{ /* * xfs_bmap_add_attrfork_btree had an implementation bug wherein it * would miscalculate the space required for the data fork bmbt root * when adding an attr fork, and promote the iroot contents to an * external block unnecessarily. This went unnoticed for many years * until scrub found filesystems in this state. Inode rooted btrees are * not supposed to have immediate child blocks that are small enough * that the contents could fit in the inode root, but we can't fail * existing filesystems, so instead we disable the check for data fork * bmap btrees when there's an attr fork.
*/ if (xfs_btree_is_bmap(bs->cur->bc_ops) &&
bs->cur->bc_ino.whichfork == XFS_DATA_FORK &&
xfs_inode_has_attr_fork(bs->sc->ip)) returnfalse;
returntrue;
}
/* * Check that this btree block has at least minrecs records or is one of the * special blocks that don't require that.
*/ STATICvoid
xchk_btree_check_minrecs( struct xchk_btree *bs, int level, struct xfs_btree_block *block)
{ struct xfs_btree_cur *cur = bs->cur; unsignedint root_level = cur->bc_nlevels - 1; unsignedint numrecs = be16_to_cpu(block->bb_numrecs);
/* More records than minrecs means the block is ok. */ if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) return;
/* * For btrees rooted in the inode, it's possible that the root block * contents spilled into a regular ondisk block because there wasn't * enough space in the inode root. The number of records in that * child block might be less than the standard minrecs, but that's ok * provided that there's only one direct child of the root.
*/ if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE &&
level == cur->bc_nlevels - 2) { struct xfs_btree_block *root_block; struct xfs_buf *root_bp; int root_maxrecs;
/* * Otherwise, only the root level is allowed to have fewer than minrecs * records or keyptrs.
*/ if (level < root_level)
xchk_btree_set_corrupt(bs->sc, cur, level);
}
/* * If this btree block has a parent, make sure that the parent's keys capture * the keyspace contained in this block.
*/ STATICvoid
xchk_btree_block_check_keys( struct xchk_btree *bs, int level, struct xfs_btree_block *block)
{ union xfs_btree_key block_key; union xfs_btree_key *block_high_key; union xfs_btree_key *parent_low_key, *parent_high_key; struct xfs_btree_cur *cur = bs->cur; struct xfs_btree_block *parent_block; struct xfs_buf *bp;
if (level == cur->bc_nlevels - 1) return;
xfs_btree_get_keys(cur, block, &block_key);
/* Make sure the low key of this block matches the parent. */
parent_block = xfs_btree_get_block(cur, level + 1, &bp);
parent_low_key = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
parent_block); if (xfs_btree_keycmp_ne(cur, &block_key, parent_low_key)) {
xchk_btree_set_corrupt(bs->sc, bs->cur, level); return;
}
if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING)) return;
/* Make sure the high key of this block matches the parent. */
parent_high_key = xfs_btree_high_key_addr(cur,
cur->bc_levels[level + 1].ptr, parent_block);
block_high_key = xfs_btree_high_key_from_key(cur, &block_key); if (xfs_btree_keycmp_ne(cur, block_high_key, parent_high_key))
xchk_btree_set_corrupt(bs->sc, bs->cur, level);
}
/* * Grab and scrub a btree block given a btree pointer. Returns block * and buffer pointers (if applicable) if they're ok to use.
*/ STATICint
xchk_btree_get_block( struct xchk_btree *bs, int level, union xfs_btree_ptr *pp, struct xfs_btree_block **pblock, struct xfs_buf **pbp)
{ int error;
xfs_btree_get_block(bs->cur, level, pbp); if (__xfs_btree_check_block(bs->cur, *pblock, level, *pbp)) {
xchk_btree_set_corrupt(bs->sc, bs->cur, level); return 0;
} if (*pbp)
xchk_buffer_recheck(bs->sc, *pbp);
xchk_btree_check_minrecs(bs, level, *pblock);
/* * Check the block's owner; this function absorbs error codes * for us.
*/
error = xchk_btree_check_owner(bs, level, *pbp); if (error) return error;
/* * Check the block's siblings; this function absorbs error codes * for us.
*/
error = xchk_btree_block_check_siblings(bs, *pblock); if (error) return error;
/* * Check that the low and high keys of this block match the keys stored * in the parent block.
*/ STATICvoid
xchk_btree_block_keys( struct xchk_btree *bs, int level, struct xfs_btree_block *block)
{ union xfs_btree_key block_keys; struct xfs_btree_cur *cur = bs->cur; union xfs_btree_key *high_bk; union xfs_btree_key *parent_keys; union xfs_btree_key *high_pk; struct xfs_btree_block *parent_block; struct xfs_buf *bp;
if (level >= cur->bc_nlevels - 1) return;
/* Calculate the keys for this block. */
xfs_btree_get_keys(cur, block, &block_keys);
/* Obtain the parent's copy of the keys for this block. */
parent_block = xfs_btree_get_block(cur, level + 1, &bp);
parent_keys = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
parent_block);
if (xfs_btree_keycmp_ne(cur, &block_keys, parent_keys))
xchk_btree_set_corrupt(bs->sc, cur, 1);
if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING)) return;
/* Get high keys */
high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
high_pk = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
parent_block);
if (xfs_btree_keycmp_ne(cur, high_bk, high_pk))
xchk_btree_set_corrupt(bs->sc, cur, 1);
}
/* * Visit all nodes and leaves of a btree. Check that all pointers and * records are in order, that the keys reflect the records, and use a callback * so that the caller can verify individual records.
*/ int
xchk_btree( struct xfs_scrub *sc, struct xfs_btree_cur *cur,
xchk_btree_rec_fn scrub_fn, conststruct xfs_owner_info *oinfo, void *private)
{ union xfs_btree_ptr ptr; struct xchk_btree *bs; union xfs_btree_ptr *pp; union xfs_btree_rec *recp; struct xfs_btree_block *block; struct xfs_buf *bp; struct check_owner *co; struct check_owner *n;
size_t cur_sz; int level; int error = 0;
/* * Allocate the btree scrub context from the heap, because this * structure can get rather large. Don't let a caller feed us a * totally absurd size.
*/
cur_sz = xchk_btree_sizeof(cur->bc_nlevels); if (cur_sz > PAGE_SIZE) {
xchk_btree_set_corrupt(sc, cur, 0); return 0;
}
bs = kzalloc(cur_sz, XCHK_GFP_FLAGS); if (!bs) return -ENOMEM;
bs->cur = cur;
bs->scrub_rec = scrub_fn;
bs->oinfo = oinfo;
bs->private = private;
bs->sc = sc;
/* Initialize scrub state */
INIT_LIST_HEAD(&bs->to_check);
/* * Load the root of the btree. The helper function absorbs * error codes for us.
*/
level = cur->bc_nlevels - 1;
xfs_btree_init_ptr_from_cur(cur, &ptr); if (!xchk_btree_ptr_ok(bs, cur->bc_nlevels, &ptr)) goto out;
error = xchk_btree_get_block(bs, level, &ptr, &block, &bp); if (error || !block) goto out;
cur->bc_levels[level].ptr = 1;
while (level < cur->bc_nlevels) {
block = xfs_btree_get_block(cur, level, &bp);
if (level == 0) { /* End of leaf, pop back towards the root. */ if (cur->bc_levels[level].ptr >
be16_to_cpu(block->bb_numrecs)) {
xchk_btree_block_keys(bs, level, block); if (level < cur->bc_nlevels - 1)
cur->bc_levels[level + 1].ptr++;
level++; continue;
}
/* Records in order for scrub? */
xchk_btree_rec(bs);
/* Call out to the record checker. */
recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
block);
error = bs->scrub_rec(bs, recp); if (error) break; if (xchk_should_terminate(sc, &error) ||
(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) break;
cur->bc_levels[level].ptr++; continue;
}
/* End of node, pop back towards the root. */ if (cur->bc_levels[level].ptr >
be16_to_cpu(block->bb_numrecs)) {
xchk_btree_block_keys(bs, level, block); if (level < cur->bc_nlevels - 1)
cur->bc_levels[level + 1].ptr++;
level++; continue;
}
/* Keys in order for scrub? */
xchk_btree_key(bs, level);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.