/* * Inode Record Repair * =================== * * Roughly speaking, inode problems can be classified based on whether or not * they trip the dinode verifiers. If those trip, then we won't be able to * xfs_iget ourselves the inode. * * Therefore, the xrep_dinode_* functions fix anything that will cause the * inode buffer verifier or the dinode verifier. The xrep_inode_* functions * fix things on live incore inodes. The inode repair functions make decisions * with security and usability implications when reviving a file: * * - Files with zero di_mode or a garbage di_mode are converted to regular file * that only root can read. This file may not actually contain user data, * if the file was not previously a regular file. Setuid and setgid bits * are cleared. * * - Zero-size directories can be truncated to look empty. It is necessary to * run the bmapbtd and directory repair functions to fully rebuild the * directory. * * - Zero-size symbolic link targets can be truncated to '?'. It is necessary * to run the bmapbtd and symlink repair functions to salvage the symlink. * * - Invalid extent size hints will be removed. * * - Quotacheck will be scheduled if we repaired an inode that was so badly * damaged that the ondisk inode had to be rebuilt. * * - Invalid user, group, or project IDs (aka -1U) will be reset to zero. * Setuid and setgid bits are cleared. * * - Data and attr forks are reset to extents format with zero extents if the * fork data is inconsistent. It is necessary to run the bmapbtd or bmapbta * repair functions to recover the space mapping. * * - ACLs will not be recovered if the attr fork is zapped or the extended * attribute structure itself requires salvaging. * * - If the attr fork is zapped, the user and group ids are reset to root and * the setuid and setgid bits are removed.
*/
/* * All the information we need to repair the ondisk inode if we can't iget the * incore inode. We don't allocate this buffer unless we're going to perform * a repair to the ondisk inode cluster buffer.
*/ struct xrep_inode { /* Inode mapping that we saved from the initial lookup attempt. */ struct xfs_imap imap;
struct xfs_scrub *sc;
/* Blocks in use on the data device by data extents or bmbt blocks. */
xfs_rfsblock_t data_blocks;
/* Blocks in use on the rt device. */
xfs_rfsblock_t rt_blocks;
/* Blocks in use by the attr fork. */
xfs_rfsblock_t attr_blocks;
/* Number of data device extents for the data fork. */
xfs_extnum_t data_extents;
/* * Number of realtime device extents for the data fork. If * data_extents and rt_extents indicate that the data fork has extents * on both devices, we'll just back away slowly.
*/
xfs_extnum_t rt_extents;
/* Number of (data device) extents for the attr fork. */
xfs_aextnum_t attr_extents;
/* Sick state to set after zapping parts of the inode. */ unsignedint ino_sick_mask;
/* Must we remove all access from this file? */ bool zap_acls;
/* Inode scanner to see if we can find the ftype from dirents */ struct xchk_iscan ftype_iscan;
uint8_t alleged_ftype;
};
/* * Setup function for inode repair. @imap contains the ondisk inode mapping * information so that we can correct the ondisk inode cluster buffer if * necessary to make iget work.
*/ int
xrep_setup_inode( struct xfs_scrub *sc, conststruct xfs_imap *imap)
{ struct xrep_inode *ri;
sc->buf = kzalloc(sizeof(struct xrep_inode), XCHK_GFP_FLAGS); if (!sc->buf) return -ENOMEM;
/* Make sure this inode cluster buffer can pass the inode buffer verifier. */ STATICvoid
xrep_dinode_buf( struct xfs_scrub *sc, struct xfs_buf *bp)
{ struct xfs_mount *mp = sc->mp; int i; int ni;
ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; for (i = 0; i < ni; i++)
xrep_dinode_buf_core(sc, bp, i << mp->m_sb.sb_inodelog);
}
/* Reinitialize things that never change in an inode. */ STATICvoid
xrep_dinode_header( struct xfs_scrub *sc, struct xfs_dinode *dip)
{
trace_xrep_dinode_header(sc, dip);
/* * If this directory entry points to the scrub target inode, then the directory * we're scanning is the parent of the scrub target inode.
*/ STATICint
xrep_dinode_findmode_dirent( struct xfs_scrub *sc, struct xfs_inode *dp,
xfs_dir2_dataptr_t dapos, conststruct xfs_name *name,
xfs_ino_t ino, void *priv)
{ struct xrep_inode *ri = priv; int error = 0;
if (xchk_should_terminate(ri->sc, &error)) return error;
/* Don't pick up dot or dotdot entries; we only want child dirents. */ if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
xfs_dir2_samename(name, &xfs_name_dot)) return 0;
/* * Uhoh, more than one parent for this inode and they don't agree on * the file type?
*/ if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN &&
ri->alleged_ftype != name->type) {
trace_xrep_dinode_findmode_dirent_inval(ri->sc, dp, name->type,
ri->alleged_ftype); return -EFSCORRUPTED;
}
/* We found a potential parent; remember the ftype. */
trace_xrep_dinode_findmode_dirent(ri->sc, dp, name->type);
ri->alleged_ftype = name->type; return 0;
}
/* Try to lock a directory, or wait a jiffy. */ staticinlineint
xrep_dinode_ilock_nowait( struct xfs_inode *dp, unsignedint lock_mode)
{ if (xfs_ilock_nowait(dp, lock_mode)) returntrue;
schedule_timeout_killable(1); returnfalse;
}
/* * Try to lock a directory to look for ftype hints. Since we already hold the * AGI buffer, we cannot block waiting for the ILOCK because rename can take * the ILOCK and then try to lock AGIs.
*/ STATICint
xrep_dinode_trylock_directory( struct xrep_inode *ri, struct xfs_inode *dp, unsignedint *lock_modep)
{ unsignedlong deadline = jiffies + msecs_to_jiffies(30000); unsignedint lock_mode; int error = 0;
do { if (xchk_should_terminate(ri->sc, &error)) return error;
if (xfs_need_iread_extents(&dp->i_df))
lock_mode = XFS_ILOCK_EXCL; else
lock_mode = XFS_ILOCK_SHARED;
if (xrep_dinode_ilock_nowait(dp, lock_mode)) {
*lock_modep = lock_mode; return 0;
}
} while (!time_is_before_jiffies(deadline)); return -EBUSY;
}
/* * If this is a directory, walk the dirents looking for any that point to the * scrub target inode.
*/ STATICint
xrep_dinode_findmode_walk_directory( struct xrep_inode *ri, struct xfs_inode *dp)
{ struct xfs_scrub *sc = ri->sc; unsignedint lock_mode; int error = 0;
/* Ignore temporary repair directories. */ if (xrep_is_tempfile(dp)) return 0;
/* * Scan the directory to see if there it contains an entry pointing to * the directory that we are repairing.
*/
error = xrep_dinode_trylock_directory(ri, dp, &lock_mode); if (error) return error;
/* * If this directory is known to be sick, we cannot scan it reliably * and must abort.
*/ if (xfs_inode_has_sickness(dp, XFS_SICK_INO_CORE |
XFS_SICK_INO_BMBTD |
XFS_SICK_INO_DIR)) {
error = -EFSCORRUPTED; goto out_unlock;
}
/* * We cannot complete our parent pointer scan if a directory looks as * though it has been zapped by the inode record repair code.
*/ if (xchk_dir_looks_zapped(dp)) {
error = -EBUSY; goto out_unlock;
}
error = xchk_dir_walk(sc, dp, xrep_dinode_findmode_dirent, ri); if (error) goto out_unlock;
/* * Try to find the mode of the inode being repaired by looking for directories * that point down to this file.
*/ STATICint
xrep_dinode_find_mode( struct xrep_inode *ri,
uint16_t *mode)
{ struct xfs_scrub *sc = ri->sc; struct xfs_inode *dp; int error;
/* No ftype means we have no other metadata to consult. */ if (!xfs_has_ftype(sc->mp)) {
*mode = S_IFREG; return 0;
}
/* * Scan all directories for parents that might point down to this * inode. Skip the inode being repaired during the scan since it * cannot be its own parent. Note that we still hold the AGI locked * so there's a real possibility that _iscan_iter can return EBUSY.
*/
xchk_iscan_start(sc, 5000, 100, &ri->ftype_iscan);
xchk_iscan_set_agi_trylock(&ri->ftype_iscan);
ri->ftype_iscan.skip_ino = sc->sm->sm_ino;
ri->alleged_ftype = XFS_DIR3_FT_UNKNOWN; while ((error = xchk_iscan_iter(&ri->ftype_iscan, &dp)) == 1) { if (S_ISDIR(VFS_I(dp)->i_mode))
error = xrep_dinode_findmode_walk_directory(ri, dp);
xchk_iscan_mark_visited(&ri->ftype_iscan, dp);
xchk_irele(sc, dp); if (error < 0) break; if (xchk_should_terminate(sc, &error)) break;
}
xchk_iscan_iter_finish(&ri->ftype_iscan);
xchk_iscan_teardown(&ri->ftype_iscan);
if (error == -EBUSY) { if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN) { /* * If we got an EBUSY after finding at least one * dirent, that means the scan found an inode on the * inactivation list and could not open it. Accept the * alleged ftype and install a new mode below.
*/
error = 0;
} elseif (!(sc->flags & XCHK_TRY_HARDER)) { /* * Otherwise, retry the operation one time to see if * the reason for the delay is an inode from the same * cluster buffer waiting on the inactivation list.
*/
error = -EDEADLOCK;
}
} if (error) return error;
/* * Convert the discovered ftype into the file mode. If all else fails, * return S_IFREG.
*/ switch (ri->alleged_ftype) { case XFS_DIR3_FT_DIR:
*mode = S_IFDIR; break; case XFS_DIR3_FT_WHT: case XFS_DIR3_FT_CHRDEV:
*mode = S_IFCHR; break; case XFS_DIR3_FT_BLKDEV:
*mode = S_IFBLK; break; case XFS_DIR3_FT_FIFO:
*mode = S_IFIFO; break; case XFS_DIR3_FT_SOCK:
*mode = S_IFSOCK; break; case XFS_DIR3_FT_SYMLINK:
*mode = S_IFLNK; break; default:
*mode = S_IFREG; break;
} return 0;
}
/* Turn di_mode into /something/ recognizable. Returns true if we succeed. */ STATICint
xrep_dinode_mode( struct xrep_inode *ri, struct xfs_dinode *dip)
{ struct xfs_scrub *sc = ri->sc;
uint16_t mode = be16_to_cpu(dip->di_mode); int error;
trace_xrep_dinode_mode(sc, dip);
if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN) return 0;
/* Try to fix the mode. If we cannot, then leave everything alone. */
error = xrep_dinode_find_mode(ri, &mode); switch (error) { case -EINTR: case -EBUSY: case -EDEADLOCK: /* temporary failure or fatal signal */ return error; case 0: /* found mode */ break; default: /* some other error, assume S_IFREG */
mode = S_IFREG; break;
}
/* bad mode, so we set it to a file that only root can read */
dip->di_mode = cpu_to_be16(mode);
dip->di_uid = 0;
dip->di_gid = 0;
ri->zap_acls = true; return 0;
}
/* Fix unused link count fields having nonzero values. */ STATICvoid
xrep_dinode_nlinks( struct xfs_dinode *dip)
{ if (dip->di_version < 2) {
dip->di_nlink = 0; return;
}
if (xfs_dinode_is_metadir(dip)) { if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX)
dip->di_metatype = cpu_to_be16(XFS_METAFILE_UNKNOWN);
} else {
dip->di_metatype = 0;
}
}
if (isrt)
flags |= XFS_DIFLAG_REALTIME; else
flags &= ~XFS_DIFLAG_REALTIME;
/* * For regular files on a reflink filesystem, set the REFLINK flag to * protect shared extents. A later stage will actually check those * extents and clear the flag if possible.
*/ if (xfs_has_reflink(mp) && S_ISREG(mode))
flags2 |= XFS_DIFLAG2_REFLINK; else
flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE); if (!xfs_has_bigtime(mp))
flags2 &= ~XFS_DIFLAG2_BIGTIME; if (!xfs_has_large_extent_counts(mp))
flags2 &= ~XFS_DIFLAG2_NREXT64; if (flags2 & XFS_DIFLAG2_NREXT64)
dip->di_nrext64_pad = 0; elseif (dip->di_version >= 3)
dip->di_v3_pad = 0;
if (flags2 & XFS_DIFLAG2_METADATA) {
xfs_failaddr_t fa;
fa = xfs_dinode_verify_metadir(sc->mp, dip, mode, flags,
flags2); if (fa)
flags2 &= ~XFS_DIFLAG2_METADATA;
}
/* * Blow out symlink; now it points nowhere. We don't have to worry about * incore state because this inode is failing the verifiers.
*/ STATICvoid
xrep_dinode_zap_symlink( struct xrep_inode *ri, struct xfs_dinode *dip)
{ struct xfs_scrub *sc = ri->sc; char *p;
/* * Blow out dir, make the parent point to the root. In the future repair will * reconstruct this directory for us. Note that there's no in-core directory * inode because the sf verifier tripped, so we don't have to worry about the * dentry cache.
*/ STATICvoid
xrep_dinode_zap_dir( struct xrep_inode *ri, struct xfs_dinode *dip)
{ struct xfs_scrub *sc = ri->sc; struct xfs_mount *mp = sc->mp; struct xfs_dir2_sf_hdr *sfp; int i8count;
/* Make sure we don't have a garbage file size. */ STATICvoid
xrep_dinode_size( struct xrep_inode *ri, struct xfs_dinode *dip)
{ struct xfs_scrub *sc = ri->sc;
uint64_t size = be64_to_cpu(dip->di_size);
uint16_t mode = be16_to_cpu(dip->di_mode);
trace_xrep_dinode_size(sc, dip);
switch (mode & S_IFMT) { case S_IFIFO: case S_IFCHR: case S_IFBLK: case S_IFSOCK: /* di_size can't be nonzero for special files */
dip->di_size = 0; break; case S_IFREG: /* Regular files can't be larger than 2^63-1 bytes. */
dip->di_size = cpu_to_be64(size & ~(1ULL << 63)); break; case S_IFLNK: /* * Truncate ridiculously oversized symlinks. If the size is * zero, reset it to point to the current directory. Both of * these conditions trigger dinode verifier errors, so there * is no in-core state to reset.
*/ if (size > XFS_SYMLINK_MAXLEN)
dip->di_size = cpu_to_be64(XFS_SYMLINK_MAXLEN); elseif (size == 0)
xrep_dinode_zap_symlink(ri, dip); break; case S_IFDIR: /* * Directories can't have a size larger than 32G. If the size * is zero, reset it to an empty directory. Both of these * conditions trigger dinode verifier errors, so there is no * in-core state to reset.
*/ if (size > XFS_DIR2_SPACE_SIZE)
dip->di_size = cpu_to_be64(XFS_DIR2_SPACE_SIZE); elseif (size == 0)
xrep_dinode_zap_dir(ri, dip); break;
}
}
fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
mode, flags, flags2); if (fa) {
dip->di_cowextsize = 0;
dip->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE);
}
}
/* Count extents and blocks for an inode given an rmap. */ STATICint
xrep_dinode_walk_rmap( struct xfs_btree_cur *cur, conststruct xfs_rmap_irec *rec, void *priv)
{ struct xrep_inode *ri = priv; int error = 0;
if (xchk_should_terminate(ri->sc, &error)) return error;
/* We only care about this inode. */ if (rec->rm_owner != ri->sc->sm->sm_ino) return 0;
if (rec->rm_flags & XFS_RMAP_ATTR_FORK) {
ri->attr_blocks += rec->rm_blockcount; if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
ri->attr_extents++;
return 0;
}
ri->data_blocks += rec->rm_blockcount; if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
ri->data_extents++;
return 0;
}
/* Count extents and blocks for an inode from all AG rmap data. */ STATICint
xrep_dinode_count_ag_rmaps( struct xrep_inode *ri, struct xfs_perag *pag)
{ struct xfs_btree_cur *cur; struct xfs_buf *agf; int error;
error = xfs_alloc_read_agf(pag, ri->sc->tp, 0, &agf); if (error) return error;
/* Count extents and blocks for an inode from all realtime rmap data. */ STATICint
xrep_dinode_count_rtgroup_rmaps( struct xrep_inode *ri, struct xfs_rtgroup *rtg)
{ struct xfs_scrub *sc = ri->sc; int error;
error = xrep_rtgroup_init(sc, rtg, &sc->sr, XFS_RTGLOCK_RMAP); if (error) return error;
/* Count extents and blocks for a given inode from all rmap data. */ STATICint
xrep_dinode_count_rmaps( struct xrep_inode *ri)
{ struct xfs_perag *pag = NULL; struct xfs_rtgroup *rtg = NULL; int error;
if (!xfs_has_rmapbt(ri->sc->mp)) return -EOPNOTSUPP;
while ((rtg = xfs_rtgroup_next(ri->sc->mp, rtg))) {
error = xrep_dinode_count_rtgroup_rmaps(ri, rtg); if (error) {
xfs_rtgroup_rele(rtg); return error;
}
}
while ((pag = xfs_perag_next(ri->sc->mp, pag))) {
error = xrep_dinode_count_ag_rmaps(ri, pag); if (error) {
xfs_perag_rele(pag); return error;
}
}
/* Can't have extents on both the rt and the data device. */ if (ri->data_extents && ri->rt_extents) return -EFSCORRUPTED;
if (level > sc->mp->m_rtrefc_maxlevels) returntrue; if (xfs_rtrefcount_droot_space_calc(level, nrecs) > dfork_size) returntrue; if (level > 0 && nrecs == 0) returntrue;
returnfalse;
}
/* Check a metadata-btree fork. */ STATICbool
xrep_dinode_bad_metabt_fork( struct xfs_scrub *sc, struct xfs_dinode *dip, unsignedint dfork_size, int whichfork)
{ if (whichfork != XFS_DATA_FORK) returntrue;
switch (be16_to_cpu(dip->di_metatype)) { case XFS_METAFILE_RTRMAP: return xrep_dinode_bad_rtrmapbt_fork(sc, dip, dfork_size); case XFS_METAFILE_RTREFCOUNT: return xrep_dinode_bad_rtrefcountbt_fork(sc, dip, dfork_size); default: returntrue;
}
returnfalse;
}
/* * Check the data fork for things that will fail the ifork verifiers or the * ifork formatters.
*/ STATICbool
xrep_dinode_check_dfork( struct xfs_scrub *sc, struct xfs_dinode *dip,
uint16_t mode)
{ void *dfork_ptr;
int64_t data_size; unsignedint fmt; unsignedint dfork_size;
/* * Verifier functions take signed int64_t, so check for bogus negative * values first.
*/
data_size = be64_to_cpu(dip->di_size); if (data_size < 0) returntrue;
fmt = XFS_DFORK_FORMAT(dip, XFS_DATA_FORK); switch (mode & S_IFMT) { case S_IFIFO: case S_IFCHR: case S_IFBLK: case S_IFSOCK: if (fmt != XFS_DINODE_FMT_DEV) returntrue; break; case S_IFREG: switch (fmt) { case XFS_DINODE_FMT_LOCAL: returntrue; case XFS_DINODE_FMT_EXTENTS: case XFS_DINODE_FMT_BTREE: case XFS_DINODE_FMT_META_BTREE: break; default: returntrue;
} break; case S_IFLNK: case S_IFDIR: switch (fmt) { case XFS_DINODE_FMT_LOCAL: case XFS_DINODE_FMT_EXTENTS: case XFS_DINODE_FMT_BTREE: break; default: returntrue;
} break; default: returntrue;
}
/* Special files always get reset to DEV */ switch (mode & S_IFMT) { case S_IFIFO: case S_IFCHR: case S_IFBLK: case S_IFSOCK:
dip->di_format = XFS_DINODE_FMT_DEV;
dip->di_size = 0; return;
}
/* * If we have data extents, reset to an empty map and hope the user * will run the bmapbtd checker next.
*/ if (ri->data_extents || ri->rt_extents || S_ISREG(mode)) {
dip->di_format = XFS_DINODE_FMT_EXTENTS; return;
}
/* Otherwise, reset the local format to the minimum. */ switch (mode & S_IFMT) { case S_IFLNK:
xrep_dinode_zap_symlink(ri, dip); break; case S_IFDIR:
xrep_dinode_zap_dir(ri, dip); break;
}
}
/* * Check the attr fork for things that will fail the ifork verifiers or the * ifork formatters.
*/ STATICbool
xrep_dinode_check_afork( struct xfs_scrub *sc, struct xfs_dinode *dip)
{ struct xfs_attr_sf_hdr *afork_ptr;
size_t attr_size; unsignedint afork_size;
switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) { case XFS_DINODE_FMT_LOCAL: /* Fork has to be large enough to extract the xattr size. */ if (afork_size < sizeof(struct xfs_attr_sf_hdr)) returntrue;
/* xattr structure cannot be larger than the fork */
attr_size = be16_to_cpu(afork_ptr->totsize); if (attr_size > afork_size) returntrue;
/* xattr structure must pass verification. */ return xfs_attr_shortform_verify(afork_ptr, attr_size) != NULL; case XFS_DINODE_FMT_EXTENTS: if (xrep_dinode_bad_extents_fork(sc, dip, afork_size,
XFS_ATTR_FORK)) returntrue; break; case XFS_DINODE_FMT_BTREE: if (xrep_dinode_bad_bmbt_fork(sc, dip, afork_size,
XFS_ATTR_FORK)) returntrue; break; case XFS_DINODE_FMT_META_BTREE: if (xrep_dinode_bad_metabt_fork(sc, dip, afork_size,
XFS_ATTR_FORK)) returntrue; break; default: returntrue;
}
returnfalse;
}
/* * Reset the attr fork to empty. Since the attr fork could have contained * ACLs, make the file readable only by root.
*/ STATICvoid
xrep_dinode_zap_afork( struct xrep_inode *ri, struct xfs_dinode *dip,
uint16_t mode)
{ struct xfs_scrub *sc = ri->sc;
/* * If the data fork is in btree format, removing the attr fork entirely * might cause verifier failures if the next level down in the bmbt * could now fit in the data fork area.
*/ if (dip->di_format != XFS_DINODE_FMT_BTREE)
dip->di_forkoff = 0;
dip->di_mode = cpu_to_be16(mode & ~0777);
dip->di_uid = 0;
dip->di_gid = 0;
}
/* * Before calling this function, xrep_dinode_core ensured that both * forks actually fit inside their respective literal areas. If this * was not the case, the fork was reset to FMT_EXTENTS with zero * records. If the rmapbt scan found attr or data fork blocks, this * will be noted in the dinode_stats, and we must leave enough room * for the bmap repair code to reconstruct the mapping structure. * * First, compute the minimum space required for the attr fork.
*/ switch (dip->di_aformat) { case XFS_DINODE_FMT_LOCAL: /* * If we still have a shortform xattr structure at all, that * means the attr fork area was exactly large enough to fit * the sf structure.
*/
afork_min = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK); break; case XFS_DINODE_FMT_EXTENTS:
attr_extents = xfs_dfork_attr_extents(dip); if (attr_extents) { /* * We must maintain sufficient space to hold the entire * extent map array in the data fork. Note that we * previously zapped the fork if it had no chance of * fitting in the inode.
*/
afork_min = sizeof(struct xfs_bmbt_rec) * attr_extents;
} elseif (ri->attr_extents > 0) { /* * The attr fork thinks it has zero extents, but we * found some xattr extents. We need to leave enough * empty space here so that the incore attr fork will * get created (and hence trigger the attr fork bmap * repairer).
*/
afork_min = bmdr_minsz;
} else { /* No extents on disk or found in rmapbt. */
afork_min = 0;
} break; case XFS_DINODE_FMT_BTREE: /* Must have space for btree header and key/pointers. */
bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
afork_min = xfs_bmap_broot_space(sc->mp, bmdr); break; default: /* We should never see any other formats. */
afork_min = 0; break;
}
/* Compute the minimum space required for the data fork. */ switch (dip->di_format) { case XFS_DINODE_FMT_DEV:
dfork_min = sizeof(__be32); break; case XFS_DINODE_FMT_UUID:
dfork_min = sizeof(uuid_t); break; case XFS_DINODE_FMT_LOCAL: /* * If we still have a shortform data fork at all, that means * the data fork area was large enough to fit whatever was in * there.
*/
dfork_min = be64_to_cpu(dip->di_size); break; case XFS_DINODE_FMT_EXTENTS:
data_extents = xfs_dfork_data_extents(dip); if (data_extents) { /* * We must maintain sufficient space to hold the entire * extent map array in the data fork. Note that we * previously zapped the fork if it had no chance of * fitting in the inode.
*/
dfork_min = sizeof(struct xfs_bmbt_rec) * data_extents;
} elseif (ri->data_extents > 0 || ri->rt_extents > 0) { /* * The data fork thinks it has zero extents, but we * found some data extents. We need to leave enough * empty space here so that the data fork bmap repair * will recover the mappings.
*/
dfork_min = bmdr_minsz;
} else { /* No extents on disk or found in rmapbt. */
dfork_min = 0;
} break; case XFS_DINODE_FMT_BTREE: /* Must have space for btree header and key/pointers. */
bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
dfork_min = xfs_bmap_broot_space(sc->mp, bmdr); break; case XFS_DINODE_FMT_META_BTREE: switch (be16_to_cpu(dip->di_metatype)) { case XFS_METAFILE_RTRMAP:
rmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
dfork_min = xfs_rtrmap_broot_space(sc->mp, rmdr); break; case XFS_METAFILE_RTREFCOUNT:
rcdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
dfork_min = xfs_rtrefcount_broot_space(sc->mp, rcdr); break; default:
dfork_min = 0; break;
} break; default:
dfork_min = 0; break;
}
/* * Round all values up to the nearest 8 bytes, because that is the * precision of di_forkoff.
*/
afork_min = roundup(afork_min, 8);
dfork_min = roundup(dfork_min, 8);
bmdr_minsz = roundup(bmdr_minsz, 8);
/* * If the data fork was zapped and we don't have enough space for the * recovery fork, move the attr fork up.
*/ if (dip->di_format == XFS_DINODE_FMT_EXTENTS &&
xfs_dfork_data_extents(dip) == 0 &&
(ri->data_extents > 0 || ri->rt_extents > 0) &&
bmdr_minsz > XFS_DFORK_DSIZE(dip, sc->mp)) { if (bmdr_minsz + afork_min > lit_sz) { /* * The attr for and the stub fork we need to recover * the data fork won't both fit. Zap the attr fork.
*/
xrep_dinode_zap_afork(ri, dip, mode);
afork_min = bmdr_minsz;
} else { void *before, *after;
/* Otherwise, just slide the attr fork up. */
before = XFS_DFORK_APTR(dip);
dip->di_forkoff = bmdr_minsz >> 3;
after = XFS_DFORK_APTR(dip);
memmove(after, before, XFS_DFORK_ASIZE(dip, sc->mp));
}
}
/* * If the attr fork was zapped and we don't have enough space for the * recovery fork, move the attr fork down.
*/ if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS &&
xfs_dfork_attr_extents(dip) == 0 &&
ri->attr_extents > 0 &&
bmdr_minsz > XFS_DFORK_ASIZE(dip, sc->mp)) { if (dip->di_format == XFS_DINODE_FMT_BTREE) { /* * If the data fork is in btree format then we can't * adjust forkoff because that runs the risk of * violating the extents/btree format transition rules.
*/
} elseif (bmdr_minsz + dfork_min > lit_sz) { /* * If we can't move the attr fork, too bad, we lose the * attr fork and leak its blocks.
*/
xrep_dinode_zap_afork(ri, dip, mode);
} else { /* * Otherwise, just slide the attr fork down. The attr * fork is empty, so we don't have any old contents to * move here.
*/
dip->di_forkoff = (lit_sz - bmdr_minsz) >> 3;
}
}
}
/* * Zap the data/attr forks if we spot anything that isn't going to pass the * ifork verifiers or the ifork formatters, because we need to get the inode * into good enough shape that the higher level repair functions can run.
*/ STATICvoid
xrep_dinode_zap_forks( struct xrep_inode *ri, struct xfs_dinode *dip)
{ struct xfs_scrub *sc = ri->sc;
xfs_extnum_t data_extents;
xfs_extnum_t attr_extents;
xfs_filblks_t nblocks;
uint16_t mode; bool zap_datafork = false; bool zap_attrfork = ri->zap_acls;
/* Inode counters don't make sense? */ if (data_extents > nblocks)
zap_datafork = true; if (attr_extents > nblocks)
zap_attrfork = true; if (data_extents + attr_extents > nblocks)
zap_datafork = zap_attrfork = true;
if (!zap_datafork)
zap_datafork = xrep_dinode_check_dfork(sc, dip, mode); if (!zap_attrfork)
zap_attrfork = xrep_dinode_check_afork(sc, dip);
/* Zap whatever's bad. */ if (zap_attrfork)
xrep_dinode_zap_afork(ri, dip, mode); if (zap_datafork)
xrep_dinode_zap_dfork(ri, dip, mode);
xrep_dinode_ensure_forkoff(ri, dip, mode);
/* * Zero di_nblocks if we don't have any extents at all to satisfy the * buffer verifier.
*/
data_extents = xfs_dfork_data_extents(dip);
attr_extents = xfs_dfork_attr_extents(dip); if (data_extents + attr_extents == 0)
dip->di_nblocks = 0;
}
/* Inode didn't pass dinode verifiers, so fix the raw buffer and retry iget. */ STATICint
xrep_dinode_core( struct xrep_inode *ri)
{ struct xfs_scrub *sc = ri->sc; struct xfs_buf *bp; struct xfs_dinode *dip;
xfs_ino_t ino = sc->sm->sm_ino; int error; int iget_error;
/* Figure out what this inode had mapped in both forks. */
error = xrep_dinode_count_rmaps(ri); if (error) return error;
/* * In theory, we've fixed the ondisk inode record enough that we should * be able to load the inode into the cache. Try to iget that inode * now while we hold the AGI and the inode cluster buffer and take the * IOLOCK so that we can continue with repairs without anyone else * accessing the inode. If iget fails, we still need to commit the * changes.
*/ if (!iget_error)
iget_error = xchk_iget(sc, ino, &sc->ip); if (!iget_error)
xchk_ilock(sc, XFS_IOLOCK_EXCL);
/* * Commit the inode cluster buffer updates and drop the AGI buffer that * we've been holding since scrub setup. From here on out, repairs * deal only with the cached inode.
*/
error = xrep_trans_commit(sc); if (error) return error;
if (iget_error) return iget_error;
error = xchk_trans_alloc(sc, 0); if (error) return error;
error = xrep_ino_dqattach(sc); if (error) return error;
xchk_ilock(sc, XFS_ILOCK_EXCL); if (ri->ino_sick_mask)
xfs_inode_mark_sick(sc->ip, ri->ino_sick_mask); return 0;
}
error = xrep_dinode_core(ri); if (error) return error;
/* We had to fix a totally busted inode, schedule quotacheck. */ if (XFS_IS_UQUOTA_ON(sc->mp))
xrep_force_quotacheck(sc, XFS_DQTYPE_USER); if (XFS_IS_GQUOTA_ON(sc->mp))
xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP); if (XFS_IS_PQUOTA_ON(sc->mp))
xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
return 0;
}
/* * Fix problems that the verifiers don't care about. In general these are * errors that don't cause problems elsewhere in the kernel that we can easily * detect, so we don't check them all that rigorously.
*/
/* Make sure block and extent counts are ok. */ STATICint
xrep_inode_blockcounts( struct xfs_scrub *sc)
{ struct xfs_ifork *ifp;
xfs_filblks_t count;
xfs_filblks_t acount;
xfs_extnum_t nextents; int error;
trace_xrep_inode_blockcounts(sc);
/* Set data fork counters from the data fork mappings. */
error = xchk_inode_count_blocks(sc, XFS_DATA_FORK, &nextents, &count); if (error) return error; if (xfs_is_reflink_inode(sc->ip)) { /* * data fork blockcount can exceed physical storage if a user * reflinks the same block over and over again.
*/
;
} elseif (XFS_IS_REALTIME_INODE(sc->ip)) { if (count >= sc->mp->m_sb.sb_rblocks) return -EFSCORRUPTED;
} else { if (count >= sc->mp->m_sb.sb_dblocks) return -EFSCORRUPTED;
}
error = xrep_ino_ensure_extent_count(sc, XFS_DATA_FORK, nextents); if (error) return error;
sc->ip->i_df.if_nextents = nextents;
/* Set attr fork counters from the attr fork mappings. */
ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK); if (ifp) {
error = xchk_inode_count_blocks(sc, XFS_ATTR_FORK, &nextents,
&acount); if (error) return error; if (count >= sc->mp->m_sb.sb_dblocks) return -EFSCORRUPTED;
error = xrep_ino_ensure_extent_count(sc, XFS_ATTR_FORK,
nextents); if (error) return error;
ifp->if_nextents = nextents;
} else {
acount = 0;
}
/* NEWRTBM only applies to realtime bitmaps */ if (sc->ip->i_ino == sc->mp->m_sb.sb_rbmino)
sc->ip->i_diflags |= XFS_DIFLAG_NEWRTBM; else
sc->ip->i_diflags &= ~XFS_DIFLAG_NEWRTBM;
/* These only make sense for directories. */ if (!S_ISDIR(mode))
sc->ip->i_diflags &= ~(XFS_DIFLAG_RTINHERIT |
XFS_DIFLAG_EXTSZINHERIT |
XFS_DIFLAG_PROJINHERIT |
XFS_DIFLAG_NOSYMLINKS);
/* These only make sense for files. */ if (!S_ISREG(mode))
sc->ip->i_diflags &= ~(XFS_DIFLAG_REALTIME |
XFS_DIFLAG_EXTSIZE);
/* These only make sense for non-rt files. */ if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
sc->ip->i_diflags &= ~XFS_DIFLAG_FILESTREAM;
/* Immutable and append only? Drop the append. */ if ((sc->ip->i_diflags & XFS_DIFLAG_IMMUTABLE) &&
(sc->ip->i_diflags & XFS_DIFLAG_APPEND))
sc->ip->i_diflags &= ~XFS_DIFLAG_APPEND;
/* No reflink flag unless we support it and it's a file. */ if (!xfs_has_reflink(sc->mp) || !S_ISREG(mode))
sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
/* DAX only applies to files and dirs. */ if (!(S_ISREG(mode) || S_ISDIR(mode)))
sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
}
/* * Fix size problems with block/node format directories. If we fail to find * the extent list, just bail out and let the bmapbtd repair functions clean * up that mess.
*/ STATICvoid
xrep_inode_blockdir_size( struct xfs_scrub *sc)
{ struct xfs_iext_cursor icur; struct xfs_bmbt_irec got; struct xfs_ifork *ifp;
xfs_fileoff_t off; int error;
trace_xrep_inode_blockdir_size(sc);
error = xfs_iread_extents(sc->tp, sc->ip, XFS_DATA_FORK); if (error) return;
/* Find the last block before 32G; this is the dir size. */
ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
off = XFS_B_TO_FSB(sc->mp, XFS_DIR2_SPACE_SIZE); if (!xfs_iext_lookup_extent_before(sc->ip, ifp, &off, &icur, &got)) { /* zero-extents directory? */ return;
}
/* * Fix any irregularities in a directory inode's size now that we can iterate * extent maps and access other regular inode data.
*/ STATICvoid
xrep_inode_dir_size( struct xfs_scrub *sc)
{
trace_xrep_inode_dir_size(sc);
switch (sc->ip->i_df.if_format) { case XFS_DINODE_FMT_EXTENTS: case XFS_DINODE_FMT_BTREE:
xrep_inode_blockdir_size(sc); break; case XFS_DINODE_FMT_LOCAL:
xrep_inode_sfdir_size(sc); break;
}
}
/* Ensure this file has an attr fork if it needs to hold a parent pointer. */ STATICint
xrep_inode_pptr( struct xfs_scrub *sc)
{ struct xfs_mount *mp = sc->mp; struct xfs_inode *ip = sc->ip; struct inode *inode = VFS_I(ip);
if (!xfs_has_parent(mp)) return 0;
/* * Unlinked inodes that cannot be added to the directory tree will not * have a parent pointer.
*/ if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE)) return 0;
/* Children of the superblock do not have parent pointers. */ if (xchk_inode_is_sb_rooted(ip)) return 0;
/* Inode already has an attr fork; no further work possible here. */ if (xfs_inode_has_attr_fork(ip)) return 0;
/* Fix any irregularities in an inode that the verifiers don't catch. */ STATICint
xrep_inode_problems( struct xfs_scrub *sc)
{ int error;
error = xrep_inode_blockcounts(sc); if (error) return error;
error = xrep_inode_pptr(sc); if (error) return error;
xrep_inode_timestamps(sc->ip);
xrep_inode_flags(sc);
xrep_inode_ids(sc); /* * We can now do a better job fixing the size of a directory now that * we can scan the data fork extents than we could in xrep_dinode_size.
*/ if (S_ISDIR(VFS_I(sc->ip)->i_mode))
xrep_inode_dir_size(sc);
xrep_inode_extsize(sc);
xrep_inode_cowextsize(sc);
/* * Make sure this inode's unlinked list pointers are consistent with its * link count.
*/ STATICint
xrep_inode_unlinked( struct xfs_scrub *sc)
{ unsignedint nlink = VFS_I(sc->ip)->i_nlink; int error;
/* * If this inode is linked from the directory tree and on the unlinked * list, remove it from the unlinked list.
*/ if (nlink > 0 && xfs_inode_on_unlinked_list(sc->ip)) { struct xfs_perag *pag; int error;
/* * If this inode is not linked from the directory tree yet not on the * unlinked list, put it on the unlinked list.
*/ if (nlink == 0 && !xfs_inode_on_unlinked_list(sc->ip)) {
error = xfs_iunlink(sc->tp, sc->ip); if (error) return error;
}
return 0;
}
/* Repair an inode's fields. */ int
xrep_inode( struct xfs_scrub *sc)
{ int error = 0;
/* * No inode? That means we failed the _iget verifiers. Repair all * the things that the inode verifiers care about, then retry _iget.
*/ if (!sc->ip) { struct xrep_inode *ri = sc->buf;
ASSERT(ri != NULL);
error = xrep_dinode_problems(ri); if (error == -EBUSY) { /* * Directory scan to recover inode mode encountered a * busy inode, so we did not continue repairing things.
*/ return 0;
} if (error) return error;
/* By this point we had better have a working incore inode. */ if (!sc->ip) return -EFSCORRUPTED;
}
xfs_trans_ijoin(sc->tp, sc->ip, 0);
/* If we found corruption of any kind, try to fix it. */ if ((sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) ||
(sc->sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)) {
error = xrep_inode_problems(sc); if (error) return error;
}
/* See if we can clear the reflink flag. */ if (xfs_is_reflink_inode(sc->ip)) {
error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp); if (error) return error;
}
/* Reconnect incore unlinked list */
error = xrep_inode_unlinked(sc); if (error) return error;
return xrep_defer_finish(sc);
}
Messung V0.5
¤ Dauer der Verarbeitung: 0.21 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.