/* * Lookup a record by ino in the btree given by cur.
*/ int/* error */
xfs_inobt_lookup( struct xfs_btree_cur *cur, /* btree cursor */
xfs_agino_t ino, /* starting inode of chunk */
xfs_lookup_t dir, /* <=, >=, == */ int *stat) /* success/failure */
{
cur->bc_rec.i.ir_startino = ino;
cur->bc_rec.i.ir_holemask = 0;
cur->bc_rec.i.ir_count = 0;
cur->bc_rec.i.ir_freecount = 0;
cur->bc_rec.i.ir_free = 0; return xfs_btree_lookup(cur, dir, stat);
}
/* * Update the record referred to by cur to the value given. * This either works (return 0) or gets an EFSCORRUPTED error.
*/ STATICint/* error */
xfs_inobt_update( struct xfs_btree_cur *cur, /* btree cursor */
xfs_inobt_rec_incore_t *irec) /* btree record */
{ union xfs_btree_rec rec;
/* Convert on-disk btree record to incore inobt record. */ void
xfs_inobt_btrec_to_irec( struct xfs_mount *mp, constunion xfs_btree_rec *rec, struct xfs_inobt_rec_incore *irec)
{
irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino); if (xfs_has_sparseinodes(mp)) {
irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask);
irec->ir_count = rec->inobt.ir_u.sp.ir_count;
irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount;
} else { /* * ir_holemask/ir_count not supported on-disk. Fill in hardcoded * values for full inode chunks.
*/
irec->ir_holemask = XFS_INOBT_HOLEMASK_FULL;
irec->ir_count = XFS_INODES_PER_CHUNK;
irec->ir_freecount =
be32_to_cpu(rec->inobt.ir_u.f.ir_freecount);
}
irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
}
/* Compute the freecount of an incore inode record. */
uint8_t
xfs_inobt_rec_freecount( conststruct xfs_inobt_rec_incore *irec)
{
uint64_t realfree = irec->ir_free;
if (xfs_inobt_issparse(irec->ir_holemask))
realfree &= xfs_inobt_irec_to_allocmask(irec); return hweight64(realfree);
}
/* Simple checks for inode records. */
xfs_failaddr_t
xfs_inobt_check_irec( struct xfs_perag *pag, conststruct xfs_inobt_rec_incore *irec)
{ /* Record has to be properly aligned within the AG. */ if (!xfs_verify_agino(pag, irec->ir_startino)) return __this_address; if (!xfs_verify_agino(pag,
irec->ir_startino + XFS_INODES_PER_CHUNK - 1)) return __this_address; if (irec->ir_count < XFS_INODES_PER_HOLEMASK_BIT ||
irec->ir_count > XFS_INODES_PER_CHUNK) return __this_address; if (irec->ir_freecount > XFS_INODES_PER_CHUNK) return __this_address;
if (xfs_inobt_rec_freecount(irec) != irec->ir_freecount) return __this_address;
xfs_warn(mp, "%sbt record corruption in AG %d detected at %pS!",
cur->bc_ops->name, cur->bc_group->xg_gno, fa);
xfs_warn(mp, "start inode 0x%x, count 0x%x, free 0x%x freemask 0x%llx, holemask 0x%x",
irec->ir_startino, irec->ir_count, irec->ir_freecount,
irec->ir_free, irec->ir_holemask);
xfs_btree_mark_sick(cur); return -EFSCORRUPTED;
}
/* * Get the data from the pointed-to record.
*/ int
xfs_inobt_get_rec( struct xfs_btree_cur *cur, struct xfs_inobt_rec_incore *irec, int *stat)
{ struct xfs_mount *mp = cur->bc_mp; union xfs_btree_rec *rec;
xfs_failaddr_t fa; int error;
/* * Verify that the number of free inodes in the AGI is correct.
*/ #ifdef DEBUG staticint
xfs_check_agi_freecount( struct xfs_btree_cur *cur)
{ if (cur->bc_nlevels == 1) {
xfs_inobt_rec_incore_t rec; int freecount = 0; int error; int i;
error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); if (error) return error;
do {
error = xfs_inobt_get_rec(cur, &rec, &i); if (error) return error;
if (i) {
freecount += rec.ir_freecount;
error = xfs_btree_increment(cur, 0, &i); if (error) return error;
}
} while (i == 1);
/* * Initialise a new set of inodes. When called without a transaction context * (e.g. from recovery) we initiate a delayed write of the inode buffers rather * than logging them (which in a transaction context puts them into the AIL * for writeback rather than the xfsbufd queue).
*/ int
xfs_ialloc_inode_init( struct xfs_mount *mp, struct xfs_trans *tp, struct list_head *buffer_list, int icount,
xfs_agnumber_t agno,
xfs_agblock_t agbno,
xfs_agblock_t length, unsignedint gen)
{ struct xfs_buf *fbuf; struct xfs_dinode *free; int nbufs; int version; int i, j;
xfs_daddr_t d;
xfs_ino_t ino = 0; int error;
/* * Loop over the new block(s), filling in the inodes. For small block * sizes, manipulate the inodes in buffers which are multiples of the * blocks size.
*/
nbufs = length / M_IGEO(mp)->blocks_per_cluster;
/* * Figure out what version number to use in the inodes we create. If * the superblock version has caught up to the one that supports the new * inode format, then use the new inode version. Otherwise use the old * version so that old kernels will continue to be able to use the file * system. * * For v3 inodes, we also need to write the inode number into the inode, * so calculate the first inode number of the chunk here as * XFS_AGB_TO_AGINO() only works within a filesystem block, not * across multiple filesystem blocks (such as a cluster) and so cannot * be used in the cluster buffer loop below. * * Further, because we are writing the inode directly into the buffer * and calculating a CRC on the entire inode, we have ot log the entire * inode so that the entire range the CRC covers is present in the log. * That means for v3 inode we log the entire buffer rather than just the * inode cores.
*/ if (xfs_has_v3inodes(mp)) {
version = 3;
ino = XFS_AGINO_TO_INO(mp, agno, XFS_AGB_TO_AGINO(mp, agbno));
/* * log the initialisation that is about to take place as an * logical operation. This means the transaction does not * need to log the physical changes to the inode buffers as log * recovery will know what initialisation is actually needed. * Hence we only need to log the buffers as "ordered" buffers so * they track in the AIL as if they were physically logged.
*/ if (tp)
xfs_icreate_log(tp, agno, agbno, icount,
mp->m_sb.sb_inodesize, length, gen);
} else
version = 2;
for (j = 0; j < nbufs; j++) { /* * Get the block.
*/
d = XFS_AGB_TO_DADDR(mp, agno, agbno +
(j * M_IGEO(mp)->blocks_per_cluster));
error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
mp->m_bsize * M_IGEO(mp)->blocks_per_cluster,
0, &fbuf); if (error) return error;
/* Initialize the inode buffers and log them appropriately. */
fbuf->b_ops = &xfs_inode_buf_ops;
xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) { int ioffset = i << mp->m_sb.sb_inodelog;
if (tp) { /* * Mark the buffer as an inode allocation buffer so it * sticks in AIL at the point of this allocation * transaction. This ensures the they are on disk before * the tail of the log can be moved past this * transaction (i.e. by preventing relogging from moving * it forward in the log).
*/
xfs_trans_inode_alloc_buf(tp, fbuf); if (version == 3) { /* * Mark the buffer as ordered so that they are * not physically logged in the transaction but * still tracked in the AIL as part of the * transaction and pin the log appropriately.
*/
xfs_trans_ordered_buf(tp, fbuf);
}
} else {
fbuf->b_flags |= XBF_DONE;
xfs_buf_delwri_queue(fbuf, buffer_list);
xfs_buf_relse(fbuf);
}
} return 0;
}
/* * Align startino and allocmask for a recently allocated sparse chunk such that * they are fit for insertion (or merge) into the on-disk inode btrees. * * Background: * * When enabled, sparse inode support increases the inode alignment from cluster * size to inode chunk size. This means that the minimum range between two * non-adjacent inode records in the inobt is large enough for a full inode * record. This allows for cluster sized, cluster aligned block allocation * without need to worry about whether the resulting inode record overlaps with * another record in the tree. Without this basic rule, we would have to deal * with the consequences of overlap by potentially undoing recent allocations in * the inode allocation codepath. * * Because of this alignment rule (which is enforced on mount), there are two * inobt possibilities for newly allocated sparse chunks. One is that the * aligned inode record for the chunk covers a range of inodes not already * covered in the inobt (i.e., it is safe to insert a new sparse record). The * other is that a record already exists at the aligned startino that considers * the newly allocated range as sparse. In the latter case, record content is * merged in hope that sparse inode chunks fill to full chunks over time.
*/ STATICvoid
xfs_align_sparse_ino( struct xfs_mount *mp,
xfs_agino_t *startino,
uint16_t *allocmask)
{
xfs_agblock_t agbno;
xfs_agblock_t mod; int offset;
agbno = XFS_AGINO_TO_AGBNO(mp, *startino);
mod = agbno % mp->m_sb.sb_inoalignmt; if (!mod) return;
/* calculate the inode offset and align startino */
offset = XFS_AGB_TO_AGINO(mp, mod);
*startino -= offset;
/* * Since startino has been aligned down, left shift allocmask such that * it continues to represent the same physical inodes relative to the * new startino.
*/
*allocmask <<= offset / XFS_INODES_PER_HOLEMASK_BIT;
}
/* * Determine whether the source inode record can merge into the target. Both * records must be sparse, the inode ranges must match and there must be no * allocation overlap between the records.
*/ STATICbool
__xfs_inobt_can_merge( struct xfs_inobt_rec_incore *trec, /* tgt record */ struct xfs_inobt_rec_incore *srec) /* src record */
{
uint64_t talloc;
uint64_t salloc;
/* records must cover the same inode range */ if (trec->ir_startino != srec->ir_startino) returnfalse;
/* both records must be sparse */ if (!xfs_inobt_issparse(trec->ir_holemask) ||
!xfs_inobt_issparse(srec->ir_holemask)) returnfalse;
/* both records must track some inodes */ if (!trec->ir_count || !srec->ir_count) returnfalse;
/* can't exceed capacity of a full record */ if (trec->ir_count + srec->ir_count > XFS_INODES_PER_CHUNK) returnfalse;
/* verify there is no allocation overlap */
talloc = xfs_inobt_irec_to_allocmask(trec);
salloc = xfs_inobt_irec_to_allocmask(srec); if (talloc & salloc) returnfalse;
returntrue;
}
/* * Merge the source inode record into the target. The caller must call * __xfs_inobt_can_merge() to ensure the merge is valid.
*/ STATICvoid
__xfs_inobt_rec_merge( struct xfs_inobt_rec_incore *trec, /* target */ struct xfs_inobt_rec_incore *srec) /* src */
{
ASSERT(trec->ir_startino == srec->ir_startino);
/* * Merge the holemask and free mask. For both fields, 0 bits refer to * allocated inodes. We combine the allocated ranges with bitwise AND.
*/
trec->ir_holemask &= srec->ir_holemask;
trec->ir_free &= srec->ir_free;
}
/* * Insert a new sparse inode chunk into the associated inode allocation btree. * The inode record for the sparse chunk is pre-aligned to a startino that * should match any pre-existing sparse inode record in the tree. This allows * sparse chunks to fill over time. * * If no preexisting record exists, the provided record is inserted. * If there is a preexisting record, the provided record is merged with the * existing record and updated in place. The merged record is returned in nrec. * * It is considered corruption if a merge is requested and not possible. Given * the sparse inode alignment constraints, this should never happen.
*/ STATICint
xfs_inobt_insert_sprec( struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp, struct xfs_inobt_rec_incore *nrec) /* in/out: new/merged rec. */
{ struct xfs_mount *mp = pag_mount(pag); struct xfs_btree_cur *cur; int error; int i; struct xfs_inobt_rec_incore rec;
cur = xfs_inobt_init_cursor(pag, tp, agbp);
/* the new record is pre-aligned so we know where to look */
error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i); if (error) goto error; /* if nothing there, insert a new record and return */ if (i == 0) {
error = xfs_inobt_insert_rec(cur, nrec->ir_holemask,
nrec->ir_count, nrec->ir_freecount,
nrec->ir_free, &i); if (error) goto error; if (XFS_IS_CORRUPT(mp, i != 1)) {
xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED; goto error;
}
goto out;
}
/* * A record exists at this startino. Merge the records.
*/
error = xfs_inobt_get_rec(cur, &rec, &i); if (error) goto error; if (XFS_IS_CORRUPT(mp, i != 1)) {
xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED; goto error;
} if (XFS_IS_CORRUPT(mp, rec.ir_startino != nrec->ir_startino)) {
xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED; goto error;
}
/* * This should never fail. If we have coexisting records that * cannot merge, something is seriously wrong.
*/ if (XFS_IS_CORRUPT(mp, !__xfs_inobt_can_merge(nrec, &rec))) {
xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED; goto error;
}
trace_xfs_irec_merge_pre(pag, &rec, nrec);
/* merge to nrec to output the updated record */
__xfs_inobt_rec_merge(nrec, &rec);
trace_xfs_irec_merge_post(pag, nrec);
error = xfs_inobt_rec_check_count(mp, nrec); if (error) goto error;
error = xfs_inobt_update(cur, nrec); if (error) goto error;
/* * Insert a new sparse inode chunk into the free inode btree. The inode * record for the sparse chunk is pre-aligned to a startino that should match * any pre-existing sparse inode record in the tree. This allows sparse chunks * to fill over time. * * The new record is always inserted, overwriting a pre-existing record if * there is one.
*/ STATICint
xfs_finobt_insert_sprec( struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp, struct xfs_inobt_rec_incore *nrec) /* in/out: new rec. */
{ struct xfs_mount *mp = pag_mount(pag); struct xfs_btree_cur *cur; int error; int i;
cur = xfs_finobt_init_cursor(pag, tp, agbp);
/* the new record is pre-aligned so we know where to look */
error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i); if (error) goto error; /* if nothing there, insert a new record and return */ if (i == 0) {
error = xfs_inobt_insert_rec(cur, nrec->ir_holemask,
nrec->ir_count, nrec->ir_freecount,
nrec->ir_free, &i); if (error) goto error; if (XFS_IS_CORRUPT(mp, i != 1)) {
xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED; goto error;
}
} else {
error = xfs_inobt_update(cur, nrec); if (error) goto error;
}
/* * Allocate new inodes in the allocation group specified by agbp. Returns 0 if * inodes were allocated in this AG; -EAGAIN if there was no space in this AG so * the caller knows it can try another AG, a hard -ENOSPC when over the maximum * inode count threshold, or the usual negative error code for other errors.
*/ STATICint
xfs_ialloc_ag_alloc( struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp)
{ struct xfs_agi *agi; struct xfs_alloc_arg args; int error;
xfs_agino_t newino; /* new first inode's number */
xfs_agino_t newlen; /* new number of inodes */ int isaligned = 0; /* inode allocation at stripe */ /* unit boundary */ /* init. to full chunk */ struct xfs_inobt_rec_incore rec; struct xfs_ino_geometry *igeo = M_IGEO(tp->t_mountp);
uint16_t allocmask = (uint16_t) -1; int do_sparse = 0;
#ifdef DEBUG /* randomly do sparse inode allocations */ if (xfs_has_sparseinodes(tp->t_mountp) &&
igeo->ialloc_min_blks < igeo->ialloc_blks)
do_sparse = get_random_u32_below(2); #endif
/* * Locking will ensure that we don't have two callers in here * at one time.
*/
newlen = igeo->ialloc_inos; if (igeo->maxicount &&
percpu_counter_read_positive(&args.mp->m_icount) + newlen >
igeo->maxicount) return -ENOSPC;
args.minlen = args.maxlen = igeo->ialloc_blks; /* * First try to allocate inodes contiguous with the last-allocated * chunk of inodes. If the filesystem is striped, this will fill * an entire stripe unit with inodes.
*/
agi = agbp->b_addr;
newino = be32_to_cpu(agi->agi_newino);
args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
igeo->ialloc_blks; if (do_sparse) goto sparse_alloc; if (likely(newino != NULLAGINO &&
(args.agbno < be32_to_cpu(agi->agi_length)))) {
args.prod = 1;
/* * We need to take into account alignment here to ensure that * we don't modify the free list if we fail to have an exact * block. If we don't have an exact match, and every oher * attempt allocation attempt fails, we'll end up cancelling * a dirty transaction and shutting down. * * For an exact allocation, alignment must be 1, * however we need to take cluster alignment into account when * fixing up the freelist. Use the minalignslop field to * indicate that extra blocks might be required for alignment, * but not to use them in the actual exact allocation.
*/
args.alignment = 1;
args.minalignslop = igeo->cluster_align - 1;
/* Allow space for the inode btree to split. */
args.minleft = igeo->inobt_maxlevels;
error = xfs_alloc_vextent_exact_bno(&args,
xfs_agbno_to_fsb(pag, args.agbno)); if (error) return error;
/* * This request might have dirtied the transaction if the AG can * satisfy the request, but the exact block was not available. * If the allocation did fail, subsequent requests will relax * the exact agbno requirement and increase the alignment * instead. It is critical that the total size of the request * (len + alignment + slop) does not increase from this point * on, so reset minalignslop to ensure it is not included in * subsequent requests.
*/
args.minalignslop = 0;
}
if (unlikely(args.fsbno == NULLFSBLOCK)) { /* * Set the alignment for the allocation. * If stripe alignment is turned on then align at stripe unit * boundary. * If the cluster size is smaller than a filesystem block * then we're doing I/O for inodes in filesystem block size * pieces, so don't need alignment anyway.
*/
isaligned = 0; if (igeo->ialloc_align) {
ASSERT(!xfs_has_noalign(args.mp));
args.alignment = args.mp->m_dalign;
isaligned = 1;
} else
args.alignment = igeo->cluster_align; /* * Allocate a fixed-size extent of inodes.
*/
args.prod = 1; /* * Allow space for the inode btree to split.
*/
args.minleft = igeo->inobt_maxlevels;
error = xfs_alloc_vextent_near_bno(&args,
xfs_agbno_to_fsb(pag,
be32_to_cpu(agi->agi_root))); if (error) return error;
}
/* * If stripe alignment is turned on, then try again with cluster * alignment.
*/ if (isaligned && args.fsbno == NULLFSBLOCK) {
args.alignment = igeo->cluster_align;
error = xfs_alloc_vextent_near_bno(&args,
xfs_agbno_to_fsb(pag,
be32_to_cpu(agi->agi_root))); if (error) return error;
}
/* * Finally, try a sparse allocation if the filesystem supports it and * the sparse allocation length is smaller than a full chunk.
*/ if (xfs_has_sparseinodes(args.mp) &&
igeo->ialloc_min_blks < igeo->ialloc_blks &&
args.fsbno == NULLFSBLOCK) {
sparse_alloc:
args.alignment = args.mp->m_sb.sb_spino_align;
args.prod = 1;
/* * The inode record will be aligned to full chunk size. We must * prevent sparse allocation from AG boundaries that result in * invalid inode records, such as records that start at agbno 0 * or extend beyond the AG. * * Set min agbno to the first aligned, non-zero agbno and max to * the last aligned agbno that is at least one full chunk from * the end of the AG.
*/
args.min_agbno = args.mp->m_sb.sb_inoalignmt;
args.max_agbno = round_down(xfs_ag_block_count(args.mp,
pag_agno(pag)),
args.mp->m_sb.sb_inoalignmt) -
igeo->ialloc_blks;
error = xfs_alloc_vextent_near_bno(&args,
xfs_agbno_to_fsb(pag,
be32_to_cpu(agi->agi_root))); if (error) return error;
/* * Stamp and write the inode buffers. * * Seed the new inode cluster with a random generation number. This * prevents short-term reuse of generation numbers if a chunk is * freed and then immediately reallocated. We use random numbers * rather than a linear progression to prevent the next generation * number from being easily guessable.
*/
error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, pag_agno(pag),
args.agbno, args.len, get_random_u32());
if (error) return error; /* * Convert the results.
*/
newino = XFS_AGB_TO_AGINO(args.mp, args.agbno);
if (xfs_inobt_issparse(~allocmask)) { /* * We've allocated a sparse chunk. Align the startino and mask.
*/
xfs_align_sparse_ino(args.mp, &newino, &allocmask);
/* * Insert the sparse record into the inobt and allow for a merge * if necessary. If a merge does occur, rec is updated to the * merged record.
*/
error = xfs_inobt_insert_sprec(pag, tp, agbp, &rec); if (error == -EFSCORRUPTED) {
xfs_alert(args.mp, "invalid sparse inode record: ino 0x%llx holemask 0x%x count %u",
xfs_agino_to_ino(pag, rec.ir_startino),
rec.ir_holemask, rec.ir_count);
xfs_force_shutdown(args.mp, SHUTDOWN_CORRUPT_INCORE);
} if (error) return error;
/* * We can't merge the part we've just allocated as for the inobt * due to finobt semantics. The original record may or may not * exist independent of whether physical inodes exist in this * sparse chunk. * * We must update the finobt record based on the inobt record. * rec contains the fully merged and up to date inobt record * from the previous call. Set merge false to replace any * existing record with this one.
*/ if (xfs_has_finobt(args.mp)) {
error = xfs_finobt_insert_sprec(pag, tp, agbp, &rec); if (error) return error;
}
} else { /* full chunk - insert new records to both btrees */
error = xfs_inobt_insert(pag, tp, agbp, newino, newlen, false); if (error) return error;
if (xfs_has_finobt(args.mp)) {
error = xfs_inobt_insert(pag, tp, agbp, newino,
newlen, true); if (error) return error;
}
}
/* * Try to retrieve the next record to the left/right from the current one.
*/ STATICint
xfs_ialloc_next_rec( struct xfs_btree_cur *cur,
xfs_inobt_rec_incore_t *rec, int *done, int left)
{ int error; int i;
if (error) return error;
*done = !i; if (i) {
error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
xfs_btree_mark_sick(cur); return -EFSCORRUPTED;
}
}
return 0;
}
STATICint
xfs_ialloc_get_rec( struct xfs_btree_cur *cur,
xfs_agino_t agino,
xfs_inobt_rec_incore_t *rec, int *done)
{ int error; int i;
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i); if (error) return error;
*done = !i; if (i) {
error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
xfs_btree_mark_sick(cur); return -EFSCORRUPTED;
}
}
return 0;
}
/* * Return the offset of the first free inode in the record. If the inode chunk * is sparsely allocated, we convert the record holemask to inode granularity * and mask off the unallocated regions from the inode free mask.
*/ STATICint
xfs_inobt_first_free_inode( struct xfs_inobt_rec_incore *rec)
{
xfs_inofree_t realfree;
/* if there are no holes, return the first available offset */ if (!xfs_inobt_issparse(rec->ir_holemask)) return xfs_lowbit64(rec->ir_free);
/* * If this AG has corrupt inodes, check if allocating this inode would fail * with corruption errors. Returns 0 if we're clear, or EAGAIN to try again * somewhere else.
*/ staticint
xfs_dialloc_check_ino( struct xfs_perag *pag, struct xfs_trans *tp,
xfs_ino_t ino)
{ struct xfs_imap imap; struct xfs_buf *bp; int error;
restart_pagno:
cur = xfs_inobt_init_cursor(pag, tp, agbp); /* * If pagino is 0 (this is the root inode allocation) use newino. * This must work because we've just allocated some.
*/ if (!pagino)
pagino = be32_to_cpu(agi->agi_newino);
error = xfs_check_agi_freecount(cur); if (error) goto error0;
/* * If in the same AG as the parent, try to get near the parent.
*/ if (pagno == pag_agno(pag)) { int doneleft; /* done, to the left */ int doneright; /* done, to the right */
error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); if (error) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) {
xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED; goto error0;
}
if (rec.ir_freecount > 0) { /* * Found a free inode in the same chunk * as the parent, done.
*/ goto alloc_inode;
}
/* * In the same AG as parent, but parent's chunk is full.
*/
/* duplicate the cursor, search left & right simultaneously */
error = xfs_btree_dup_cursor(cur, &tcur); if (error) goto error0;
/* * Skip to last blocks looked up if same parent inode.
*/ if (pagino != NULLAGINO &&
pag->pagl_pagino == pagino &&
pag->pagl_leftrec != NULLAGINO &&
pag->pagl_rightrec != NULLAGINO) {
error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec,
&trec, &doneleft); if (error) goto error1;
error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec,
&rec, &doneright); if (error) goto error1;
} else { /* search left with tcur, back up 1 record */
error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1); if (error) goto error1;
/* search right with cur, go forward 1 record. */
error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0); if (error) goto error1;
}
/* * Loop until we find an inode chunk with a free inode.
*/ while (--searchdistance > 0 && (!doneleft || !doneright)) { int useleft; /* using left inode chunk this time */
/* figure out the closer block if both are valid. */ if (!doneleft && !doneright) {
useleft = pagino -
(trec.ir_startino + XFS_INODES_PER_CHUNK - 1) <
rec.ir_startino - pagino;
} else {
useleft = !doneleft;
}
/* free inodes to the left? */ if (useleft && trec.ir_freecount) {
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
cur = tcur;
/* get next record to check */ if (useleft) {
error = xfs_ialloc_next_rec(tcur, &trec,
&doneleft, 1);
} else {
error = xfs_ialloc_next_rec(cur, &rec,
&doneright, 0);
} if (error) goto error1;
}
if (searchdistance <= 0) { /* * Not in range - save last search * location and allocate a new inode
*/
xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
pag->pagl_leftrec = trec.ir_startino;
pag->pagl_rightrec = rec.ir_startino;
pag->pagl_pagino = pagino;
} else { /* * We've reached the end of the btree. because * we are only searching a small chunk of the * btree each search, there is obviously free * inodes closer to the parent inode than we * are now. restart the search again.
*/
pag->pagl_pagino = NULLAGINO;
pag->pagl_leftrec = NULLAGINO;
pag->pagl_rightrec = NULLAGINO;
xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); goto restart_pagno;
}
}
/* * In a different AG from the parent. * See if the most recently allocated block has any free.
*/ if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
XFS_LOOKUP_EQ, &i); if (error) goto error0;
if (i == 1) {
error = xfs_inobt_get_rec(cur, &rec, &j); if (error) goto error0;
if (j == 1 && rec.ir_freecount > 0) { /* * The last chunk allocated in the group * still has a free inode.
*/ goto alloc_inode;
}
}
}
/* * None left in the last group, search the whole AG
*/
error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); if (error) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) {
xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED; goto error0;
}
for (;;) {
error = xfs_inobt_get_rec(cur, &rec, &i); if (error) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) {
xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED; goto error0;
} if (rec.ir_freecount > 0) break;
error = xfs_btree_increment(cur, 0, &i); if (error) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) {
xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED; goto error0;
}
}
/* * Use the free inode btree to allocate an inode based on distance from the * parent. Note that the provided cursor may be deleted and replaced.
*/ STATICint
xfs_dialloc_ag_finobt_near(
xfs_agino_t pagino, struct xfs_btree_cur **ocur, struct xfs_inobt_rec_incore *rec)
{ struct xfs_btree_cur *lcur = *ocur; /* left search cursor */ struct xfs_btree_cur *rcur; /* right search cursor */ struct xfs_inobt_rec_incore rrec; int error; int i, j;
error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i); if (error) return error;
if (i == 1) {
error = xfs_inobt_get_rec(lcur, rec, &i); if (error) return error; if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1)) {
xfs_btree_mark_sick(lcur); return -EFSCORRUPTED;
}
/* * See if we've landed in the parent inode record. The finobt * only tracks chunks with at least one free inode, so record * existence is enough.
*/ if (pagino >= rec->ir_startino &&
pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK)) return 0;
}
error = xfs_btree_dup_cursor(lcur, &rcur); if (error) return error;
error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j); if (error) goto error_rcur; if (j == 1) {
error = xfs_inobt_get_rec(rcur, &rrec, &j); if (error) goto error_rcur; if (XFS_IS_CORRUPT(lcur->bc_mp, j != 1)) {
xfs_btree_mark_sick(lcur);
error = -EFSCORRUPTED; goto error_rcur;
}
}
if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1 && j != 1)) {
xfs_btree_mark_sick(lcur);
error = -EFSCORRUPTED; goto error_rcur;
} if (i == 1 && j == 1) { /* * Both the left and right records are valid. Choose the closer * inode chunk to the target.
*/ if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) >
(rrec.ir_startino - pagino)) {
*rec = rrec;
xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
*ocur = rcur;
} else {
xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
}
} elseif (j == 1) { /* only the right record is valid */
*rec = rrec;
xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
*ocur = rcur;
} elseif (i == 1) { /* only the left record is valid */
xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
}
/* * Use the free inode btree to find a free inode based on a newino hint. If * the hint is NULL, find the first free inode in the AG.
*/ STATICint
xfs_dialloc_ag_finobt_newino( struct xfs_agi *agi, struct xfs_btree_cur *cur, struct xfs_inobt_rec_incore *rec)
{ int error; int i;
if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
XFS_LOOKUP_EQ, &i); if (error) return error; if (i == 1) {
error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
xfs_btree_mark_sick(cur); return -EFSCORRUPTED;
} return 0;
}
}
/* * Find the first inode available in the AG.
*/
error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); if (error) return error; if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
xfs_btree_mark_sick(cur); return -EFSCORRUPTED;
}
error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
xfs_btree_mark_sick(cur); return -EFSCORRUPTED;
}
return 0;
}
/* * Update the inobt based on a modification made to the finobt. Also ensure that * the records from both trees are equivalent post-modification.
*/ STATICint
xfs_dialloc_ag_update_inobt( struct xfs_btree_cur *cur, /* inobt cursor */ struct xfs_inobt_rec_incore *frec, /* finobt record */ int offset) /* inode offset */
{ struct xfs_inobt_rec_incore rec; int error; int i;
error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); if (error) return error; if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
xfs_btree_mark_sick(cur); return -EFSCORRUPTED;
}
error = xfs_inobt_get_rec(cur, &rec, &i); if (error) return error; if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
xfs_btree_mark_sick(cur); return -EFSCORRUPTED;
}
ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
XFS_INODES_PER_CHUNK) == 0);
/* * Allocate an inode using the free inode btree, if available. Otherwise, fall * back to the inobt search algorithm. * * The caller selected an AG for us, and made sure that free inodes are * available.
*/ staticint
xfs_dialloc_ag( struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp,
xfs_ino_t parent,
xfs_ino_t *inop)
{ struct xfs_mount *mp = tp->t_mountp; struct xfs_agi *agi = agbp->b_addr;
xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent);
xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); struct xfs_btree_cur *cur; /* finobt cursor */ struct xfs_btree_cur *icur; /* inobt cursor */ struct xfs_inobt_rec_incore rec;
xfs_ino_t ino; int error; int offset; int i;
if (!xfs_has_finobt(mp)) return xfs_dialloc_ag_inobt(pag, tp, agbp, parent, inop);
/* * If pagino is 0 (this is the root inode allocation) use newino. * This must work because we've just allocated some.
*/ if (!pagino)
pagino = be32_to_cpu(agi->agi_newino);
cur = xfs_finobt_init_cursor(pag, tp, agbp);
error = xfs_check_agi_freecount(cur); if (error) goto error_cur;
/* * The search algorithm depends on whether we're in the same AG as the * parent. If so, find the closest available inode to the parent. If * not, consider the agi hint or find the first free inode in the AG.
*/ if (pag_agno(pag) == pagno)
error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec); else
error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec); if (error) goto error_cur;
if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) {
error = xfs_dialloc_check_ino(pag, tp, ino); if (error) goto error_cur;
}
/* * Modify or remove the finobt record.
*/
rec.ir_free &= ~XFS_INOBT_MASK(offset);
rec.ir_freecount--; if (rec.ir_freecount)
error = xfs_inobt_update(cur, &rec); else
error = xfs_btree_delete(cur, &i); if (error) goto error_cur;
/* * The finobt has now been updated appropriately. We haven't updated the * agi and superblock yet, so we can create an inobt cursor and validate * the original freecount. If all is well, make the equivalent update to * the inobt using the finobt record and offset information.
*/
icur = xfs_inobt_init_cursor(pag, tp, agbp);
error = xfs_check_agi_freecount(icur); if (error) goto error_icur;
error = xfs_dialloc_ag_update_inobt(icur, &rec, offset); if (error) goto error_icur;
/* * Both trees have now been updated. We must update the perag and * superblock before we can check the freecount for each btree.
*/
be32_add_cpu(&agi->agi_freecount, -1);
xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
pag->pagi_freecount--;
xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
error = xfs_check_agi_freecount(icur); if (error) goto error_icur;
error = xfs_check_agi_freecount(cur); if (error) goto error_icur;
/* * Hold to on to the agibp across the commit so no other allocation can * come in and take the free inodes we just allocated for our caller.
*/
xfs_trans_bhold(tp, agibp);
/* * We want the quota changes to be associated with the next transaction, * NOT this one. So, detach the dqinfo from this and attach it to the * next transaction.
*/
dqinfo = tp->t_dqinfo;
tp->t_dqinfo = NULL;
error = xfs_trans_roll(&tp);
/* Re-attach the quota info that we detached from prev trx. */
tp->t_dqinfo = dqinfo;
/* * Join the buffer even on commit error so that the buffer is released * when the caller cancels the transaction and doesn't have to handle * this error case specially.
*/
xfs_trans_bjoin(tp, agibp);
*tpp = tp; return error;
}
staticbool
xfs_dialloc_good_ag( struct xfs_perag *pag, struct xfs_trans *tp,
umode_t mode, int flags, bool ok_alloc)
{ struct xfs_mount *mp = tp->t_mountp;
xfs_extlen_t ineed;
xfs_extlen_t longest = 0; int needspace; int error;
if (!pag) returnfalse; if (!xfs_perag_allows_inodes(pag)) returnfalse;
if (!xfs_perag_initialised_agi(pag)) {
error = xfs_ialloc_read_agi(pag, tp, 0, NULL); if (error) returnfalse;
}
if (pag->pagi_freecount) returntrue; if (!ok_alloc) returnfalse;
if (!xfs_perag_initialised_agf(pag)) {
error = xfs_alloc_read_agf(pag, tp, flags, NULL); if (error) returnfalse;
}
/* * Check that there is enough free space for the file plus a chunk of * inodes if we need to allocate some. If this is the first pass across * the AGs, take into account the potential space needed for alignment * of inode chunks when checking the longest contiguous free space in * the AG - this prevents us from getting ENOSPC because we have free * space larger than ialloc_blks but alignment constraints prevent us * from using it. * * If we can't find an AG with space for full alignment slack to be * taken into account, we must be near ENOSPC in all AGs. Hence we * don't include alignment for the second pass and so if we fail * allocation due to alignment issues then it is most likely a real * ENOSPC condition. * * XXX(dgc): this calculation is now bogus thanks to the per-ag * reservations that xfs_alloc_fix_freelist() now does via * xfs_alloc_space_available(). When the AG fills up, pagf_freeblks will * be more than large enough for the check below to succeed, but * xfs_alloc_space_available() will fail because of the non-zero * metadata reservation and hence we won't actually be able to allocate * more inodes in this AG. We do soooo much unnecessary work near ENOSPC * because of this.
*/
ineed = M_IGEO(mp)->ialloc_min_blks; if (flags && ineed > 1)
ineed += M_IGEO(mp)->cluster_align;
longest = pag->pagf_longest; if (!longest)
longest = pag->pagf_flcount > 0;
needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
/* * Then read in the AGI buffer and recheck with the AGI buffer * lock held.
*/
error = xfs_ialloc_read_agi(pag, *tpp, 0, &agbp); if (error) return error;
if (!pag->pagi_freecount) { if (!ok_alloc) {
error = -EAGAIN; goto out_release;
}
/* * We successfully allocated space for an inode cluster in this * AG. Roll the transaction so that we can allocate one of the * new inodes.
*/
ASSERT(pag->pagi_freecount > 0);
error = xfs_dialloc_roll(tpp, agbp); if (error) goto out_release;
}
/* Allocate an inode in the found AG */
error = xfs_dialloc_ag(pag, *tpp, agbp, parent, &ino); if (!error)
*new_ino = ino; return error;
/* * Pick an AG for the new inode. * * Directories, symlinks, and regular files frequently allocate at least one * block, so factor that potential expansion when we examine whether an AG has * enough space for file creation. Try to keep metadata files all in the same * AG.
*/ staticinline xfs_agnumber_t
xfs_dialloc_pick_ag( struct xfs_mount *mp, struct xfs_inode *dp,
umode_t mode)
{
xfs_agnumber_t start_agno;
if (!dp) return 0; if (xfs_is_metadir_inode(dp)) { if (mp->m_sb.sb_logstart) return XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart); return 0;
}
if (S_ISDIR(mode)) return (atomic_inc_return(&mp->m_agirotor) - 1) % mp->m_maxagi;
/* * Allocate an on-disk inode. * * Mode is used to tell whether the new inode is a directory and hence where to * locate it. The on-disk inode that is allocated will be returned in @new_ino * on success, otherwise an error will be set to indicate the failure (e.g. * -ENOSPC).
*/ int
xfs_dialloc( struct xfs_trans **tpp, conststruct xfs_icreate_args *args,
xfs_ino_t *new_ino)
{ struct xfs_mount *mp = (*tpp)->t_mountp; struct xfs_perag *pag; struct xfs_ino_geometry *igeo = M_IGEO(mp);
xfs_ino_t ino = NULLFSINO;
xfs_ino_t parent = args->pip ? args->pip->i_ino : 0;
xfs_agnumber_t agno;
xfs_agnumber_t start_agno;
umode_t mode = args->mode & S_IFMT; bool ok_alloc = true; bool low_space = false; int flags; int error = 0;
/* * If we have already hit the ceiling of inode blocks then clear * ok_alloc so we scan all available agi structures for a free * inode. * * Read rough value of mp->m_icount by percpu_counter_read_positive, * which will sacrifice the preciseness but improve the performance.
*/ if (igeo->maxicount &&
percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos
> igeo->maxicount) {
ok_alloc = false;
}
/* * If we are near to ENOSPC, we want to prefer allocation from AGs that * have free inodes in them rather than use up free space allocating new * inode chunks. Hence we turn off allocation for the first non-blocking * pass through the AGs if we are near ENOSPC to consume free inodes * that we can immediately allocate, but then we allow allocation on the * second pass if we fail to find an AG with free inodes in it.
*/ if (xfs_estimate_freecounter(mp, XC_FREE_BLOCKS) <
mp->m_low_space[XFS_LOWSP_1_PCNT]) {
ok_alloc = false;
low_space = true;
}
/* * Loop until we find an allocation group that either has free inodes * or in which we can allocate some inodes. Iterate through the * allocation groups upward, wrapping at the end.
*/
flags = XFS_ALLOC_FLAG_TRYLOCK;
retry:
for_each_perag_wrap_at(mp, start_agno, mp->m_maxagi, agno, pag) { if (xfs_dialloc_good_ag(pag, *tpp, mode, flags, ok_alloc)) {
error = xfs_dialloc_try_ag(pag, tpp, parent,
&ino, ok_alloc); if (error != -EAGAIN) break;
error = 0;
}
if (xfs_is_shutdown(mp)) {
error = -EFSCORRUPTED; break;
}
} if (pag)
xfs_perag_rele(pag); if (error) return error; if (ino == NULLFSINO) { if (flags) {
flags = 0; if (low_space)
ok_alloc = true; goto retry;
} return -ENOSPC;
}
/* * Protect against obviously corrupt allocation btree records. Later * xfs_iget checks will catch re-allocation of other active in-memory * and on-disk inodes. If we don't catch reallocating the parent inode * here we will deadlock in xfs_iget() so we have to do these checks * first.
*/ if (ino == parent || !xfs_verify_dir_ino(mp, ino)) {
xfs_alert(mp, "Allocated a known in-use inode 0x%llx!", ino);
xfs_agno_mark_sick(mp, XFS_INO_TO_AGNO(mp, ino),
XFS_SICK_AG_INOBT); return -EFSCORRUPTED;
}
*new_ino = ino; return 0;
}
/* * Free the blocks of an inode chunk. We must consider that the inode chunk * might be sparse and only free the regions that are allocated as part of the * chunk.
*/ staticint
xfs_difree_inode_chunk( struct xfs_trans *tp, struct xfs_perag *pag, struct xfs_inobt_rec_incore *rec)
{ struct xfs_mount *mp = tp->t_mountp;
xfs_agblock_t sagbno = XFS_AGINO_TO_AGBNO(mp,
rec->ir_startino); int startidx, endidx; int nextbit;
xfs_agblock_t agbno; int contigblk;
DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS);
if (!xfs_inobt_issparse(rec->ir_holemask)) { /* not sparse, calculate extent info directly */ return xfs_free_extent_later(tp, xfs_agbno_to_fsb(pag, sagbno),
M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
XFS_AG_RESV_NONE, 0);
}
/* holemask is only 16-bits (fits in an unsigned long) */
ASSERT(sizeof(rec->ir_holemask) <= sizeof(holemask[0]));
holemask[0] = rec->ir_holemask;
/* * Find contiguous ranges of zeroes (i.e., allocated regions) in the * holemask and convert the start/end index of each range to an extent. * We start with the start and end index both pointing at the first 0 in * the mask.
*/
startidx = endidx = find_first_zero_bit(holemask,
XFS_INOBT_HOLEMASK_BITS);
nextbit = startidx + 1; while (startidx < XFS_INOBT_HOLEMASK_BITS) { int error;
nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS,
nextbit); /* * If the next zero bit is contiguous, update the end index of * the current range and continue.
*/ if (nextbit != XFS_INOBT_HOLEMASK_BITS &&
nextbit == endidx + 1) {
endidx = nextbit; goto next;
}
/* * nextbit is not contiguous with the current end index. Convert * the current start/end to an extent and add it to the free * list.
*/
agbno = sagbno + (startidx * XFS_INODES_PER_HOLEMASK_BIT) /
mp->m_sb.sb_inopblock;
contigblk = ((endidx - startidx + 1) *
XFS_INODES_PER_HOLEMASK_BIT) /
mp->m_sb.sb_inopblock;
/* * Initialize the cursor.
*/
cur = xfs_inobt_init_cursor(pag, tp, agbp);
error = xfs_check_agi_freecount(cur); if (error) goto error0;
/* * Look for the entry describing this inode.
*/ if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) {
xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.",
__func__, error); goto error0;
} if (XFS_IS_CORRUPT(mp, i != 1)) {
xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED; goto error0;
}
error = xfs_inobt_get_rec(cur, &rec, &i); if (error) {
xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.",
__func__, error); goto error0;
} if (XFS_IS_CORRUPT(mp, i != 1)) {
xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED; goto error0;
} /* * Get the offset in the inode chunk.
*/
off = agino - rec.ir_startino;
ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK);
ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off))); /* * Mark the inode free & increment the count.
*/
rec.ir_free |= XFS_INOBT_MASK(off);
rec.ir_freecount++;
/* * When an inode chunk is free, it becomes eligible for removal. Don't * remove the chunk if the block size is large enough for multiple inode * chunks (that might not be free).
*/ if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE &&
mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
xic->deleted = true;
xic->first_ino = xfs_agino_to_ino(pag, rec.ir_startino);
xic->alloc = xfs_inobt_irec_to_allocmask(&rec);
/* * Remove the inode cluster from the AGI B+Tree, adjust the * AGI and Superblock inode counts, and mark the disk space * to be freed when the transaction is committed.
*/
ilen = rec.ir_freecount;
be32_add_cpu(&agi->agi_count, -ilen);
be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
pag->pagi_freecount -= ilen - 1;
pag->pagi_count -= ilen;
xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
if ((error = xfs_btree_delete(cur, &i))) {
xfs_warn(mp, "%s: xfs_btree_delete returned error %d.",
__func__, error); goto error0;
}
/* * Free an inode in the free inode btree.
*/ STATICint
xfs_difree_finobt( struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp,
xfs_agino_t agino, struct xfs_inobt_rec_incore *ibtrec) /* inobt record */
{ struct xfs_mount *mp = pag_mount(pag); struct xfs_btree_cur *cur; struct xfs_inobt_rec_incore rec; int offset = agino - ibtrec->ir_startino; int error; int i;
cur = xfs_finobt_init_cursor(pag, tp, agbp);
error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i); if (error) goto error; if (i == 0) { /* * If the record does not exist in the finobt, we must have just * freed an inode in a previously fully allocated chunk. If not, * something is out of sync.
*/ if (XFS_IS_CORRUPT(mp, ibtrec->ir_freecount != 1)) {
xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED; goto error;
}
/* * Read and update the existing record. We could just copy the ibtrec * across here, but that would defeat the purpose of having redundant * metadata. By making the modifications independently, we can catch * corruptions that we wouldn't see if we just copied from one record * to another.
*/
error = xfs_inobt_get_rec(cur, &rec, &i); if (error) goto error; if (XFS_IS_CORRUPT(mp, i != 1)) {
xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED; goto error;
}
/* * The content of inobt records should always match between the inobt * and finobt. The lifecycle of records in the finobt is different from * the inobt in that the finobt only tracks records with at least one * free inode. Hence, if all of the inodes are free and we aren't * keeping inode chunks permanently on disk, remove the record. * Otherwise, update the record with the new information. * * Note that we currently can't free chunks when the block size is large * enough for multiple chunks. Leave the finobt record to remain in sync * with the inobt.
*/ if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE &&
mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
error = xfs_btree_delete(cur, &i); if (error) goto error;
ASSERT(i == 1);
} else {
error = xfs_inobt_update(cur, &rec); if (error) goto error;
}
out:
error = xfs_check_agi_freecount(cur); if (error) goto error;
/* * Free disk inode. Carefully avoids touching the incore inode, all * manipulations incore are the caller's responsibility. * The on-disk inode is not changed by this operation, only the * btree (free inode mask) is changed.
*/ int
xfs_difree( struct xfs_trans *tp, struct xfs_perag *pag,
xfs_ino_t inode, struct xfs_icluster *xic)
{ /* REFERENCED */
xfs_agblock_t agbno; /* block number containing inode */ struct xfs_buf *agbp; /* buffer for allocation group header */
xfs_agino_t agino; /* allocation group inode number */ int error; /* error return value */ struct xfs_mount *mp = tp->t_mountp; struct xfs_inobt_rec_incore rec;/* btree record */
/* * Break up inode number into its components.
*/ if (pag_agno(pag) != XFS_INO_TO_AGNO(mp, inode)) {
xfs_warn(mp, "%s: agno != pag_agno(pag) (%d != %d).",
__func__, XFS_INO_TO_AGNO(mp, inode), pag_agno(pag));
ASSERT(0); return -EINVAL;
}
agino = XFS_INO_TO_AGINO(mp, inode); if (inode != xfs_agino_to_ino(pag, agino)) {
xfs_warn(mp, "%s: inode != xfs_agino_to_ino() (%llu != %llu).",
__func__, (unsignedlonglong)inode,
(unsignedlonglong)xfs_agino_to_ino(pag, agino));
ASSERT(0); return -EINVAL;
}
agbno = XFS_AGINO_TO_AGBNO(mp, agino); if (agbno >= xfs_ag_block_count(mp, pag_agno(pag))) {
xfs_warn(mp, "%s: agbno >= xfs_ag_block_count (%d >= %d).",
__func__, agbno, xfs_ag_block_count(mp, pag_agno(pag)));
ASSERT(0); return -EINVAL;
} /* * Get the allocation group header.
*/
error = xfs_ialloc_read_agi(pag, tp, 0, &agbp); if (error) {
xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
__func__, error); return error;
}
/* * Fix up the inode allocation btree.
*/
error = xfs_difree_inobt(pag, tp, agbp, agino, xic, &rec); if (error) goto error0;
/* * Fix up the free inode btree.
*/ if (xfs_has_finobt(mp)) {
error = xfs_difree_finobt(pag, tp, agbp, agino, &rec); if (error) goto error0;
}
/* * Lookup the inode record for the given agino. If the record cannot be * found, then it's an invalid inode number and we should abort. Once * we have a record, we need to ensure it contains the inode number * we are looking up.
*/
cur = xfs_inobt_init_cursor(pag, tp, agbp);
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); if (!error) { if (i)
error = xfs_inobt_get_rec(cur, &rec, &i); if (!error && i == 0)
error = -EINVAL;
}
xfs_trans_brelse(tp, agbp);
xfs_btree_del_cursor(cur, error); if (error) return error;
/* check that the returned record contains the required inode */ if (rec.ir_startino > agino ||
rec.ir_startino + M_IGEO(mp)->ialloc_inos <= agino) return -EINVAL;
/* for untrusted inodes check it is allocated first */ if ((flags & XFS_IGET_UNTRUSTED) &&
(rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) return -EINVAL;
/* * Return the location of the inode in imap, for mapping it into a buffer.
*/ int
xfs_imap( struct xfs_perag *pag, struct xfs_trans *tp,
xfs_ino_t ino, /* inode to locate */ struct xfs_imap *imap, /* location map structure */
uint flags) /* flags for inode btree lookup */
{ struct xfs_mount *mp = pag_mount(pag);
xfs_agblock_t agbno; /* block number of inode in the alloc group */
xfs_agino_t agino; /* inode number within alloc group */
xfs_agblock_t chunk_agbno; /* first block in inode chunk */
--> --------------------
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.