/* * Search for a busy extent within the range of the extent we are about to * allocate. You need to be holding the busy extent tree lock when calling * xfs_extent_busy_search(). This function returns 0 for no overlapping busy * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact * match. This is done so that a non-zero return indicates an overlap that * will require a synchronous transaction, but it can still be * used to distinguish between a partial or exact match.
*/ int
xfs_extent_busy_search( struct xfs_group *xg,
xfs_agblock_t bno,
xfs_extlen_t len)
{ struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; struct rb_node *rbp; struct xfs_extent_busy *busyp; int match = 0;
/* find closest start bno overlap */
spin_lock(&eb->eb_lock);
rbp = eb->eb_tree.rb_node; while (rbp) {
busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node); if (bno < busyp->bno) { /* may overlap, but exact start block is lower */ if (bno + len > busyp->bno)
match = -1;
rbp = rbp->rb_left;
} elseif (bno > busyp->bno) { /* may overlap, but exact start block is higher */ if (bno < busyp->bno + busyp->length)
match = -1;
rbp = rbp->rb_right;
} else { /* bno matches busyp, length determines exact match */
match = (busyp->length == len) ? 1 : -1; break;
}
}
spin_unlock(&eb->eb_lock); return match;
}
/* * The found free extent [fbno, fend] overlaps part or all of the given busy * extent. If the overlap covers the beginning, the end, or all of the busy * extent, the overlapping portion can be made unbusy and used for the * allocation. We can't split a busy extent because we can't modify a * transaction/CIL context busy list, but we can update an entry's block * number or length. * * Returns true if the extent can safely be reused, or false if the search * needs to be restarted.
*/ STATICbool
xfs_extent_busy_update_extent( struct xfs_group *xg, struct xfs_extent_busy *busyp,
xfs_agblock_t fbno,
xfs_extlen_t flen, bool userdata)
__releases(&eb->eb_lock)
__acquires(&eb->eb_lock)
{ struct xfs_extent_busy_tree *eb = xg->xg_busy_extents;
xfs_agblock_t fend = fbno + flen;
xfs_agblock_t bbno = busyp->bno;
xfs_agblock_t bend = bbno + busyp->length;
/* * This extent is currently being discarded. Give the thread * performing the discard a chance to mark the extent unbusy * and retry.
*/ if (busyp->flags & XFS_EXTENT_BUSY_DISCARDED) {
spin_unlock(&eb->eb_lock);
delay(1);
spin_lock(&eb->eb_lock); returnfalse;
}
/* * If there is a busy extent overlapping a user allocation, we have * no choice but to force the log and retry the search. * * Fortunately this does not happen during normal operation, but * only if the filesystem is very low on space and has to dip into * the AGFL for normal allocations.
*/ if (userdata) goto out_force_log;
if (bbno < fbno && bend > fend) { /* * Case 1: * bbno bend * +BBBBBBBBBBBBBBBBB+ * +---------+ * fbno fend
*/
/* * We would have to split the busy extent to be able to track * it correct, which we cannot do because we would have to * modify the list of busy extents attached to the transaction * or CIL context, which is immutable. * * Force out the log to clear the busy extent and retry the * search.
*/ goto out_force_log;
} elseif (bbno >= fbno && bend <= fend) { /* * Case 2: * bbno bend * +BBBBBBBBBBBBBBBBB+ * +-----------------+ * fbno fend * * Case 3: * bbno bend * +BBBBBBBBBBBBBBBBB+ * +--------------------------+ * fbno fend * * Case 4: * bbno bend * +BBBBBBBBBBBBBBBBB+ * +--------------------------+ * fbno fend * * Case 5: * bbno bend * +BBBBBBBBBBBBBBBBB+ * +-----------------------------------+ * fbno fend *
*/
/* * The busy extent is fully covered by the extent we are * allocating, and can simply be removed from the rbtree. * However we cannot remove it from the immutable list * tracking busy extents in the transaction or CIL context, * so set the length to zero to mark it invalid. * * We also need to restart the busy extent search from the * tree root, because erasing the node can rearrange the * tree topology.
*/
rb_erase(&busyp->rb_node, &eb->eb_tree);
busyp->length = 0; returnfalse;
} elseif (fend < bend) { /* * Case 6: * bbno bend * +BBBBBBBBBBBBBBBBB+ * +---------+ * fbno fend * * Case 7: * bbno bend * +BBBBBBBBBBBBBBBBB+ * +------------------+ * fbno fend *
*/
busyp->bno = fend;
busyp->length = bend - fend;
} elseif (bbno < fbno) { /* * Case 8: * bbno bend * +BBBBBBBBBBBBBBBBB+ * +-------------+ * fbno fend * * Case 9: * bbno bend * +BBBBBBBBBBBBBBBBB+ * +----------------------+ * fbno fend
*/
busyp->length = fbno - busyp->bno;
} else {
ASSERT(0);
}
/* * For a given extent [fbno, flen], search the busy extent list to find a * subset of the extent that is not busy. If *rlen is smaller than * args->minlen no suitable extent could be found, and the higher level * code needs to force out the log and retry the allocation. * * Return the current busy generation for the group if the extent is busy. This * value can be used to wait for at least one of the currently busy extents * to be cleared. Note that the busy list is not guaranteed to be empty after * the gen is woken. The state of a specific extent must always be confirmed * with another call to xfs_extent_busy_trim() before it can be used.
*/ bool
xfs_extent_busy_trim( struct xfs_group *xg,
xfs_extlen_t minlen,
xfs_extlen_t maxlen,
xfs_agblock_t *bno,
xfs_extlen_t *len, unsigned *busy_gen)
{ struct xfs_extent_busy_tree *eb = xg->xg_busy_extents;
xfs_agblock_t fbno;
xfs_extlen_t flen; struct rb_node *rbp; bool ret = false;
/* * Case 1: * bbno bend * +BBBBBBBBBBBBBBBBB+ * +---------+ * fbno fend * * Case 2: * bbno bend * +BBBBBBBBBBBBBBBBB+ * +-------------+ * fbno fend * * Case 3: * bbno bend * +BBBBBBBBBBBBBBBBB+ * +-------------+ * fbno fend * * Case 4: * bbno bend * +BBBBBBBBBBBBBBBBB+ * +-----------------+ * fbno fend * * No unbusy region in extent, return failure.
*/ if (fend <= bend) goto fail;
/* * Case 5: * bbno bend * +BBBBBBBBBBBBBBBBB+ * +----------------------+ * fbno fend * * Case 6: * bbno bend * +BBBBBBBBBBBBBBBBB+ * +--------------------------+ * fbno fend * * Needs to be trimmed to: * +-------+ * fbno fend
*/
fbno = bend;
} elseif (bend >= fend) { /* end overlap */
/* * Case 7: * bbno bend * +BBBBBBBBBBBBBBBBB+ * +------------------+ * fbno fend * * Case 8: * bbno bend * +BBBBBBBBBBBBBBBBB+ * +--------------------------+ * fbno fend * * Needs to be trimmed to: * +-------+ * fbno fend
*/
fend = bbno;
} else { /* middle overlap */
/* * Case 9: * bbno bend * +BBBBBBBBBBBBBBBBB+ * +-----------------------------------+ * fbno fend * * Can be trimmed to: * +-------+ OR +-------+ * fbno fend fbno fend * * Backward allocation leads to significant * fragmentation of directories, which degrades * directory performance, therefore we always want to * choose the option that produces forward allocation * patterns. * Preferring the lower bno extent will make the next * request use "fend" as the start of the next * allocation; if the segment is no longer busy at * that point, we'll get a contiguous allocation, but * even if it is still busy, we will get a forward * allocation. * We try to avoid choosing the segment at "bend", * because that can lead to the next allocation * taking the segment at "fbno", which would be a * backward allocation. We only use the segment at * "fbno" if it is much larger than the current * requested size, because in that case there's a * good chance subsequent allocations will be * contiguous.
*/ if (bbno - fbno >= maxlen) { /* left candidate fits perfect */
fend = bbno;
} elseif (fend - bend >= maxlen * 4) { /* right candidate has enough free space */
fbno = bend;
} elseif (bbno - fbno >= minlen) { /* left candidate fits minimum requirement */
fend = bbno;
} else { goto fail;
}
}
flen = fend - fbno;
}
out:
if (fbno != *bno || flen != *len) {
trace_xfs_extent_busy_trim(xg, *bno, *len, fbno, flen);
*bno = fbno;
*len = flen;
*busy_gen = eb->eb_gen;
ret = true;
}
spin_unlock(&eb->eb_lock); return ret;
fail: /* * Return a zero extent length as failure indications. All callers * re-check if the trimmed extent satisfies the minlen requirement.
*/
flen = 0; goto out;
}
/* * Remove all extents on the passed in list from the busy extents tree. * If do_discard is set skip extents that need to be discarded, and mark * these as undergoing a discard operation instead.
*/ void
xfs_extent_busy_clear( struct list_head *list, bool do_discard)
{ struct xfs_extent_busy *busyp, *next;
busyp = list_first_entry_or_null(list, typeof(*busyp), list); if (!busyp) return;
spin_lock(&eb->eb_lock); do {
next = list_next_entry(busyp, list); if (xfs_extent_busy_clear_one(busyp, do_discard))
wakeup = true;
busyp = next;
} while (!list_entry_is_head(busyp, list, list) &&
busyp->group == xg);
if (wakeup) {
eb->eb_gen++;
wake_up_all(&eb->eb_wait);
}
spin_unlock(&eb->eb_lock);
xfs_group_put(xg);
} while (!list_entry_is_head(busyp, list, list));
}
/* * Flush out all busy extents for this group. * * If the current transaction is holding busy extents, the caller may not want * to wait for committed busy extents to resolve. If we are being told just to * try a flush or progress has been made since we last skipped a busy extent, * return immediately to allow the caller to try again. * * If we are freeing extents, we might actually be holding the only free extents * in the transaction busy list and the log force won't resolve that situation. * In this case, we must return -EAGAIN to avoid a deadlock by informing the * caller it needs to commit the busy extents it holds before retrying the * extent free operation.
*/ int
xfs_extent_busy_flush( struct xfs_trans *tp, struct xfs_group *xg, unsigned busy_gen,
uint32_t alloc_flags)
{ struct xfs_extent_busy_tree *eb = xg->xg_busy_extents;
DEFINE_WAIT (wait); int error;
error = xfs_log_force(tp->t_mountp, XFS_LOG_SYNC); if (error) return error;
/* Avoid deadlocks on uncommitted busy extents. */ if (!list_empty(&tp->t_busy)) { if (alloc_flags & XFS_ALLOC_FLAG_TRYFLUSH) return 0;
if (busy_gen != READ_ONCE(eb->eb_gen)) return 0;
if (alloc_flags & XFS_ALLOC_FLAG_FREEING) return -EAGAIN;
}
/* Wait for committed busy extents to resolve. */ do {
prepare_to_wait(&eb->eb_wait, &wait, TASK_KILLABLE); if (busy_gen != READ_ONCE(eb->eb_gen)) break;
schedule();
} while (1);
do {
prepare_to_wait(&eb->eb_wait, &wait, TASK_KILLABLE); if (RB_EMPTY_ROOT(&eb->eb_tree)) break;
schedule();
} while (1);
finish_wait(&eb->eb_wait, &wait);
}
/* Are there any busy extents in this group? */ bool
xfs_extent_busy_list_empty( struct xfs_group *xg, unsigned *busy_gen)
{ struct xfs_extent_busy_tree *eb = xg->xg_busy_extents; bool res;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.