/* * Subpage (block size < folio size) support overview: * * Limitations: * * - Only support 64K page size for now * This is to make metadata handling easier, as 64K page would ensure * all nodesize would fit inside one page, thus we don't need to handle * cases where a tree block crosses several pages. * * - Only metadata read-write for now * The data read-write part is in development. * * - Metadata can't cross 64K page boundary * btrfs-progs and kernel have done that for a while, thus only ancient * filesystems could have such problem. For such case, do a graceful * rejection. * * Special behavior: * * - Metadata * Metadata read is fully supported. * Meaning when reading one tree block will only trigger the read for the * needed range, other unrelated range in the same page will not be touched. * * Metadata write support is partial. * The writeback is still for the full page, but we will only submit * the dirty extent buffers in the page. * * This means, if we have a metadata page like this: * * Page offset * 0 16K 32K 48K 64K * |/////////| |///////////| * \- Tree block A \- Tree block B * * Even if we just want to writeback tree block A, we will also writeback * tree block B if it's also dirty. * * This may cause extra metadata writeback which results more COW. * * Implementation: * * - Common * Both metadata and data will use a new structure, btrfs_folio_state, to * record the status of each sector inside a page. This provides the extra * granularity needed. * * - Metadata * Since we have multiple tree blocks inside one page, we can't rely on page * locking anymore, or we will have greatly reduced concurrency or even * deadlocks (hold one tree lock while trying to lock another tree lock in * the same page). * * Thus for metadata locking, subpage support relies on io_tree locking only. * This means a slightly higher tree locking latency.
*/
/* For metadata we don't support large folio yet. */ if (type == BTRFS_SUBPAGE_METADATA)
ASSERT(!folio_test_large(folio));
/* * We have cases like a dummy extent buffer page, which is not mapped * and doesn't need to be locked.
*/ if (folio->mapping)
ASSERT(folio_test_locked(folio));
/* Either not subpage, or the folio already has private attached. */ if (folio_test_private(folio)) return 0; if (type == BTRFS_SUBPAGE_METADATA && !btrfs_meta_is_subpage(fs_info)) return 0; if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio)) return 0;
bfs = btrfs_alloc_folio_state(fs_info, folio_size(folio), type); if (IS_ERR(bfs)) return PTR_ERR(bfs);
/* Either not subpage, or the folio already has private attached. */ if (!folio_test_private(folio)) return; if (type == BTRFS_SUBPAGE_METADATA && !btrfs_meta_is_subpage(fs_info)) return; if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio)) return;
/* * Increase the eb_refs of current subpage. * * This is important for eb allocation, to prevent race with last eb freeing * of the same page. * With the eb_refs increased before the eb inserted into radix tree, * detach_extent_buffer_page() won't detach the folio private while we're still * allocating the extent buffer.
*/ void btrfs_folio_inc_eb_refs(conststruct btrfs_fs_info *fs_info, struct folio *folio)
{ struct btrfs_folio_state *bfs;
*start = max_t(u64, folio_pos(folio), orig_start); /* * For certain call sites like btrfs_drop_pages(), we may have pages * beyond the target range. In that case, just set @len to 0, subpage * helpers can handle @len == 0 without any problem.
*/ if (folio_pos(folio) >= orig_start + orig_len)
*len = 0; else
*len = min_t(u64, folio_end(folio), orig_start + orig_len) - *start;
}
spin_lock_irqsave(&bfs->lock, flags); /* * We have call sites passing @lock_page into * extent_clear_unlock_delalloc() for compression path. * * This @locked_page is locked by plain lock_page(), thus its * subpage::locked is 0. Handle them in a special way.
*/ if (atomic_read(&bfs->nr_locked) == 0) {
spin_unlock_irqrestore(&bfs->lock, flags); returntrue;
}
/* * Handle different locked folios: * * - Non-subpage folio * Just unlock it. * * - folio locked but without any subpage locked * This happens either before writepage_delalloc() or the delalloc range is * already handled by previous folio. * We can simple unlock it. * * - folio locked with subpage range locked. * We go through the locked sectors inside the range and clear their locked * bitmap, reduce the writer lock number, and unlock the page if that's * the last locked range.
*/ void btrfs_folio_end_lock(conststruct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len)
{ struct btrfs_folio_state *bfs = folio_get_private(folio);
ASSERT(folio_test_locked(folio));
if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio)) {
folio_unlock(folio); return;
}
/* * For subpage case, there are two types of locked page. With or * without locked number. * * Since we own the page lock, no one else could touch subpage::locked * and we are safe to do several atomic operations without spinlock.
*/ if (atomic_read(&bfs->nr_locked) == 0) { /* No subpage lock, locked by plain lock_page(). */
folio_unlock(folio); return;
}
btrfs_subpage_clamp_range(folio, &start, &len); if (btrfs_subpage_end_and_test_lock(fs_info, folio, start, len))
folio_unlock(folio);
}
/* * Extra clear_and_test function for subpage dirty bitmap. * * Return true if we're the last bits in the dirty_bitmap and clear the * dirty_bitmap. * Return false otherwise. * * NOTE: Callers should manually clear page dirty for true case, as we have * extra handling for tree blocks.
*/ bool btrfs_subpage_clear_and_test_dirty(conststruct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len)
{ struct btrfs_folio_state *bfs = folio_get_private(folio); unsignedint start_bit = subpage_calc_start_bit(fs_info, folio,
dirty, start, len); unsignedlong flags; bool last = false;
spin_lock_irqsave(&bfs->lock, flags);
bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); if (subpage_test_bitmap_all_zero(fs_info, folio, dirty))
last = true;
spin_unlock_irqrestore(&bfs->lock, flags); return last;
}
spin_lock_irqsave(&bfs->lock, flags);
bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
/* * Don't clear the TOWRITE tag when starting writeback on a still-dirty * folio. Doing so can cause WB_SYNC_ALL writepages() to overlook it, * assume writeback is complete, and exit too early — violating sync * ordering guarantees.
*/ if (!folio_test_writeback(folio))
__folio_start_writeback(folio, true); if (!folio_test_dirty(folio)) { struct address_space *mapping = folio_mapping(folio);
XA_STATE(xas, &mapping->i_pages, folio->index); unsignedlong flags;
/* * Make sure not only the page dirty bit is cleared, but also subpage dirty bit * is cleared.
*/ void btrfs_folio_assert_not_dirty(conststruct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len)
{ struct btrfs_folio_state *bfs; unsignedint start_bit; unsignedint nbits; unsignedlong flags;
if (!IS_ENABLED(CONFIG_BTRFS_ASSERT)) return;
if (!btrfs_is_subpage(fs_info, folio)) {
ASSERT(!folio_test_dirty(folio)); return;
}
/* * This is for folio already locked by plain lock_page()/folio_lock(), which * doesn't have any subpage awareness. * * This populates the involved subpage ranges so that subpage helpers can * properly unlock them.
*/ void btrfs_folio_set_lock(conststruct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len)
{ struct btrfs_folio_state *bfs; unsignedlong flags; unsignedint start_bit; unsignedint nbits; int ret;
ASSERT(folio_test_locked(folio)); if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio)) return;
bfs = folio_get_private(folio);
start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
nbits = len >> fs_info->sectorsize_bits;
spin_lock_irqsave(&bfs->lock, flags); /* Target range should not yet be locked. */ if (unlikely(!bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits))) {
SUBPAGE_DUMP_BITMAP(fs_info, folio, locked, start, len);
ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits));
}
bitmap_set(bfs->bitmaps, start_bit, nbits);
ret = atomic_add_return(nbits, &bfs->nr_locked);
ASSERT(ret <= btrfs_blocks_per_folio(fs_info, folio));
spin_unlock_irqrestore(&bfs->lock, flags);
}
/* * Clear the dirty flag for the folio. * * If the affected folio is no longer dirty, return true. Otherwise return false.
*/ bool btrfs_meta_folio_clear_and_test_dirty(struct folio *folio, conststruct extent_buffer *eb)
{ bool last;
if (!btrfs_meta_is_subpage(eb->fs_info)) {
folio_clear_dirty_for_io(folio); returntrue;
}
last = btrfs_subpage_clear_and_test_dirty(eb->fs_info, folio, eb->start, eb->len); if (last) {
folio_clear_dirty_for_io(folio); returntrue;
} returnfalse;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.