// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/resize.c * * Support for resizing an ext4 filesystem while it is mounted. * * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com> * * This could probably be made into a module, because it is not often in use.
*/
int ext4_resize_begin(struct super_block *sb)
{ struct ext4_sb_info *sbi = EXT4_SB(sb); int ret = 0;
if (!capable(CAP_SYS_RESOURCE)) return -EPERM;
/* * If the reserved GDT blocks is non-zero, the resize_inode feature * should always be set.
*/ if (sbi->s_es->s_reserved_gdt_blocks &&
!ext4_has_feature_resize_inode(sb)) {
ext4_error(sb, "resize_inode disabled but reserved GDT blocks non-zero"); return -EFSCORRUPTED;
}
/* * If we are not using the primary superblock/GDT copy don't resize, * because the user tools have no way of handling this. Probably a * bad time to do it anyways.
*/ if (EXT4_B2C(sbi, sbi->s_sbh->b_blocknr) !=
le32_to_cpu(sbi->s_es->s_first_data_block)) {
ext4_warning(sb, "won't resize using backup superblock at %llu",
(unsignedlonglong)sbi->s_sbh->b_blocknr); return -EPERM;
}
/* * We are not allowed to do online-resizing on a filesystem mounted * with error, because it can destroy the filesystem easily.
*/ if (sbi->s_mount_state & EXT4_ERROR_FS) {
ext4_warning(sb, "There are errors in the filesystem, " "so online resizing is not allowed"); return -EPERM;
}
if (ext4_has_feature_sparse_super2(sb)) {
ext4_msg(sb, KERN_ERR, "Online resizing not supported with sparse_super2"); return -EOPNOTSUPP;
}
if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING,
&sbi->s_ext4_flags))
ret = -EBUSY;
return ret;
}
int ext4_resize_end(struct super_block *sb, bool update_backups)
{
clear_bit_unlock(EXT4_FLAGS_RESIZING, &EXT4_SB(sb)->s_ext4_flags);
smp_mb__after_atomic(); if (update_backups) return ext4_update_overhead(sb, true); return 0;
}
ext4_get_group_no_and_offset(sb, start, NULL, &offset); if (offset != 0)
ext4_warning(sb, "Last group not full"); elseif (input->reserved_blocks > input->blocks_count / 5)
ext4_warning(sb, "Reserved blocks too high (%u)",
input->reserved_blocks); elseif (free_blocks_count < 0)
ext4_warning(sb, "Bad blocks count %u",
input->blocks_count); elseif (IS_ERR(bh = ext4_sb_bread(sb, end - 1, 0))) {
err = PTR_ERR(bh);
bh = NULL;
ext4_warning(sb, "Cannot read last block (%llu)",
end - 1);
} elseif (outside(input->block_bitmap, start, end))
ext4_warning(sb, "Block bitmap not in group (block %llu)",
(unsignedlonglong)input->block_bitmap); elseif (outside(input->inode_bitmap, start, end))
ext4_warning(sb, "Inode bitmap not in group (block %llu)",
(unsignedlonglong)input->inode_bitmap); elseif (outside(input->inode_table, start, end) ||
outside(itend - 1, start, end))
ext4_warning(sb, "Inode table not in group (blocks %llu-%llu)",
(unsignedlonglong)input->inode_table, itend - 1); elseif (input->inode_bitmap == input->block_bitmap)
ext4_warning(sb, "Block bitmap same as inode bitmap (%llu)",
(unsignedlonglong)input->block_bitmap); elseif (inside(input->block_bitmap, input->inode_table, itend))
ext4_warning(sb, "Block bitmap (%llu) in inode table " "(%llu-%llu)",
(unsignedlonglong)input->block_bitmap,
(unsignedlonglong)input->inode_table, itend - 1); elseif (inside(input->inode_bitmap, input->inode_table, itend))
ext4_warning(sb, "Inode bitmap (%llu) in inode table " "(%llu-%llu)",
(unsignedlonglong)input->inode_bitmap,
(unsignedlonglong)input->inode_table, itend - 1); elseif (inside(input->block_bitmap, start, metaend))
ext4_warning(sb, "Block bitmap (%llu) in GDT table (%llu-%llu)",
(unsignedlonglong)input->block_bitmap,
start, metaend - 1); elseif (inside(input->inode_bitmap, start, metaend))
ext4_warning(sb, "Inode bitmap (%llu) in GDT table (%llu-%llu)",
(unsignedlonglong)input->inode_bitmap,
start, metaend - 1); elseif (inside(input->inode_table, start, metaend) ||
inside(itend - 1, start, metaend))
ext4_warning(sb, "Inode table (%llu-%llu) overlaps GDT table " "(%llu-%llu)",
(unsignedlonglong)input->inode_table,
itend - 1, start, metaend - 1); else
err = 0;
brelse(bh);
return err;
}
/* * ext4_new_flex_group_data is used by 64bit-resize interface to add a flex * group each time.
*/ struct ext4_new_flex_group_data { struct ext4_new_group_data *groups; /* new_group_data for groups
in the flex group */
__u16 *bg_flags; /* block group flags of groups
in @groups */
ext4_group_t resize_bg; /* number of allocated
new_group_data */
ext4_group_t count; /* number of groups in @groups
*/
};
/* * Avoiding memory allocation failures due to too many groups added each time.
*/ #define MAX_RESIZE_BG 16384
/* * alloc_flex_gd() allocates an ext4_new_flex_group_data that satisfies the * resizing from @o_group to @n_group, its size is typically @flexbg_size. * * Returns NULL on failure otherwise address of the allocated structure.
*/ staticstruct ext4_new_flex_group_data *alloc_flex_gd(unsignedint flexbg_size,
ext4_group_t o_group, ext4_group_t n_group)
{
ext4_group_t last_group; unsignedint max_resize_bg; struct ext4_new_flex_group_data *flex_gd;
flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS); if (flex_gd == NULL) goto out3;
/* * ext4_alloc_group_tables() allocates block bitmaps, inode bitmaps * and inode tables for a flex group. * * This function is used by 64bit-resize. Note that this function allocates * group tables from the 1st group of groups contained by @flexgd, which may * be a partial of a flex group. * * @sb: super block of fs to which the groups belongs * * Returns 0 on a successful allocation of the metadata blocks in the * block group.
*/ staticint ext4_alloc_group_tables(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd, unsignedint flexbg_size)
{ struct ext4_new_group_data *group_data = flex_gd->groups;
ext4_fsblk_t start_blk;
ext4_fsblk_t last_blk;
ext4_group_t src_group;
ext4_group_t bb_index = 0;
ext4_group_t ib_index = 0;
ext4_group_t it_index = 0;
ext4_group_t group;
ext4_group_t last_group; unsigned overhead;
__u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0; int i;
/* * Set up the block and inode bitmaps, and the inode table for the new groups. * This doesn't need to be part of the main transaction, since we are only * changing blocks outside the actual filesystem. We still do journaling to * ensure the recovery is correct in case of a failure just after resize. * If any part of this fails, we simply abort the resize. * * setup_new_flex_group_blocks handles a flex group as follow: * 1. copy super block and GDT, and initialize group tables if necessary. * In this step, we only set bits in blocks bitmaps for blocks taken by * super block and GDT. * 2. allocate group tables in block bitmaps, that is, set bits in block * bitmap for blocks taken by group tables.
*/ staticint setup_new_flex_group_blocks(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd)
{ int group_table_count[] = {1, 1, EXT4_SB(sb)->s_itb_per_group};
ext4_fsblk_t start;
ext4_fsblk_t block; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; struct ext4_new_group_data *group_data = flex_gd->groups;
__u16 *bg_flags = flex_gd->bg_flags;
handle_t *handle;
ext4_group_t group, count; struct buffer_head *bh = NULL; int reserved_gdb, i, j, err = 0, err2; int meta_bg;
/* This transaction may be extended/restarted along the way */
handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA); if (IS_ERR(handle)) return PTR_ERR(handle);
group = group_data[0].group; for (i = 0; i < flex_gd->count; i++, group++) { unsignedlong gdblocks;
ext4_grpblk_t overhead;
if (meta_bg == 0 && !ext4_bg_has_super(sb, group)) goto handle_itb;
if (meta_bg == 1) goto handle_itb;
block = start + ext4_bg_has_super(sb, group); /* Copy all of the GDT blocks into the backup in this group */ for (j = 0; j < gdblocks; j++, block++) { struct buffer_head *gdb;
ext4_debug("update backup group %#04llx\n", block);
err = ext4_resize_ensure_credits_batch(handle, 1); if (err < 0) goto out;
/* Zero out all of the reserved backup group descriptor * table blocks
*/ if (ext4_bg_has_super(sb, group)) {
err = sb_issue_zeroout(sb, gdblocks + start + 1,
reserved_gdb, GFP_NOFS); if (err) goto out;
}
handle_itb: /* Initialize group tables of the group @group */ if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) goto handle_bb;
/* Zero out all of the inode table blocks */
block = group_data[i].inode_table;
ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
block, sbi->s_itb_per_group);
err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group,
GFP_NOFS); if (err) goto out;
handle_bb: if (bg_flags[i] & EXT4_BG_BLOCK_UNINIT) goto handle_ib;
/* Initialize block bitmap of the @group */
block = group_data[i].block_bitmap;
err = ext4_resize_ensure_credits_batch(handle, 1); if (err < 0) goto out;
/* * Iterate through the groups which hold BACKUP superblock/GDT copies in an * ext4 filesystem. The counters should be initialized to 1, 5, and 7 before * calling this for the first time. In a sparse filesystem it will be the * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ... * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
*/ unsignedint ext4_list_backups(struct super_block *sb, unsignedint *three, unsignedint *five, unsignedint *seven)
{ struct ext4_super_block *es = EXT4_SB(sb)->s_es; unsignedint *min = three; int mult = 3; unsignedint ret;
if (ext4_has_feature_sparse_super2(sb)) { do { if (*min > 2) return UINT_MAX;
ret = le32_to_cpu(es->s_backup_bgs[*min - 1]);
*min += 1;
} while (!ret); return ret;
}
if (!ext4_has_feature_sparse_super(sb)) {
ret = *min;
*min += 1; return ret;
}
if (*five < *min) {
min = five;
mult = 5;
} if (*seven < *min) {
min = seven;
mult = 7;
}
ret = *min;
*min *= mult;
return ret;
}
/* * Check that all of the backup GDT blocks are held in the primary GDT block. * It is assumed that they are stored in group order. Returns the number of * groups in current filesystem that have BACKUPS, or -ve error code.
*/ staticint verify_reserved_gdb(struct super_block *sb,
ext4_group_t end, struct buffer_head *primary)
{ const ext4_fsblk_t blk = primary->b_blocknr; unsigned three = 1; unsigned five = 5; unsigned seven = 7; unsigned grp;
__le32 *p = (__le32 *)primary->b_data; int gdbackups = 0;
/* * Called when we need to bring a reserved group descriptor table block into * use from the resize inode. The primary copy of the new GDT block currently * is an indirect block (under the double indirect block in the resize inode). * The new backup GDT blocks will be stored as leaf blocks in this indirect * block, in group order. Even though we know all the block numbers we need, * we check to ensure that the resize inode has actually reserved these blocks. * * Don't need to update the block bitmaps because the blocks are still in use. * * We get all of the error cases out of the way, so that we are sure to not * fail once we start modifying the data on disk, because JBD has no rollback.
*/ staticint add_new_gdb(handle_t *handle, struct inode *inode,
ext4_group_t group)
{ struct super_block *sb = inode->i_sb; struct ext4_super_block *es = EXT4_SB(sb)->s_es; unsignedlong gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; struct buffer_head **o_group_desc, **n_group_desc = NULL; struct buffer_head *dind = NULL; struct buffer_head *gdb_bh = NULL; int gdbackups; struct ext4_iloc iloc = { .bh = NULL };
__le32 *data; int err;
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
gdb_num);
gdb_bh = ext4_sb_bread(sb, gdblock, 0); if (IS_ERR(gdb_bh)) return PTR_ERR(gdb_bh);
/* * Finally, we have all of the possible failures behind us... * * Remove new GDT block from inode double-indirect block and clear out * the new GDT block for use (which also "frees" the backup GDT blocks * from the reserved inode). We don't need to change the bitmaps for * these blocks, because they are marked as in-use from being in the * reserved inode, and will become GDT blocks (primary and backup).
*/
data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0;
err = ext4_handle_dirty_metadata(handle, NULL, dind); if (unlikely(err)) {
ext4_std_error(sb, err); goto errout;
}
inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >>
(9 - EXT4_SB(sb)->s_cluster_bits);
ext4_mark_iloc_dirty(handle, inode, &iloc);
memset(gdb_bh->b_data, 0, sb->s_blocksize);
err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); if (unlikely(err)) {
ext4_std_error(sb, err);
iloc.bh = NULL; goto errout;
}
brelse(dind);
ext4_debug("leaving with error %d\n", err); return err;
}
/* * If there is no available space in the existing block group descriptors for * the new block group and there are no reserved block group descriptors, then * the meta_bg feature will get enabled, and es->s_first_meta_bg will get set * to the first block group that is managed using meta_bg and s_first_meta_bg * must be a multiple of EXT4_DESC_PER_BLOCK(sb). * This function will be called when first group of meta_bg is added to bring * new group descriptors block of new added meta_bg.
*/ staticint add_new_gdb_meta_bg(struct super_block *sb,
handle_t *handle, ext4_group_t group) {
ext4_fsblk_t gdblock; struct buffer_head *gdb_bh; struct buffer_head **o_group_desc, **n_group_desc; unsignedlong gdb_num = group / EXT4_DESC_PER_BLOCK(sb); int err;
/* * Called when we are adding a new group which has a backup copy of each of * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks. * We need to add these reserved backup GDT blocks to the resize inode, so * that they are kept for future resizing and not allocated to files. * * Each reserved backup GDT block will go into a different indirect block. * The indirect blocks are actually the primary reserved GDT blocks, * so we know in advance what their block numbers are. We only get the * double-indirect block to verify it is pointing to the primary reserved * GDT blocks so we don't overwrite a data block by accident. The reserved * backup GDT blocks are stored in their reserved primary GDT block.
*/ staticint reserve_backup_gdb(handle_t *handle, struct inode *inode,
ext4_group_t group)
{ struct super_block *sb = inode->i_sb; int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); int cluster_bits = EXT4_SB(sb)->s_cluster_bits; struct buffer_head **primary; struct buffer_head *dind; struct ext4_iloc iloc;
ext4_fsblk_t blk;
__le32 *data, *end; int gdbackups = 0; int res, i; int err;
primary = kmalloc_array(reserved_gdb, sizeof(*primary), GFP_NOFS); if (!primary) return -ENOMEM;
/* Get each reserved primary GDT block and verify it holds backups */ for (res = 0; res < reserved_gdb; res++, blk++) { if (le32_to_cpu(*data) != blk) {
ext4_warning(sb, "reserved block %llu" " not at offset %ld",
blk,
(long)(data - (__le32 *)dind->b_data));
err = -EINVAL; goto exit_bh;
}
primary[res] = ext4_sb_bread(sb, blk, 0); if (IS_ERR(primary[res])) {
err = PTR_ERR(primary[res]);
primary[res] = NULL; goto exit_bh;
}
gdbackups = verify_reserved_gdb(sb, group, primary[res]); if (gdbackups < 0) {
brelse(primary[res]);
err = gdbackups; goto exit_bh;
} if (++data >= end)
data = (__le32 *)dind->b_data;
}
for (i = 0; i < reserved_gdb; i++) {
BUFFER_TRACE(primary[i], "get_write_access"); if ((err = ext4_journal_get_write_access(handle, sb, primary[i],
EXT4_JTR_NONE))) goto exit_bh;
}
if ((err = ext4_reserve_inode_write(handle, inode, &iloc))) goto exit_bh;
/* * Finally we can add each of the reserved backup GDT blocks from * the new group to its reserved primary GDT block.
*/
blk = group * EXT4_BLOCKS_PER_GROUP(sb); for (i = 0; i < reserved_gdb; i++) { int err2;
data = (__le32 *)primary[i]->b_data;
data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
err2 = ext4_handle_dirty_metadata(handle, NULL, primary[i]); if (!err)
err = err2;
}
es->s_block_group_nr = cpu_to_le16(group); if (ext4_has_feature_metadata_csum(sb))
es->s_checksum = ext4_superblock_csum(es);
}
/* * Update the backup copies of the ext4 metadata. These don't need to be part * of the main resize transaction, because e2fsck will re-write them if there * is a problem (basically only OOM will cause a problem). However, we * _should_ update the backups if possible, in case the primary gets trashed * for some reason and we need to run e2fsck from a backup superblock. The * important part is that the new block and inode counts are in the backup * superblocks, and the location of the new group metadata in the GDT backups. * * We do not need take the s_resize_lock for this, because these * blocks are not otherwise touched by the filesystem code when it is * mounted. We don't need to worry about last changing from * sbi->s_groups_count, because the worst that can happen is that we * do not copy the full number of backups at this time. The resize * which changed s_groups_count will backup again.
*/ staticvoid update_backups(struct super_block *sb, sector_t blk_off, char *data, int size, int meta_bg)
{ struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_group_t last; constint bpg = EXT4_BLOCKS_PER_GROUP(sb); unsigned three = 1; unsigned five = 5; unsigned seven = 7;
ext4_group_t group = 0; int rest = sb->s_blocksize - size;
handle_t *handle; int err = 0, err2;
handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA); if (IS_ERR(handle)) {
group = 1;
err = PTR_ERR(handle); goto exit_err;
}
if (meta_bg == 0) {
group = ext4_list_backups(sb, &three, &five, &seven);
last = sbi->s_groups_count;
} else {
group = ext4_get_group_number(sb, blk_off) + 1;
last = (ext4_group_t)(group + EXT4_DESC_PER_BLOCK(sb) - 2);
}
while (group < sbi->s_groups_count) { struct buffer_head *bh;
ext4_fsblk_t backup_block; int has_super = ext4_bg_has_super(sb, group);
ext4_fsblk_t first_block = ext4_group_first_block_no(sb, group);
/* Out of journal space, and can't get more - abort - so sad */
err = ext4_resize_ensure_credits_batch(handle, 1); if (err < 0) break;
if (meta_bg == 0)
group = ext4_list_backups(sb, &three, &five, &seven); elseif (group == last) break; else
group = last;
} if ((err2 = ext4_journal_stop(handle)) && !err)
err = err2;
/* * Ugh! Need to have e2fsck write the backup copies. It is too * late to revert the resize, we shouldn't fail just because of * the backup copies (they are only needed in case of corruption). * * However, if we got here we have a journal problem too, so we * can't really start a transaction to mark the superblock. * Chicken out and just set the flag on the hope it will be written * to disk, and if not - we will simply wait until next fsck.
*/
exit_err: if (err) {
ext4_warning(sb, "can't update backup for group %u (err %d), " "forcing fsck on next reboot", group, err);
sbi->s_mount_state &= ~EXT4_VALID_FS;
sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
mark_buffer_dirty(sbi->s_sbh);
}
}
/* * ext4_add_new_descs() adds @count group descriptor of groups * starting at @group * * @handle: journal handle * @sb: super block * @group: the group no. of the first group desc to be added * @resize_inode: the resize inode * @count: number of group descriptors to be added
*/ staticint ext4_add_new_descs(handle_t *handle, struct super_block *sb,
ext4_group_t group, struct inode *resize_inode,
ext4_group_t count)
{ struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; struct buffer_head *gdb_bh; int i, gdb_off, gdb_num, err = 0; int meta_bg;
meta_bg = ext4_has_feature_meta_bg(sb); for (i = 0; i < count; i++, group++) { int reserved_gdb = ext4_bg_has_super(sb, group) ?
le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
/* * We will only either add reserved group blocks to a backup group * or remove reserved blocks for the first group in a new group block. * Doing both would be mean more complex code, and sane people don't * use non-sparse filesystems anymore. This is already checked above.
*/ if (gdb_off) {
gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
gdb_num);
BUFFER_TRACE(gdb_bh, "get_write_access");
err = ext4_journal_get_write_access(handle, sb, gdb_bh,
EXT4_JTR_NONE);
/* * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg
*/ staticint ext4_setup_new_descs(handle_t *handle, struct super_block *sb, struct ext4_new_flex_group_data *flex_gd)
{ struct ext4_new_group_data *group_data = flex_gd->groups; struct ext4_group_desc *gdp; struct ext4_sb_info *sbi = EXT4_SB(sb); struct buffer_head *gdb_bh;
ext4_group_t group;
__u16 *bg_flags = flex_gd->bg_flags; int i, gdb_off, gdb_num, err = 0;
for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) {
group = group_data->group;
gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
/* * get_write_access() has been called on gdb_bh by ext4_add_new_desc().
*/
gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num); /* Update group descriptor block for new group */
gdp = (struct ext4_group_desc *)(gdb_bh->b_data +
gdb_off * EXT4_DESC_SIZE(sb));
/* * We can allocate memory for mb_alloc based on the new group * descriptor
*/
err = ext4_mb_add_groupinfo(sb, group, gdp); if (err) break;
} return err;
}
/* * ext4_update_super() updates the super block so that the newly added * groups can be seen by the filesystem. * * @sb: super block * @flex_gd: new added groups
*/ staticvoid ext4_update_super(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd)
{
ext4_fsblk_t blocks_count = 0;
ext4_fsblk_t free_blocks = 0;
ext4_fsblk_t reserved_blocks = 0; struct ext4_new_group_data *group_data = flex_gd->groups; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; int i;
BUG_ON(flex_gd->count == 0 || group_data == NULL); /* * Make the new blocks and inodes valid next. We do this before * increasing the group count so that once the group is enabled, * all of its blocks and inodes are already valid. * * We always allocate group-by-group, then block-by-block or * inode-by-inode within a group, so enabling these * blocks/inodes before the group is live won't actually let us * allocate the new space yet.
*/ for (i = 0; i < flex_gd->count; i++) {
blocks_count += group_data[i].blocks_count;
free_blocks += EXT4_C2B(sbi, group_data[i].free_clusters_count);
}
ext4_debug("free blocks count %llu", ext4_free_blocks_count(es)); /* * We need to protect s_groups_count against other CPUs seeing * inconsistent state in the superblock. * * The precise rules we use are: * * * Writers must perform a smp_wmb() after updating all * dependent data and before modifying the groups count * * * Readers must perform an smp_rmb() after reading the groups * count and before reading any dependent data. * * NB. These rules can be relaxed when checking the group count * while freeing data, as we can only allocate from a block * group after serialising against the group count, and we can * only then free after serialising in turn against that * allocation.
*/
smp_wmb();
/* Update the global fs size fields */
sbi->s_groups_count += flex_gd->count;
sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
/* Update the reserved block counts only once the new group is
* active. */
ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
reserved_blocks);
/* Update the free space counts */
percpu_counter_add(&sbi->s_freeclusters_counter,
EXT4_NUM_B2C(sbi, free_blocks));
percpu_counter_add(&sbi->s_freeinodes_counter,
EXT4_INODES_PER_GROUP(sb) * flex_gd->count);
/* * Update the fs overhead information. * * For bigalloc, if the superblock already has a properly calculated * overhead, update it with a value based on numbers already computed * above for the newly allocated capacity.
*/ if (ext4_has_feature_bigalloc(sb) && (sbi->s_overhead != 0))
ext4_add_overhead(sb,
EXT4_NUM_B2C(sbi, blocks_count - free_blocks)); else
ext4_calculate_overhead(sb);
es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead);
ext4_superblock_csum_set(sb);
unlock_buffer(sbi->s_sbh); if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: added group %u:" "%llu blocks(%llu free %llu reserved)\n", flex_gd->count,
blocks_count, free_blocks, reserved_blocks);
}
/* Add a flex group to an fs. Ensure we handle all possible error conditions * _before_ we start modifying the filesystem, because we cannot abort the * transaction and not have it write the data to disk.
*/ staticint ext4_flex_group_add(struct super_block *sb, struct inode *resize_inode, struct ext4_new_flex_group_data *flex_gd)
{ struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es;
ext4_fsblk_t o_blocks_count;
ext4_grpblk_t last;
ext4_group_t group;
handle_t *handle; unsigned reserved_gdb; int err = 0, err2 = 0, credit;
err = setup_new_flex_group_blocks(sb, flex_gd); if (err) gotoexit; /* * We will always be modifying at least the superblock and GDT * blocks. If we are adding a group past the last current GDT block, * we will also modify the inode and the dindirect block. If we * are adding a group with superblock/GDT backups we will also * modify each of the reserved GDT dindirect blocks.
*/
credit = 3; /* sb, resize inode, resize inode dindirect */ /* GDT blocks */
credit += 1 + DIV_ROUND_UP(flex_gd->count, EXT4_DESC_PER_BLOCK(sb));
credit += reserved_gdb; /* Reserved GDT dindirect blocks */
handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credit); if (IS_ERR(handle)) {
err = PTR_ERR(handle); gotoexit;
}
last_group = group | (flex_gd->resize_bg - 1); if (last_group > n_group)
last_group = n_group;
flex_gd->count = last_group - group + 1;
for (i = 0; i < flex_gd->count; i++) { int overhead;
group_data[i].group = group + i;
group_data[i].blocks_count = EXT4_BLOCKS_PER_GROUP(sb);
overhead = ext4_group_overhead_blocks(sb, group + i);
group_data[i].mdata_blocks = overhead;
group_data[i].free_clusters_count = EXT4_CLUSTERS_PER_GROUP(sb); if (ext4_has_group_desc_csum(sb)) {
flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
EXT4_BG_INODE_UNINIT; if (!test_opt(sb, INIT_INODE_TABLE))
flex_gd->bg_flags[i] |= EXT4_BG_INODE_ZEROED;
} else
flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED;
}
if (last_group == n_group && ext4_has_group_desc_csum(sb)) /* We need to initialize block bitmap of last group. */
flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT;
if ((last_group == n_group) && (last != clusters_per_group - 1)) {
group_data[i - 1].blocks_count = EXT4_C2B(sbi, last + 1);
group_data[i - 1].free_clusters_count -= clusters_per_group -
last - 1;
}
return 1;
}
/* Add group descriptor data to an existing or new group descriptor block. * Ensure we handle all possible error conditions _before_ we start modifying * the filesystem, because we cannot abort the transaction and not have it * write the data to disk. * * If we are on a GDT block boundary, we need to get the reserved GDT block. * Otherwise, we may need to add backup GDT blocks for a sparse group. * * We only need to hold the superblock lock while we are actually adding * in the new group's counts to the superblock. Prior to that we have * not really "added" the group at all. We re-check that we are still * adding in the last group in case things have changed since verifying.
*/ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
{ struct ext4_new_flex_group_data flex_gd; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; int reserved_gdb = ext4_bg_has_super(sb, input->group) ?
le16_to_cpu(es->s_reserved_gdt_blocks) : 0; struct inode *inode = NULL; int gdb_off; int err;
__u16 bg_flags = 0;
/* * extend a group without checking assuming that checking has been done.
*/ staticint ext4_group_extend_no_check(struct super_block *sb,
ext4_fsblk_t o_blocks_count, ext4_grpblk_t add)
{ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
handle_t *handle; int err = 0, err2;
/* We will update the superblock, one block bitmap, and * one group descriptor via ext4_group_add_blocks().
*/
handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, 3); if (IS_ERR(handle)) {
err = PTR_ERR(handle);
ext4_warning(sb, "error %d on journal start", err); return err;
}
lock_buffer(EXT4_SB(sb)->s_sbh);
ext4_blocks_count_set(es, o_blocks_count + add);
ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add);
ext4_superblock_csum_set(sb);
unlock_buffer(EXT4_SB(sb)->s_sbh);
ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
o_blocks_count + add); /* We add the blocks to the bitmap and set the group need init bit */
err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); if (err) goto errout;
ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
o_blocks_count + add);
errout:
err2 = ext4_journal_stop(handle); if (err2 && !err)
err = err2;
if (!err) { if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: extended group to %llu " "blocks\n", ext4_blocks_count(es));
update_backups(sb, ext4_group_first_block_no(sb, 0),
(char *)es, sizeof(struct ext4_super_block), 0);
} return err;
}
/* * Extend the filesystem to the new number of blocks specified. This entry * point is only used to extend the current filesystem to the end of the last * existing group. It can be accessed via ioctl, or by "remount,resize=<size>" * for emergencies (because it has no dependencies on reserved blocks). * * If we _really_ wanted, we could use default values to call ext4_group_add() * allow the "remount" trick to work for arbitrary resizing, assuming enough * GDT blocks are reserved to grow to the desired size.
*/ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
ext4_fsblk_t n_blocks_count)
{
ext4_fsblk_t o_blocks_count;
ext4_grpblk_t last;
ext4_grpblk_t add; struct buffer_head *bh;
ext4_group_t group;
o_blocks_count = ext4_blocks_count(es);
if (test_opt(sb, DEBUG))
ext4_msg(sb, KERN_DEBUG, "extending last group from %llu to %llu blocks",
o_blocks_count, n_blocks_count);
if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) return 0;
if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
ext4_msg(sb, KERN_ERR, "filesystem too large to resize to %llu blocks safely",
n_blocks_count); return -EINVAL;
}
if (o_blocks_count + add < n_blocks_count)
ext4_warning(sb, "will only finish group (%llu blocks, %u new)",
o_blocks_count + add, add);
/* See if the device is actually as big as what was requested */
bh = ext4_sb_bread(sb, o_blocks_count + add - 1, 0); if (IS_ERR(bh)) {
ext4_warning(sb, "can't read last block, resize aborted"); return -ENOSPC;
}
brelse(bh);
/* * Release the resize inode and drop the resize_inode feature if there * are no more reserved gdt blocks, and then convert the file system * to enable meta_bg
*/ staticint ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
{
handle_t *handle; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; struct ext4_inode_info *ei = EXT4_I(inode);
ext4_fsblk_t nr; int i, ret, err = 0; int credits = 1;
ext4_msg(sb, KERN_INFO, "Converting file system to meta_bg"); if (inode) { if (es->s_reserved_gdt_blocks) {
ext4_error(sb, "Unexpected non-zero " "s_reserved_gdt_blocks"); return -EPERM;
}
/* Do a quick sanity check of the resize inode */ if (inode->i_blocks != 1 << (inode->i_blkbits -
(9 - sbi->s_cluster_bits))) goto invalid_resize_inode; for (i = 0; i < EXT4_N_BLOCKS; i++) { if (i == EXT4_DIND_BLOCK) { if (ei->i_data[i]) continue; else goto invalid_resize_inode;
} if (ei->i_data[i]) goto invalid_resize_inode;
}
credits += 3; /* block bitmap, bg descriptor, resize inode */
}
handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credits); if (IS_ERR(handle)) return PTR_ERR(handle);
/* * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count * * @sb: super block of the fs to be resized * @n_blocks_count: the number of blocks resides in the resized fs
*/ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
{ struct ext4_new_flex_group_data *flex_gd = NULL; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; struct buffer_head *bh; struct inode *resize_inode = NULL;
ext4_grpblk_t add, offset; unsignedlong n_desc_blocks; unsignedlong o_desc_blocks;
ext4_group_t o_group;
ext4_group_t n_group;
ext4_fsblk_t o_blocks_count;
ext4_fsblk_t n_blocks_count_retry = 0; unsignedlong last_update_time = 0; int err = 0; int meta_bg; unsignedint flexbg_size = ext4_flex_bg_size(sbi);
/* See if the device is actually as big as what was requested */
bh = ext4_sb_bread(sb, n_blocks_count - 1, 0); if (IS_ERR(bh)) {
ext4_warning(sb, "can't read last block, resize aborted"); return -ENOSPC;
}
brelse(bh);
/* * For bigalloc, trim the requested size to the nearest cluster * boundary to avoid creating an unusable filesystem. We do this * silently, instead of returning an error, to avoid breaking * callers that blindly resize the filesystem to the full size of * the underlying block device.
*/ if (ext4_has_feature_bigalloc(sb))
n_blocks_count &= ~((1 << EXT4_CLUSTER_BITS(sb)) - 1);
/* * Make sure the last group has enough space so that it's * guaranteed to have enough space for all metadata blocks * that it might need to hold. (We might not need to store * the inode table blocks in the last block group, but there * will be cases where this might be needed.)
*/ if ((ext4_group_first_block_no(sb, n_group) +
ext4_group_overhead_blocks(sb, n_group) + 2 +
sbi->s_itb_per_group + sbi->s_cluster_ratio) >= n_blocks_count) {
n_blocks_count = ext4_group_first_block_no(sb, n_group);
n_group--;
n_blocks_count_retry = 0; if (resize_inode) {
iput(resize_inode);
resize_inode = NULL;
} goto retry;
}
/* extend the last group */ if (n_group == o_group)
add = n_blocks_count - o_blocks_count; else
add = EXT4_C2B(sbi, EXT4_CLUSTERS_PER_GROUP(sb) - (offset + 1)); if (add > 0) {
err = ext4_group_extend_no_check(sb, o_blocks_count, add); if (err) goto out;
}
if (ext4_blocks_count(es) == n_blocks_count && n_blocks_count_retry == 0) goto out;
err = ext4_alloc_flex_bg_array(sb, n_group + 1); if (err) goto out;
err = ext4_mb_alloc_groupinfo(sb, n_group + 1); if (err) goto out;
/* Add flex groups. Note that a regular group is a * flex group with 1 group.
*/ while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count)) { if (time_is_before_jiffies(last_update_time + HZ * 10)) { if (last_update_time)
ext4_msg(sb, KERN_INFO, "resized to %llu blocks",
ext4_blocks_count(es));
last_update_time = jiffies;
} if (ext4_alloc_group_tables(sb, flex_gd, flexbg_size) != 0) break;
err = ext4_flex_group_add(sb, resize_inode, flex_gd); if (unlikely(err)) break;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.