/* * buffer's verified bit is no longer valid after reading from * disk again due to write out error, clear it to make sure we * recheck the buffer contents.
*/
clear_buffer_verified(bh);
/* * This works like __bread_gfp() except it uses ERR_PTR for error * returns. Currently with sb_bread it's impossible to distinguish * between ENOMEM and EIO situations (since both result in a NULL * return.
*/ staticstruct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb,
sector_t block,
blk_opf_t op_flags, gfp_t gfp)
{ struct buffer_head *bh; int ret;
bh = sb_getblk_gfp(sb, block, gfp); if (bh == NULL) return ERR_PTR(-ENOMEM); if (ext4_buffer_uptodate(bh)) return bh;
ret = ext4_read_bh_lock(bh, REQ_META | op_flags, true); if (ret) {
put_bh(bh); return ERR_PTR(ret);
} return bh;
}
/* * The ext4_maybe_update_superblock() function checks and updates the * superblock if needed. * * This function is designed to update the on-disk superblock only under * certain conditions to prevent excessive disk writes and unnecessary * waking of the disk from sleep. The superblock will be updated if: * 1. More than sbi->s_sb_update_sec (def: 1 hour) has passed since the last * superblock update * 2. More than sbi->s_sb_update_kb (def: 16MB) kbs have been written since the * last superblock update. * * @sb: The superblock
*/ staticvoid ext4_maybe_update_superblock(struct super_block *sb)
{ struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es;
journal_t *journal = sbi->s_journal;
time64_t now;
__u64 last_update;
__u64 lifetime_write_kbytes;
__u64 diff_size;
/* Get the number of kilobytes not written to disk to account * for statistics and compare with a multiple of 16 MB. This * is used to determine when the next superblock commit should * occur (i.e. not more often than once per 16MB if there was * less written in an hour).
*/
diff_size = lifetime_write_kbytes - le64_to_cpu(es->s_kbytes_written);
if (diff_size > sbi->s_sb_update_kb)
schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
}
bh = head = folio_buffers(folio); do { /* * We have to redirty a page in these cases: * 1) If buffer is dirty, it means the page was dirty because it * contains a buffer that needs checkpointing. So the dirty bit * needs to be preserved so that checkpointing writes the buffer * properly. * 2) If buffer is not part of the committing transaction * (we may have just accidentally come across this buffer because * inode range tracking is not exact) or if the currently running * transaction already contains this buffer as well, dirty bit * needs to be preserved so that the buffer gets writeprotected * properly on running transaction's commit.
*/
jh = bh2jh(bh); if (buffer_dirty(bh) ||
(jh && (jh->b_transaction != jinode->i_transaction ||
jh->b_next_transaction))) returntrue;
} while ((bh = bh->b_this_page) != head);
/* * writeback_iter() already checks for dirty pages and calls * folio_clear_dirty_for_io(), which we want to write protect the * folios. * * However, we may have to redirty a folio sometimes.
*/ while ((folio = writeback_iter(mapping, &wbc, folio, &error))) { if (ext4_journalled_writepage_needs_redirty(jinode, folio))
folio_redirty_for_writepage(&wbc, folio);
folio_unlock(folio);
}
return error;
}
staticint ext4_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
{ int ret;
if (ext4_should_journal_data(jinode->i_vfs_inode))
ret = ext4_journalled_submit_inode_data_buffers(jinode); else
ret = ext4_normal_submit_inode_data_buffers(jinode); return ret;
}
staticint ext4_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
{ int ret = 0;
if (!ext4_should_journal_data(jinode->i_vfs_inode))
ret = jbd2_journal_finish_inode_data_buffers(jinode);
/* Deal with the reporting of failure conditions on a filesystem such as * inconsistencies detected or read IO failures. * * On ext2, we can store the error state of the filesystem in the * superblock. That is not possible on ext4, because we may have other * write ordering constraints on the superblock which prevent us from * writing it out straight away; and given that the journal is about to * be aborted, we can't rely on the current, or future, transactions to * write out the superblock safely. * * We'll just use the jbd2_journal_abort() error code to record an error in * the journal instead. On recovery, the journal will complain about * that error until we've noted it down and cleared it. * * If force_ro is set, we unconditionally force the filesystem into an * ABORT|READONLY state, unless the error response on the fs has been set to * panic in which case we take the easy way out and panic immediately. This is * used to deal with unrecoverable failures such as journal IO errors or ENOMEM * at a critical moment in log management.
*/ staticvoid ext4_handle_error(struct super_block *sb, bool force_ro, int error,
__u32 ino, __u64 block, constchar *func, unsignedint line)
{
journal_t *journal = EXT4_SB(sb)->s_journal; bool continue_fs = !force_ro && test_opt(sb, ERRORS_CONT);
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; if (test_opt(sb, WARN_ON_ERROR))
WARN_ON_ONCE(1);
if (!continue_fs && !ext4_emergency_ro(sb) && journal)
jbd2_journal_abort(journal, -EIO);
if (!bdev_read_only(sb->s_bdev)) {
save_error_info(sb, error, ino, block, func, line); /* * In case the fs should keep running, we need to writeout * superblock through the journal. Due to lock ordering * constraints, it may not be safe to do it right here so we * defer superblock flushing to a workqueue. We just need to be * careful when the journal is already shutting down. If we get * here in that case, just update the sb directly as the last * transaction won't commit anyway.
*/ if (continue_fs && journal &&
!ext4_test_mount_flag(sb, EXT4_MF_JOURNAL_DESTROY))
schedule_work(&EXT4_SB(sb)->s_sb_upd_work); else
ext4_commit_super(sb);
}
/* * We force ERRORS_RO behavior when system is rebooting. Otherwise we * could panic during 'reboot -f' as the underlying device got already * disabled.
*/ if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
panic("EXT4-fs (device %s): panic forced after error\n",
sb->s_id);
}
if (ext4_emergency_ro(sb) || continue_fs) return;
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); /* * We don't set SB_RDONLY because that requires sb->s_umount * semaphore and setting it without proper remount procedure is * confusing code such as freeze_super() leading to deadlocks * and other problems.
*/
set_bit(EXT4_FLAGS_EMERGENCY_RO, &EXT4_SB(sb)->s_ext4_flags);
}
/* * If the journal is still running, we have to write out superblock * through the journal to avoid collisions of other journalled sb * updates. * * We use directly jbd2 functions here to avoid recursing back into * ext4 error handling code during handling of previous errors.
*/ if (!ext4_emergency_state(sbi->s_sb) &&
!sb_rdonly(sbi->s_sb) && journal) { struct buffer_head *sbh = sbi->s_sbh; bool call_notify_err = false;
handle = jbd2_journal_start(journal, 1); if (IS_ERR(handle)) goto write_directly; if (jbd2_journal_get_write_access(handle, sbh)) {
jbd2_journal_stop(handle); goto write_directly;
}
if (sbi->s_add_error_count > 0)
call_notify_err = true;
ext4_update_super(sbi->s_sb); if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to " "superblock detected");
clear_buffer_write_io_error(sbh);
set_buffer_uptodate(sbh);
}
if (jbd2_journal_dirty_metadata(handle, sbh)) {
jbd2_journal_stop(handle); goto write_directly;
}
jbd2_journal_stop(handle);
if (call_notify_err)
ext4_notify_error_sysfs(sbi);
return;
}
write_directly: /* * Write through journal failed. Write sb directly to get error info * out and hope for the best.
*/
ext4_commit_super(sbi->s_sb);
ext4_notify_error_sysfs(sbi);
}
/* Special case: if the error is EROFS, and we're not already * inside a transaction, then there's really no point in logging
* an error. */ if (errno == -EROFS && journal_current_handle() == NULL && sb_rdonly(sb)) return;
if (test_opt(sb, ERRORS_CONT)) { if (test_opt(sb, WARN_ON_ERROR))
WARN_ON_ONCE(1);
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; if (!bdev_read_only(sb->s_bdev)) {
save_error_info(sb, EFSCORRUPTED, ino, block, function,
line);
schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
} return;
}
ext4_unlock_group(sb, grp);
ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line); /* * We only get here in the ERRORS_RO case; relocking the group * may be dangerous, but nothing bad will happen since the * filesystem will have already been marked read/only and the * journal has been aborted. We return 1 as a hint to callers * who might what to use the return value from * ext4_grp_locked_error() to distinguish between the * ERRORS_CONT and ERRORS_RO case, and perhaps return more * aggressively from the ext4 function in question, with a * more appropriate error code.
*/
ext4_lock_group(sb, grp); return;
}
if (!grp || !gdp) return; if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
&grp->bb_state); if (!ret)
percpu_counter_sub(&sbi->s_freeclusters_counter,
grp->bb_free);
}
if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
&grp->bb_state); if (!ret && gdp) { int count;
if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) return;
ext4_warning(sb, "updating to rev %d because of new feature flag, " "running e2fsck is recommended",
EXT4_DYNAMIC_REV);
es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); /* leave es->s_feature_*compat flags alone */ /* es->s_uuid will be set by e2fsck if empty */
/* * The rest of the superblock fields should be zero, and if not it * means they are likely already in use, so leave them alone. We * can leave it up to e2fsck to clean up any inconsistencies there.
*/
}
#ifdef CONFIG_QUOTA staticint ext4_quota_off(struct super_block *sb, int type);
staticinlinevoid ext4_quotas_off(struct super_block *sb, int type)
{
BUG_ON(type > EXT4_MAXQUOTAS);
/* Use our quota_off function to clear inode flags etc. */ for (type--; type >= 0; type--)
ext4_quota_off(sb, type);
}
/* * This is a helper function which is used in the mount/remount * codepaths (which holds s_umount) to fetch the quota file name.
*/ staticinlinechar *get_qf_name(struct super_block *sb, struct ext4_sb_info *sbi, int type)
{ return rcu_dereference_protected(sbi->s_qf_names[type],
lockdep_is_held(&sb->s_umount));
} #else staticinlinevoid ext4_quotas_off(struct super_block *sb, int type)
{
} #endif
staticint ext4_percpu_param_init(struct ext4_sb_info *sbi)
{
ext4_fsblk_t block; int err;
staticvoid ext4_group_desc_free(struct ext4_sb_info *sbi)
{ struct buffer_head **group_desc; int i;
rcu_read_lock();
group_desc = rcu_dereference(sbi->s_group_desc); for (i = 0; i < sbi->s_gdb_count; i++)
brelse(group_desc[i]);
kvfree(group_desc);
rcu_read_unlock();
}
staticvoid ext4_flex_groups_free(struct ext4_sb_info *sbi)
{ struct flex_groups **flex_groups; int i;
rcu_read_lock();
flex_groups = rcu_dereference(sbi->s_flex_groups); if (flex_groups) { for (i = 0; i < sbi->s_flex_groups_allocated; i++)
kvfree(flex_groups[i]);
kvfree(flex_groups);
}
rcu_read_unlock();
}
/* * Unregister sysfs before destroying jbd2 journal. * Since we could still access attr_journal_task attribute via sysfs * path which could have sbi->s_journal->j_task as NULL * Unregister sysfs before flush sbi->s_sb_upd_work. * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If * read metadata verify failed then will queue error work. * update_super_work will call start_this_handle may trigger * BUG_ON.
*/
ext4_unregister_sysfs(sb);
if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs unmount"))
ext4_msg(sb, KERN_INFO, "unmounting filesystem %pU.",
&sb->s_uuid);
WARN_ON_ONCE(!(sbi->s_mount_state & EXT4_ERROR_FS) &&
percpu_counter_sum(&sbi->s_dirtyclusters_counter));
ext4_percpu_param_destroy(sbi); #ifdef CONFIG_QUOTA for (int i = 0; i < EXT4_MAXQUOTAS; i++)
kfree(get_qf_name(sb, sbi, i)); #endif
/* Debugging code just in case the in-memory inode orphan list * isn't empty. The on-disk one can be non-empty if we've * detected an error and taken the fs readonly, but the
* in-memory list had better be clean by this point. */ if (!list_empty(&sbi->s_orphan))
dump_orphan_list(sb, sbi);
ASSERT(list_empty(&sbi->s_orphan));
sync_blockdev(sb->s_bdev);
invalidate_bdev(sb->s_bdev); if (sbi->s_journal_bdev_file) { /* * Invalidate the journal device's buffers. We don't want them * floating about in memory - the physical journal device may * hotswapped, and it breaks the `ro-after' testing code.
*/
sync_blockdev(file_bdev(sbi->s_journal_bdev_file));
invalidate_bdev(file_bdev(sbi->s_journal_bdev_file));
}
brelse(sbi->s_sbh);
sb->s_fs_info = NULL; /* * Now that we are completely done shutting down the * superblock, we need to actually destroy the kobject.
*/
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
kfree(sbi->s_blockgroup_lock);
fs_put_dax(sbi->s_daxdev, NULL);
fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy); #if IS_ENABLED(CONFIG_UNICODE)
utf8_unload(sb->s_encoding); #endif
kfree(sbi);
}
staticstruct kmem_cache *ext4_inode_cachep;
/* * Called inside transaction, so use GFP_NOFS
*/ staticstruct inode *ext4_alloc_inode(struct super_block *sb)
{ struct ext4_inode_info *ei;
ei = alloc_inode_sb(sb, ext4_inode_cachep, GFP_NOFS); if (!ei) return NULL;
staticvoid destroy_inodecache(void)
{ /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache.
*/
rcu_barrier();
kmem_cache_destroy(ext4_inode_cachep);
}
/* * Currently we don't know the generation for parent directory, so * a generation of 0 means "accept any"
*/
inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE); if (IS_ERR(inode)) return ERR_CAST(inode); if (generation && inode->i_generation != generation) {
iput(inode); return ERR_PTR(-ESTALE);
}
return inode;
}
staticstruct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type)
{ return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
ext4_nfs_get_inode);
}
staticstruct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type)
{ return generic_fh_to_parent(sb, fid, fh_len, fh_type,
ext4_nfs_get_inode);
}
/* * Clear the name of the specified quota file.
*/ staticint unnote_qf_name(struct fs_context *fc, int qtype)
{ struct ext4_fs_context *ctx = fc->fs_private;
if (m->flags & MOPT_NOSUPPORT) {
ext4_msg(NULL, KERN_ERR, "%s option not supported",
param->key); return 0;
}
switch (token) { #ifdef CONFIG_QUOTA case Opt_usrjquota: if (!*param->string) return unnote_qf_name(fc, USRQUOTA); else return note_qf_name(fc, USRQUOTA, param); case Opt_grpjquota: if (!*param->string) return unnote_qf_name(fc, GRPQUOTA); else return note_qf_name(fc, GRPQUOTA, param); #endif case Opt_sb: if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
ext4_msg(NULL, KERN_WARNING, "Ignoring %s option on remount", param->key);
} else {
ctx->s_sb_block = result.uint_32;
ctx->spec |= EXT4_SPEC_s_sb_block;
} return 0; case Opt_removed:
ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option",
param->key); return 0; case Opt_inlinecrypt: #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
ctx_set_flags(ctx, SB_INLINECRYPT); #else
ext4_msg(NULL, KERN_ERR, "inline encryption not supported"); #endif return 0; case Opt_errors:
ctx_clear_mount_opt(ctx, EXT4_MOUNT_ERRORS_MASK);
ctx_set_mount_opt(ctx, result.uint_32); return 0; #ifdef CONFIG_QUOTA case Opt_jqfmt:
ctx->s_jquota_fmt = result.uint_32;
ctx->spec |= EXT4_SPEC_JQFMT; return 0; #endif case Opt_data:
ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
ctx_set_mount_opt(ctx, result.uint_32);
ctx->spec |= EXT4_SPEC_DATAJ; return 0; case Opt_commit: if (result.uint_32 == 0)
result.uint_32 = JBD2_DEFAULT_MAX_COMMIT_AGE; elseif (result.uint_32 > INT_MAX / HZ) {
ext4_msg(NULL, KERN_ERR, "Invalid commit interval %d, " "must be smaller than %d",
result.uint_32, INT_MAX / HZ); return -EINVAL;
}
ctx->s_commit_interval = HZ * result.uint_32;
ctx->spec |= EXT4_SPEC_s_commit_interval; return 0; case Opt_debug_want_extra_isize: if ((result.uint_32 & 1) || (result.uint_32 < 4)) {
ext4_msg(NULL, KERN_ERR, "Invalid want_extra_isize %d", result.uint_32); return -EINVAL;
}
ctx->s_want_extra_isize = result.uint_32;
ctx->spec |= EXT4_SPEC_s_want_extra_isize; return 0; case Opt_max_batch_time:
ctx->s_max_batch_time = result.uint_32;
ctx->spec |= EXT4_SPEC_s_max_batch_time; return 0; case Opt_min_batch_time:
ctx->s_min_batch_time = result.uint_32;
ctx->spec |= EXT4_SPEC_s_min_batch_time; return 0; case Opt_inode_readahead_blks: if (result.uint_32 &&
(result.uint_32 > (1 << 30) ||
!is_power_of_2(result.uint_32))) {
ext4_msg(NULL, KERN_ERR, "EXT4-fs: inode_readahead_blks must be " "0 or a power of 2 smaller than 2^31"); return -EINVAL;
}
ctx->s_inode_readahead_blks = result.uint_32;
ctx->spec |= EXT4_SPEC_s_inode_readahead_blks; return 0; case Opt_init_itable:
ctx_set_mount_opt(ctx, EXT4_MOUNT_INIT_INODE_TABLE);
ctx->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; if (param->type == fs_value_is_string)
ctx->s_li_wait_mult = result.uint_32;
ctx->spec |= EXT4_SPEC_s_li_wait_mult; return 0; case Opt_max_dir_size_kb:
ctx->s_max_dir_size_kb = result.uint_32;
ctx->spec |= EXT4_SPEC_s_max_dir_size_kb; return 0; #ifdef CONFIG_EXT4_DEBUG case Opt_fc_debug_max_replay:
ctx->s_fc_debug_max_replay = result.uint_32;
ctx->spec |= EXT4_SPEC_s_fc_debug_max_replay; return 0; #endif case Opt_stripe:
ctx->s_stripe = result.uint_32;
ctx->spec |= EXT4_SPEC_s_stripe; return 0; case Opt_resuid:
ctx->s_resuid = result.uid;
ctx->spec |= EXT4_SPEC_s_resuid; return 0; case Opt_resgid:
ctx->s_resgid = result.gid;
ctx->spec |= EXT4_SPEC_s_resgid; return 0; case Opt_journal_dev: if (is_remount) {
ext4_msg(NULL, KERN_ERR, "Cannot specify journal on remount"); return -EINVAL;
}
ctx->journal_devnum = result.uint_32;
ctx->spec |= EXT4_SPEC_JOURNAL_DEV; return 0; case Opt_journal_path:
{ struct inode *journal_inode; struct path path; int error;
if (is_remount) {
ext4_msg(NULL, KERN_ERR, "Cannot specify journal on remount"); return -EINVAL;
}
error = fs_lookup_param(fc, param, 1, LOOKUP_FOLLOW, &path); if (error) {
ext4_msg(NULL, KERN_ERR, "error: could not find " "journal device path"); return -EINVAL;
}
journal_inode = d_inode(path.dentry);
ctx->journal_devnum = new_encode_dev(journal_inode->i_rdev);
ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
path_put(&path); return 0;
} case Opt_journal_ioprio: if (result.uint_32 > 7) {
ext4_msg(NULL, KERN_ERR, "Invalid journal IO priority" " (must be 0-7)"); return -EINVAL;
}
ctx->journal_ioprio =
IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, result.uint_32);
ctx->spec |= EXT4_SPEC_JOURNAL_IOPRIO; return 0; case Opt_test_dummy_encryption: return ext4_parse_test_dummy_encryption(param, ctx); case Opt_dax: case Opt_dax_type: #ifdef CONFIG_FS_DAX
{ int type = (token == Opt_dax) ?
Opt_dax : result.uint_32;
switch (type) { case Opt_dax: case Opt_dax_always:
ctx_set_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER); break; case Opt_dax_never:
ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS); break; case Opt_dax_inode:
ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER); /* Strictly for printing options */
ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE); break;
} return 0;
} #else
ext4_msg(NULL, KERN_INFO, "dax option not supported"); return -EINVAL; #endif case Opt_data_err: if (result.uint_32 == Opt_data_err_abort)
ctx_set_mount_opt(ctx, m->mount_opt); elseif (result.uint_32 == Opt_data_err_ignore)
ctx_clear_mount_opt(ctx, m->mount_opt); return 0; case Opt_mb_optimize_scan: if (result.int_32 == 1) {
ctx_set_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
ctx->spec |= EXT4_SPEC_mb_optimize_scan;
} elseif (result.int_32 == 0) {
ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
ctx->spec |= EXT4_SPEC_mb_optimize_scan;
} else {
ext4_msg(NULL, KERN_WARNING, "mb_optimize_scan should be set to 0 or 1."); return -EINVAL;
} return 0;
}
/* * At this point we should only be getting options requiring MOPT_SET, * or MOPT_CLEAR. Anything else is a bug
*/ if (m->token == Opt_err) {
ext4_msg(NULL, KERN_WARNING, "buggy handling of option %s",
param->key);
WARN_ON(1); return -EINVAL;
}
else { unsignedint set = 0;
if ((param->type == fs_value_is_flag) ||
result.uint_32 > 0)
set = 1;
if (m->flags & MOPT_CLEAR)
set = !set; elseif (unlikely(!(m->flags & MOPT_SET))) {
ext4_msg(NULL, KERN_WARNING, "buggy handling of option %s",
param->key);
WARN_ON(1); return -EINVAL;
} if (m->flags & MOPT_2) { if (set != 0)
ctx_set_mount_opt2(ctx, m->mount_opt); else
ctx_clear_mount_opt2(ctx, m->mount_opt);
} else { if (set != 0)
ctx_set_mount_opt(ctx, m->mount_opt); else
ctx_clear_mount_opt(ctx, m->mount_opt);
}
}
/* * We do the test below only for project quotas. 'usrquota' and * 'grpquota' mount options are allowed even without quota feature * to support legacy quotas in quota files.
*/ if (ctx_test_mount_opt(ctx, EXT4_MOUNT_PRJQUOTA) &&
!ext4_has_feature_project(sb)) {
ext4_msg(NULL, KERN_ERR, "Project quota feature not enabled. " "Cannot enable project quota enforcement."); return -EINVAL;
}
if (quota_feature) {
ext4_msg(NULL, KERN_INFO, "Journaled quota options ignored when " "QUOTA feature is enabled"); return 0;
}
}
if (ctx->spec & EXT4_SPEC_JQFMT) { if (sbi->s_jquota_fmt != ctx->s_jquota_fmt && quota_loaded) goto err_jquota_change; if (quota_feature) {
ext4_msg(NULL, KERN_INFO, "Quota format mount options " "ignored when QUOTA feature is enabled"); return 0;
}
}
/* Make sure we don't mix old and new quota format */
usr_qf_name = (get_qf_name(sb, sbi, USRQUOTA) ||
ctx->s_qf_names[USRQUOTA]);
grp_qf_name = (get_qf_name(sb, sbi, GRPQUOTA) ||
ctx->s_qf_names[GRPQUOTA]);
if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy)) return 0;
if (!ext4_has_feature_encrypt(sb)) {
ext4_msg(NULL, KERN_WARNING, "test_dummy_encryption requires encrypt feature"); return -EINVAL;
} /* * This mount option is just for testing, and it's not worthwhile to * implement the extra complexity (e.g. RCU protection) that would be * needed to allow it to be set or changed during remount. We do allow * it to be specified during remount, but only if there is no change.
*/ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
&ctx->dummy_enc_policy)) return 0;
ext4_msg(NULL, KERN_WARNING, "Can't set or change test_dummy_encryption on remount"); return -EINVAL;
} /* Also make sure s_mount_opts didn't contain a conflicting value. */ if (fscrypt_is_dummy_policy_set(&sbi->s_dummy_enc_policy)) { if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
&ctx->dummy_enc_policy)) return 0;
ext4_msg(NULL, KERN_WARNING, "Conflicting test_dummy_encryption options"); return -EINVAL;
} return 0;
}
staticvoid ext4_apply_test_dummy_encryption(struct ext4_fs_context *ctx, struct super_block *sb)
{ if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy) || /* if already set, it was already verified to be the same */
fscrypt_is_dummy_policy_set(&EXT4_SB(sb)->s_dummy_enc_policy)) return;
EXT4_SB(sb)->s_dummy_enc_policy = ctx->dummy_enc_policy;
memset(&ctx->dummy_enc_policy, 0, sizeof(ctx->dummy_enc_policy));
ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
}
for (spec = ext4_param_specs; spec->name != NULL; spec++) if (spec->opt == token && !spec->type) break; return spec->name;
}
/* * Show an option if * - it's set to a non-default value OR * - if the per-sb default is different from the global default
*/ staticint _ext4_show_options(struct seq_file *seq, struct super_block *sb, int nodefs)
{ struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; int def_errors; conststruct mount_opts *m; char sep = nodefs ? '\n' : ',';
block_bitmap = ext4_block_bitmap(sb, gdp); if (block_bitmap == sb_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Block bitmap for group %u overlaps " "superblock", i); if (!sb_rdonly(sb)) return 0;
} if (block_bitmap >= sb_block + 1 &&
block_bitmap <= last_bg_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Block bitmap for group %u overlaps " "block group descriptors", i); if (!sb_rdonly(sb)) return 0;
} if (block_bitmap < first_block || block_bitmap > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Block bitmap for group %u not in group " "(block %llu)!", i, block_bitmap); return 0;
}
inode_bitmap = ext4_inode_bitmap(sb, gdp); if (inode_bitmap == sb_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode bitmap for group %u overlaps " "superblock", i); if (!sb_rdonly(sb)) return 0;
} if (inode_bitmap >= sb_block + 1 &&
inode_bitmap <= last_bg_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode bitmap for group %u overlaps " "block group descriptors", i); if (!sb_rdonly(sb)) return 0;
} if (inode_bitmap < first_block || inode_bitmap > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode bitmap for group %u not in group " "(block %llu)!", i, inode_bitmap); return 0;
}
inode_table = ext4_inode_table(sb, gdp); if (inode_table == sb_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode table for group %u overlaps " "superblock", i); if (!sb_rdonly(sb)) return 0;
} if (inode_table >= sb_block + 1 &&
inode_table <= last_bg_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode table for group %u overlaps " "block group descriptors", i); if (!sb_rdonly(sb)) return 0;
} if (inode_table < first_block ||
inode_table + sbi->s_itb_per_group - 1 > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode table for group %u not in group " "(block %llu)!", i, inode_table); return 0;
}
ext4_lock_group(sb, i); if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Checksum for group %u failed (%u!=%u)",
i, le16_to_cpu(ext4_group_desc_csum(sb, i,
gdp)), le16_to_cpu(gdp->bg_checksum)); if (!sb_rdonly(sb)) {
ext4_unlock_group(sb, i); return 0;
}
}
ext4_unlock_group(sb, i); if (!flexbg_flag)
first_block += EXT4_BLOCKS_PER_GROUP(sb);
} if (NULL != first_not_zeroed)
*first_not_zeroed = grp; return 1;
}
/* * Maximal extent format file size. * Resulting logical blkno at s_maxbytes must fit in our on-disk * extent format containers, within a sector_t, and within i_blocks * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, * so that won't be a limiting factor. * * However there is other limiting factor. We do store extents in the form * of starting block and length, hence the resulting length of the extent * covering maximum file size must fit into on-disk format containers as * well. Given that length is always by 1 unit bigger than max unit (because * we count 0 as well) we have to lower the s_maxbytes by one fs block. * * Note, this does *not* consider any metadata overhead for vfs i_blocks.
*/ static loff_t ext4_max_size(int blkbits, int has_huge_files)
{
loff_t res;
loff_t upper_limit = MAX_LFS_FILESIZE;
BUILD_BUG_ON(sizeof(blkcnt_t) < sizeof(u64));
if (!has_huge_files) {
upper_limit = (1LL << 32) - 1;
/* total blocks in file system block size */
upper_limit >>= (blkbits - 9);
upper_limit <<= blkbits;
}
/* * 32-bit extent-start container, ee_block. We lower the maxbytes * by one fs block, so ee_len can cover the extent of maximum file * size
*/
res = (1LL << 32) - 1;
res <<= blkbits;
/* Sanity check against vm- & vfs- imposed limits */ if (res > upper_limit)
res = upper_limit;
return res;
}
/* * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. * We need to be 1 filesystem block less than the 2^48 sector limit.
*/ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
{
loff_t upper_limit, res = EXT4_NDIR_BLOCKS; int meta_blocks; unsignedint ppb = 1 << (bits - 2);
/* * This is calculated to be the largest file size for a dense, block * mapped file such that the file's total number of 512-byte sectors, * including data and all indirect blocks, does not exceed (2^48 - 1). * * __u32 i_blocks_lo and _u16 i_blocks_high represent the total * number of 512-byte sectors of the file.
*/ if (!has_huge_files) { /* * !has_huge_files or implies that the inode i_block field * represents total file blocks in 2^32 512-byte sectors == * size of vfs inode i_blocks * 8
*/
upper_limit = (1LL << 32) - 1;
/* total blocks in file system block size */
upper_limit >>= (bits - 9);
} else { /* * We use 48 bit ext4_inode i_blocks * With EXT4_HUGE_FILE_FL set the i_blocks * represent total number of blocks in * file system block size
*/
upper_limit = (1LL << 48) - 1;
}
/* Compute how many blocks we can address by block tree */
res += ppb;
res += ppb * ppb;
res += ((loff_t)ppb) * ppb * ppb; /* Compute how many metadata blocks are needed */
meta_blocks = 1;
meta_blocks += 1 + ppb;
meta_blocks += 1 + ppb + ppb * ppb; /* Does block tree limit file size? */ if (res + meta_blocks <= upper_limit) goto check_lfs;
res = upper_limit; /* How many metadata blocks are needed for addressing upper_limit? */
upper_limit -= EXT4_NDIR_BLOCKS; /* indirect blocks */
meta_blocks = 1;
upper_limit -= ppb; /* double indirect blocks */ if (upper_limit < ppb * ppb) {
meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb);
res -= meta_blocks; goto check_lfs;
}
meta_blocks += 1 + ppb;
upper_limit -= ppb * ppb; /* tripple indirect blocks for the rest */
meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb) +
DIV_ROUND_UP_ULL(upper_limit, ppb*ppb);
res -= meta_blocks;
check_lfs:
res <<= bits; if (res > MAX_LFS_FILESIZE)
res = MAX_LFS_FILESIZE;
if (!ext4_has_feature_meta_bg(sb) || nr < first_meta_bg) return logical_sb_block + nr + 1;
bg = sbi->s_desc_per_block * nr; if (ext4_bg_has_super(sb, bg))
has_super = 1;
/* * If we have a meta_bg fs with 1k blocks, group 0's GDT is at * block 2, not 1. If s_first_data_block == 0 (bigalloc is enabled * on modern mke2fs or blksize > 1k on older mke2fs) then we must * compensate.
*/ if (sb->s_blocksize == 1024 && nr == 0 &&
le32_to_cpu(sbi->s_es->s_first_data_block) == 0)
has_super++;
/** * ext4_get_stripe_size: Get the stripe size. * @sbi: In memory super block info * * If we have specified it via mount option, then * use the mount option value. If the value specified at mount time is * greater than the blocks per group use the super block value. * If the super block value is greater than blocks per group return 0. * Allocator needs it be less than blocks per group. *
*/ staticunsignedlong ext4_get_stripe_size(struct ext4_sb_info *sbi)
{ unsignedlong stride = le16_to_cpu(sbi->s_es->s_raid_stride); unsignedlong stripe_width =
le32_to_cpu(sbi->s_es->s_raid_stripe_width); int ret;
if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
ret = sbi->s_stripe; elseif (stripe_width && stripe_width <= sbi->s_blocks_per_group)
ret = stripe_width; elseif (stride && stride <= sbi->s_blocks_per_group)
ret = stride; else
ret = 0;
/* * If the stripe width is 1, this makes no sense and * we set it to 0 to turn off stripe handling code.
*/ if (ret <= 1)
ret = 0;
return ret;
}
/* * Check whether this filesystem can be mounted based on * the features present and the RDONLY/RDWR mount requested. * Returns 1 if this filesystem can be mounted as requested, * 0 if it cannot be.
*/ int ext4_feature_set_ok(struct super_block *sb, int readonly)
{ if (ext4_has_unknown_ext4_incompat_features(sb)) {
ext4_msg(sb, KERN_ERR, "Couldn't mount because of " "unsupported optional features (%x)",
(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
~EXT4_FEATURE_INCOMPAT_SUPP)); return 0;
}
if (!IS_ENABLED(CONFIG_UNICODE) && ext4_has_feature_casefold(sb)) {
ext4_msg(sb, KERN_ERR, "Filesystem with casefold feature cannot be " "mounted without CONFIG_UNICODE"); return 0;
}
if (readonly) return 1;
if (ext4_has_feature_readonly(sb)) {
ext4_msg(sb, KERN_INFO, "filesystem is read-only");
sb->s_flags |= SB_RDONLY; return 1;
}
/* Check that feature set is OK for a read-write mount */ if (ext4_has_unknown_ext4_ro_compat_features(sb)) {
ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " "unsupported optional features (%x)",
(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
~EXT4_FEATURE_RO_COMPAT_SUPP)); return 0;
} if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
ext4_msg(sb, KERN_ERR, "Can't support bigalloc feature without " "extents feature\n"); return 0;
}
#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2) if (!readonly && (ext4_has_feature_quota(sb) ||
ext4_has_feature_project(sb))) {
ext4_msg(sb, KERN_ERR, "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2"); return 0;
} #endif/* CONFIG_QUOTA */ return 1;
}
/* * This function is called once a day if we have errors logged * on the file system
*/ staticvoid print_daily_error_info(struct timer_list *t)
{ struct ext4_sb_info *sbi = timer_container_of(sbi, t, s_err_report); struct super_block *sb = sbi->s_sb; struct ext4_super_block *es = sbi->s_es;
if (es->s_error_count) /* fsck newer than v1.41.13 is needed to clean this condition. */
ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
le32_to_cpu(es->s_error_count)); if (es->s_first_error_time) {
printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d",
sb->s_id,
ext4_get_tstamp(es, s_first_error_time),
(int) sizeof(es->s_first_error_func),
es->s_first_error_func,
le32_to_cpu(es->s_first_error_line)); if (es->s_first_error_ino)
printk(KERN_CONT ": inode %u",
le32_to_cpu(es->s_first_error_ino)); if (es->s_first_error_block)
printk(KERN_CONT ": block %llu", (unsignedlonglong)
le64_to_cpu(es->s_first_error_block));
printk(KERN_CONT "\n");
} if (es->s_last_error_time) {
printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d",
sb->s_id,
ext4_get_tstamp(es, s_last_error_time),
(int) sizeof(es->s_last_error_func),
es->s_last_error_func,
le32_to_cpu(es->s_last_error_line)); if (es->s_last_error_ino)
printk(KERN_CONT ": inode %u",
le32_to_cpu(es->s_last_error_ino)); if (es->s_last_error_block)
printk(KERN_CONT ": block %llu", (unsignedlonglong)
le64_to_cpu(es->s_last_error_block));
printk(KERN_CONT "\n");
}
mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
}
/* Find next suitable group and run ext4_init_inode_table */ staticint ext4_run_li_request(struct ext4_li_request *elr)
{ struct ext4_group_desc *gdp = NULL; struct super_block *sb = elr->lr_super;
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
ext4_group_t group = elr->lr_next_group; unsignedint prefetch_ios = 0; int ret = 0; int nr = EXT4_SB(sb)->s_mb_prefetch;
u64 start_time;
/* * Remove lr_request from the list_request and free the * request structure. Should be called with li_list_mtx held
*/ staticvoid ext4_remove_li_request(struct ext4_li_request *elr)
{ if (!elr) return;
/* * This is the function where ext4lazyinit thread lives. It walks * through the request list searching for next scheduled filesystem. * When such a fs is found, run the lazy initialization request * (ext4_rn_li_request) and keep track of the time spend in this * function. Based on that time we compute next schedule time of * the request. When walking through the list is complete, compute * next waking time and put itself into sleep.
*/ staticint ext4_lazyinit_thread(void *arg)
{ struct ext4_lazy_init *eli = arg; struct list_head *pos, *n; struct ext4_li_request *elr; unsignedlong next_wakeup, cur;
BUG_ON(NULL == eli);
set_freezable();
cont_thread: while (true) { bool next_wakeup_initialized = false;
next_wakeup = 0;
mutex_lock(&eli->li_list_mtx); if (list_empty(&eli->li_request_list)) {
mutex_unlock(&eli->li_list_mtx); goto exit_thread;
}
list_for_each_safe(pos, n, &eli->li_request_list) { int err = 0; int progress = 0;
elr = list_entry(pos, struct ext4_li_request,
lr_request);
if (time_before(jiffies, elr->lr_next_sched)) { if (!next_wakeup_initialized ||
time_before(elr->lr_next_sched, next_wakeup)) {
next_wakeup = elr->lr_next_sched;
next_wakeup_initialized = true;
} continue;
} if (down_read_trylock(&elr->lr_super->s_umount)) { if (sb_start_write_trylock(elr->lr_super)) {
progress = 1; /* * We hold sb->s_umount, sb can not * be removed from the list, it is * now safe to drop li_list_mtx
*/
mutex_unlock(&eli->li_list_mtx);
err = ext4_run_li_request(elr);
sb_end_write(elr->lr_super);
mutex_lock(&eli->li_list_mtx);
n = pos->next;
}
up_read((&elr->lr_super->s_umount));
} /* error, remove the lazy_init job */ if (err) {
ext4_remove_li_request(elr); continue;
} if (!progress) {
elr->lr_next_sched = jiffies +
get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ);
} if (!next_wakeup_initialized ||
time_before(elr->lr_next_sched, next_wakeup)) {
next_wakeup = elr->lr_next_sched;
next_wakeup_initialized = true;
}
}
mutex_unlock(&eli->li_list_mtx);
try_to_freeze();
cur = jiffies; if (!next_wakeup_initialized || time_after_eq(cur, next_wakeup)) {
cond_resched(); continue;
}
if (kthread_should_stop()) {
ext4_clear_request_list(); goto exit_thread;
}
}
exit_thread: /* * It looks like the request list is empty, but we need * to check it under the li_list_mtx lock, to prevent any * additions into it, and of course we should lock ext4_li_mtx * to atomically free the list and ext4_li_info, because at * this point another ext4 filesystem could be registering * new one.
*/
mutex_lock(&ext4_li_mtx);
mutex_lock(&eli->li_list_mtx); if (!list_empty(&eli->li_request_list)) {
mutex_unlock(&eli->li_list_mtx);
mutex_unlock(&ext4_li_mtx); goto cont_thread;
}
mutex_unlock(&eli->li_list_mtx);
kfree(ext4_li_info);
ext4_li_info = NULL;
mutex_unlock(&ext4_li_mtx);
/* * Check whether it make sense to run itable init. thread or not. * If there is at least one uninitialized inode table, return * corresponding group number, else the loop goes through all * groups and return total number of groups.
*/ static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
{
ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count; struct ext4_group_desc *gdp = NULL;
if (!ext4_has_group_desc_csum(sb)) return ngroups;
for (group = 0; group < ngroups; group++) {
gdp = ext4_get_group_desc(sb, group, NULL); if (!gdp) continue;
if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) break;
}
/* * Randomize first schedule time of the request to * spread the inode table initialization requests * better.
*/
elr->lr_next_sched = jiffies + get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ); return elr;
}
int ext4_register_li_request(struct super_block *sb,
ext4_group_t first_not_zeroed)
{ struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_li_request *elr = NULL;
ext4_group_t ngroups = sbi->s_groups_count; int ret = 0;
mutex_lock(&ext4_li_mtx); if (sbi->s_li_request != NULL) { /* * Reset timeout so it can be computed again, because * s_li_wait_mult might have changed.
*/
sbi->s_li_request->lr_timeout = 0; goto out;
}
sbi->s_li_request = elr; /* * set elr to NULL here since it has been inserted to * the request_list and the removal and free of it is * handled by ext4_clear_request_list from now on.
*/
elr = NULL;
if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
ret = ext4_run_lazyinit_thread(); if (ret) goto out;
}
out:
mutex_unlock(&ext4_li_mtx); if (ret)
kfree(elr); return ret;
}
/* * We do not need to lock anything since this is called on * module unload.
*/ staticvoid ext4_destroy_lazyinit_thread(void)
{ /* * If thread exited earlier * there's nothing to be done.
*/ if (!ext4_li_info || !ext4_lazyinit_task) return;
kthread_stop(ext4_lazyinit_task);
}
staticint set_journal_csum_feature_set(struct super_block *sb)
{ int ret = 1; int compat, incompat; struct ext4_sb_info *sbi = EXT4_SB(sb);
/* * Note: calculating the overhead so we can be compatible with * historical BSD practice is quite difficult in the face of * clusters/bigalloc. This is because multiple metadata blocks from * different block group can end up in the same allocation cluster. * Calculating the exact overhead in the face of clustered allocation * requires either O(all block bitmaps) in memory or O(number of block * groups**2) in time. We will still calculate the superblock for * older file systems --- and if we come across with a bigalloc file * system with zero in s_overhead_clusters the estimate will be close to * correct especially for very large cluster sizes --- but for newer * file systems, it's better to calculate this figure once at mkfs * time, and store it in the superblock. If the superblock value is * present (even for non-bigalloc file systems), we will use it.
*/ staticint count_overhead(struct super_block *sb, ext4_group_t grp, char *buf)
{ struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_desc *gdp;
ext4_fsblk_t first_block, last_block, b;
ext4_group_t i, ngroups = ext4_get_groups_count(sb); int s, j, count = 0; int has_super = ext4_bg_has_super(sb, grp);
first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
(grp * EXT4_BLOCKS_PER_GROUP(sb));
last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1; for (i = 0; i < ngroups; i++) {
gdp = ext4_get_group_desc(sb, i, NULL);
b = ext4_block_bitmap(sb, gdp); if (b >= first_block && b <= last_block) {
ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
count++;
}
b = ext4_inode_bitmap(sb, gdp); if (b >= first_block && b <= last_block) {
ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
count++;
}
b = ext4_inode_table(sb, gdp); if (b >= first_block && b + sbi->s_itb_per_group <= last_block) for (j = 0; j < sbi->s_itb_per_group; j++, b++) { int c = EXT4_B2C(sbi, b - first_block);
ext4_set_bit(c, buf);
count++;
} if (i != grp) continue;
s = 0; if (ext4_bg_has_super(sb, grp)) {
ext4_set_bit(s++, buf);
count++;
}
j = ext4_bg_num_gdb(sb, grp); if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
ext4_error(sb, "Invalid number of block group " "descriptor blocks: %d", j);
j = EXT4_BLOCKS_PER_GROUP(sb) - s;
}
count += j; for (; j > 0; j--)
ext4_set_bit(EXT4_B2C(sbi, s++), buf);
} if (!count) return 0; return EXT4_CLUSTERS_PER_GROUP(sb) -
ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
}
/* * Compute the overhead and stash it in sbi->s_overhead
*/ int ext4_calculate_overhead(struct super_block *sb)
{ struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; struct inode *j_inode; unsignedint j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
ext4_group_t i, ngroups = ext4_get_groups_count(sb);
ext4_fsblk_t overhead = 0; char *buf = (char *) get_zeroed_page(GFP_NOFS);
if (!buf) return -ENOMEM;
/* * Compute the overhead (FS structures). This is constant * for a given filesystem unless the number of block groups * changes so we cache the previous value until it does.
*/
/* * All of the blocks before first_data_block are overhead
*/
overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
/* * Add the overhead found in each block group
*/ for (i = 0; i < ngroups; i++) { int blks;
blks = count_overhead(sb, i, buf);
overhead += blks; if (blks)
memset(buf, 0, PAGE_SIZE);
cond_resched();
}
/* * Add the internal journal blocks whether the journal has been * loaded or not
*/ if (sbi->s_journal && !sbi->s_journal_bdev_file)
overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len); elseif (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) { /* j_inum for internal journal is non-zero */
j_inode = ext4_get_journal_inode(sb, j_inum); if (!IS_ERR(j_inode)) {
j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
overhead += EXT4_NUM_B2C(sbi, j_blocks);
iput(j_inode);
} else {
ext4_msg(sb, KERN_ERR, "can't get journal size");
}
}
sbi->s_overhead = overhead;
smp_wmb();
free_page((unsignedlong) buf); return 0;
}
/* * There's no need to reserve anything when we aren't using extents. * The space estimates are exact, there are no unwritten extents, * hole punching doesn't need new metadata... This is needed especially * to keep ext2/3 backward compatibility.
*/ if (!ext4_has_feature_extents(sb)) return; /* * By default we reserve 2% or 4096 clusters, whichever is smaller. * This should cover the situations where we can not afford to run * out of space like for example punch hole, or converting * unwritten extents in delalloc path. In most cases such * allocation would require 1, or 2 blocks, higher numbers are * very rare.
*/
resv_clusters = (ext4_blocks_count(sbi->s_es) >>
sbi->s_cluster_bits);
/* Set defaults before we parse the mount options */
def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
set_opt(sb, INIT_INODE_TABLE); if (def_mount_opts & EXT4_DEFM_DEBUG)
set_opt(sb, DEBUG); if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
set_opt(sb, GRPID); if (def_mount_opts & EXT4_DEFM_UID16)
set_opt(sb, NO_UID32); /* xattr user namespace & acls are now defaulted on */
set_opt(sb, XATTR_USER); #ifdef CONFIG_EXT4_FS_POSIX_ACL
set_opt(sb, POSIX_ACL); #endif if (ext4_has_feature_fast_commit(sb))
set_opt2(sb, JOURNAL_FAST_COMMIT); /* don't forget to enable journal_csum when metadata_csum is enabled. */ if (ext4_has_feature_metadata_csum(sb))
set_opt(sb, JOURNAL_CHECKSUM);
if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_PANIC)
set_opt(sb, ERRORS_PANIC); elseif (le16_to_cpu(es->s_errors) == EXT4_ERRORS_CONTINUE)
set_opt(sb, ERRORS_CONT); else
set_opt(sb, ERRORS_RO); /* block_validity enabled by default; disable with noblock_validity */
set_opt(sb, BLOCK_VALIDITY); if (def_mount_opts & EXT4_DEFM_DISCARD)
set_opt(sb, DISCARD);
if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
set_opt(sb, BARRIER);
/* * enable delayed allocation by default * Use -o nodelalloc to turn it off
*/ if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
set_opt(sb, DELALLOC);
if (sb->s_blocksize <= PAGE_SIZE)
set_opt(sb, DIOREAD_NOLOCK);
}
/* Handle clustersize */
clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size); if (ext4_has_feature_bigalloc(sb)) { if (clustersize < sb->s_blocksize) {
ext4_msg(sb, KERN_ERR, "cluster size (%d) smaller than " "block size (%lu)", clustersize, sb->s_blocksize); return -EINVAL;
}
sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
le32_to_cpu(es->s_log_block_size);
} else { if (clustersize != sb->s_blocksize) {
ext4_msg(sb, KERN_ERR, "fragment/cluster size (%d) != " "block size (%lu)", clustersize, sb->s_blocksize); return -EINVAL;
} if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
ext4_msg(sb, KERN_ERR, "#blocks per group too big: %lu",
sbi->s_blocks_per_group); return -EINVAL;
}
sbi->s_cluster_bits = 0;
}
sbi->s_clusters_per_group = le32_to_cpu(es->s_clusters_per_group); if (sbi->s_clusters_per_group > sb->s_blocksize * 8) {
ext4_msg(sb, KERN_ERR, "#clusters per group too big: %lu",
sbi->s_clusters_per_group); return -EINVAL;
} if (sbi->s_blocks_per_group !=
(sbi->s_clusters_per_group * (clustersize / sb->s_blocksize))) {
ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and clusters per group (%lu) inconsistent",
sbi->s_blocks_per_group, sbi->s_clusters_per_group); return -EINVAL;
}
sbi->s_cluster_ratio = clustersize / sb->s_blocksize;
/* Do we have standard group size of clustersize * 8 blocks ? */ if (sbi->s_blocks_per_group == clustersize << 3)
set_opt2(sb, STD_GROUP_SIZE);
return 0;
}
/* * ext4_atomic_write_init: Initializes filesystem min & max atomic write units. * With non-bigalloc filesystem awu will be based upon filesystem blocksize * & bdev awu units. * With bigalloc it will be based upon bigalloc cluster size & bdev awu units. * @sb: super block
*/ staticvoid ext4_atomic_write_init(struct super_block *sb)
{ struct ext4_sb_info *sbi = EXT4_SB(sb); struct block_device *bdev = sb->s_bdev; unsignedint clustersize = EXT4_CLUSTER_SIZE(sb);
/* Warn if metadata_csum and gdt_csum are both set. */ if (ext4_has_feature_metadata_csum(sb) &&
ext4_has_feature_gdt_csum(sb))
ext4_warning(sb, "metadata_csum and uninit_bg are " "redundant flags; please run fsck.");
/* Check for a known checksum algorithm */ if (!ext4_verify_csum_type(sb, es)) {
ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " "unknown checksum algorithm."); return -EINVAL;
}
ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
ext4_orphan_file_block_trigger);
/* Check superblock checksum */ if (!ext4_superblock_csum_verify(sb, es)) {
ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " "invalid superblock checksum. Run e2fsck?"); return -EFSBADCRC;
}
/* Precompute checksum seed for all metadata */ if (ext4_has_feature_csum_seed(sb))
sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed); elseif (ext4_has_feature_metadata_csum(sb) ||
ext4_has_feature_ea_inode(sb))
sbi->s_csum_seed = ext4_chksum(~0, es->s_uuid, sizeof(es->s_uuid)); return 0;
}
if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
(ext4_has_compat_features(sb) ||
ext4_has_ro_compat_features(sb) ||
ext4_has_incompat_features(sb)))
ext4_msg(sb, KERN_WARNING, "feature flags set on rev 0 fs, " "running e2fsck is recommended");
if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
set_opt2(sb, HURD_COMPAT); if (ext4_has_feature_64bit(sb)) {
ext4_msg(sb, KERN_ERR, "The Hurd can't support 64-bit file systems"); return -EINVAL;
}
/* * ea_inode feature uses l_i_version field which is not * available in HURD_COMPAT mode.
*/ if (ext4_has_feature_ea_inode(sb)) {
ext4_msg(sb, KERN_ERR, "ea_inode feature is not supported for Hurd"); return -EINVAL;
}
}
if (IS_EXT2_SB(sb)) { if (ext2_feature_set_ok(sb))
ext4_msg(sb, KERN_INFO, "mounting ext2 file system " "using the ext4 subsystem"); else { /* * If we're probing be silent, if this looks like * it's actually an ext[34] filesystem.
*/ if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb))) return -EINVAL;
ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due " "to feature incompatibilities"); return -EINVAL;
}
}
if (IS_EXT3_SB(sb)) { if (ext3_feature_set_ok(sb))
ext4_msg(sb, KERN_INFO, "mounting ext3 file system " "using the ext4 subsystem"); else { /* * If we're probing be silent, if this looks like * it's actually an ext4 filesystem.
*/ if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb))) return -EINVAL;
ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due " "to feature incompatibilities"); return -EINVAL;
}
}
/* * Check feature flags regardless of the revision level, since we * previously didn't change the revision level when setting the flags, * so there is a chance incompat flags are set on a rev 0 filesystem.
*/ if (!ext4_feature_set_ok(sb, (sb_rdonly(sb)))) return -EINVAL;
if (sbi->s_daxdev) { if (sb->s_blocksize == PAGE_SIZE)
set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags); else
ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
}
if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) { if (ext4_has_feature_inline_data(sb)) {
ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem" " that may contain inline data"); return -EINVAL;
} if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
ext4_msg(sb, KERN_ERR, "DAX unsupported by block device."); return -EINVAL;
}
}
if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (sb->s_blocksize / 4)) {
ext4_msg(sb, KERN_ERR, "Number of reserved GDT blocks insanely large: %d",
le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks)); return -EINVAL;
} /* * Test whether we have more sectors than will fit in sector_t, * and whether the max offset is addressable by the page cache.
*/
err = generic_check_addressable(sb->s_blocksize_bits,
ext4_blocks_count(es)); if (err) {
ext4_msg(sb, KERN_ERR, "filesystem" " too large to mount safely on this system"); return err;
}
/* * It makes no sense for the first data block to be beyond the end * of the filesystem.
*/ if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
ext4_msg(sb, KERN_WARNING, "bad geometry: first data " "block %u is beyond end of filesystem (%llu)",
le32_to_cpu(es->s_first_data_block),
ext4_blocks_count(es)); return -EINVAL;
} if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
(sbi->s_cluster_ratio == 1)) {
ext4_msg(sb, KERN_WARNING, "bad geometry: first data " "block is 0 with a 1k block and cluster size"); return -EINVAL;
}
blocks_count = (ext4_blocks_count(es) -
le32_to_cpu(es->s_first_data_block) +
EXT4_BLOCKS_PER_GROUP(sb) - 1);
do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
ext4_msg(sb, KERN_WARNING, "groups count too large: %llu " "(block count %llu, first data block %u, " "blocks per group %lu)", blocks_count,
ext4_blocks_count(es),
le32_to_cpu(es->s_first_data_block),
EXT4_BLOCKS_PER_GROUP(sb)); return -EINVAL;
}
sbi->s_groups_count = blocks_count;
sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
le32_to_cpu(es->s_inodes_count)) {
ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
le32_to_cpu(es->s_inodes_count),
((u64)sbi->s_groups_count * sbi->s_inodes_per_group)); return -EINVAL;
}
db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
EXT4_DESC_PER_BLOCK(sb); if (ext4_has_feature_meta_bg(sb)) { if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
ext4_msg(sb, KERN_WARNING, "first meta block group too large: %u " "(group descriptor block count %u)",
le32_to_cpu(es->s_first_meta_bg), db_count); return -EINVAL;
}
}
rcu_assign_pointer(sbi->s_group_desc,
kvmalloc_array(db_count, sizeof(struct buffer_head *),
GFP_KERNEL)); if (sbi->s_group_desc == NULL) {
ext4_msg(sb, KERN_ERR, "not enough memory"); return -ENOMEM;
}
bgl_lock_init(sbi->s_blockgroup_lock);
/* Pre-read the descriptors into the buffer cache */ for (i = 0; i < db_count; i++) {
block = descriptor_loc(sb, logical_sb_block, i);
ext4_sb_breadahead_unmovable(sb, block);
}
for (i = 0; i < db_count; i++) { struct buffer_head *bh;
err = ext4_load_journal(sb, es, ctx->journal_devnum); if (err) return err;
if (ext4_has_feature_64bit(sb) &&
!jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
JBD2_FEATURE_INCOMPAT_64BIT)) {
ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); goto out;
}
if (!set_journal_csum_feature_set(sb)) {
ext4_msg(sb, KERN_ERR, "Failed to set journal checksum " "feature set"); goto out;
}
if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
!jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
ext4_msg(sb, KERN_ERR, "Failed to set fast commit journal feature"); goto out;
}
/* We have now updated the journal if required, so we can
* validate the data journaling mode. */ switch (test_opt(sb, DATA_FLAGS)) { case 0: /* No mode set, assume a default based on the journal * capabilities: ORDERED_DATA if the journal can * cope, else JOURNAL_DATA
*/ if (jbd2_journal_check_available_features
(sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
set_opt(sb, ORDERED_DATA);
sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
} else {
set_opt(sb, JOURNAL_DATA);
sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
} break;
case EXT4_MOUNT_ORDERED_DATA: case EXT4_MOUNT_WRITEBACK_DATA: if (!jbd2_journal_check_available_features
(sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
ext4_msg(sb, KERN_ERR, "Journal does not support " "requested data journaling mode"); goto out;
} break; default: break;
}
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
ext4_msg(sb, KERN_ERR, "can't mount with " "journal_async_commit in data=ordered mode"); goto out;
}
staticint ext4_check_journal_data_mode(struct super_block *sb)
{ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with " "data=journal disables delayed allocation, " "dioread_nolock, O_DIRECT and fast_commit support!\n"); /* can't mount with both data=journal and dioread_nolock. */
clear_opt(sb, DIOREAD_NOLOCK);
clear_opt2(sb, JOURNAL_FAST_COMMIT); if (test_opt2(sb, EXPLICIT_DELALLOC)) {
ext4_msg(sb, KERN_ERR, "can't mount with " "both data=journal and delalloc"); return -EINVAL;
} if (test_opt(sb, DAX_ALWAYS)) {
ext4_msg(sb, KERN_ERR, "can't mount with " "both data=journal and dax"); return -EINVAL;
} if (ext4_has_feature_encrypt(sb)) {
ext4_msg(sb, KERN_WARNING, "encrypted files will use data=ordered " "instead of data journaling mode");
} if (test_opt(sb, DELALLOC))
clear_opt(sb, DELALLOC);
} else {
sb->s_iflags |= SB_I_CGROUPWB;
}
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) return"journal_async_commit"; if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) return"journal_checksum"; if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) return"commit="; if (EXT4_MOUNT_DATA_FLAGS &
(sbi->s_mount_opt ^ sbi->s_def_mount_opt)) return"data="; if (test_opt(sb, DATA_ERR_ABORT)) return"data_err=abort"; return NULL;
}
staticint ext4_load_super(struct super_block *sb, ext4_fsblk_t *lsb, int silent)
{ struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es;
ext4_fsblk_t logical_sb_block; unsignedlong offset = 0; struct buffer_head *bh; int ret = -EINVAL; int blocksize;
blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); if (!blocksize) {
ext4_msg(sb, KERN_ERR, "unable to set blocksize"); return -EINVAL;
}
/* * The ext4 superblock will not be buffer aligned for other than 1kB * block sizes. We need to calculate the offset from buffer start.
*/ if (blocksize != EXT4_MIN_BLOCK_SIZE) {
logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
offset = do_div(logical_sb_block, blocksize);
} else {
logical_sb_block = sbi->s_sb_block;
}
bh = ext4_sb_bread_unmovable(sb, logical_sb_block); if (IS_ERR(bh)) {
ext4_msg(sb, KERN_ERR, "unable to read superblock"); return PTR_ERR(bh);
} /* * Note: s_es must be initialized as soon as possible because * some ext4 macro-instructions depend on its value
*/
es = (struct ext4_super_block *) (bh->b_data + offset);
sbi->s_es = es;
sb->s_magic = le16_to_cpu(es->s_magic); if (sb->s_magic != EXT4_SUPER_MAGIC) { if (!silent)
ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); goto out;
}
/* * If the default block size is not the same as the real block size, * we need to reload it.
*/ if (sb->s_blocksize == blocksize) {
*lsb = logical_sb_block;
sbi->s_sbh = bh; return 0;
}
/* * bh must be released before kill_bdev(), otherwise * it won't be freed and its page also. kill_bdev() * is called by sb_set_blocksize().
*/
brelse(bh); /* Validate the filesystem blocksize */ if (!sb_set_blocksize(sb, blocksize)) {
ext4_msg(sb, KERN_ERR, "bad block size %d",
blocksize);
bh = NULL; goto out;
}
if (sbi->s_def_hash_version > DX_HASH_LAST) {
ext4_msg(sb, KERN_ERR, "Invalid default hash set in the superblock"); return -EINVAL;
} elseif (sbi->s_def_hash_version == DX_HASH_SIPHASH) {
ext4_msg(sb, KERN_ERR, "SIPHASH is not a valid default hash value"); return -EINVAL;
}
for (i = 0; i < 4; i++)
sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
if (ext4_has_feature_dir_index(sb)) {
i = le32_to_cpu(es->s_flags); if (i & EXT2_FLAGS_UNSIGNED_HASH)
sbi->s_hash_unsigned = 3; elseif ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { #ifdef __CHAR_UNSIGNED__ if (!sb_rdonly(sb))
es->s_flags |=
cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
sbi->s_hash_unsigned = 3; #else if (!sb_rdonly(sb))
es->s_flags |=
cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); #endif
}
} return 0;
}
staticint ext4_block_group_meta_init(struct super_block *sb, int silent)
{ struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; int has_huge_files;
/* * It's hard to get stripe aligned blocks if stripe is not aligned with * cluster, just disable stripe and alert user to simplify code and avoid * stripe aligned allocation which will rarely succeed.
*/ staticbool ext4_is_stripe_incompatible(struct super_block *sb, unsignedlong stripe)
{ struct ext4_sb_info *sbi = EXT4_SB(sb); return (stripe > 0 && sbi->s_cluster_ratio > 1 &&
stripe % sbi->s_cluster_ratio != 0);
}
/* Set defaults for the variables that will be set during parsing */ if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO))
ctx->journal_ioprio = EXT4_DEF_JOURNAL_IOPRIO;
if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb)) {
err = ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)); if (err) goto failed_mount3a;
}
err = -EINVAL; /* * The first inode we look at is the journal inode. Don't try * root first: it may be modified in the journal!
*/ if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
err = ext4_load_and_init_journal(sb, es, ctx); if (err) goto failed_mount3a; if (bdev_read_only(sb->s_bdev))
needs_recovery = 0;
} elseif (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
ext4_has_feature_journal_needs_recovery(sb)) {
ext4_msg(sb, KERN_ERR, "required journal recovery " "suppressed and not mounted read-only"); goto failed_mount3a;
} else { constchar *journal_option;
/* Nojournal mode, all journal mount options are illegal */
journal_option = ext4_has_journal_option(sb); if (journal_option != NULL) {
ext4_msg(sb, KERN_ERR, "can't mount with %s, fs mounted w/o journal",
journal_option); goto failed_mount3a;
}
if (!test_opt(sb, NO_MBCACHE)) {
sbi->s_ea_block_cache = ext4_xattr_create_cache(); if (!sbi->s_ea_block_cache) {
ext4_msg(sb, KERN_ERR, "Failed to create ea_block_cache");
err = -EINVAL; goto failed_mount_wq;
}
if (ext4_has_feature_ea_inode(sb)) {
sbi->s_ea_inode_cache = ext4_xattr_create_cache(); if (!sbi->s_ea_inode_cache) {
ext4_msg(sb, KERN_ERR, "Failed to create ea_inode_cache");
err = -EINVAL; goto failed_mount_wq;
}
}
}
/* * Get the # of file system overhead blocks from the * superblock if present.
*/
sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters); /* ignore the precalculated value if it is ridiculous */ if (sbi->s_overhead > ext4_blocks_count(es))
sbi->s_overhead = 0; /* * If the bigalloc feature is not enabled recalculating the * overhead doesn't take long, so we might as well just redo * it to make sure we are using the correct value.
*/ if (!ext4_has_feature_bigalloc(sb))
sbi->s_overhead = 0; if (sbi->s_overhead == 0) {
err = ext4_calculate_overhead(sb); if (err) goto failed_mount_wq;
}
/* * The maximum number of concurrent works can be high and * concurrency isn't really necessary. Limit it to 1.
*/
EXT4_SB(sb)->rsv_conversion_wq =
alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); if (!EXT4_SB(sb)->rsv_conversion_wq) {
printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
err = -ENOMEM; goto failed_mount4;
}
/* * The jbd2_journal_load will have done any necessary log recovery, * so we can safely mount the rest of the filesystem now.
*/
if (test_opt(sb, BLOCK_VALIDITY)) {
err = ext4_setup_system_zone(sb); if (err) {
ext4_msg(sb, KERN_ERR, "failed to initialize system " "zone (%d)", err); goto failed_mount4a;
}
}
ext4_fc_replay_cleanup(sb);
ext4_ext_init(sb);
/* * Enable optimize_scan if number of groups is > threshold. This can be * turned off by passing "mb_optimize_scan=0". This can also be * turned on forcefully by passing "mb_optimize_scan=1".
*/ if (!(ctx->spec & EXT4_SPEC_mb_optimize_scan)) { if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD)
set_opt2(sb, MB_OPTIMIZE_SCAN); else
clear_opt2(sb, MB_OPTIMIZE_SCAN);
}
err = ext4_mb_init(sb); if (err) {
ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
err); goto failed_mount5;
}
/* * We can only set up the journal commit callback once * mballoc is initialized
*/ if (sbi->s_journal)
sbi->s_journal->j_commit_callback =
ext4_journal_commit_callback;
err = ext4_percpu_param_init(sbi); if (err) goto failed_mount6;
if (ext4_has_feature_flex_bg(sb)) if (!ext4_fill_flex_info(sb)) {
ext4_msg(sb, KERN_ERR, "unable to initialize " "flex_bg meta info!");
err = -ENOMEM; goto failed_mount6;
}
err = ext4_register_li_request(sb, first_not_zeroed); if (err) goto failed_mount6;
err = ext4_init_orphan_info(sb); if (err) goto failed_mount7; #ifdef CONFIG_QUOTA /* Enable quota usage during mount. */ if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
err = ext4_enable_quotas(sb); if (err) goto failed_mount8;
} #endif/* CONFIG_QUOTA */
/* * Save the original bdev mapping's wb_err value which could be * used to detect the metadata async write error.
*/
errseq_check_and_advance(&sb->s_bdev->bd_mapping->wb_err,
&sbi->s_bdev_wb_err);
EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
ext4_orphan_cleanup(sb, es);
EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; /* * Update the checksum after updating free space/inode counters and * ext4_orphan_cleanup. Otherwise the superblock can have an incorrect * checksum in the buffer cache until it is written out and * e2fsprogs programs trying to open a file system immediately * after it is mounted can fail.
*/
ext4_superblock_csum_set(sb); if (needs_recovery) {
ext4_msg(sb, KERN_INFO, "recovery complete");
err = ext4_mark_recovery_complete(sb, es); if (err) goto failed_mount9;
}
if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev)) {
ext4_msg(sb, KERN_WARNING, "mounting with \"discard\" option, but the device does not support discard");
clear_opt(sb, DISCARD);
}
if (es->s_error_count)
mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
/* * Setup any per-fs journal parameters now. We'll do this both on * initial mount, once the journal has been initialised but before we've * done any recovery; and again on any subsequent remount.
*/ staticvoid ext4_init_journal_params(struct super_block *sb, journal_t *journal)
{ struct ext4_sb_info *sbi = EXT4_SB(sb);
/* * Test for the existence of a valid inode on disk. Bad things * happen if we iget() an unused inode, as the subsequent iput() * will try to delete it.
*/
journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL); if (IS_ERR(journal_inode)) {
ext4_msg(sb, KERN_ERR, "no journal found"); return ERR_CAST(journal_inode);
} if (!journal_inode->i_nlink) {
make_bad_inode(journal_inode);
iput(journal_inode);
ext4_msg(sb, KERN_ERR, "journal inode is deleted"); return ERR_PTR(-EFSCORRUPTED);
} if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
ext4_msg(sb, KERN_ERR, "invalid journal inode");
iput(journal_inode); return ERR_PTR(-EFSCORRUPTED);
}
ext4_debug("Journal inode found at %p: %lld bytes\n",
journal_inode, journal_inode->i_size); return journal_inode;
}
/* * Are we loading a blank journal or performing recovery after a * crash? For recovery, we need to check in advance whether we * can get read-write access to the device.
*/ if (ext4_has_feature_journal_needs_recovery(sb)) { if (sb_rdonly(sb)) {
ext4_msg(sb, KERN_INFO, "INFO: recovery " "required on readonly filesystem"); if (really_read_only) {
ext4_msg(sb, KERN_ERR, "write access " "unavailable, cannot proceed " "(try mounting with noload)");
err = -EROFS; goto err_out;
}
ext4_msg(sb, KERN_INFO, "write access will " "be enabled during recovery");
}
}
if (!(journal->j_flags & JBD2_BARRIER))
ext4_msg(sb, KERN_INFO, "barriers disabled");
if (!ext4_has_feature_journal_needs_recovery(sb))
err = jbd2_journal_wipe(journal, !really_read_only); if (!err) { char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
__le16 orig_state; bool changed = false;
if (save)
memcpy(save, ((char *) es) +
EXT4_S_ERR_START, EXT4_S_ERR_LEN);
err = jbd2_journal_load(journal); if (save && memcmp(((char *) es) + EXT4_S_ERR_START,
save, EXT4_S_ERR_LEN)) {
memcpy(((char *) es) + EXT4_S_ERR_START,
save, EXT4_S_ERR_LEN);
changed = true;
}
kfree(save);
orig_state = es->s_state;
es->s_state |= cpu_to_le16(EXT4_SB(sb)->s_mount_state &
EXT4_ERROR_FS); if (orig_state != es->s_state)
changed = true; /* Write out restored error information to the superblock */ if (changed && !really_read_only) { int err2;
err2 = ext4_commit_super(sb);
err = err ? : err2;
}
}
/* Copy state of EXT4_SB(sb) into buffer for on-disk superblock */ staticvoid ext4_update_super(struct super_block *sb)
{ struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; struct buffer_head *sbh = sbi->s_sbh;
lock_buffer(sbh); /* * If the file system is mounted read-only, don't update the * superblock write time. This avoids updating the superblock * write time when we are mounting the root file system * read/only but we need to replay the journal; at that point, * for people who are east of GMT and who make their clock * tick in localtime for Windows bug-for-bug compatibility, * the clock is set in the future, and this will cause e2fsck * to complain and force a full file system check.
*/ if (!sb_rdonly(sb))
ext4_update_tstamp(es, s_wtime);
es->s_kbytes_written =
cpu_to_le64(sbi->s_kbytes_written +
((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
sbi->s_sectors_written_start) >> 1)); if (percpu_counter_initialized(&sbi->s_freeclusters_counter))
ext4_free_blocks_count_set(es,
EXT4_C2B(sbi, percpu_counter_sum_positive(
&sbi->s_freeclusters_counter))); if (percpu_counter_initialized(&sbi->s_freeinodes_counter))
es->s_free_inodes_count =
cpu_to_le32(percpu_counter_sum_positive(
&sbi->s_freeinodes_counter)); /* Copy error information to the on-disk superblock */
spin_lock(&sbi->s_error_lock); if (sbi->s_add_error_count > 0) {
es->s_state |= cpu_to_le16(EXT4_ERROR_FS); if (!es->s_first_error_time && !es->s_first_error_time_hi) {
__ext4_update_tstamp(&es->s_first_error_time,
&es->s_first_error_time_hi,
sbi->s_first_error_time);
strtomem_pad(es->s_first_error_func,
sbi->s_first_error_func, 0);
es->s_first_error_line =
cpu_to_le32(sbi->s_first_error_line);
es->s_first_error_ino =
cpu_to_le32(sbi->s_first_error_ino);
es->s_first_error_block =
cpu_to_le64(sbi->s_first_error_block);
es->s_first_error_errcode =
ext4_errno_to_code(sbi->s_first_error_code);
}
__ext4_update_tstamp(&es->s_last_error_time,
&es->s_last_error_time_hi,
sbi->s_last_error_time);
strtomem_pad(es->s_last_error_func, sbi->s_last_error_func, 0);
es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);
es->s_last_error_errcode =
ext4_errno_to_code(sbi->s_last_error_code); /* * Start the daily error reporting function if it hasn't been * started already
*/ if (!es->s_error_count)
mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);
le32_add_cpu(&es->s_error_count, sbi->s_add_error_count);
sbi->s_add_error_count = 0;
}
spin_unlock(&sbi->s_error_lock);
lock_buffer(sbh); /* Buffer got discarded which means block device got invalidated */ if (!buffer_mapped(sbh)) {
unlock_buffer(sbh); return -EIO;
}
if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) { /* * Oh, dear. A previous attempt to write the * superblock failed. This could happen because the * USB device was yanked out. Or it could happen to * be a transient write error and maybe the block will * be remapped. Nothing we can do but to retry the * write and hope for the best.
*/
ext4_msg(sb, KERN_ERR, "previous I/O error to " "superblock detected");
clear_buffer_write_io_error(sbh);
set_buffer_uptodate(sbh);
}
get_bh(sbh); /* Clear potential dirty bit if it was journalled update */
clear_buffer_dirty(sbh);
sbh->b_end_io = end_buffer_write_sync;
submit_bh(REQ_OP_WRITE | REQ_SYNC |
(test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh);
wait_on_buffer(sbh); if (buffer_write_io_error(sbh)) {
ext4_msg(sb, KERN_ERR, "I/O error while writing " "superblock");
clear_buffer_write_io_error(sbh);
set_buffer_uptodate(sbh); return -EIO;
} return 0;
}
/* * Have we just finished recovery? If so, and if we are mounting (or * remounting) the filesystem readonly, then we will end up with a * consistent fs on disk. Record that fact.
*/ staticint ext4_mark_recovery_complete(struct super_block *sb, struct ext4_super_block *es)
{ int err;
journal_t *journal = EXT4_SB(sb)->s_journal;
if (!ext4_has_feature_journal(sb)) { if (journal != NULL) {
ext4_error(sb, "Journal got removed while the fs was " "mounted!"); return -EFSCORRUPTED;
} return 0;
}
jbd2_journal_lock_updates(journal);
err = jbd2_journal_flush(journal, 0); if (err < 0) goto out;
if (sb_rdonly(sb) && (ext4_has_feature_journal_needs_recovery(sb) ||
ext4_has_feature_orphan_present(sb))) { if (!ext4_orphan_file_empty(sb)) {
ext4_error(sb, "Orphan file not empty on read-only fs.");
err = -EFSCORRUPTED; goto out;
}
ext4_clear_feature_journal_needs_recovery(sb);
ext4_clear_feature_orphan_present(sb);
ext4_commit_super(sb);
}
out:
jbd2_journal_unlock_updates(journal); return err;
}
/* * If we are mounting (or read-write remounting) a filesystem whose journal * has recorded an error from a previous lifetime, move that error to the * main filesystem now.
*/ staticint ext4_clear_journal_err(struct super_block *sb, struct ext4_super_block *es)
{
journal_t *journal; int j_errno; constchar *errstr;
if (!ext4_has_feature_journal(sb)) {
ext4_error(sb, "Journal got removed while the fs was mounted!"); return -EFSCORRUPTED;
}
journal = EXT4_SB(sb)->s_journal;
/* * Now check for any error status which may have been recorded in the * journal by a prior ext4_error() or ext4_abort()
*/
j_errno = jbd2_journal_errno(journal); if (j_errno) { char nbuf[16];
/* * Force the running and committing transactions to commit, * and wait on the commit.
*/ int ext4_force_commit(struct super_block *sb)
{ return ext4_journal_force_commit(EXT4_SB(sb)->s_journal);
}
staticint ext4_sync_fs(struct super_block *sb, int wait)
{ int ret = 0;
tid_t target; bool needs_barrier = false; struct ext4_sb_info *sbi = EXT4_SB(sb);
ret = ext4_emergency_state(sb); if (unlikely(ret)) return ret;
trace_ext4_sync_fs(sb, wait);
flush_workqueue(sbi->rsv_conversion_wq); /* * Writeback quota in non-journalled quota case - journalled quota has * no dirty dquots
*/
dquot_writeback_dquots(sb, -1); /* * Data writeback is possible w/o journal transaction, so barrier must * being sent at the end of the function. But we can skip it if * transaction_commit will do it for us.
*/ if (sbi->s_journal) {
target = jbd2_get_latest_transaction(sbi->s_journal); if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
!jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
needs_barrier = true;
if (jbd2_journal_start_commit(sbi->s_journal, &target)) { if (wait)
ret = jbd2_log_wait_commit(sbi->s_journal,
target);
}
} elseif (wait && test_opt(sb, BARRIER))
needs_barrier = true; if (needs_barrier) { int err;
err = blkdev_issue_flush(sb->s_bdev); if (!ret)
ret = err;
}
return ret;
}
/* * LVM calls this function before a (read-only) snapshot is created. This * gives us a chance to flush the journal completely and mark the fs clean. * * Note that only this function cannot bring a filesystem to be in a clean * state independently. It relies on upper layer to stop all data & metadata * modifications.
*/ staticint ext4_freeze(struct super_block *sb)
{ int error = 0;
journal_t *journal = EXT4_SB(sb)->s_journal;
if (journal) { /* Now we set up the journal barrier. */
jbd2_journal_lock_updates(journal);
/* * Don't clear the needs_recovery flag if we failed to * flush the journal.
*/
error = jbd2_journal_flush(journal, 0); if (error < 0) goto out;
/* Journal blocked and flushed, clear needs_recovery flag. */
ext4_clear_feature_journal_needs_recovery(sb); if (ext4_orphan_file_empty(sb))
ext4_clear_feature_orphan_present(sb);
}
error = ext4_commit_super(sb);
out: if (journal) /* we rely on upper layer to stop further updates */
jbd2_journal_unlock_updates(journal); return error;
}
/* * Called by LVM after the snapshot is done. We need to reset the RECOVER * flag here, even though the filesystem is not technically dirty yet.
*/ staticint ext4_unfreeze(struct super_block *sb)
{ if (ext4_emergency_state(sb)) return 0;
if (EXT4_SB(sb)->s_journal) { /* Reset the needs_recovery flag before the fs is unlocked. */
ext4_set_feature_journal_needs_recovery(sb); if (ext4_has_feature_orphan_file(sb))
ext4_set_feature_orphan_present(sb);
}
ext4_commit_super(sb); return 0;
}
/* * Structure to save mount options for ext4_remount's benefit
*/ struct ext4_mount_options { unsignedlong s_mount_opt; unsignedlong s_mount_opt2;
kuid_t s_resuid;
kgid_t s_resgid; unsignedlong s_commit_interval;
u32 s_min_batch_time, s_max_batch_time; #ifdef CONFIG_QUOTA int s_jquota_fmt; char *s_qf_names[EXT4_MAXQUOTAS]; #endif
};
staticint __ext4_remount(struct fs_context *fc, struct super_block *sb)
{ struct ext4_fs_context *ctx = fc->fs_private; struct ext4_super_block *es; struct ext4_sb_info *sbi = EXT4_SB(sb); unsignedlong old_sb_flags; struct ext4_mount_options old_opts;
ext4_group_t g; int err = 0; int alloc_ctx; #ifdef CONFIG_QUOTA int enable_quota = 0; int i, j; char *to_free[EXT4_MAXQUOTAS]; #endif
/* Store the original options */
old_sb_flags = sb->s_flags;
old_opts.s_mount_opt = sbi->s_mount_opt;
old_opts.s_mount_opt2 = sbi->s_mount_opt2;
old_opts.s_resuid = sbi->s_resuid;
old_opts.s_resgid = sbi->s_resgid;
old_opts.s_commit_interval = sbi->s_commit_interval;
old_opts.s_min_batch_time = sbi->s_min_batch_time;
old_opts.s_max_batch_time = sbi->s_max_batch_time; #ifdef CONFIG_QUOTA
old_opts.s_jquota_fmt = sbi->s_jquota_fmt; for (i = 0; i < EXT4_MAXQUOTAS; i++) if (sbi->s_qf_names[i]) { char *qf_name = get_qf_name(sb, sbi, i);
if ((ctx->spec & EXT4_SPEC_s_stripe) &&
ext4_is_stripe_incompatible(sb, ctx->s_stripe)) {
ext4_msg(sb, KERN_WARNING, "stripe (%lu) is not aligned with cluster size (%u), " "stripe is disabled",
ctx->s_stripe, sbi->s_cluster_ratio);
ctx->s_stripe = 0;
}
/* * Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause * two calls to ext4_should_dioread_nolock() to return inconsistent * values, triggering WARN_ON in ext4_add_complete_io(). we grab * here s_writepages_rwsem to avoid race between writepages ops and * remount.
*/
alloc_ctx = ext4_writepages_down_write(sb);
ext4_apply_options(fc, sb);
ext4_writepages_up_write(sb, alloc_ctx);
if (sbi->s_journal) {
ext4_init_journal_params(sb, sbi->s_journal);
set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
}
/* Flush outstanding errors before changing fs state */
flush_work(&sbi->s_sb_upd_work);
if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) { if (ext4_emergency_state(sb)) {
err = -EROFS; goto restore_opts;
}
if (fc->sb_flags & SB_RDONLY) {
err = sync_filesystem(sb); if (err < 0) goto restore_opts;
err = dquot_suspend(sb, -1); if (err < 0) goto restore_opts;
/* * First of all, the unconditional stuff we have to do * to disable replay of the journal when we next remount
*/
sb->s_flags |= SB_RDONLY;
/* * OK, test if we are remounting a valid rw partition * readonly, and if so set the rdonly flag and then * mark the partition as valid again.
*/ if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
(sbi->s_mount_state & EXT4_VALID_FS))
es->s_state = cpu_to_le16(sbi->s_mount_state);
if (sbi->s_journal) { /* * We let remount-ro finish even if marking fs * as clean failed...
*/
ext4_mark_recovery_complete(sb, es);
}
} else { /* Make sure we can mount this feature set readwrite */ if (ext4_has_feature_readonly(sb) ||
!ext4_feature_set_ok(sb, 0)) {
err = -EROFS; goto restore_opts;
} /* * Make sure the group descriptor checksums * are sane. If they aren't, refuse to remount r/w.
*/ for (g = 0; g < sbi->s_groups_count; g++) { struct ext4_group_desc *gdp =
ext4_get_group_desc(sb, g, NULL);
if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
ext4_msg(sb, KERN_ERR, "ext4_remount: Checksum for group %u failed (%u!=%u)",
g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)),
le16_to_cpu(gdp->bg_checksum));
err = -EFSBADCRC; goto restore_opts;
}
}
/* * If we have an unprocessed orphan list hanging * around from a previously readonly bdev mount, * require a full umount/remount for now.
*/ if (es->s_last_orphan || !ext4_orphan_file_empty(sb)) {
ext4_msg(sb, KERN_WARNING, "Couldn't " "remount RDWR because of unprocessed " "orphan inode list. Please " "umount/remount instead");
err = -EINVAL; goto restore_opts;
}
/* * Mounting a RDONLY partition read-write, so reread * and store the current valid flag. (It may have * been changed by e2fsck since we originally mounted * the partition.)
*/ if (sbi->s_journal) {
err = ext4_clear_journal_err(sb, es); if (err) goto restore_opts;
}
sbi->s_mount_state = (le16_to_cpu(es->s_state) &
~EXT4_FC_REPLAY);
err = ext4_setup_super(sb, es, 0); if (err) goto restore_opts;
/* * Handle creation of system zone data early because it can fail. * Releasing of existing data is done when we are sure remount will * succeed.
*/ if (test_opt(sb, BLOCK_VALIDITY) && !sbi->s_system_blks) {
err = ext4_setup_system_zone(sb); if (err) goto restore_opts;
}
if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
err = ext4_commit_super(sb); if (err) goto restore_opts;
}
#ifdef CONFIG_QUOTA if (enable_quota) { if (sb_any_quota_suspended(sb))
dquot_resume(sb, -1); elseif (ext4_has_feature_quota(sb)) {
err = ext4_enable_quotas(sb); if (err) goto restore_opts;
}
} /* Release old quota file names */ for (i = 0; i < EXT4_MAXQUOTAS; i++)
kfree(old_opts.s_qf_names[i]); #endif if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
ext4_release_system_zone(sb);
/* * Reinitialize lazy itable initialization thread based on * current settings
*/ if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
ext4_unregister_li_request(sb); else {
ext4_group_t first_not_zeroed;
first_not_zeroed = ext4_has_uninit_itable(sb);
ext4_register_li_request(sb, first_not_zeroed);
}
if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
ext4_stop_mmpd(sbi);
/* * Handle aborting the filesystem as the last thing during remount to * avoid obsure errors during remount when some option changes fail to * apply due to shutdown filesystem.
*/ if (test_opt2(sb, ABORT))
ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
return 0;
restore_opts: /* * If there was a failing r/w to ro transition, we may need to * re-enable quota
*/ if (sb_rdonly(sb) && !(old_sb_flags & SB_RDONLY) &&
sb_any_quota_suspended(sb))
dquot_resume(sb, -1);
/* * Helper functions so that transaction is started before we acquire dqio_sem * to keep correct lock ordering of transaction > dqio_sem
*/ staticinlinestruct inode *dquot_to_inode(struct dquot *dquot)
{ return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
}
/* * Trying to sb_start_intwrite() in a running transaction * can result in a deadlock. Further, running transactions * are already protected from freezing.
*/ if (!ext4_journal_current_handle()) {
sb_start_intwrite(dquot->dq_sb);
freeze_protected = true;
}
handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); if (IS_ERR(handle)) { /* Release dquot anyway to avoid endless cycle in dqput() */
dquot_release(dquot); if (freeze_protected)
sb_end_intwrite(dquot->dq_sb); return PTR_ERR(handle);
}
ret = dquot_release(dquot); if (ret < 0)
ext4_error_err(dquot->dq_sb, -ret, "Failed to release dquot type %d",
dquot->dq_id.type);
err = ext4_journal_stop(handle); if (!ret)
ret = err;
if (freeze_protected)
sb_end_intwrite(dquot->dq_sb);
/* The first argument of lockdep_set_subclass has to be * *exactly* the same as the argument to init_rwsem() --- in * this case, in init_once() --- or lockdep gets unhappy * because the name of the lock is set using the * stringification of the argument to init_rwsem().
*/
(void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */
lockdep_set_subclass(&ei->i_data_sem, subclass);
}
/* * Standard function to be called on quota_on
*/ staticint ext4_quota_on(struct super_block *sb, int type, int format_id, conststruct path *path)
{ int err;
if (!test_opt(sb, QUOTA)) return -EINVAL;
/* Quotafile not on the same filesystem? */ if (path->dentry->d_sb != sb) return -EXDEV;
/* Quota already enabled for this file? */ if (IS_NOQUOTA(d_inode(path->dentry))) return -EBUSY;
/* Journaling quota? */ if (EXT4_SB(sb)->s_qf_names[type]) { /* Quotafile not in fs root? */ if (path->dentry->d_parent != sb->s_root)
ext4_msg(sb, KERN_WARNING, "Quota file not on filesystem root. " "Journaled quota will not work");
sb_dqopt(sb)->flags |= DQUOT_NOLIST_DIRTY;
} else { /* * Clear the flag just in case mount options changed since * last time.
*/
sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
}
/* * Set inode flags to prevent userspace from messing with quota * files. If this fails, we return success anyway since quotas * are already enabled and this is not a hard failure.
*/
inode_lock(inode);
handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1); if (IS_ERR(handle)) goto unlock_inode;
EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
S_NOATIME | S_IMMUTABLE);
err = ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
unlock_inode:
inode_unlock(inode); if (err)
dquot_quota_off(sb, type);
} if (err)
lockdep_set_quota_inode(path->dentry->d_inode,
I_DATA_SEM_NORMAL); return err;
}
staticint ext4_quota_off(struct super_block *sb, int type)
{ struct inode *inode = sb_dqopt(sb)->files[type];
handle_t *handle; int err;
/* Force all delayed allocation blocks to be allocated.
* Caller already holds s_umount sem */ if (test_opt(sb, DELALLOC))
sync_filesystem(sb);
if (!inode || !igrab(inode)) goto out;
err = dquot_quota_off(sb, type); if (err || ext4_has_feature_quota(sb)) goto out_put; /* * When the filesystem was remounted read-only first, we cannot cleanup * inode flags here. Bad luck but people should be using QUOTA feature * these days anyway.
*/ if (sb_rdonly(sb)) goto out_put;
inode_lock(inode); /* * Update modification times of quota files when userspace can * start looking at them. If we fail, we return success anyway since * this is not a hard failure and quotas are already disabled.
*/
handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1); if (IS_ERR(handle)) {
err = PTR_ERR(handle); goto out_unlock;
}
EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
err = ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
out_unlock:
inode_unlock(inode);
out_put:
lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
iput(inode); return err;
out: return dquot_quota_off(sb, type);
}
/* Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code * itself serializes the operations (and no one else should touch the files)
* we don't have to be afraid of races */ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off)
{ struct inode *inode = sb_dqopt(sb)->files[type];
ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); int offset = off & (sb->s_blocksize - 1); int tocopy;
size_t toread; struct buffer_head *bh;
loff_t i_size = i_size_read(inode);
if (off > i_size) return 0; if (off+len > i_size)
len = i_size-off;
toread = len; while (toread > 0) {
tocopy = min_t(unsignedlong, sb->s_blocksize - offset, toread);
bh = ext4_bread(NULL, inode, blk, 0); if (IS_ERR(bh)) return PTR_ERR(bh); if (!bh) /* A hole? */
memset(data, 0, tocopy); else
memcpy(data, bh->b_data+offset, tocopy);
brelse(bh);
offset = 0;
toread -= tocopy;
data += tocopy;
blk++;
} return len;
}
/* Write to quotafile (we know the transaction is already started and has
* enough credits) */ static ssize_t ext4_quota_write(struct super_block *sb, int type, constchar *data, size_t len, loff_t off)
{ struct inode *inode = sb_dqopt(sb)->files[type];
ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); int err = 0, err2 = 0, offset = off & (sb->s_blocksize - 1); int retries = 0; struct buffer_head *bh;
handle_t *handle = journal_current_handle();
if (!handle) {
ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" " cancelled because transaction is not started",
(unsignedlonglong)off, (unsignedlonglong)len); return -EIO;
} /* * Since we account only one data block in transaction credits, * then it is impossible to cross a block boundary.
*/ if (sb->s_blocksize - offset < len) {
ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" " cancelled because not block aligned",
(unsignedlonglong)off, (unsignedlonglong)len); return -EIO;
}
do {
bh = ext4_bread(handle, inode, blk,
EXT4_GET_BLOCKS_CREATE |
EXT4_GET_BLOCKS_METADATA_NOFAIL);
} while (PTR_ERR(bh) == -ENOSPC &&
ext4_should_retry_alloc(inode->i_sb, &retries)); if (IS_ERR(bh)) return PTR_ERR(bh); if (!bh) goto out;
BUFFER_TRACE(bh, "get write access");
err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE); if (err) {
brelse(bh); return err;
}
lock_buffer(bh);
memcpy(bh->b_data+offset, data, len);
flush_dcache_folio(bh->b_folio);
unlock_buffer(bh);
err = ext4_handle_dirty_metadata(handle, NULL, bh);
brelse(bh);
out: if (inode->i_size < off + len) {
i_size_write(inode, off + len);
EXT4_I(inode)->i_disksize = inode->i_size;
err2 = ext4_mark_inode_dirty(handle, inode); if (unlikely(err2 && !err))
err = err2;
} return err ? err : len;
} #endif
#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2) staticinlinevoid register_as_ext2(void)
{ int err = register_filesystem(&ext2_fs_type); if (err)
printk(KERN_WARNING "EXT4-fs: Unable to register as ext2 (%d)\n", err);
}
staticinlinevoid register_as_ext3(void)
{ int err = register_filesystem(&ext3_fs_type); if (err)
printk(KERN_WARNING "EXT4-fs: Unable to register as ext3 (%d)\n", err);
}
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
MODULE_DESCRIPTION("Fourth Extended Filesystem");
MODULE_LICENSE("GPL");
module_init(ext4_init_fs)
module_exit(ext4_exit_fs)
Messung V0.5 in Prozent
¤ Diese beiden folgenden Angebotsgruppen bietet das Unternehmen0.162Angebot
(Wie Sie bei der Firma Beratungs- und Dienstleistungen beauftragen können 2026-04-25)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.