staticint btrfs_parse_compress(struct btrfs_fs_context *ctx, conststruct fs_parameter *param, int opt)
{ constchar *string = param->string; int ret;
/* * Provide the same semantics as older kernels that don't use fs * context, specifying the "compress" option clears "force-compress" * without the need to pass "compress-force=[no|none]" before * specifying "compress".
*/ if (opt != Opt_compress_force && opt != Opt_compress_force_type)
btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS);
switch (opt) { case Opt_degraded:
btrfs_set_opt(ctx->mount_opt, DEGRADED); break; case Opt_subvol_empty: /* * This exists because we used to allow it on accident, so we're * keeping it to maintain ABI. See 37becec95ac3 ("Btrfs: allow * empty subvol= again").
*/ break; case Opt_subvol:
kfree(ctx->subvol_name);
ctx->subvol_name = kstrdup(param->string, GFP_KERNEL); if (!ctx->subvol_name) return -ENOMEM; break; case Opt_subvolid:
ctx->subvol_objectid = result.uint_64;
/* subvolid=0 means give me the original fs_tree. */ if (!ctx->subvol_objectid)
ctx->subvol_objectid = BTRFS_FS_TREE_OBJECTID; break; case Opt_device: { struct btrfs_device *device;
mutex_lock(&uuid_mutex);
device = btrfs_scan_one_device(param->string, false);
mutex_unlock(&uuid_mutex); if (IS_ERR(device)) return PTR_ERR(device); break;
} case Opt_datasum: if (result.negated) {
btrfs_set_opt(ctx->mount_opt, NODATASUM);
} else {
btrfs_clear_opt(ctx->mount_opt, NODATACOW);
btrfs_clear_opt(ctx->mount_opt, NODATASUM);
} break; case Opt_datacow: if (result.negated) {
btrfs_clear_opt(ctx->mount_opt, COMPRESS);
btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS);
btrfs_set_opt(ctx->mount_opt, NODATACOW);
btrfs_set_opt(ctx->mount_opt, NODATASUM);
} else {
btrfs_clear_opt(ctx->mount_opt, NODATACOW);
} break; case Opt_compress_force: case Opt_compress_force_type:
btrfs_set_opt(ctx->mount_opt, FORCE_COMPRESS);
fallthrough; case Opt_compress: case Opt_compress_type: if (btrfs_parse_compress(ctx, param, opt)) return -EINVAL; break; case Opt_ssd: if (result.negated) {
btrfs_set_opt(ctx->mount_opt, NOSSD);
btrfs_clear_opt(ctx->mount_opt, SSD);
btrfs_clear_opt(ctx->mount_opt, SSD_SPREAD);
} else {
btrfs_set_opt(ctx->mount_opt, SSD);
btrfs_clear_opt(ctx->mount_opt, NOSSD);
} break; case Opt_ssd_spread: if (result.negated) {
btrfs_clear_opt(ctx->mount_opt, SSD_SPREAD);
} else {
btrfs_set_opt(ctx->mount_opt, SSD);
btrfs_set_opt(ctx->mount_opt, SSD_SPREAD);
btrfs_clear_opt(ctx->mount_opt, NOSSD);
} break; case Opt_barrier: if (result.negated)
btrfs_set_opt(ctx->mount_opt, NOBARRIER); else
btrfs_clear_opt(ctx->mount_opt, NOBARRIER); break; case Opt_thread_pool: if (result.uint_32 == 0) {
btrfs_err(NULL, "invalid value 0 for thread_pool"); return -EINVAL;
}
ctx->thread_pool_size = result.uint_32; break; case Opt_max_inline:
ctx->max_inline = memparse(param->string, NULL); break; case Opt_acl: if (result.negated) {
fc->sb_flags &= ~SB_POSIXACL;
} else { #ifdef CONFIG_BTRFS_FS_POSIX_ACL
fc->sb_flags |= SB_POSIXACL; #else
btrfs_err(NULL, "support for ACL not compiled in"); return -EINVAL; #endif
} /* * VFS limits the ability to toggle ACL on and off via remount, * despite every file system allowing this. This seems to be * an oversight since we all do, but it'll fail if we're * remounting. So don't set the mask here, we'll check it in * btrfs_reconfigure and do the toggling ourselves.
*/ if (fc->purpose != FS_CONTEXT_FOR_RECONFIGURE)
fc->sb_flags_mask |= SB_POSIXACL; break; case Opt_treelog: if (result.negated)
btrfs_set_opt(ctx->mount_opt, NOTREELOG); else
btrfs_clear_opt(ctx->mount_opt, NOTREELOG); break; case Opt_norecovery:
btrfs_info(NULL, "'norecovery' is for compatibility only, recommended to use 'rescue=nologreplay'");
btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY); break; case Opt_flushoncommit: if (result.negated)
btrfs_clear_opt(ctx->mount_opt, FLUSHONCOMMIT); else
btrfs_set_opt(ctx->mount_opt, FLUSHONCOMMIT); break; case Opt_ratio:
ctx->metadata_ratio = result.uint_32; break; case Opt_discard: if (result.negated) {
btrfs_clear_opt(ctx->mount_opt, DISCARD_SYNC);
btrfs_clear_opt(ctx->mount_opt, DISCARD_ASYNC);
btrfs_set_opt(ctx->mount_opt, NODISCARD);
} else {
btrfs_set_opt(ctx->mount_opt, DISCARD_SYNC);
btrfs_clear_opt(ctx->mount_opt, DISCARD_ASYNC);
} break; case Opt_discard_mode: switch (result.uint_32) { case Opt_discard_sync:
btrfs_clear_opt(ctx->mount_opt, DISCARD_ASYNC);
btrfs_set_opt(ctx->mount_opt, DISCARD_SYNC); break; case Opt_discard_async:
btrfs_clear_opt(ctx->mount_opt, DISCARD_SYNC);
btrfs_set_opt(ctx->mount_opt, DISCARD_ASYNC); break; default:
btrfs_err(NULL, "unrecognized discard mode value %s",
param->key); return -EINVAL;
}
btrfs_clear_opt(ctx->mount_opt, NODISCARD); break; case Opt_space_cache: if (result.negated) {
btrfs_set_opt(ctx->mount_opt, NOSPACECACHE);
btrfs_clear_opt(ctx->mount_opt, SPACE_CACHE);
btrfs_clear_opt(ctx->mount_opt, FREE_SPACE_TREE);
} else {
btrfs_clear_opt(ctx->mount_opt, FREE_SPACE_TREE);
btrfs_set_opt(ctx->mount_opt, SPACE_CACHE);
} break; case Opt_space_cache_version: switch (result.uint_32) { case Opt_space_cache_v1:
btrfs_set_opt(ctx->mount_opt, SPACE_CACHE);
btrfs_clear_opt(ctx->mount_opt, FREE_SPACE_TREE); break; case Opt_space_cache_v2:
btrfs_clear_opt(ctx->mount_opt, SPACE_CACHE);
btrfs_set_opt(ctx->mount_opt, FREE_SPACE_TREE); break; default:
btrfs_err(NULL, "unrecognized space_cache value %s",
param->key); return -EINVAL;
} break; case Opt_rescan_uuid_tree:
btrfs_set_opt(ctx->mount_opt, RESCAN_UUID_TREE); break; case Opt_clear_cache:
btrfs_set_opt(ctx->mount_opt, CLEAR_CACHE); break; case Opt_user_subvol_rm_allowed:
btrfs_set_opt(ctx->mount_opt, USER_SUBVOL_RM_ALLOWED); break; case Opt_enospc_debug: if (result.negated)
btrfs_clear_opt(ctx->mount_opt, ENOSPC_DEBUG); else
btrfs_set_opt(ctx->mount_opt, ENOSPC_DEBUG); break; case Opt_defrag: if (result.negated)
btrfs_clear_opt(ctx->mount_opt, AUTO_DEFRAG); else
btrfs_set_opt(ctx->mount_opt, AUTO_DEFRAG); break; case Opt_usebackuproot:
btrfs_warn(NULL, "'usebackuproot' is deprecated, use 'rescue=usebackuproot' instead");
btrfs_set_opt(ctx->mount_opt, USEBACKUPROOT);
/* If we're loading the backup roots we can't trust the space cache. */
btrfs_set_opt(ctx->mount_opt, CLEAR_CACHE); break; case Opt_skip_balance:
btrfs_set_opt(ctx->mount_opt, SKIP_BALANCE); break; case Opt_fatal_errors: switch (result.uint_32) { case Opt_fatal_errors_panic:
btrfs_set_opt(ctx->mount_opt, PANIC_ON_FATAL_ERROR); break; case Opt_fatal_errors_bug:
btrfs_clear_opt(ctx->mount_opt, PANIC_ON_FATAL_ERROR); break; default:
btrfs_err(NULL, "unrecognized fatal_errors value %s",
param->key); return -EINVAL;
} break; case Opt_commit_interval:
ctx->commit_interval = result.uint_32; if (ctx->commit_interval > BTRFS_WARNING_COMMIT_INTERVAL) {
btrfs_warn(NULL, "excessive commit interval %u, use with care",
ctx->commit_interval);
} if (ctx->commit_interval == 0)
ctx->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; break; case Opt_rescue: switch (result.uint_32) { case Opt_rescue_usebackuproot:
btrfs_set_opt(ctx->mount_opt, USEBACKUPROOT); break; case Opt_rescue_nologreplay:
btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY); break; case Opt_rescue_ignorebadroots:
btrfs_set_opt(ctx->mount_opt, IGNOREBADROOTS); break; case Opt_rescue_ignoredatacsums:
btrfs_set_opt(ctx->mount_opt, IGNOREDATACSUMS); break; case Opt_rescue_ignoremetacsums:
btrfs_set_opt(ctx->mount_opt, IGNOREMETACSUMS); break; case Opt_rescue_ignoresuperflags:
btrfs_set_opt(ctx->mount_opt, IGNORESUPERFLAGS); break; case Opt_rescue_parameter_all:
btrfs_set_opt(ctx->mount_opt, IGNOREDATACSUMS);
btrfs_set_opt(ctx->mount_opt, IGNOREMETACSUMS);
btrfs_set_opt(ctx->mount_opt, IGNORESUPERFLAGS);
btrfs_set_opt(ctx->mount_opt, IGNOREBADROOTS);
btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY); break; default:
btrfs_info(NULL, "unrecognized rescue option '%s'",
param->key); return -EINVAL;
} break; #ifdef CONFIG_BTRFS_DEBUG case Opt_fragment: switch (result.uint_32) { case Opt_fragment_parameter_all:
btrfs_set_opt(ctx->mount_opt, FRAGMENT_DATA);
btrfs_set_opt(ctx->mount_opt, FRAGMENT_METADATA); break; case Opt_fragment_parameter_metadata:
btrfs_set_opt(ctx->mount_opt, FRAGMENT_METADATA); break; case Opt_fragment_parameter_data:
btrfs_set_opt(ctx->mount_opt, FRAGMENT_DATA); break; default:
btrfs_info(NULL, "unrecognized fragment option '%s'",
param->key); return -EINVAL;
} break; #endif #ifdef CONFIG_BTRFS_FS_REF_VERIFY case Opt_ref_verify:
btrfs_set_opt(ctx->mount_opt, REF_VERIFY); break; #endif default:
btrfs_err(NULL, "unrecognized mount option '%s'", param->key); return -EINVAL;
}
return 0;
}
/* * Some options only have meaning at mount time and shouldn't persist across * remounts, or be displayed. Clear these at the end of mount and remount code * paths.
*/ staticvoid btrfs_clear_oneshot_options(struct btrfs_fs_info *fs_info)
{
btrfs_clear_opt(fs_info->mount_opt, USEBACKUPROOT);
btrfs_clear_opt(fs_info->mount_opt, CLEAR_CACHE);
btrfs_clear_opt(fs_info->mount_opt, NOSPACECACHE);
}
staticbool check_ro_option(conststruct btrfs_fs_info *fs_info, unsignedlonglong mount_opt, unsignedlonglong opt, constchar *opt_name)
{ if (mount_opt & opt) {
btrfs_err(fs_info, "%s must be used with ro mount option",
opt_name); returntrue;
} returnfalse;
}
if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) &&
!btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE) &&
!btrfs_raw_test_opt(*mount_opt, CLEAR_CACHE)) {
btrfs_err(info, "cannot disable free-space-tree");
ret = false;
} if (btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE) &&
!btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE)) {
btrfs_err(info, "cannot disable free-space-tree with block-group-tree feature");
ret = false;
}
if (btrfs_check_mountopts_zoned(info, mount_opt))
ret = false;
if (!test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state)) { if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) {
btrfs_warn(info, "space cache v1 is being deprecated and will be removed in a future release, please use -o space_cache=v2");
}
}
return ret;
}
/* * This is subtle, we only call this during open_ctree(). We need to pre-load * the mount options with the on-disk settings. Before the new mount API took * effect we would do this on mount and remount. With the new mount API we'll * only do this on the initial mount. * * This isn't a change in behavior, because we're using the current state of the * file system to set the current mount options. If you mounted with special * options to disable these features and then remounted we wouldn't revert the * settings, because mounting without these features cleared the on-disk * settings, so this being called on re-mount is not needed.
*/ void btrfs_set_free_space_cache_settings(struct btrfs_fs_info *fs_info)
{ if (fs_info->sectorsize < PAGE_SIZE) {
btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE); if (!btrfs_test_opt(fs_info, FREE_SPACE_TREE)) {
btrfs_info(fs_info, "forcing free space tree for sector size %u with page size %lu",
fs_info->sectorsize, PAGE_SIZE);
btrfs_set_opt(fs_info->mount_opt, FREE_SPACE_TREE);
}
}
/* * At this point our mount options are populated, so we only mess with * these settings if we don't have any settings already.
*/ if (btrfs_test_opt(fs_info, FREE_SPACE_TREE)) return;
if (btrfs_is_zoned(fs_info) &&
btrfs_free_space_cache_v1_active(fs_info)) {
btrfs_info(fs_info, "zoned: clearing existing space cache");
btrfs_set_super_cache_generation(fs_info->super_copy, 0); return;
}
if (btrfs_test_opt(fs_info, SPACE_CACHE)) return;
if (btrfs_test_opt(fs_info, NOSPACECACHE)) return;
/* * At this point we don't have explicit options set by the user, set * them ourselves based on the state of the file system.
*/ if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
btrfs_set_opt(fs_info->mount_opt, FREE_SPACE_TREE); elseif (btrfs_free_space_cache_v1_active(fs_info))
btrfs_set_opt(fs_info->mount_opt, SPACE_CACHE);
}
/* * For devices supporting discard turn on discard=async automatically, * unless it's already set or disabled. This could be turned off by * nodiscard for the same mount. * * The zoned mode piggy backs on the discard functionality for * resetting a zone. There is no reason to delay the zone reset as it is * fast enough. So, do not enable async discard for zoned mode.
*/ if (!(btrfs_test_opt(fs_info, DISCARD_SYNC) ||
btrfs_test_opt(fs_info, DISCARD_ASYNC) ||
btrfs_test_opt(fs_info, NODISCARD)) &&
fs_info->fs_devices->discardable &&
!btrfs_is_zoned(fs_info))
btrfs_set_opt(fs_info->mount_opt, DISCARD_ASYNC);
}
path = btrfs_alloc_path(); if (!path) {
ret = -ENOMEM; goto err;
}
name = kmalloc(PATH_MAX, GFP_KERNEL); if (!name) {
ret = -ENOMEM; goto err;
}
ptr = name + PATH_MAX - 1;
ptr[0] = '\0';
/* * Walk up the subvolume trees in the tree of tree roots by root * backrefs until we hit the top-level subvolume.
*/ while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
key.objectid = subvol_objectid;
key.type = BTRFS_ROOT_BACKREF_KEY;
key.offset = (u64)-1;
ret = btrfs_search_backwards(root, &key, path); if (ret < 0) { goto err;
} elseif (ret > 0) {
ret = -ENOENT; goto err;
}
fs_root = btrfs_get_fs_root(fs_info, subvol_objectid, true); if (IS_ERR(fs_root)) {
ret = PTR_ERR(fs_root);
fs_root = NULL; goto err;
}
/* * Walk up the filesystem tree by inode refs until we hit the * root directory.
*/ while (dirid != BTRFS_FIRST_FREE_OBJECTID) {
key.objectid = dirid;
key.type = BTRFS_INODE_REF_KEY;
key.offset = (u64)-1;
ret = btrfs_search_backwards(fs_root, &key, path); if (ret < 0) { goto err;
} elseif (ret > 0) {
ret = -ENOENT; goto err;
}
path = btrfs_alloc_path(); if (!path) return -ENOMEM;
/* * Find the "default" dir item which points to the root item that we * will mount by default if we haven't been given a specific subvolume * to mount.
*/
dir_id = btrfs_super_root_dir(fs_info->super_copy);
di = btrfs_lookup_dir_item(NULL, root, path, dir_id, &name, 0); if (IS_ERR(di)) {
btrfs_free_path(path); return PTR_ERR(di);
} if (!di) { /* * Ok the default dir item isn't there. This is weird since * it's always been there, but don't freak out, just try and * mount the top-level subvolume.
*/
btrfs_free_path(path);
*objectid = BTRFS_FS_TREE_OBJECTID; return 0;
}
ret = super_setup_bdi(sb); if (ret) {
btrfs_err(fs_info, "super_setup_bdi failed"); return ret;
}
ret = open_ctree(sb, fs_devices); if (ret) {
btrfs_err(fs_info, "open_ctree failed: %d", ret); return ret;
}
btrfs_emit_options(fs_info, NULL);
inode = btrfs_iget(BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root); if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
btrfs_handle_fs_error(fs_info, ret, NULL); goto fail_close;
}
sb->s_root = d_make_root(&inode->vfs_inode); if (!sb->s_root) {
ret = -ENOMEM; goto fail_close;
}
sb->s_flags |= SB_ACTIVE; return 0;
fail_close:
close_ctree(fs_info); return ret;
}
int btrfs_sync_fs(struct super_block *sb, int wait)
{ struct btrfs_trans_handle *trans; struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_root *root = fs_info->tree_root;
trace_btrfs_sync_fs(fs_info, wait);
if (!wait) {
filemap_flush(fs_info->btree_inode->i_mapping); return 0;
}
btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL);
trans = btrfs_attach_transaction_barrier(root); if (IS_ERR(trans)) { /* no transaction, don't bother */ if (PTR_ERR(trans) == -ENOENT) { /* * Exit unless we have some pending changes * that need to go through commit
*/ if (!test_bit(BTRFS_FS_NEED_TRANS_COMMIT,
&fs_info->flags)) return 0; /* * A non-blocking test if the fs is frozen. We must not * start a new transaction here otherwise a deadlock * happens. The pending operations are delayed to the * next commit after thawing.
*/ if (sb_start_write_trylock(sb))
sb_end_write(sb); else return 0;
trans = btrfs_start_transaction(root, 0);
} if (IS_ERR(trans)) return PTR_ERR(trans);
} return btrfs_commit_transaction(trans);
}
ret = 0; if (!is_subvolume_inode(root_inode)) {
btrfs_err(fs_info, "'%s' is not a valid subvolume",
subvol_name);
ret = -EINVAL;
} if (subvol_objectid && root_objectid != subvol_objectid) { /* * This will also catch a race condition where a * subvolume which was passed by ID is renamed and * another subvolume is renamed over the old location.
*/
btrfs_err(fs_info, "subvol '%s' does not match subvolid %llu",
subvol_name, subvol_objectid);
ret = -EINVAL;
} if (ret) {
dput(root);
root = ERR_PTR(ret);
deactivate_locked_super(s);
}
}
/* * We need to cleanup all defragable inodes if the autodefragment is * close or the filesystem is read only.
*/ if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
(!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || sb_rdonly(fs_info->sb))) {
btrfs_cleanup_defrag_inodes(fs_info);
}
/* If we toggled discard async */ if (!btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
btrfs_test_opt(fs_info, DISCARD_ASYNC))
btrfs_discard_resume(fs_info); elseif (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
!btrfs_test_opt(fs_info, DISCARD_ASYNC))
btrfs_discard_cleanup(fs_info);
/* If we toggled space cache */ if (cache_opt != btrfs_free_space_cache_v1_active(fs_info))
btrfs_set_free_space_cache_v1_active(fs_info, cache_opt);
}
staticint btrfs_remount_rw(struct btrfs_fs_info *fs_info)
{ int ret;
if (BTRFS_FS_ERROR(fs_info)) {
btrfs_err(fs_info, "remounting read-write after error is not allowed"); return -EINVAL;
}
if (fs_info->fs_devices->rw_devices == 0) return -EACCES;
if (!btrfs_check_rw_degradable(fs_info, NULL)) {
btrfs_warn(fs_info, "too many missing devices, writable remount is not allowed"); return -EACCES;
}
if (btrfs_super_log_root(fs_info->super_copy) != 0) {
btrfs_warn(fs_info, "mount required to replay tree-log, cannot remount read-write"); return -EINVAL;
}
/* * NOTE: when remounting with a change that does writes, don't put it * anywhere above this point, as we are not sure to be safe to write * until we pass the above checks.
*/
ret = btrfs_start_pre_rw_mount(fs_info); if (ret) return ret;
btrfs_clear_sb_rdonly(fs_info->sb);
set_bit(BTRFS_FS_OPEN, &fs_info->flags);
/* * If we've gone from readonly -> read-write, we need to get our * sync/async discard lists in the right state.
*/
btrfs_discard_resume(fs_info);
return 0;
}
staticint btrfs_remount_ro(struct btrfs_fs_info *fs_info)
{ /* * This also happens on 'umount -rf' or on shutdown, when the * filesystem is busy.
*/
cancel_work_sync(&fs_info->async_reclaim_work);
cancel_work_sync(&fs_info->async_data_reclaim_work);
btrfs_discard_cleanup(fs_info);
/* Wait for the uuid_scan task to finish */
down(&fs_info->uuid_tree_rescan_sem); /* Avoid complains from lockdep et al. */
up(&fs_info->uuid_tree_rescan_sem);
btrfs_set_sb_rdonly(fs_info->sb);
/* * Setting SB_RDONLY will put the cleaner thread to sleep at the next * loop if it's already active. If it's already asleep, we'll leave * unused block groups on disk until we're mounted read-write again * unless we clean them up here.
*/
btrfs_delete_unused_bgs(fs_info);
/* * The cleaner task could be already running before we set the flag * BTRFS_FS_STATE_RO (and SB_RDONLY in the superblock). We must make * sure that after we finish the remount, i.e. after we call * btrfs_commit_super(), the cleaner can no longer start a transaction * - either because it was dropping a dead root, running delayed iputs * or deleting an unused block group (the cleaner picked a block * group from the list of unused block groups before we were able to * in the previous call to btrfs_delete_unused_bgs()).
*/
wait_on_bit(&fs_info->flags, BTRFS_FS_CLEANER_RUNNING, TASK_UNINTERRUPTIBLE);
/* * We've set the superblock to RO mode, so we might have made the * cleaner task sleep without running all pending delayed iputs. Go * through all the delayed iputs here, so that if an unmount happens * without remounting RW we don't end up at finishing close_ctree() * with a non-empty list of delayed iputs.
*/
btrfs_run_delayed_iputs(fs_info);
/* * Pause the qgroup rescan worker if it is running. We don't want it to * be still running after we are in RO mode, as after that, by the time * we unmount, it might have left a transaction open, so we would leak * the transaction and/or crash.
*/
btrfs_qgroup_wait_for_completion(fs_info, false);
/* * This is our "bind mount" trick, we don't want to allow the user to do * anything other than mount a different ro/rw and a different subvol, * all of the mount options should be maintained.
*/ if (mount_reconfigure)
ctx->mount_opt = old_ctx.mount_opt;
if ((bool)btrfs_test_opt(fs_info, FREE_SPACE_TREE) !=
(bool)btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
(!sb_rdonly(sb) || (fc->sb_flags & SB_RDONLY))) {
btrfs_warn(fs_info, "remount supports changing free space tree only from RO to RW"); /* Make sure free space cache options match the state on disk. */ if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
btrfs_set_opt(fs_info->mount_opt, FREE_SPACE_TREE);
btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE);
} if (btrfs_free_space_cache_v1_active(fs_info)) {
btrfs_clear_opt(fs_info->mount_opt, FREE_SPACE_TREE);
btrfs_set_opt(fs_info->mount_opt, SPACE_CACHE);
}
}
ret = 0; if (!sb_rdonly(sb) && (fc->sb_flags & SB_RDONLY))
ret = btrfs_remount_ro(fs_info); elseif (sb_rdonly(sb) && !(fc->sb_flags & SB_RDONLY))
ret = btrfs_remount_rw(fs_info); if (ret) goto restore;
/* * If we set the mask during the parameter parsing VFS would reject the * remount. Here we can set the mask and the value will be updated * appropriately.
*/ if ((fc->sb_flags & SB_POSIXACL) != (sb->s_flags & SB_POSIXACL))
fc->sb_flags_mask |= SB_POSIXACL;
/* * sort the devices by max_avail, in which max free extent size of each device * is stored.(Descending Sort)
*/ staticinlinevoid btrfs_descending_sort_devices( struct btrfs_device_info *devices,
size_t nr_devices)
{
sort(devices, nr_devices, sizeof(struct btrfs_device_info),
btrfs_cmp_device_free_bytes, NULL);
}
/* * The helper to calc the free space on the devices that can be used to store * file data.
*/ staticinlineint btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
u64 *free_bytes)
{ struct btrfs_device_info *devices_info; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device;
u64 type;
u64 avail_space;
u64 min_stripe_size; int num_stripes = 1; int i = 0, nr_devices; conststruct btrfs_raid_attr *rattr;
/* * We aren't under the device list lock, so this is racy-ish, but good * enough for our purposes.
*/
nr_devices = fs_info->fs_devices->open_devices; if (!nr_devices) {
smp_mb();
nr_devices = fs_info->fs_devices->open_devices;
ASSERT(nr_devices); if (!nr_devices) {
*free_bytes = 0; return 0;
}
}
devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
GFP_KERNEL); if (!devices_info) return -ENOMEM;
/* calc min stripe number for data space allocation */
type = btrfs_data_alloc_profile(fs_info);
rattr = &btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)];
/* align with stripe_len */
avail_space = rounddown(avail_space, BTRFS_STRIPE_LEN);
/* * Ensure we have at least min_stripe_size on top of the * reserved space on the device.
*/ if (avail_space <= BTRFS_DEVICE_RANGE_RESERVED + min_stripe_size) continue;
/* * Calculate numbers for 'df', pessimistic in case of mixed raid profiles. * * If there's a redundant raid level at DATA block groups, use the respective * multiplier to scale the sizes. * * Unused device space usage is based on simulating the chunk allocator * algorithm that respects the device sizes and order of allocations. This is * a close approximation of the actual use but there are other factors that may * change the result (like a new metadata chunk). * * If metadata is exhausted, f_bavail will be 0.
*/ staticint btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{ struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb); struct btrfs_super_block *disk_super = fs_info->super_copy; struct btrfs_space_info *found;
u64 total_used = 0;
u64 total_free_data = 0;
u64 total_free_meta = 0;
u32 bits = fs_info->sectorsize_bits;
__be32 *fsid = (__be32 *)fs_info->fs_devices->fsid; unsigned factor = 1; struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; int ret;
u64 thresh = 0; int mixed = 0;
list_for_each_entry(found, &fs_info->space_info, list) { if (found->flags & BTRFS_BLOCK_GROUP_DATA) { int i;
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { if (!list_empty(&found->block_groups[i]))
factor = btrfs_bg_type_to_factor(
btrfs_raid_array[i].bg_flag);
}
}
/* * Metadata in mixed block group profiles are accounted in data
*/ if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) { if (found->flags & BTRFS_BLOCK_GROUP_DATA)
mixed = 1; else
total_free_meta += found->disk_total -
found->disk_used;
}
/* * We calculate the remaining metadata space minus global reserve. If * this is (supposedly) smaller than zero, there's no space. But this * does not hold in practice, the exhausted state happens where's still * some positive delta. So we apply some guesswork and compare the * delta to a 4M threshold. (Practically observed delta was ~2M.) * * We probably cannot calculate the exact threshold value because this * depends on the internal reservations requested by various * operations, so some operations that consume a few metadata will * succeed even if the Avail is zero. But this is better than the other * way around.
*/
thresh = SZ_4M;
/* * We only want to claim there's no available space if we can no longer * allocate chunks for our metadata profile and our global reserve will * not fit in the free metadata space. If we aren't ->full then we * still can allocate chunks and thus are fine using the currently * calculated f_bavail.
*/ if (!mixed && block_rsv->space_info->full &&
(total_free_meta < thresh || total_free_meta - thresh < block_rsv->size))
buf->f_bavail = 0;
/* We treat it as constant endianness (it doesn't matter _which_) because we want the fsid to come out the same whether mounted
on a big-endian or little-endian host */
buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]); /* Mask in the root object ID too, to disambiguate subvols */
buf->f_fsid.val[0] ^= btrfs_root_id(BTRFS_I(d_inode(dentry))->root) >> 32;
buf->f_fsid.val[1] ^= btrfs_root_id(BTRFS_I(d_inode(dentry))->root);
/* * With 'true' passed to btrfs_scan_one_device() (mount time) we expect * either a valid device or an error.
*/
device = btrfs_scan_one_device(fc->source, true);
ASSERT(device != NULL); if (IS_ERR(device)) {
mutex_unlock(&uuid_mutex); return PTR_ERR(device);
}
fs_devices = device->fs_devices; /* * We cannot hold uuid_mutex calling sget_fc(), it will lead to a * locking order reversal with s_umount. * * So here we increase the holding number of fs_devices, this will ensure * the fs_devices itself won't be freed.
*/
btrfs_fs_devices_inc_holding(fs_devices);
fs_info->fs_devices = fs_devices;
mutex_unlock(&uuid_mutex);
sb = sget_fc(fc, btrfs_fc_test_super, set_anon_super_fc); if (IS_ERR(sb)) {
mutex_lock(&uuid_mutex);
btrfs_fs_devices_dec_holding(fs_devices); /* * Since the fs_devices is not opened, it can be freed at any * time after unlocking uuid_mutex. We need to avoid double * free through put_fs_context()->btrfs_free_fs_info(). * So here we reset fs_info->fs_devices to NULL, and let the * regular fs_devices reclaim path to handle it. * * This applies to all later branches where no fs_devices is * opened.
*/
fs_info->fs_devices = NULL;
mutex_unlock(&uuid_mutex); return PTR_ERR(sb);
}
if (sb->s_root) { /* * Not the first mount of the fs thus got an existing super block. * Will reuse the returned super block, fs_info and fs_devices. * * fc->s_fs_info is not touched and will be later freed by * put_fs_context() through btrfs_free_fs_context().
*/
ASSERT(fc->s_fs_info == fs_info);
mutex_lock(&uuid_mutex);
btrfs_fs_devices_dec_holding(fs_devices);
fs_info->fs_devices = NULL;
mutex_unlock(&uuid_mutex); /* * At this stage we may have RO flag mismatch between * fc->sb_flags and sb->s_flags. Caller should detect such * mismatch and reconfigure with sb->s_umount rwsem held if * needed.
*/
} else { struct block_device *bdev;
/* * The first mount of the fs thus a new superblock, fc->s_fs_info * must be NULL, and the ownership of our fs_info and fs_devices is * transferred to the super block.
*/
ASSERT(fc->s_fs_info == NULL);
mutex_lock(&uuid_mutex);
btrfs_fs_devices_dec_holding(fs_devices);
ret = btrfs_open_devices(fs_devices, mode, sb); if (ret < 0)
fs_info->fs_devices = NULL;
mutex_unlock(&uuid_mutex); if (ret < 0) {
deactivate_locked_super(sb); return ret;
} if (!(fc->sb_flags & SB_RDONLY) && fs_devices->rw_devices == 0) {
deactivate_locked_super(sb); return -EACCES;
}
set_device_specific_options(fs_info);
bdev = fs_devices->latest_dev->bdev;
snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev);
shrinker_debugfs_rename(sb->s_shrink, "sb-btrfs:%s", sb->s_id);
ret = btrfs_fill_super(sb, fs_devices); if (ret) {
deactivate_locked_super(sb); return ret;
}
}
btrfs_clear_oneshot_options(fs_info);
fc->root = dget(sb->s_root); return 0;
}
/* * Ever since commit 0723a0473fb4 ("btrfs: allow mounting btrfs subvolumes * with different ro/rw options") the following works: * * (i) mount /dev/sda3 -o subvol=foo,ro /mnt/foo * (ii) mount /dev/sda3 -o subvol=bar,rw /mnt/bar * * which looks nice and innocent but is actually pretty intricate and deserves * a long comment. * * On another filesystem a subvolume mount is close to something like: * * (iii) # create rw superblock + initial mount * mount -t xfs /dev/sdb /opt/ * * # create ro bind mount * mount --bind -o ro /opt/foo /mnt/foo * * # unmount initial mount * umount /opt * * Of course, there's some special subvolume sauce and there's the fact that the * sb->s_root dentry is really swapped after mount_subtree(). But conceptually * it's very close and will help us understand the issue. * * The old mount API didn't cleanly distinguish between a mount being made ro * and a superblock being made ro. The only way to change the ro state of * either object was by passing ms_rdonly. If a new mount was created via * mount(2) such as: * * mount("/dev/sdb", "/mnt", "xfs", ms_rdonly, null); * * the MS_RDONLY flag being specified had two effects: * * (1) MNT_READONLY was raised -> the resulting mount got * @mnt->mnt_flags |= MNT_READONLY raised. * * (2) MS_RDONLY was passed to the filesystem's mount method and the filesystems * made the superblock ro. Note, how SB_RDONLY has the same value as * ms_rdonly and is raised whenever MS_RDONLY is passed through mount(2). * * Creating a subtree mount via (iii) ends up leaving a rw superblock with a * subtree mounted ro. * * But consider the effect on the old mount API on btrfs subvolume mounting * which combines the distinct step in (iii) into a single step. * * By issuing (i) both the mount and the superblock are turned ro. Now when (ii) * is issued the superblock is ro and thus even if the mount created for (ii) is * rw it wouldn't help. Hence, btrfs needed to transition the superblock from ro * to rw for (ii) which it did using an internal remount call. * * IOW, subvolume mounting was inherently complicated due to the ambiguity of * MS_RDONLY in mount(2). Note, this ambiguity has mount(8) always translate * "ro" to MS_RDONLY. IOW, in both (i) and (ii) "ro" becomes MS_RDONLY when * passed by mount(8) to mount(2). * * Enter the new mount API. The new mount API disambiguates making a mount ro * and making a superblock ro. * * (3) To turn a mount ro the MOUNT_ATTR_ONLY flag can be used with either * fsmount() or mount_setattr() this is a pure VFS level change for a * specific mount or mount tree that is never seen by the filesystem itself. * * (4) To turn a superblock ro the "ro" flag must be used with * fsconfig(FSCONFIG_SET_FLAG, "ro"). This option is seen by the filesystem * in fc->sb_flags. * * But, currently the util-linux mount command already utilizes the new mount * API and is still setting fsconfig(FSCONFIG_SET_FLAG, "ro") no matter if it's * btrfs or not, setting the whole super block RO. To make per-subvolume mounting * work with different options work we need to keep backward compatibility.
*/ staticint btrfs_reconfigure_for_mount(struct fs_context *fc)
{ int ret = 0;
if (!(fc->sb_flags & SB_RDONLY) && (fc->root->d_sb->s_flags & SB_RDONLY))
ret = btrfs_reconfigure(fc);
/* * Setup a dummy root and fs_info for test/set super. This is because * we don't actually fill this stuff out until open_ctree, but we need * then open_ctree will properly initialize the file system specific * settings later. btrfs_init_fs_info initializes the static elements * of the fs_info (locks and such) to make cleanup easier if we find a * superblock with our given fs_devices later on at sget() time.
*/
fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL); if (!fs_info) return -ENOMEM;
fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); if (!fs_info->super_copy || !fs_info->super_for_commit) { /* * Dont call btrfs_free_fs_info() to free it as it's still * initialized partially.
*/
kfree(fs_info->super_copy);
kfree(fs_info->super_for_commit);
kvfree(fs_info); return -ENOMEM;
}
btrfs_init_fs_info(fs_info);
dup_fc = vfs_dup_fs_context(fc); if (IS_ERR(dup_fc)) {
btrfs_free_fs_info(fs_info); return PTR_ERR(dup_fc);
}
/* * When we do the sget_fc this gets transferred to the sb, so we only * need to set it on the dup_fc as this is what creates the super block.
*/
dup_fc->s_fs_info = fs_info;
ret = btrfs_get_tree_super(dup_fc); if (ret) goto error;
ret = btrfs_reconfigure_for_mount(dup_fc);
up_write(&dup_fc->root->d_sb->s_umount); if (ret) goto error;
mnt = vfs_create_mount(dup_fc);
put_fs_context(dup_fc); if (IS_ERR(mnt)) return PTR_ERR(mnt);
/* * This free's ->subvol_name, because if it isn't set we have to * allocate a buffer to hold the subvol_name, so we just drop our * reference to it here.
*/
dentry = mount_subvol(ctx->subvol_name, ctx->subvol_objectid, mnt);
ctx->subvol_name = NULL; if (IS_ERR(dentry)) return PTR_ERR(dentry);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.