unsignedint fuse_max_pages_limit = 256; /* default is no timeout */ unsignedint fuse_default_req_timeout; unsignedint fuse_max_req_timeout;
unsignedint max_user_bgreq;
module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
&max_user_bgreq, 0644);
__MODULE_PARM_TYPE(max_user_bgreq, "uint");
MODULE_PARM_DESC(max_user_bgreq, "Global limit for the maximum number of backgrounded requests an " "unprivileged user can set");
unsignedint max_user_congthresh;
module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
&max_user_congthresh, 0644);
__MODULE_PARM_TYPE(max_user_congthresh, "uint");
MODULE_PARM_DESC(max_user_congthresh, "Global limit for the maximum congestion threshold an " "unprivileged user can set");
#define FUSE_DEFAULT_BLKSIZE 512
/** Maximum number of outstanding background requests */ #define FUSE_DEFAULT_MAX_BACKGROUND 12
/** Congestion starts at 75% of maximum */ #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)
if (FUSE_IS_DAX(inode))
fuse_dax_inode_cleanup(inode); if (fi->nlookup) {
fuse_queue_forget(fc, fi->forget, fi->nodeid,
fi->nlookup);
fi->forget = NULL;
}
if (fi->submount_lookup) {
fuse_cleanup_submount_lookup(fc, fi->submount_lookup);
fi->submount_lookup = NULL;
} /* * Evict of non-deleted inode may race with outstanding * LOOKUP/READDIRPLUS requests and result in inconsistency when * the request finishes. Deal with that here by bumping a * counter that can be compared to the starting value.
*/ if (inode->i_nlink > 0)
atomic64_inc(&fc->evict_ctr);
} if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
WARN_ON(fi->iocachectr != 0);
WARN_ON(!list_empty(&fi->write_files));
WARN_ON(!list_empty(&fi->queued_writes));
}
}
sync_filesystem(sb); if (fsc->sb_flags & SB_MANDLOCK) return -EINVAL;
return 0;
}
/* * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down * so that it will fit.
*/ static ino_t fuse_squash_ino(u64 ino64)
{
ino_t ino = (ino_t) ino64; if (sizeof(ino_t) < sizeof(u64))
ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8; return ino;
}
/* * Clear basic stats from invalid mask. * * Don't do this if this is coming from a fuse_iget() call and there * might have been a racing evict which would've invalidated the result * if the attr_version would've been preserved. * * !evict_ctr -> this is create * fi->attr_version != 0 -> this is not a new inode * evict_ctr == fuse_get_evict_ctr() -> no evicts while during request
*/ if (!evict_ctr || fi->attr_version || evict_ctr == fuse_get_evict_ctr(fc))
set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0);
inode_set_atime(inode, attr->atime, attr->atimensec); /* mtime from server may be stale due to local buffered write */ if (!(cache_mask & STATX_MTIME)) {
inode_set_mtime(inode, attr->mtime, attr->mtimensec);
} if (!(cache_mask & STATX_CTIME)) {
inode_set_ctime(inode, attr->ctime, attr->ctimensec);
} if (sx) { /* Sanitize nsecs */
sx->btime.tv_nsec =
min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
/* * Btime has been queried, cache is valid (whether or not btime * is available or not) so clear STATX_BTIME from inval_mask. * * Availability of the btime attribute is indicated in * FUSE_I_BTIME
*/
set_mask_bits(&fi->inval_mask, STATX_BTIME, 0); if (sx->mask & STATX_BTIME) {
set_bit(FUSE_I_BTIME, &fi->state);
fi->i_btime.tv_sec = sx->btime.tv_sec;
fi->i_btime.tv_nsec = sx->btime.tv_nsec;
}
}
if (attr->blksize)
fi->cached_i_blkbits = ilog2(attr->blksize); else
fi->cached_i_blkbits = fc->blkbits;
/* * Don't set the sticky bit in i_mode, unless we want the VFS * to check permissions. This prevents failures due to the * check in may_delete().
*/
fi->orig_i_mode = inode->i_mode; if (!fc->default_permissions)
inode->i_mode &= ~S_ISVTX;
fi->orig_ino = attr->ino;
/* * We are refreshing inode data and it is possible that another * client set suid/sgid or security.capability xattr. So clear * S_NOSEC. Ideally, we could have cleared it only if suid/sgid * was set or if security.capability xattr was set. But we don't * know if security.capability has been set or not. So clear it * anyway. Its less efficient but should be safe.
*/
inode->i_flags &= ~S_NOSEC;
}
spin_lock(&fi->lock); /* * In case of writeback_cache enabled, writes update mtime, ctime and * may update i_size. In these cases trust the cached value in the * inode.
*/
cache_mask = fuse_get_cache_mask(inode); if (cache_mask & STATX_SIZE)
attr->size = i_size_read(inode);
oldsize = inode->i_size; /* * In case of writeback_cache enabled, the cached writes beyond EOF * extend local i_size without keeping userspace server in sync. So, * attr->size coming from server can be stale. We cannot trust it.
*/ if (!(cache_mask & STATX_SIZE))
i_size_write(inode, attr->size);
spin_unlock(&fi->lock);
if (!cache_mask && S_ISREG(inode->i_mode)) { bool inval = false;
/* * Auto mount points get their node id from the submount root, which is * not a unique identifier within this filesystem. * * To avoid conflicts, do not place submount points into the inode hash * table.
*/ if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) &&
S_ISDIR(attr->mode)) { struct fuse_inode *fi;
/* * Completion of new bucket depends on completion of this bucket, so add * one more count.
*/
atomic_inc(&new_bucket->count);
rcu_assign_pointer(fc->curr_bucket, new_bucket);
spin_unlock(&fc->lock); /* * Drop initial active count. At this point if all writes in this and * ancestor buckets complete, the count will go to zero and this task * will be woken up.
*/
atomic_dec(&bucket->count);
switch (opt) { case OPT_SOURCE: if (fsc->source) return invalfc(fsc, "Multiple sources specified");
fsc->source = param->string;
param->string = NULL; break;
case OPT_SUBTYPE: if (ctx->subtype) return invalfc(fsc, "Multiple subtypes specified");
ctx->subtype = param->string;
param->string = NULL; return 0;
case OPT_FD:
ctx->fd = result.uint_32;
ctx->fd_present = true; break;
case OPT_ROOTMODE: if (!fuse_valid_type(result.uint_32)) return invalfc(fsc, "Invalid rootmode");
ctx->rootmode = result.uint_32;
ctx->rootmode_present = true; break;
case OPT_USER_ID:
kuid = result.uid; /* * The requested uid must be representable in the * filesystem's idmapping.
*/ if (!kuid_has_mapping(fsc->user_ns, kuid)) return invalfc(fsc, "Invalid user_id");
ctx->user_id = kuid;
ctx->user_id_present = true; break;
case OPT_GROUP_ID:
kgid = result.gid; /* * The requested gid must be representable in the * filesystem's idmapping.
*/ if (!kgid_has_mapping(fsc->user_ns, kgid)) return invalfc(fsc, "Invalid group_id");
ctx->group_id = kgid;
ctx->group_id_present = true; break;
case OPT_DEFAULT_PERMISSIONS:
ctx->default_permissions = true; break;
case OPT_ALLOW_OTHER:
ctx->allow_other = true; break;
case OPT_MAX_READ:
ctx->max_read = result.uint_32; break;
case OPT_BLKSIZE: if (!ctx->is_bdev) return invalfc(fsc, "blksize only supported for fuseblk");
ctx->blksize = result.uint_32; break;
spin_lock_init(&fpq->lock); for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
INIT_LIST_HEAD(&fpq->processing[i]);
INIT_LIST_HEAD(&fpq->io);
fpq->connected = 1;
}
err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
&dotdot_name, &outarg, &inode); if (err) { if (err == -ENOENT) return ERR_PTR(-ESTALE); return ERR_PTR(err);
}
parent = d_obtain_alias(inode); if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID)
fuse_invalidate_entry_cache(parent);
return parent;
}
/* only for fid encoding; no support for file handle */ staticconststruct export_operations fuse_export_fid_operations = {
.encode_fh = fuse_encode_fh,
};
staticvoid sanitize_global_limit(unsignedint *limit)
{ /* * The default maximum number of async requests is calculated to consume * 1/2^13 of the total memory, assuming 392 bytes per request.
*/ if (*limit == 0)
*limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392;
if (*limit >= 1 << 16)
*limit = (1 << 16) - 1;
}
staticint set_global_limit(constchar *val, conststruct kernel_param *kp)
{ int rv;
if (error || arg->major != FUSE_KERNEL_VERSION)
ok = false; else { unsignedlong ra_pages; unsignedint timeout = 0;
process_init_limits(fc, arg);
if (arg->minor >= 6) {
u64 flags = arg->flags;
if (flags & FUSE_INIT_EXT)
flags |= (u64) arg->flags2 << 32;
ra_pages = arg->max_readahead / PAGE_SIZE; if (flags & FUSE_ASYNC_READ)
fc->async_read = 1; if (!(flags & FUSE_POSIX_LOCKS))
fc->no_lock = 1; if (arg->minor >= 17) { if (!(flags & FUSE_FLOCK_LOCKS))
fc->no_flock = 1;
} else { if (!(flags & FUSE_POSIX_LOCKS))
fc->no_flock = 1;
} if (flags & FUSE_ATOMIC_O_TRUNC)
fc->atomic_o_trunc = 1; if (arg->minor >= 9) { /* LOOKUP has dependency on proto version */ if (flags & FUSE_EXPORT_SUPPORT)
fc->export_support = 1;
} if (flags & FUSE_BIG_WRITES)
fc->big_writes = 1; if (flags & FUSE_DONT_MASK)
fc->dont_mask = 1; if (flags & FUSE_AUTO_INVAL_DATA)
fc->auto_inval_data = 1; elseif (flags & FUSE_EXPLICIT_INVAL_DATA)
fc->explicit_inval_data = 1; if (flags & FUSE_DO_READDIRPLUS) {
fc->do_readdirplus = 1; if (flags & FUSE_READDIRPLUS_AUTO)
fc->readdirplus_auto = 1;
} if (flags & FUSE_ASYNC_DIO)
fc->async_dio = 1; if (flags & FUSE_WRITEBACK_CACHE)
fc->writeback_cache = 1; if (flags & FUSE_PARALLEL_DIROPS)
fc->parallel_dirops = 1; if (flags & FUSE_HANDLE_KILLPRIV)
fc->handle_killpriv = 1; if (arg->time_gran && arg->time_gran <= 1000000000)
fm->sb->s_time_gran = arg->time_gran; if ((flags & FUSE_POSIX_ACL)) {
fc->default_permissions = 1;
fc->posix_acl = 1;
} if (flags & FUSE_CACHE_SYMLINKS)
fc->cache_symlinks = 1; if (flags & FUSE_ABORT_ERROR)
fc->abort_err = 1; if (flags & FUSE_MAX_PAGES) {
fc->max_pages =
min_t(unsignedint, fc->max_pages_limit,
max_t(unsignedint, arg->max_pages, 1));
/* * PATH_MAX file names might need two pages for * ops like rename
*/ if (fc->max_pages > 1)
fc->name_max = FUSE_NAME_MAX;
} if (IS_ENABLED(CONFIG_FUSE_DAX)) { if (flags & FUSE_MAP_ALIGNMENT &&
!fuse_dax_check_alignment(fc, arg->map_alignment)) {
ok = false;
} if (flags & FUSE_HAS_INODE_DAX)
fc->inode_dax = 1;
} if (flags & FUSE_HANDLE_KILLPRIV_V2) {
fc->handle_killpriv_v2 = 1;
fm->sb->s_flags |= SB_NOSEC;
} if (flags & FUSE_SETXATTR_EXT)
fc->setxattr_ext = 1; if (flags & FUSE_SECURITY_CTX)
fc->init_security = 1; if (flags & FUSE_CREATE_SUPP_GROUP)
fc->create_supp_group = 1; if (flags & FUSE_DIRECT_IO_ALLOW_MMAP)
fc->direct_io_allow_mmap = 1; /* * max_stack_depth is the max stack depth of FUSE fs, * so it has to be at least 1 to support passthrough * to backing files. * * with max_stack_depth > 1, the backing files can be * on a stacked fs (e.g. overlayfs) themselves and with * max_stack_depth == 1, FUSE fs can be stacked as the * underlying fs of a stacked fs (e.g. overlayfs). * * Also don't allow the combination of FUSE_PASSTHROUGH * and FUSE_WRITEBACK_CACHE, current design doesn't handle * them together.
*/ if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH) &&
(flags & FUSE_PASSTHROUGH) &&
arg->max_stack_depth > 0 &&
arg->max_stack_depth <= FILESYSTEM_MAX_STACK_DEPTH &&
!(flags & FUSE_WRITEBACK_CACHE)) {
fc->passthrough = 1;
fc->max_stack_depth = arg->max_stack_depth;
fm->sb->s_stack_depth = arg->max_stack_depth;
} if (flags & FUSE_NO_EXPORT_SUPPORT)
fm->sb->s_export_op = &fuse_export_fid_operations; if (flags & FUSE_ALLOW_IDMAP) { if (fc->default_permissions)
fm->sb->s_iflags &= ~SB_I_NOIDMAP; else
ok = false;
} if (flags & FUSE_OVER_IO_URING && fuse_uring_enabled())
fc->io_uring = 1;
/* * This is just an information flag for fuse server. No need to check * the reply - server is either sending IORING_OP_URING_CMD or not.
*/ if (fuse_uring_enabled())
flags |= FUSE_OVER_IO_URING;
ia->args.opcode = FUSE_INIT;
ia->args.in_numargs = 1;
ia->args.in_args[0].size = sizeof(ia->in);
ia->args.in_args[0].value = &ia->in;
ia->args.out_numargs = 1; /* Variable length argument used for backward compatibility with interface version < 7.5. Rest of init_out is zeroed
by do_get_request(), so a short reply is not a problem */
ia->args.out_argvar = true;
ia->args.out_args[0].size = sizeof(ia->out);
ia->args.out_args[0].value = &ia->out;
ia->args.force = true;
ia->args.nocreds = true;
ia->args.end = process_init_reply;
if (sb->s_bdev) {
suffix = "-fuseblk"; /* * sb->s_bdi points to blkdev's bdi however we want to redirect * it to our private bdi...
*/
bdi_put(sb->s_bdi);
sb->s_bdi = &noop_backing_dev_info;
}
err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev),
MINOR(fc->dev), suffix); if (err) return err;
/* fuse does it's own writeback accounting */
sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT;
sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT;
/* * For a single fuse filesystem use max 1% of dirty + * writeback threshold. * * This gives about 1M of write buffer for memory maps on a * machine with 1G and 10% dirty_ratio, which should be more * than enough. * * Privileged users can raise it by writing to * * /sys/class/bdi/<bdi>/max_ratio
*/
bdi_set_max_ratio(sb->s_bdi, 1);
fuse_fill_attr_from_inode(&root_attr, parent_fi);
root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0,
fuse_get_evict_ctr(fm->fc)); /* * This inode is just a duplicate, so it is not looked up and * its nlookup should not be incremented. fuse_iget() does * that, though, so undo it here.
*/
fi = get_fuse_inode(root);
fi->nlookup--;
set_default_d_op(sb, &fuse_dentry_operations);
sb->s_root = d_make_root(root); if (!sb->s_root) return -ENOMEM;
/* * Grab the parent's submount_lookup pointer and take a * reference on the shared nlookup from the parent. This is to * prevent the last forget for this nodeid from getting * triggered until all users have finished with it.
*/
sl = parent_fi->submount_lookup;
WARN_ON(!sl); if (sl) {
refcount_inc(&sl->count);
fi->submount_lookup = sl;
}
return 0;
}
/* Filesystem context private data holds the FUSE inode of the mount point */ staticint fuse_get_tree_submount(struct fs_context *fsc)
{ struct fuse_mount *fm; struct fuse_inode *mp_fi = fsc->fs_private; struct fuse_conn *fc = get_fuse_conn(&mp_fi->inode); struct super_block *sb; int err;
fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); if (!fm) return -ENOMEM;
fm->fc = fuse_conn_get(fc);
fsc->s_fs_info = fm;
sb = sget_fc(fsc, NULL, set_anon_super_fc); if (fsc->s_fs_info)
fuse_mount_destroy(fm); if (IS_ERR(sb)) return PTR_ERR(sb);
/* Initialize superblock, making @mp_fi its root */
err = fuse_fill_super_submount(sb, mp_fi); if (err) {
deactivate_locked_super(sb); return err;
}
if (ctx->is_bdev) { #ifdef CONFIG_BLOCK
err = -EINVAL; if (!sb_set_blocksize(sb, ctx->blksize)) goto err; /* * This is a workaround until fuse hooks into iomap for reads. * Use PAGE_SIZE for the blocksize else if the writeback cache * is enabled, buffered writes go through iomap and a read may * overwrite partially written data if blocksize < PAGE_SIZE
*/
fc->blkbits = sb->s_blocksize_bits; if (ctx->blksize != PAGE_SIZE &&
!sb_set_blocksize(sb, PAGE_SIZE)) goto err; #endif
} else {
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
fc->blkbits = sb->s_blocksize_bits;
}
sb->s_subtype = ctx->subtype;
ctx->subtype = NULL; if (IS_ENABLED(CONFIG_FUSE_DAX)) {
err = fuse_dax_conn_alloc(fc, ctx->dax_mode, ctx->dax_dev); if (err) goto err;
}
if (ctx->fudptr) {
err = -ENOMEM;
fud = fuse_dev_alloc_install(fc); if (!fud) goto err_free_dax;
}
if (!ctx->file || !ctx->rootmode_present ||
!ctx->user_id_present || !ctx->group_id_present) return -EINVAL;
/* * Require mount to happen from the same user namespace which * opened /dev/fuse to prevent potential attacks.
*/ if ((ctx->file->f_op != &fuse_dev_operations) ||
(ctx->file->f_cred->user_ns != sb->s_user_ns)) return -EINVAL;
ctx->fudptr = &ctx->file->private_data;
err = fuse_fill_super_common(sb, ctx); if (err) return err; /* file->private_data shall be visible on all CPUs after this */
smp_mb();
fuse_send_init(get_fuse_mount_super(sb)); return 0;
}
/* * This is the path where user supplied an already initialized fuse dev. In * this case never create a new super if the old one is gone.
*/ staticint fuse_set_no_super(struct super_block *sb, struct fs_context *fsc)
{ return -ENOTCONN;
}
if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) {
err = get_tree_bdev(fsc, fuse_fill_super); goto out;
} /* * While block dev mount can be initialized with a dummy device fd * (found by device name), normal fuse mounts can't
*/
err = -EINVAL; if (!ctx->file) goto out;
/* * Allow creating a fuse mount with an already initialized fuse * connection
*/
fud = READ_ONCE(ctx->file->private_data); if (ctx->file->f_op == &fuse_dev_operations && fud) {
fsc->sget_key = fud->fc;
sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super);
err = PTR_ERR_OR_ZERO(sb); if (!IS_ERR(sb))
fsc->root = dget(sb->s_root);
} else {
err = get_tree_nodev(fsc, fuse_fill_super);
}
out: if (fsc->s_fs_info)
fuse_mount_destroy(fm); if (ctx->file)
fput(ctx->file); return err;
}
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.26Bemerkung:
(vorverarbeitet)
¤