if (!inode) {
spin_unlock(&object->lock); return;
}
/* * Protects against concurrent use by hook_sb_delete() of the reference * to the underlying inode.
*/
object->underobj = NULL; /* * Makes sure that if the filesystem is concurrently unmounted, * hook_sb_delete() will wait for us to finish iput().
*/
sb = inode->i_sb;
atomic_long_inc(&landlock_superblock(sb)->inode_refs);
spin_unlock(&object->lock); /* * Because object->underobj was not NULL, hook_sb_delete() and * get_inode_object() guarantee that it is safe to reset * landlock_inode(inode)->object while it is not NULL. It is therefore * not necessary to lock inode->i_lock.
*/
rcu_assign_pointer(landlock_inode(inode)->object, NULL); /* * Now, new rules can safely be tied to @inode with get_inode_object().
*/
iput(inode); if (atomic_long_dec_and_test(&landlock_superblock(sb)->inode_refs))
wake_up_var(&landlock_superblock(sb)->inode_refs);
}
/** * is_masked_device_ioctl - Determine whether an IOCTL command is always * permitted with Landlock for device files. These commands can not be * restricted on device files by enforcing a Landlock policy. * * @cmd: The IOCTL command that is supposed to be run. * * By default, any IOCTL on a device file requires the * LANDLOCK_ACCESS_FS_IOCTL_DEV right. However, we blanket-permit some * commands, if: * * 1. The command is implemented in fs/ioctl.c's do_vfs_ioctl(), * not in f_ops->unlocked_ioctl() or f_ops->compat_ioctl(). * * 2. The command is harmless when invoked on devices. * * We also permit commands that do not make sense for devices, but where the * do_vfs_ioctl() implementation returns a more conventional error code. * * Any new IOCTL commands that are implemented in fs/ioctl.c's do_vfs_ioctl() * should be considered for inclusion here. * * Returns: true if the IOCTL @cmd can not be restricted with Landlock for * device files.
*/ static __attribute_const__ bool is_masked_device_ioctl(constunsignedint cmd)
{ switch (cmd) { /* * FIOCLEX, FIONCLEX, FIONBIO and FIOASYNC manipulate the FD's * close-on-exec and the file's buffered-IO and async flags. These * operations are also available through fcntl(2), and are * unconditionally permitted in Landlock.
*/ case FIOCLEX: case FIONCLEX: case FIONBIO: case FIOASYNC: /* * FIOQSIZE queries the size of a regular file, directory, or link. * * We still permit it, because it always returns -ENOTTY for * other file types.
*/ case FIOQSIZE: /* * FIFREEZE and FITHAW freeze and thaw the file system which the * given file belongs to. Requires CAP_SYS_ADMIN. * * These commands operate on the file system's superblock rather * than on the file itself. The same operations can also be * done through any other file or directory on the same file * system, so it is safe to permit these.
*/ case FIFREEZE: case FITHAW: /* * FS_IOC_FIEMAP queries information about the allocation of * blocks within a file. * * This IOCTL command only makes sense for regular files and is * not implemented by devices. It is harmless to permit.
*/ case FS_IOC_FIEMAP: /* * FIGETBSZ queries the file system's block size for a file or * directory. * * This command operates on the file system's superblock rather * than on the file itself. The same operation can also be done * through any other file or directory on the same file system, * so it is safe to permit it.
*/ case FIGETBSZ: /* * FICLONE, FICLONERANGE and FIDEDUPERANGE make files share * their underlying storage ("reflink") between source and * destination FDs, on file systems which support that. * * These IOCTL commands only apply to regular files * and are harmless to permit for device files.
*/ case FICLONE: case FICLONERANGE: case FIDEDUPERANGE: /* * FS_IOC_GETFSUUID and FS_IOC_GETFSSYSFSPATH both operate on * the file system superblock, not on the specific file, so * these operations are available through any other file on the * same file system as well.
*/ case FS_IOC_GETFSUUID: case FS_IOC_GETFSSYSFSPATH: returntrue;
/* * FIONREAD, FS_IOC_GETFLAGS, FS_IOC_SETFLAGS, FS_IOC_FSGETXATTR and * FS_IOC_FSSETXATTR are forwarded to device implementations.
*/
/* * file_ioctl() commands (FIBMAP, FS_IOC_RESVSP, FS_IOC_RESVSP64, * FS_IOC_UNRESVSP, FS_IOC_UNRESVSP64 and FS_IOC_ZERO_RANGE) are * forwarded to device implementations, so not permitted.
*/
/* Other commands are guarded by the access right. */ default: returnfalse;
}
}
/* * is_masked_device_ioctl_compat - same as the helper above, but checking the * "compat" IOCTL commands. * * The IOCTL commands with special handling in compat-mode should behave the * same as their non-compat counterparts.
*/ static __attribute_const__ bool
is_masked_device_ioctl_compat(constunsignedint cmd)
{ switch (cmd) { /* FICLONE is permitted, same as in the non-compat variant. */ case FICLONE: returntrue;
#ifdefined(CONFIG_X86_64) /* * FS_IOC_RESVSP_32, FS_IOC_RESVSP64_32, FS_IOC_UNRESVSP_32, * FS_IOC_UNRESVSP64_32, FS_IOC_ZERO_RANGE_32: not blanket-permitted, * for consistency with their non-compat variants.
*/ case FS_IOC_RESVSP_32: case FS_IOC_RESVSP64_32: case FS_IOC_UNRESVSP_32: case FS_IOC_UNRESVSP64_32: case FS_IOC_ZERO_RANGE_32: #endif
/* * FS_IOC32_GETFLAGS, FS_IOC32_SETFLAGS are forwarded to their device * implementations.
*/ case FS_IOC32_GETFLAGS: case FS_IOC32_SETFLAGS: returnfalse; default: return is_masked_device_ioctl(cmd);
}
}
rcu_read_lock();
retry:
object = rcu_dereference(inode_sec->object); if (object) { if (likely(refcount_inc_not_zero(&object->usage))) {
rcu_read_unlock(); return object;
} /* * We are racing with release_inode(), the object is going * away. Wait for release_inode(), then retry.
*/
spin_lock(&object->lock);
spin_unlock(&object->lock); goto retry;
}
rcu_read_unlock();
/* * If there is no object tied to @inode, then create a new one (without * holding any locks).
*/
new_object = landlock_create_object(&landlock_fs_underops, inode); if (IS_ERR(new_object)) return new_object;
/* * Protects against concurrent calls to get_inode_object() or * hook_sb_delete().
*/
spin_lock(&inode->i_lock); if (unlikely(rcu_access_pointer(inode_sec->object))) { /* Someone else just created the object, bail out and retry. */
spin_unlock(&inode->i_lock);
kfree(new_object);
rcu_read_lock(); goto retry;
}
/* * @inode will be released by hook_sb_delete() on its superblock * shutdown, or by release_inode() when no more ruleset references the * related object.
*/
ihold(inode);
rcu_assign_pointer(inode_sec->object, new_object);
spin_unlock(&inode->i_lock); return new_object;
}
/* All access rights that can be tied to files. */ /* clang-format off */ #define ACCESS_FILE ( \
LANDLOCK_ACCESS_FS_EXECUTE | \
LANDLOCK_ACCESS_FS_WRITE_FILE | \
LANDLOCK_ACCESS_FS_READ_FILE | \
LANDLOCK_ACCESS_FS_TRUNCATE | \
LANDLOCK_ACCESS_FS_IOCTL_DEV) /* clang-format on */
/* * @path: Should have been checked by get_path_from_fd().
*/ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset, conststruct path *const path,
access_mask_t access_rights)
{ int err; struct landlock_id id = {
.type = LANDLOCK_KEY_INODE,
};
/* Files only get access rights that make sense. */ if (!d_is_dir(path->dentry) &&
(access_rights | ACCESS_FILE) != ACCESS_FILE) return -EINVAL; if (WARN_ON_ONCE(ruleset->num_layers != 1)) return -EINVAL;
/* Transforms relative access rights to absolute ones. */
access_rights |= LANDLOCK_MASK_ACCESS_FS &
~landlock_get_fs_access_mask(ruleset, 0);
id.key.object = get_inode_object(d_backing_inode(path->dentry)); if (IS_ERR(id.key.object)) return PTR_ERR(id.key.object);
mutex_lock(&ruleset->lock);
err = landlock_insert_rule(ruleset, id, access_rights);
mutex_unlock(&ruleset->lock); /* * No need to check for an error because landlock_insert_rule() * increments the refcount for the new object if needed.
*/
landlock_put_object(id.key.object); return err;
}
/* Access-control management */
/* * The lifetime of the returned rule is tied to @domain. * * Returns NULL if no rule is found or if @dentry is negative.
*/ staticconststruct landlock_rule *
find_rule(conststruct landlock_ruleset *const domain, conststruct dentry *const dentry)
{ conststruct landlock_rule *rule; conststruct inode *inode; struct landlock_id id = {
.type = LANDLOCK_KEY_INODE,
};
/* Ignores nonexistent leafs. */ if (d_is_negative(dentry)) return NULL;
/* * Allows access to pseudo filesystems that will never be mountable (e.g. * sockfs, pipefs), but can still be reachable through * /proc/<pid>/fd/<file-descriptor>
*/ staticbool is_nouser_or_private(conststruct dentry *dentry)
{ return (dentry->d_sb->s_flags & SB_NOUSER) ||
(d_is_positive(dentry) &&
unlikely(IS_PRIVATE(d_backing_inode(dentry))));
}
/* * Check that a destination file hierarchy has more restrictions than a source * file hierarchy. This is only used for link and rename actions. * * @layer_masks_child2: Optional child masks.
*/ staticbool no_more_access( const layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS], const layer_mask_t (*const layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS], constbool child1_is_directory, const layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS], const layer_mask_t (*const layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS], constbool child2_is_directory)
{ unsignedlong access_bit;
for (access_bit = 0; access_bit < ARRAY_SIZE(*layer_masks_parent2);
access_bit++) { /* Ignores accesses that only make sense for directories. */ constbool is_file_access =
!!(BIT_ULL(access_bit) & ACCESS_FILE);
if (child1_is_directory || is_file_access) { /* * Checks if the destination restrictions are a * superset of the source ones (i.e. inherited access * rights without child exceptions): * restrictions(parent2) >= restrictions(child1)
*/ if ((((*layer_masks_parent1)[access_bit] &
(*layer_masks_child1)[access_bit]) |
(*layer_masks_parent2)[access_bit]) !=
(*layer_masks_parent2)[access_bit]) returnfalse;
}
if (!layer_masks_child2) continue; if (child2_is_directory || is_file_access) { /* * Checks inverted restrictions for RENAME_EXCHANGE: * restrictions(parent1) >= restrictions(child2)
*/ if ((((*layer_masks_parent2)[access_bit] &
(*layer_masks_child2)[access_bit]) |
(*layer_masks_parent1)[access_bit]) !=
(*layer_masks_parent1)[access_bit]) returnfalse;
}
} returntrue;
}
/* * Checks that we can only refer a file if no more access could be * inherited.
*/
NMA_TRUE(&x0, &x0, false, &rx0, NULL, false);
NMA_TRUE(&rx0, &rx0, false, &rx0, NULL, false);
NMA_FALSE(&rx0, &rx0, false, &x0, NULL, false);
NMA_FALSE(&rx0, &rx0, false, &x1, NULL, false);
/* * Allowing the following requests should not be a security risk * because domain 0 denies execute access, and domain 1 is always * nested with domain 0. However, adding an exception for this case * would mean to check all nested domains to make sure none can get * more privileges (e.g. processes only sandboxed by domain 0). * Moreover, this behavior (i.e. composition of N domains) could then * be inconsistent compared to domain 1's ruleset alone (e.g. it might * be denied to link/rename with domain 1's ruleset, whereas it would * be allowed if nested on top of domain 0). Another drawback would be * to create a cover channel that could enable sandboxed processes to * infer most of the filesystem restrictions from their domain. To * make it simple, efficient, safe, and more consistent, this case is * always denied.
*/
NMA_FALSE(&x1, &x1, false, &x0, NULL, false);
NMA_FALSE(&x1, &x1, false, &rx0, NULL, false);
NMA_FALSE(&x1, &x1, true, &x0, NULL, false);
NMA_FALSE(&x1, &x1, true, &rx0, NULL, false);
/* Checks the same case of exclusive domains with a file... */
NMA_TRUE(&x1, &x1, false, &x01, NULL, false);
NMA_FALSE(&x1, &x1, false, &x01, &x0, false);
NMA_FALSE(&x1, &x1, false, &x01, &x01, false);
NMA_FALSE(&x1, &x1, false, &x0, &x0, false); /* ...and with a directory. */
NMA_FALSE(&x1, &x1, false, &x0, &x0, true);
NMA_FALSE(&x1, &x1, true, &x0, &x0, false);
NMA_FALSE(&x1, &x1, true, &x0, &x0, true);
}
/* Checks and scopes without access request. */
KUNIT_EXPECT_TRUE(test, scope_to_request(0, &layer_masks));
KUNIT_EXPECT_EQ(test, 0,
layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]);
KUNIT_EXPECT_EQ(test, 0,
layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]);
}
#endif/* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */
/* * Returns true if there is at least one access right different than * LANDLOCK_ACCESS_FS_REFER.
*/ staticbool
is_eacces(const layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS], const access_mask_t access_request)
{ unsignedlong access_bit; /* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */ constunsignedlong access_check = access_request &
~LANDLOCK_ACCESS_FS_REFER;
if (!layer_masks) returnfalse;
for_each_set_bit(access_bit, &access_check, ARRAY_SIZE(*layer_masks)) { if ((*layer_masks)[access_bit]) returntrue;
} returnfalse;
}
/** * is_access_to_paths_allowed - Check accesses for requests with a common path * * @domain: Domain to check against. * @path: File hierarchy to walk through. * @access_request_parent1: Accesses to check, once @layer_masks_parent1 is * equal to @layer_masks_parent2 (if any). This is tied to the unique * requested path for most actions, or the source in case of a refer action * (i.e. rename or link), or the source and destination in case of * RENAME_EXCHANGE. * @layer_masks_parent1: Pointer to a matrix of layer masks per access * masks, identifying the layers that forbid a specific access. Bits from * this matrix can be unset according to the @path walk. An empty matrix * means that @domain allows all possible Landlock accesses (i.e. not only * those identified by @access_request_parent1). This matrix can * initially refer to domain layer masks and, when the accesses for the * destination and source are the same, to requested layer masks. * @log_request_parent1: Audit request to fill if the related access is denied. * @dentry_child1: Dentry to the initial child of the parent1 path. This * pointer must be NULL for non-refer actions (i.e. not link nor rename). * @access_request_parent2: Similar to @access_request_parent1 but for a * request involving a source and a destination. This refers to the * destination, except in case of RENAME_EXCHANGE where it also refers to * the source. Must be set to 0 when using a simple path request. * @layer_masks_parent2: Similar to @layer_masks_parent1 but for a refer * action. This must be NULL otherwise. * @log_request_parent2: Audit request to fill if the related access is denied. * @dentry_child2: Dentry to the initial child of the parent2 path. This * pointer is only set for RENAME_EXCHANGE actions and must be NULL * otherwise. * * This helper first checks that the destination has a superset of restrictions * compared to the source (if any) for a common path. Because of * RENAME_EXCHANGE actions, source and destinations may be swapped. It then * checks that the collected accesses and the remaining ones are enough to * allow the request. * * Returns: * - true if the access request is granted; * - false otherwise.
*/ staticbool is_access_to_paths_allowed( conststruct landlock_ruleset *const domain, conststruct path *const path, const access_mask_t access_request_parent1,
layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS], struct landlock_request *const log_request_parent1, struct dentry *const dentry_child1, const access_mask_t access_request_parent2,
layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS], struct landlock_request *const log_request_parent2, struct dentry *const dentry_child2)
{ bool allowed_parent1 = false, allowed_parent2 = false, is_dom_check,
child1_is_directory = true, child2_is_directory = true; struct path walker_path;
access_mask_t access_masked_parent1, access_masked_parent2;
layer_mask_t _layer_masks_child1[LANDLOCK_NUM_ACCESS_FS],
_layer_masks_child2[LANDLOCK_NUM_ACCESS_FS];
layer_mask_t(*layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS] = NULL,
(*layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS] = NULL;
if (!access_request_parent1 && !access_request_parent2) returntrue;
if (WARN_ON_ONCE(!path)) returntrue;
if (is_nouser_or_private(path->dentry)) returntrue;
if (WARN_ON_ONCE(!layer_masks_parent1)) returnfalse;
/* * For a double request, first check for potential privilege * escalation by looking at domain handled accesses (which are * a superset of the meaningful requested accesses).
*/
access_masked_parent1 = access_masked_parent2 =
landlock_union_access_masks(domain).fs;
is_dom_check = true;
} else { if (WARN_ON_ONCE(dentry_child1 || dentry_child2)) returnfalse; /* For a simple request, only check for requested accesses. */
access_masked_parent1 = access_request_parent1;
access_masked_parent2 = access_request_parent2;
is_dom_check = false;
}
walker_path = *path;
path_get(&walker_path); /* * We need to walk through all the hierarchy to not miss any relevant * restriction.
*/ while (true) { struct dentry *parent_dentry; conststruct landlock_rule *rule;
/* * If at least all accesses allowed on the destination are * already allowed on the source, respectively if there is at * least as much as restrictions on the destination than on the * source, then we can safely refer files from the source to * the destination without risking a privilege escalation. * This also applies in the case of RENAME_EXCHANGE, which * implies checks on both direction. This is crucial for * standalone multilayered security policies. Furthermore, * this helps avoid policy writers to shoot themselves in the * foot.
*/ if (unlikely(is_dom_check &&
no_more_access(
layer_masks_parent1, layer_masks_child1,
child1_is_directory, layer_masks_parent2,
layer_masks_child2,
child2_is_directory))) { /* * Now, downgrades the remaining checks from domain * handled accesses to requested accesses.
*/
is_dom_check = false;
access_masked_parent1 = access_request_parent1;
access_masked_parent2 = access_request_parent2;
/* Stops when a rule from each layer grants access. */ if (allowed_parent1 && allowed_parent2) break;
jump_up: if (walker_path.dentry == walker_path.mnt->mnt_root) { if (follow_up(&walker_path)) { /* Ignores hidden mount points. */ goto jump_up;
} else { /* * Stops at the real root. Denies access * because not all layers have granted access.
*/ break;
}
} if (unlikely(IS_ROOT(walker_path.dentry))) { /* * Stops at disconnected root directories. Only allows * access to internal filesystems (e.g. nsfs, which is * reachable through /proc/<pid>/ns/<namespace>).
*/ if (walker_path.mnt->mnt_flags & MNT_INTERNAL) {
allowed_parent1 = true;
allowed_parent2 = true;
} break;
}
parent_dentry = dget_parent(walker_path.dentry);
dput(walker_path.dentry);
walker_path.dentry = parent_dentry;
}
path_put(&walker_path);
/** * collect_domain_accesses - Walk through a file path and collect accesses * * @domain: Domain to check against. * @mnt_root: Last directory to check. * @dir: Directory to start the walk from. * @layer_masks_dom: Where to store the collected accesses. * * This helper is useful to begin a path walk from the @dir directory to a * @mnt_root directory used as a mount point. This mount point is the common * ancestor between the source and the destination of a renamed and linked * file. While walking from @dir to @mnt_root, we record all the domain's * allowed accesses in @layer_masks_dom. * * This is similar to is_access_to_paths_allowed() but much simpler because it * only handles walking on the same mount point and only checks one set of * accesses. * * Returns: * - true if all the domain access rights are allowed for @dir; * - false if the walk reached @mnt_root.
*/ staticbool collect_domain_accesses( conststruct landlock_ruleset *const domain, conststruct dentry *const mnt_root, struct dentry *dir,
layer_mask_t (*const layer_masks_dom)[LANDLOCK_NUM_ACCESS_FS])
{ unsignedlong access_dom; bool ret = false;
if (WARN_ON_ONCE(!domain || !mnt_root || !dir || !layer_masks_dom)) returntrue; if (is_nouser_or_private(dir)) returntrue;
dget(dir); while (true) { struct dentry *parent_dentry;
/* Gets all layers allowing all domain accesses. */ if (landlock_unmask_layers(find_rule(domain, dir), access_dom,
layer_masks_dom,
ARRAY_SIZE(*layer_masks_dom))) { /* * Stops when all handled accesses are allowed by at * least one rule in each layer.
*/
ret = true; break;
}
/* We should not reach a root other than @mnt_root. */ if (dir == mnt_root || WARN_ON_ONCE(IS_ROOT(dir))) break;
/** * current_check_refer_path - Check if a rename or link action is allowed * * @old_dentry: File or directory requested to be moved or linked. * @new_dir: Destination parent directory. * @new_dentry: Destination file or directory. * @removable: Sets to true if it is a rename operation. * @exchange: Sets to true if it is a rename operation with RENAME_EXCHANGE. * * Because of its unprivileged constraints, Landlock relies on file hierarchies * (and not only inodes) to tie access rights to files. Being able to link or * rename a file hierarchy brings some challenges. Indeed, moving or linking a * file (i.e. creating a new reference to an inode) can have an impact on the * actions allowed for a set of files if it would change its parent directory * (i.e. reparenting). * * To avoid trivial access right bypasses, Landlock first checks if the file or * directory requested to be moved would gain new access rights inherited from * its new hierarchy. Before returning any error, Landlock then checks that * the parent source hierarchy and the destination hierarchy would allow the * link or rename action. If it is not the case, an error with EACCES is * returned to inform user space that there is no way to remove or create the * requested source file type. If it should be allowed but the new inherited * access rights would be greater than the source access rights, then the * kernel returns an error with EXDEV. Prioritizing EACCES over EXDEV enables * user space to abort the whole operation if there is no way to do it, or to * manually copy the source to the destination if this remains allowed, e.g. * because file creation is allowed on the destination directory but not direct * linking. * * To achieve this goal, the kernel needs to compare two file hierarchies: the * one identifying the source file or directory (including itself), and the * destination one. This can be seen as a multilayer partial ordering problem. * The kernel walks through these paths and collects in a matrix the access * rights that are denied per layer. These matrices are then compared to see * if the destination one has more (or the same) restrictions as the source * one. If this is the case, the requested action will not return EXDEV, which * doesn't mean the action is allowed. The parent hierarchy of the source * (i.e. parent directory), and the destination hierarchy must also be checked * to verify that they explicitly allow such action (i.e. referencing, * creation and potentially removal rights). The kernel implementation is then * required to rely on potentially four matrices of access rights: one for the * source file or directory (i.e. the child), a potentially other one for the * other source/destination (in case of RENAME_EXCHANGE), one for the source * parent hierarchy and a last one for the destination hierarchy. These * ephemeral matrices take some space on the stack, which limits the number of * layers to a deemed reasonable number: 16. * * Returns: * - 0 if access is allowed; * - -EXDEV if @old_dentry would inherit new access rights from @new_dir; * - -EACCES if file removal or creation is denied.
*/ staticint current_check_refer_path(struct dentry *const old_dentry, conststruct path *const new_dir, struct dentry *const new_dentry, constbool removable, constbool exchange)
{ conststruct landlock_cred_security *const subject =
landlock_get_applicable_subject(current_cred(), any_fs, NULL); bool allow_parent1, allow_parent2;
access_mask_t access_request_parent1, access_request_parent2; struct path mnt_dir; struct dentry *old_parent;
layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {},
layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {}; struct landlock_request request1 = {}, request2 = {};
if (!subject) return 0;
if (unlikely(d_is_negative(old_dentry))) return -ENOENT; if (exchange) { if (unlikely(d_is_negative(new_dentry))) return -ENOENT;
access_request_parent1 =
get_mode_access(d_backing_inode(new_dentry)->i_mode);
} else {
access_request_parent1 = 0;
}
access_request_parent2 =
get_mode_access(d_backing_inode(old_dentry)->i_mode); if (removable) {
access_request_parent1 |= maybe_remove(old_dentry);
access_request_parent2 |= maybe_remove(new_dentry);
}
/* The mount points are the same for old and new paths, cf. EXDEV. */ if (old_dentry->d_parent == new_dir->dentry) { /* * The LANDLOCK_ACCESS_FS_REFER access right is not required * for same-directory referer (i.e. no reparenting).
*/
access_request_parent1 = landlock_init_layer_masks(
subject->domain,
access_request_parent1 | access_request_parent2,
&layer_masks_parent1, LANDLOCK_KEY_INODE); if (is_access_to_paths_allowed(subject->domain, new_dir,
access_request_parent1,
&layer_masks_parent1, &request1,
NULL, 0, NULL, NULL, NULL)) return 0;
/* Saves the common mount point. */
mnt_dir.mnt = new_dir->mnt;
mnt_dir.dentry = new_dir->mnt->mnt_root;
/* * old_dentry may be the root of the common mount point and * !IS_ROOT(old_dentry) at the same time (e.g. with open_tree() and * OPEN_TREE_CLONE). We do not need to call dget(old_parent) because * we keep a reference to old_dentry.
*/
old_parent = (old_dentry == mnt_dir.dentry) ? old_dentry :
old_dentry->d_parent;
/* new_dir->dentry is equal to new_dentry->d_parent */
allow_parent1 = collect_domain_accesses(subject->domain, mnt_dir.dentry,
old_parent,
&layer_masks_parent1);
allow_parent2 = collect_domain_accesses(subject->domain, mnt_dir.dentry,
new_dir->dentry,
&layer_masks_parent2);
if (allow_parent1 && allow_parent2) return 0;
/* * To be able to compare source and destination domain access rights, * take into account the @old_dentry access rights aggregated with its * parent access rights. This will be useful to compare with the * destination parent access rights.
*/ if (is_access_to_paths_allowed(
subject->domain, &mnt_dir, access_request_parent1,
&layer_masks_parent1, &request1, old_dentry,
access_request_parent2, &layer_masks_parent2, &request2,
exchange ? new_dentry : NULL)) return 0;
if (request1.access) {
request1.audit.u.path.dentry = old_parent;
landlock_log_denial(subject, &request1);
} if (request2.access) {
request2.audit.u.path.dentry = new_dir->dentry;
landlock_log_denial(subject, &request2);
}
/* * This prioritizes EACCES over EXDEV for all actions, including * renames with RENAME_EXCHANGE.
*/ if (likely(is_eacces(&layer_masks_parent1, access_request_parent1) ||
is_eacces(&layer_masks_parent2, access_request_parent2))) return -EACCES;
/* * Gracefully forbids reparenting if the destination directory * hierarchy is not a superset of restrictions of the source directory * hierarchy, or if LANDLOCK_ACCESS_FS_REFER is not allowed by the * source or the destination.
*/ return -EXDEV;
}
/* * All inodes must already have been untied from their object by * release_inode() or hook_sb_delete().
*/
inode_sec = inode_security + landlock_blob_sizes.lbs_inode;
WARN_ON_ONCE(inode_sec->object);
}
/* Super-block hooks */
/* * Release the inodes used in a security policy. * * Cf. fsnotify_unmount_inodes() and evict_inodes()
*/ staticvoid hook_sb_delete(struct super_block *const sb)
{ struct inode *inode, *prev_inode = NULL;
/* Only handles referenced inodes. */ if (!atomic_read(&inode->i_count)) continue;
/* * Protects against concurrent modification of inode (e.g. * from get_inode_object()).
*/
spin_lock(&inode->i_lock); /* * Checks I_FREEING and I_WILL_FREE to protect against a race * condition when release_inode() just called iput(), which * could lead to a NULL dereference of inode->security or a * second call to iput() for the same Landlock object. Also * checks I_NEW because such inode cannot be tied to an object.
*/ if (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW)) {
spin_unlock(&inode->i_lock); continue;
}
rcu_read_lock();
object = rcu_dereference(landlock_inode(inode)->object); if (!object) {
rcu_read_unlock();
spin_unlock(&inode->i_lock); continue;
} /* Keeps a reference to this inode until the next loop walk. */
__iget(inode);
spin_unlock(&inode->i_lock);
/* * If there is no concurrent release_inode() ongoing, then we * are in charge of calling iput() on this inode, otherwise we * will just wait for it to finish.
*/
spin_lock(&object->lock); if (object->underobj == inode) {
object->underobj = NULL;
spin_unlock(&object->lock);
rcu_read_unlock();
/* * Because object->underobj was not NULL, * release_inode() and get_inode_object() guarantee * that it is safe to reset * landlock_inode(inode)->object while it is not NULL. * It is therefore not necessary to lock inode->i_lock.
*/
rcu_assign_pointer(landlock_inode(inode)->object, NULL); /* * At this point, we own the ihold() reference that was * originally set up by get_inode_object() and the * __iget() reference that we just set in this loop * walk. Therefore the following call to iput() will * not sleep nor drop the inode because there is now at * least two references to it.
*/
iput(inode);
} else {
spin_unlock(&object->lock);
rcu_read_unlock();
}
if (prev_inode) { /* * At this point, we still own the __iget() reference * that we just set in this loop walk. Therefore we * can drop the list lock and know that the inode won't * disappear from under us until the next loop walk.
*/
spin_unlock(&sb->s_inode_list_lock); /* * We can now actually put the inode reference from the * previous loop walk, which is not needed anymore.
*/
iput(prev_inode);
cond_resched();
spin_lock(&sb->s_inode_list_lock);
}
prev_inode = inode;
}
spin_unlock(&sb->s_inode_list_lock);
/* Puts the inode reference from the last loop walk, if any. */ if (prev_inode)
iput(prev_inode); /* Waits for pending iput() in release_inode(). */
wait_var_event(&landlock_superblock(sb)->inode_refs,
!atomic_long_read(&landlock_superblock(sb)->inode_refs));
}
/* * Because a Landlock security policy is defined according to the filesystem * topology (i.e. the mount namespace), changing it may grant access to files * not previously allowed. * * To make it simple, deny any filesystem topology modification by landlocked * processes. Non-landlocked processes may still change the namespace of a * landlocked process, but this kind of threat must be handled by a system-wide * access-control security policy. * * This could be lifted in the future if Landlock can safely handle mount * namespace updates requested by a landlocked process. Indeed, we could * update the current domain (which is currently read-only) by taking into * account the accesses of the source and the destination of a new mount point. * However, it would also require to make all the child domains dynamically * inherit these new constraints. Anyway, for backward compatibility reasons, * a dedicated user space option would be required (e.g. as a ruleset flag).
*/ staticint hook_sb_mount(constchar *const dev_name, conststruct path *const path, constchar *const type, constunsignedlong flags, void *const data)
{
size_t handle_layer; conststruct landlock_cred_security *const subject =
landlock_get_applicable_subject(current_cred(), any_fs,
&handle_layer);
/* * Removing a mount point may reveal a previously hidden file hierarchy, which * may then grant access to files, which may have previously been forbidden.
*/ staticint hook_sb_umount(struct vfsmount *const mnt, constint flags)
{
size_t handle_layer; conststruct landlock_cred_security *const subject =
landlock_get_applicable_subject(current_cred(), any_fs,
&handle_layer);
/* * pivot_root(2), like mount(2), changes the current mount namespace. It must * then be forbidden for a landlocked process. * * However, chroot(2) may be allowed because it only changes the relative root * directory of the current process. Moreover, it can be used to restrict the * view of the filesystem.
*/ staticint hook_sb_pivotroot(conststruct path *const old_path, conststruct path *const new_path)
{
size_t handle_layer; conststruct landlock_cred_security *const subject =
landlock_get_applicable_subject(current_cred(), any_fs,
&handle_layer);
/** * get_required_file_open_access - Get access needed to open a file * * @file: File being opened. * * Returns the access rights that are required for opening the given file, * depending on the file type and open mode.
*/ static access_mask_t
get_required_file_open_access(conststruct file *const file)
{
access_mask_t access = 0;
if (file->f_mode & FMODE_READ) { /* A directory can only be opened in read mode. */ if (S_ISDIR(file_inode(file)->i_mode)) return LANDLOCK_ACCESS_FS_READ_DIR;
access = LANDLOCK_ACCESS_FS_READ_FILE;
} if (file->f_mode & FMODE_WRITE)
access |= LANDLOCK_ACCESS_FS_WRITE_FILE; /* __FMODE_EXEC is indeed part of f_flags, not f_mode. */ if (file->f_flags & __FMODE_EXEC)
access |= LANDLOCK_ACCESS_FS_EXECUTE; return access;
}
staticint hook_file_alloc_security(struct file *const file)
{ /* * Grants all access rights, even if most of them are not checked later * on. It is more consistent. * * Notably, file descriptors for regular files can also be acquired * without going through the file_open hook, for example when using * memfd_create(2).
*/
landlock_file(file)->allowed_access = LANDLOCK_MASK_ACCESS_FS; return 0;
}
/* * Because a file may be opened with O_PATH, get_required_file_open_access() * may return 0. This case will be handled with a future Landlock * evolution.
*/
open_access_request = get_required_file_open_access(file);
/* * We look up more access than what we immediately need for open(), so * that we can later authorize operations on opened files.
*/
optional_access = LANDLOCK_ACCESS_FS_TRUNCATE; if (is_device(file))
optional_access |= LANDLOCK_ACCESS_FS_IOCTL_DEV;
/* * Calculate the actual allowed access rights from layer_masks. * Add each access right to allowed_access which has not been * vetoed by any layer.
*/
allowed_access = 0;
for_each_set_bit(access_bit, &access_req,
ARRAY_SIZE(layer_masks)) { if (!layer_masks[access_bit])
allowed_access |= BIT_ULL(access_bit);
}
}
/* * For operations on already opened files (i.e. ftruncate()), it is the * access rights at the time of open() which decide whether the * operation is permitted. Therefore, we record the relevant subset of * file access rights in the opened struct file.
*/
landlock_file(file)->allowed_access = allowed_access; #ifdef CONFIG_AUDIT
landlock_file(file)->deny_masks = landlock_get_deny_masks(
_LANDLOCK_ACCESS_FS_OPTIONAL, optional_access, &layer_masks,
ARRAY_SIZE(layer_masks)); #endif/* CONFIG_AUDIT */
if ((open_access_request & allowed_access) == open_access_request) return 0;
/* Sets access to reflect the actual request. */
request.access = open_access_request;
landlock_log_denial(subject, &request); return -EACCES;
}
staticint hook_file_truncate(struct file *const file)
{ /* * Allows truncation if the truncate right was available at the time of * opening the file, to get a consistent access check as for read, write * and execute operations. * * Note: For checks done based on the file's Landlock allowed access, we * enforce them independently of whether the current thread is in a * Landlock domain, so that open files passed between independent * processes retain their behaviour.
*/ if (landlock_file(file)->allowed_access & LANDLOCK_ACCESS_FS_TRUNCATE) return 0;
/* * It is the access rights at the time of opening the file which * determine whether IOCTL can be used on the opened file later. * * The access right is attached to the opened file in hook_file_open().
*/ if (allowed_access & LANDLOCK_ACCESS_FS_IOCTL_DEV) return 0;
if (!is_device(file)) return 0;
if (unlikely(is_compat) ? is_masked_device_ioctl_compat(cmd) :
is_masked_device_ioctl(cmd)) return 0;
/* * Always allow sending signals between threads of the same process. This * ensures consistency with hook_task_kill().
*/ staticbool control_current_fowner(struct fown_struct *const fown)
{ struct task_struct *p;
/* * Lock already held by __f_setown(), see commit 26f204380a3c ("fs: Fix * file_set_fowner LSM hook inconsistencies").
*/
lockdep_assert_held(&fown->lock);
/* * Some callers (e.g. fcntl_dirnotify) may not be in an RCU read-side * critical section.
*/
guard(rcu)();
p = pid_task(fown->pid, fown->pid_type); if (!p) returntrue;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.