/* * If a non-root user executes a setuid-root binary in * !secure(SECURE_NOROOT) mode, then we raise capabilities. * However if fE is also set, then the intent is for only * the file capabilities to be applied, and the setuid-root * bit is left on either to change the uid (plausible) or * to get full privilege on a kernel without file capabilities * support. So in that case we do not raise capabilities. * * Warn if that happens, once per boot.
*/ staticvoid warn_setuid_and_fcaps_mixed(constchar *fname)
{ staticint warned; if (!warned) {
printk(KERN_INFO "warning: `%s' has both setuid-root and" " effective capabilities. Therefore not raising all" " capabilities.\n", fname);
warned = 1;
}
}
/** * cap_capable_helper - Determine whether a task has a particular effective * capability. * @cred: The credentials to use * @target_ns: The user namespace of the resource being accessed * @cred_ns: The user namespace of the credentials * @cap: The capability to check for * * Determine whether the nominated task has the specified capability amongst * its effective set, returning 0 if it does, -ve if it does not. * * See cap_capable for more details.
*/ staticinlineint cap_capable_helper(conststruct cred *cred, struct user_namespace *target_ns, conststruct user_namespace *cred_ns, int cap)
{ struct user_namespace *ns = target_ns;
/* See if cred has the capability in the target user namespace * by examining the target user namespace and all of the target * user namespace's parents.
*/ for (;;) { /* Do we have the necessary capabilities? */ if (likely(ns == cred_ns)) return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
/* * If we're already at a lower level than we're looking for, * we're done searching.
*/ if (ns->level <= cred_ns->level) return -EPERM;
/* * The owner of the user namespace in the parent of the * user namespace has all caps.
*/ if ((ns->parent == cred_ns) && uid_eq(ns->owner, cred->euid)) return 0;
/* * If you have a capability in a parent user ns, then you have * it over all children user namespaces as well.
*/
ns = ns->parent;
}
/* We never get here */
}
/** * cap_capable - Determine whether a task has a particular effective capability * @cred: The credentials to use * @target_ns: The user namespace of the resource being accessed * @cap: The capability to check for * @opts: Bitmask of options defined in include/linux/security.h (unused) * * Determine whether the nominated task has the specified capability amongst * its effective set, returning 0 if it does, -ve if it does not. * * NOTE WELL: cap_capable() has reverse semantics to the capable() call * and friends. That is cap_capable() returns an int 0 when a task has * a capability, while the kernel's capable(), has_ns_capability(), * has_ns_capability_noaudit(), and has_capability_noaudit() return a * bool true (1) for this case.
*/ int cap_capable(conststruct cred *cred, struct user_namespace *target_ns, int cap, unsignedint opts)
{ conststruct user_namespace *cred_ns = cred->user_ns; int ret = cap_capable_helper(cred, target_ns, cred_ns, cap);
/** * cap_settime - Determine whether the current process may set the system clock * @ts: The time to set * @tz: The timezone to set * * Determine whether the current process may set the system clock and timezone * information, returning 0 if permission granted, -ve if denied.
*/ int cap_settime(conststruct timespec64 *ts, conststruct timezone *tz)
{ if (!capable(CAP_SYS_TIME)) return -EPERM; return 0;
}
/** * cap_ptrace_access_check - Determine whether the current process may access * another * @child: The process to be accessed * @mode: The mode of attachment. * * If we are in the same or an ancestor user_ns and have all the target * task's capabilities, then ptrace access is allowed. * If we have the ptrace capability to the target user_ns, then ptrace * access is allowed. * Else denied. * * Determine whether a process may access another, returning 0 if permission * granted, -ve if denied.
*/ int cap_ptrace_access_check(struct task_struct *child, unsignedint mode)
{ int ret = 0; conststruct cred *cred, *child_cred; const kernel_cap_t *caller_caps;
rcu_read_lock();
cred = current_cred();
child_cred = __task_cred(child); if (mode & PTRACE_MODE_FSCREDS)
caller_caps = &cred->cap_effective; else
caller_caps = &cred->cap_permitted; if (cred->user_ns == child_cred->user_ns &&
cap_issubset(child_cred->cap_permitted, *caller_caps)) goto out; if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE)) goto out;
ret = -EPERM;
out:
rcu_read_unlock(); return ret;
}
/** * cap_ptrace_traceme - Determine whether another process may trace the current * @parent: The task proposed to be the tracer * * If parent is in the same or an ancestor user_ns and has all current's * capabilities, then ptrace access is allowed. * If parent has the ptrace capability to current's user_ns, then ptrace * access is allowed. * Else denied. * * Determine whether the nominated task is permitted to trace the current * process, returning 0 if permission is granted, -ve if denied.
*/ int cap_ptrace_traceme(struct task_struct *parent)
{ int ret = 0; conststruct cred *cred, *child_cred;
rcu_read_lock();
cred = __task_cred(parent);
child_cred = current_cred(); if (cred->user_ns == child_cred->user_ns &&
cap_issubset(child_cred->cap_permitted, cred->cap_permitted)) goto out; if (has_ns_capability(parent, child_cred->user_ns, CAP_SYS_PTRACE)) goto out;
ret = -EPERM;
out:
rcu_read_unlock(); return ret;
}
/** * cap_capget - Retrieve a task's capability sets * @target: The task from which to retrieve the capability sets * @effective: The place to record the effective set * @inheritable: The place to record the inheritable set * @permitted: The place to record the permitted set * * This function retrieves the capabilities of the nominated task and returns * them to the caller.
*/ int cap_capget(conststruct task_struct *target, kernel_cap_t *effective,
kernel_cap_t *inheritable, kernel_cap_t *permitted)
{ conststruct cred *cred;
/* * Determine whether the inheritable capabilities are limited to the old * permitted set. Returns 1 if they are limited, 0 if they are not.
*/ staticinlineint cap_inh_is_capped(void)
{ /* they are so limited unless the current task has the CAP_SETPCAP * capability
*/ if (cap_capable(current_cred(), current_cred()->user_ns,
CAP_SETPCAP, CAP_OPT_NONE) == 0) return 0; return 1;
}
/** * cap_capset - Validate and apply proposed changes to current's capabilities * @new: The proposed new credentials; alterations should be made here * @old: The current task's current credentials * @effective: A pointer to the proposed new effective capabilities set * @inheritable: A pointer to the proposed new inheritable capabilities set * @permitted: A pointer to the proposed new permitted capabilities set * * This function validates and applies a proposed mass change to the current * process's capability sets. The changes are made to the proposed new * credentials, and assuming no error, will be committed by the caller of LSM.
*/ int cap_capset(struct cred *new, conststruct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted)
{ if (cap_inh_is_capped() &&
!cap_issubset(*inheritable,
cap_combine(old->cap_inheritable,
old->cap_permitted))) /* incapable of using this inheritable set */ return -EPERM;
if (!cap_issubset(*inheritable,
cap_combine(old->cap_inheritable,
old->cap_bset))) /* no new pI capabilities outside bounding set */ return -EPERM;
/* verify restrictions on target's new Permitted set */ if (!cap_issubset(*permitted, old->cap_permitted)) return -EPERM;
/* verify the _new_Effective_ is a subset of the _new_Permitted_ */ if (!cap_issubset(*effective, *permitted)) return -EPERM;
/* * Mask off ambient bits that are no longer both permitted and * inheritable.
*/
new->cap_ambient = cap_intersect(new->cap_ambient,
cap_intersect(*permitted,
*inheritable)); if (WARN_ON(!cap_ambient_invariant_ok(new))) return -EINVAL; return 0;
}
/** * cap_inode_need_killpriv - Determine if inode change affects privileges * @dentry: The inode/dentry in being changed with change marked ATTR_KILL_PRIV * * Determine if an inode having a change applied that's marked ATTR_KILL_PRIV * affects the security markings on that inode, and if it is, should * inode_killpriv() be invoked or the change rejected. * * Return: 1 if security.capability has a value, meaning inode_killpriv() * is required, 0 otherwise, meaning inode_killpriv() is not required.
*/ int cap_inode_need_killpriv(struct dentry *dentry)
{ struct inode *inode = d_backing_inode(dentry); int error;
/** * cap_inode_killpriv - Erase the security markings on an inode * * @idmap: idmap of the mount the inode was found from * @dentry: The inode/dentry to alter * * Erase the privilege-enhancing security markings on an inode. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply pass @nop_mnt_idmap. * * Return: 0 if successful, -ve on error.
*/ int cap_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry)
{ int error;
/* * getsecurity: We are called for security.* before any attempt to read the * xattr from the inode itself. * * This gives us a chance to read the on-disk value and convert it. If we * return -EOPNOTSUPP, then vfs_getxattr() will call the i_op handler. * * Note we are not called by vfs_getxattr_alloc(), but that is only called * by the integrity subsystem, which really wants the unconverted values - * so that's good.
*/ int cap_inode_getsecurity(struct mnt_idmap *idmap, struct inode *inode, constchar *name, void **buffer, bool alloc)
{ int size;
kuid_t kroot;
vfsuid_t vfsroot;
u32 nsmagic, magic;
uid_t root, mappedroot; char *tmpbuf = NULL; struct vfs_cap_data *cap; struct vfs_ns_cap_data *nscap = NULL; struct dentry *dentry; struct user_namespace *fs_ns;
if (strcmp(name, "capability") != 0) return -EOPNOTSUPP;
dentry = d_find_any_alias(inode); if (!dentry) return -EINVAL;
size = vfs_getxattr_alloc(idmap, dentry, XATTR_NAME_CAPS, &tmpbuf, sizeof(struct vfs_ns_cap_data), GFP_NOFS);
dput(dentry); /* gcc11 complains if we don't check for !tmpbuf */ if (size < 0 || !tmpbuf) goto out_free;
/* If this is an idmapped mount shift the kuid. */
vfsroot = make_vfsuid(idmap, fs_ns, kroot);
/* If the root kuid maps to a valid uid in current ns, then return
* this as a nscap. */
mappedroot = from_kuid(current_user_ns(), vfsuid_into_kuid(vfsroot)); if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) {
size = sizeof(struct vfs_ns_cap_data); if (alloc) { if (!nscap) { /* v2 -> v3 conversion */
nscap = kzalloc(size, GFP_ATOMIC); if (!nscap) {
size = -ENOMEM; goto out_free;
}
nsmagic = VFS_CAP_REVISION_3;
magic = le32_to_cpu(cap->magic_etc); if (magic & VFS_CAP_FLAGS_EFFECTIVE)
nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
nscap->magic_etc = cpu_to_le32(nsmagic);
} else { /* use allocated v3 buffer */
tmpbuf = NULL;
}
nscap->rootid = cpu_to_le32(mappedroot);
*buffer = nscap;
} goto out_free;
}
if (!rootid_owns_currentns(vfsroot)) {
size = -EOVERFLOW; goto out_free;
}
/* This comes from a parent namespace. Return as a v2 capability */
size = sizeof(struct vfs_cap_data); if (alloc) { if (nscap) { /* v3 -> v2 conversion */
cap = kzalloc(size, GFP_ATOMIC); if (!cap) {
size = -ENOMEM; goto out_free;
}
magic = VFS_CAP_REVISION_2;
nsmagic = le32_to_cpu(nscap->magic_etc); if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE)
magic |= VFS_CAP_FLAGS_EFFECTIVE;
memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
cap->magic_etc = cpu_to_le32(magic);
} else { /* use unconverted v2 */
tmpbuf = NULL;
}
*buffer = cap;
}
out_free:
kfree(tmpbuf); return size;
}
/** * rootid_from_xattr - translate root uid of vfs caps * * @value: vfs caps value which may be modified by this function * @size: size of @ivalue * @task_ns: user namespace of the caller
*/ static vfsuid_t rootid_from_xattr(constvoid *value, size_t size, struct user_namespace *task_ns)
{ conststruct vfs_ns_cap_data *nscap = value;
uid_t rootid = 0;
if (size == XATTR_CAPS_SZ_3)
rootid = le32_to_cpu(nscap->rootid);
/** * cap_convert_nscap - check vfs caps * * @idmap: idmap of the mount the inode was found from * @dentry: used to retrieve inode to check permissions on * @ivalue: vfs caps value which may be modified by this function * @size: size of @ivalue * * User requested a write of security.capability. If needed, update the * xattr to change from v2 to v3, or to fixup the v3 rootid. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply pass @nop_mnt_idmap. * * Return: On success, return the new size; on error, return < 0.
*/ int cap_convert_nscap(struct mnt_idmap *idmap, struct dentry *dentry, constvoid **ivalue, size_t size)
{ struct vfs_ns_cap_data *nscap;
uid_t nsrootid; conststruct vfs_cap_data *cap = *ivalue;
__u32 magic, nsmagic; struct inode *inode = d_backing_inode(dentry); struct user_namespace *task_ns = current_user_ns(),
*fs_ns = inode->i_sb->s_user_ns;
kuid_t rootid;
vfsuid_t vfsrootid;
size_t newsize;
if (!*ivalue) return -EINVAL; if (!validheader(size, cap)) return -EINVAL; if (!capable_wrt_inode_uidgid(idmap, inode, CAP_SETFCAP)) return -EPERM; if (size == XATTR_CAPS_SZ_2 && (idmap == &nop_mnt_idmap)) if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP)) /* user is privileged, just write the v2 */ return size;
vfsrootid = rootid_from_xattr(*ivalue, size, task_ns); if (!vfsuid_valid(vfsrootid)) return -EINVAL;
rootid = from_vfsuid(idmap, fs_ns, vfsrootid); if (!uid_valid(rootid)) return -EINVAL;
nsrootid = from_kuid(fs_ns, rootid); if (nsrootid == -1) return -EINVAL;
/* * Calculate the new process capability sets from the capability sets attached * to a file.
*/ staticinlineint bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, struct linux_binprm *bprm, bool *effective, bool *has_fcap)
{ struct cred *new = bprm->cred; int ret = 0;
if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
*effective = true;
if (caps->magic_etc & VFS_CAP_REVISION_MASK)
*has_fcap = true;
/* * pP' = (X & fP) | (pI & fI) * The addition of pA' is handled later.
*/
new->cap_permitted.val =
(new->cap_bset.val & caps->permitted.val) |
(new->cap_inheritable.val & caps->inheritable.val);
if (caps->permitted.val & ~new->cap_permitted.val) /* insufficient to execute correctly */
ret = -EPERM;
/* * For legacy apps, with no internal support for recognizing they * do not have enough capabilities, we return an error if they are * missing some "forced" (aka file-permitted) capabilities.
*/ return *effective ? ret : 0;
}
/** * get_vfs_caps_from_disk - retrieve vfs caps from disk * * @idmap: idmap of the mount the inode was found from * @dentry: dentry from which @inode is retrieved * @cpu_caps: vfs capabilities * * Extract the on-exec-apply capability sets for an executable file. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply pass @nop_mnt_idmap.
*/ int get_vfs_caps_from_disk(struct mnt_idmap *idmap, conststruct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps)
{ struct inode *inode = d_backing_inode(dentry);
__u32 magic_etc; int size; struct vfs_ns_cap_data data, *nscaps = &data; struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;
kuid_t rootkuid;
vfsuid_t rootvfsuid; struct user_namespace *fs_ns;
rootkuid = make_kuid(fs_ns, 0); switch (magic_etc & VFS_CAP_REVISION_MASK) { case VFS_CAP_REVISION_1: if (size != XATTR_CAPS_SZ_1) return -EINVAL; break; case VFS_CAP_REVISION_2: if (size != XATTR_CAPS_SZ_2) return -EINVAL; break; case VFS_CAP_REVISION_3: if (size != XATTR_CAPS_SZ_3) return -EINVAL;
rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid)); break;
default: return -EINVAL;
}
rootvfsuid = make_vfsuid(idmap, fs_ns, rootkuid); if (!vfsuid_valid(rootvfsuid)) return -ENODATA;
/* Limit the caps to the mounter of the filesystem * or the more limited uid specified in the xattr.
*/ if (!rootid_owns_currentns(rootvfsuid)) return -ENODATA;
/* * Rev1 had just a single 32-bit word, later expanded * to a second one for the high bits
*/ if ((magic_etc & VFS_CAP_REVISION_MASK) != VFS_CAP_REVISION_1) {
cpu_caps->permitted.val += (u64)le32_to_cpu(caps->data[1].permitted) << 32;
cpu_caps->inheritable.val += (u64)le32_to_cpu(caps->data[1].inheritable) << 32;
}
/* * Attempt to get the on-exec apply capability sets for an executable file from * its xattrs and, if present, apply them to the proposed credentials being * constructed by execve().
*/ staticint get_file_caps(struct linux_binprm *bprm, conststruct file *file, bool *effective, bool *has_fcap)
{ int rc = 0; struct cpu_vfs_cap_data vcaps;
cap_clear(bprm->cred->cap_permitted);
if (!file_caps_enabled) return 0;
if (!mnt_may_suid(file->f_path.mnt)) return 0;
/* * This check is redundant with mnt_may_suid() but is kept to make * explicit that capability bits are limited to s_user_ns and its * descendants.
*/ if (!current_in_userns(file->f_path.mnt->mnt_sb->s_user_ns)) return 0;
/* * handle_privileged_root - Handle case of privileged root * @bprm: The execution parameters, including the proposed creds * @has_fcap: Are any file capabilities set? * @effective: Do we have effective root privilege? * @root_uid: This namespace' root UID WRT initial USER namespace * * Handle the case where root is privileged and hasn't been neutered by * SECURE_NOROOT. If file capabilities are set, they won't be combined with * set UID root and nothing is changed. If we are root, cap_permitted is * updated. If we have become set UID root, the effective bit is set.
*/ staticvoid handle_privileged_root(struct linux_binprm *bprm, bool has_fcap, bool *effective, kuid_t root_uid)
{ conststruct cred *old = current_cred(); struct cred *new = bprm->cred;
if (!root_privileged()) return; /* * If the legacy file capability is set, then don't set privs * for a setuid root binary run by a non-root user. Do set it * for a root user just to cause least surprise to an admin.
*/ if (has_fcap && __is_suid(root_uid, new)) {
warn_setuid_and_fcaps_mixed(bprm->filename); return;
} /* * To support inheritance of root-permissions and suid-root * executables under compatibility mode, we override the * capability sets for the file.
*/ if (__is_eff(root_uid, new) || __is_real(root_uid, new)) { /* pP' = (cap_bset & ~0) | (pI & ~0) */
new->cap_permitted = cap_combine(old->cap_bset,
old->cap_inheritable);
} /* * If only the real uid is 0, we do not set the effective bit.
*/ if (__is_eff(root_uid, new))
*effective = true;
}
/* * 1) Audit candidate if current->cap_effective is set * * We do not bother to audit if 3 things are true: * 1) cap_effective has all caps * 2) we became root *OR* are were already root * 3) root is supposed to have all caps (SECURE_NOROOT) * Since this is just a normal root execing a process. * * Number 1 above might fail if you don't have a full bset, but I think * that is interesting information to audit. * * A number of other conditions require logging: * 2) something prevented setuid root getting all caps * 3) non-setuid root gets fcaps * 4) non-setuid root gets ambient
*/ staticinlinebool nonroot_raised_pE(struct cred *new, conststruct cred *old,
kuid_t root, bool has_fcap)
{ bool ret = false;
/** * cap_bprm_creds_from_file - Set up the proposed credentials for execve(). * @bprm: The execution parameters, including the proposed creds * @file: The file to pull the credentials from * * Set up the proposed credentials for a new execution context being * constructed by execve(). The proposed creds in @bprm->cred is altered, * which won't take effect immediately. * * Return: 0 if successful, -ve on error.
*/ int cap_bprm_creds_from_file(struct linux_binprm *bprm, conststruct file *file)
{ /* Process setpcap binaries and capabilities for uid 0 */ conststruct cred *old = current_cred(); struct cred *new = bprm->cred; bool effective = false, has_fcap = false, id_changed; int ret;
kuid_t root_uid;
if (WARN_ON(!cap_ambient_invariant_ok(old))) return -EPERM;
ret = get_file_caps(bprm, file, &effective, &has_fcap); if (ret < 0) return ret;
/* if we have fs caps, clear dangerous personality flags */ if (__cap_gained(permitted, new, old))
bprm->per_clear |= PER_CLEAR_ON_SETID;
/* Don't let someone trace a set[ug]id/setpcap binary with the revised * credentials unless they have the appropriate permit. * * In addition, if NO_NEW_PRIVS, then ensure we get no new privs.
*/
id_changed = !uid_eq(new->euid, old->euid) || !in_group_p(new->egid);
if ((id_changed || __cap_gained(permitted, new, old)) &&
((bprm->unsafe & ~LSM_UNSAFE_PTRACE) ||
!ptracer_capable(current, new->user_ns))) { /* downgrade; they get no more than they had, and maybe less */ if (!ns_capable(new->user_ns, CAP_SETUID) ||
(bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) {
new->euid = new->uid;
new->egid = new->gid;
}
new->cap_permitted = cap_intersect(new->cap_permitted,
old->cap_permitted);
}
/* File caps or setid cancels ambient. */ if (has_fcap || id_changed)
cap_clear(new->cap_ambient);
/* * Now that we've computed pA', update pP' to give: * pP' = (X & fP) | (pI & fI) | pA'
*/
new->cap_permitted = cap_combine(new->cap_permitted, new->cap_ambient);
/* * Set pE' = (fE ? pP' : pA'). Because pA' is zero if fE is set, * this is the same as pE' = (fE ? pP' : 0) | pA'.
*/ if (effective)
new->cap_effective = new->cap_permitted; else
new->cap_effective = new->cap_ambient;
if (WARN_ON(!cap_ambient_invariant_ok(new))) return -EPERM;
if (nonroot_raised_pE(new, old, root_uid, has_fcap)) {
ret = audit_log_bprm_fcaps(bprm, new, old); if (ret < 0) return ret;
}
/** * cap_inode_setxattr - Determine whether an xattr may be altered * @dentry: The inode/dentry being altered * @name: The name of the xattr to be changed * @value: The value that the xattr will be changed to * @size: The size of value * @flags: The replacement flag * * Determine whether an xattr may be altered or set on an inode, returning 0 if * permission is granted, -ve if denied. * * This is used to make sure security xattrs don't get updated or set by those * who aren't privileged to do so.
*/ int cap_inode_setxattr(struct dentry *dentry, constchar *name, constvoid *value, size_t size, int flags)
{ struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
/* * For XATTR_NAME_CAPS the check will be done in * cap_convert_nscap(), called by setxattr()
*/ if (strcmp(name, XATTR_NAME_CAPS) == 0) return 0;
if (!ns_capable(user_ns, CAP_SYS_ADMIN)) return -EPERM; return 0;
}
/** * cap_inode_removexattr - Determine whether an xattr may be removed * * @idmap: idmap of the mount the inode was found from * @dentry: The inode/dentry being altered * @name: The name of the xattr to be changed * * Determine whether an xattr may be removed from an inode, returning 0 if * permission is granted, -ve if denied. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then * take care to map the inode according to @idmap before checking * permissions. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply pass @nop_mnt_idmap. * * This is used to make sure security xattrs don't get removed by those who * aren't privileged to remove them.
*/ int cap_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, constchar *name)
{ struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
if (strcmp(name, XATTR_NAME_CAPS) == 0) { /* security.capability gets namespaced */ struct inode *inode = d_backing_inode(dentry); if (!inode) return -EINVAL; if (!capable_wrt_inode_uidgid(idmap, inode, CAP_SETFCAP)) return -EPERM; return 0;
}
if (!ns_capable(user_ns, CAP_SYS_ADMIN)) return -EPERM; return 0;
}
/* * cap_emulate_setxuid() fixes the effective / permitted capabilities of * a process after a call to setuid, setreuid, or setresuid. * * 1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of * {r,e,s}uid != 0, the permitted and effective capabilities are * cleared. * * 2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective * capabilities of the process are cleared. * * 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective * capabilities are set to the permitted capabilities. * * fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should * never happen. * * -astor * * cevans - New behaviour, Oct '99 * A process may, via prctl(), elect to keep its capabilities when it * calls setuid() and switches away from uid==0. Both permitted and * effective sets will be retained. * Without this change, it was impossible for a daemon to drop only some * of its privilege. The call to setuid(!=0) would drop all privileges! * Keeping uid 0 is not an option because uid 0 owns too many vital * files.. * Thanks to Olaf Kirch and Peter Benie for spotting this.
*/ staticinlinevoid cap_emulate_setxuid(struct cred *new, conststruct cred *old)
{
kuid_t root_uid = make_kuid(old->user_ns, 0);
/* * Pre-ambient programs expect setresuid to nonroot followed * by exec to drop capabilities. We should make sure that * this remains the case.
*/
cap_clear(new->cap_ambient);
} if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid))
cap_clear(new->cap_effective); if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid))
new->cap_effective = new->cap_permitted;
}
/** * cap_task_fix_setuid - Fix up the results of setuid() call * @new: The proposed credentials * @old: The current task's current credentials * @flags: Indications of what has changed * * Fix up the results of setuid() call before the credential changes are * actually applied. * * Return: 0 to grant the changes, -ve to deny them.
*/ int cap_task_fix_setuid(struct cred *new, conststruct cred *old, int flags)
{ switch (flags) { case LSM_SETID_RE: case LSM_SETID_ID: case LSM_SETID_RES: /* juggle the capabilities to follow [RES]UID changes unless
* otherwise suppressed */ if (!issecure(SECURE_NO_SETUID_FIXUP))
cap_emulate_setxuid(new, old); break;
case LSM_SETID_FS: /* juggle the capabilities to follow FSUID changes, unless * otherwise suppressed * * FIXME - is fsuser used for all CAP_FS_MASK capabilities? * if not, we might be a bit too harsh here.
*/ if (!issecure(SECURE_NO_SETUID_FIXUP)) {
kuid_t root_uid = make_kuid(old->user_ns, 0); if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid))
new->cap_effective =
cap_drop_fs_set(new->cap_effective);
/* * Rationale: code calling task_setscheduler, task_setioprio, and * task_setnice, assumes that * . if capable(cap_sys_nice), then those actions should be allowed * . if not capable(cap_sys_nice), but acting on your own processes, * then those actions should be allowed * This is insufficient now since you can call code without suid, but * yet with increased caps. * So we check for increased caps on the target process.
*/ staticint cap_safe_nice(struct task_struct *p)
{ int is_subset, ret = 0;
rcu_read_lock();
is_subset = cap_issubset(__task_cred(p)->cap_permitted,
current_cred()->cap_permitted); if (!is_subset && !ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE))
ret = -EPERM;
rcu_read_unlock();
return ret;
}
/** * cap_task_setscheduler - Determine if scheduler policy change is permitted * @p: The task to affect * * Determine if the requested scheduler policy change is permitted for the * specified task. * * Return: 0 if permission is granted, -ve if denied.
*/ int cap_task_setscheduler(struct task_struct *p)
{ return cap_safe_nice(p);
}
/** * cap_task_setioprio - Determine if I/O priority change is permitted * @p: The task to affect * @ioprio: The I/O priority to set * * Determine if the requested I/O priority change is permitted for the specified * task. * * Return: 0 if permission is granted, -ve if denied.
*/ int cap_task_setioprio(struct task_struct *p, int ioprio)
{ return cap_safe_nice(p);
}
/** * cap_task_setnice - Determine if task priority change is permitted * @p: The task to affect * @nice: The nice value to set * * Determine if the requested task priority change is permitted for the * specified task. * * Return: 0 if permission is granted, -ve if denied.
*/ int cap_task_setnice(struct task_struct *p, int nice)
{ return cap_safe_nice(p);
}
/* * Implement PR_CAPBSET_DROP. Attempt to remove the specified capability from * the current task's bounding set. Returns 0 on success, -ve on error.
*/ staticint cap_prctl_drop(unsignedlong cap)
{ struct cred *new;
if (!ns_capable(current_user_ns(), CAP_SETPCAP)) return -EPERM; if (!cap_valid(cap)) return -EINVAL;
new = prepare_creds(); if (!new) return -ENOMEM;
cap_lower(new->cap_bset, cap); return commit_creds(new);
}
/** * cap_task_prctl - Implement process control functions for this security module * @option: The process control function requested * @arg2: The argument data for this function * @arg3: The argument data for this function * @arg4: The argument data for this function * @arg5: The argument data for this function * * Allow process control functions (sys_prctl()) to alter capabilities; may * also deny access to other functions not otherwise implemented here. * * Return: 0 or +ve on success, -ENOSYS if this function is not implemented * here, other -ve on error. If -ENOSYS is returned, sys_prctl() and other LSM * modules will consider performing the function.
*/ int cap_task_prctl(int option, unsignedlong arg2, unsignedlong arg3, unsignedlong arg4, unsignedlong arg5)
{ conststruct cred *old = current_cred(); struct cred *new;
switch (option) { case PR_CAPBSET_READ: if (!cap_valid(arg2)) return -EINVAL; return !!cap_raised(old->cap_bset, arg2);
case PR_CAPBSET_DROP: return cap_prctl_drop(arg2);
/* * The next four prctl's remain to assist with transitioning a * system from legacy UID=0 based privilege (when filesystem * capabilities are not in use) to a system using filesystem * capabilities only - as the POSIX.1e draft intended. * * Note: * * PR_SET_SECUREBITS = * issecure_mask(SECURE_KEEP_CAPS_LOCKED) * | issecure_mask(SECURE_NOROOT) * | issecure_mask(SECURE_NOROOT_LOCKED) * | issecure_mask(SECURE_NO_SETUID_FIXUP) * | issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED) * * will ensure that the current process and all of its * children will be locked into a pure * capability-based-privilege environment.
*/ case PR_SET_SECUREBITS: if ((((old->securebits & SECURE_ALL_LOCKS) >> 1)
& (old->securebits ^ arg2)) /*[1]*/
|| ((old->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/
|| (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ /* * [1] no changing of bits that are locked * [2] no unlocking of locks * [3] no setting of unsupported bits
*/
) /* cannot change a locked bit */ return -EPERM;
/* * Doing anything requires privilege (go read about the * "sendmail capabilities bug"), except for unprivileged bits. * Indeed, the SECURE_ALL_UNPRIVILEGED bits are not * restrictions enforced by the kernel but by user space on * itself.
*/ if (cap_capable(current_cred(), current_cred()->user_ns,
CAP_SETPCAP, CAP_OPT_NONE) != 0) { constunsignedlong unpriv_and_locks =
SECURE_ALL_UNPRIVILEGED |
SECURE_ALL_UNPRIVILEGED << 1; constunsignedlong changed = old->securebits ^ arg2;
/* For legacy reason, denies non-change. */ if (!changed) return -EPERM;
new = prepare_creds(); if (!new) return -ENOMEM;
new->securebits = arg2; return commit_creds(new);
case PR_GET_SECUREBITS: return old->securebits;
case PR_GET_KEEPCAPS: return !!issecure(SECURE_KEEP_CAPS);
case PR_SET_KEEPCAPS: if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */ return -EINVAL; if (issecure(SECURE_KEEP_CAPS_LOCKED)) return -EPERM;
new = prepare_creds(); if (!new) return -ENOMEM; if (arg2)
new->securebits |= issecure_mask(SECURE_KEEP_CAPS); else
new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); return commit_creds(new);
case PR_CAP_AMBIENT: if (arg2 == PR_CAP_AMBIENT_CLEAR_ALL) { if (arg3 | arg4 | arg5) return -EINVAL;
new = prepare_creds(); if (!new) return -ENOMEM;
cap_clear(new->cap_ambient); return commit_creds(new);
}
if (((!cap_valid(arg3)) | arg4 | arg5)) return -EINVAL;
new = prepare_creds(); if (!new) return -ENOMEM; if (arg2 == PR_CAP_AMBIENT_RAISE)
cap_raise(new->cap_ambient, arg3); else
cap_lower(new->cap_ambient, arg3); return commit_creds(new);
}
default: /* No functionality available - continue with default */ return -ENOSYS;
}
}
/** * cap_vm_enough_memory - Determine whether a new virtual mapping is permitted * @mm: The VM space in which the new mapping is to be made * @pages: The size of the mapping * * Determine whether the allocation of a new virtual mapping by the current * task is permitted. * * Return: 0 if permission granted, negative error code if not.
*/ int cap_vm_enough_memory(struct mm_struct *mm, long pages)
{ return cap_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN,
CAP_OPT_NOAUDIT);
}
/** * cap_mmap_addr - check if able to map given addr * @addr: address attempting to be mapped * * If the process is attempting to map memory below dac_mmap_min_addr they need * CAP_SYS_RAWIO. The other parameters to this function are unused by the * capability security module. * * Return: 0 if this mapping should be allowed or -EPERM if not.
*/ int cap_mmap_addr(unsignedlong addr)
{ int ret = 0;
if (addr < dac_mmap_min_addr) {
ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,
CAP_OPT_NONE); /* set PF_SUPERPRIV if it turns out we allow the low mmap */ if (ret == 0)
current->flags |= PF_SUPERPRIV;
} return ret;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.