/* Remove suid, sgid, and file capabilities on truncate too */
ret = dentry_needs_remove_privs(idmap, dentry); if (ret < 0) return ret; if (ret)
newattrs.ia_valid |= ret | ATTR_FORCE;
ret = inode_lock_killable(dentry->d_inode); if (ret) return ret;
/* Note any delegations or leases have already been broken: */
ret = notify_change(idmap, dentry, &newattrs, NULL);
inode_unlock(dentry->d_inode); return ret;
}
int vfs_truncate(conststruct path *path, loff_t length)
{ struct mnt_idmap *idmap; struct inode *inode; int error;
inode = path->dentry->d_inode;
/* For directories it's -EISDIR, for other non-regulars - -EINVAL */ if (S_ISDIR(inode->i_mode)) return -EISDIR; if (!S_ISREG(inode->i_mode)) return -EINVAL;
error = fsnotify_truncate_perm(path, length); if (error) return error;
error = mnt_want_write(path->mnt); if (error) return error;
error = -EPERM; if (IS_APPEND(inode)) goto mnt_drop_write_and_out;
error = get_write_access(inode); if (error) goto mnt_drop_write_and_out;
/* * Make sure that there are no leases. get_write_access() protects * against the truncate racing with a lease-granting setlease().
*/
error = break_lease(inode, O_WRONLY); if (error) goto put_write_and_out;
int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{ struct inode *inode = file_inode(file); int ret;
loff_t sum;
if (offset < 0 || len <= 0) return -EINVAL;
if (mode & ~(FALLOC_FL_MODE_MASK | FALLOC_FL_KEEP_SIZE)) return -EOPNOTSUPP;
/* * Modes are exclusive, even if that is not obvious from the encoding * as bit masks and the mix with the flag in the same namespace. * * To make things even more complicated, FALLOC_FL_ALLOCATE_RANGE is * encoded as no bit set.
*/ switch (mode & FALLOC_FL_MODE_MASK) { case FALLOC_FL_ALLOCATE_RANGE: case FALLOC_FL_UNSHARE_RANGE: case FALLOC_FL_ZERO_RANGE: break; case FALLOC_FL_PUNCH_HOLE: if (!(mode & FALLOC_FL_KEEP_SIZE)) return -EOPNOTSUPP; break; case FALLOC_FL_COLLAPSE_RANGE: case FALLOC_FL_INSERT_RANGE: case FALLOC_FL_WRITE_ZEROES: if (mode & FALLOC_FL_KEEP_SIZE) return -EOPNOTSUPP; break; default: return -EOPNOTSUPP;
}
if (!(file->f_mode & FMODE_WRITE)) return -EBADF;
/* * On append-only files only space preallocation is supported.
*/ if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode)) return -EPERM;
if (IS_IMMUTABLE(inode)) return -EPERM;
/* * We cannot allow any fallocate operation on an active swapfile
*/ if (IS_SWAPFILE(inode)) return -ETXTBSY;
/* * Revalidate the write permissions, in case security policy has * changed since the files were opened.
*/
ret = security_file_permission(file, MAY_WRITE); if (ret) return ret;
ret = fsnotify_file_area_perm(file, MAY_WRITE, &offset, len); if (ret) return ret;
if (S_ISFIFO(inode->i_mode)) return -ESPIPE;
if (S_ISDIR(inode->i_mode)) return -EISDIR;
if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) return -ENODEV;
/* Check for wraparound */ if (check_add_overflow(offset, len, &sum)) return -EFBIG;
if (sum > inode->i_sb->s_maxbytes) return -EFBIG;
if (!file->f_op->fallocate) return -EOPNOTSUPP;
file_start_write(file);
ret = file->f_op->fallocate(file, mode, offset, len);
/* * Create inotify and fanotify events. * * To keep the logic simple always create events if fallocate succeeds. * This implies that events are even created if the file size remains * unchanged, e.g. when using flag FALLOC_FL_KEEP_SIZE.
*/ if (ret == 0)
fsnotify_modify(file);
/* * access() needs to use the real uid/gid, not the effective uid/gid. * We do this by temporarily clearing all FS-related capabilities and * switching the fsuid/fsgid around to the real ones. * * Creating new credentials is expensive, so we try to skip doing it, * which we can if the result would match what we already got.
*/ staticbool access_need_override_creds(int flags)
{ conststruct cred *cred;
if (flags & AT_EACCESS) returnfalse;
cred = current_cred(); if (!uid_eq(cred->fsuid, cred->uid) ||
!gid_eq(cred->fsgid, cred->gid)) returntrue;
if (!issecure(SECURE_NO_SETUID_FIXUP)) {
kuid_t root_uid = make_kuid(cred->user_ns, 0); if (!uid_eq(cred->uid, root_uid)) { if (!cap_isclear(cred->cap_effective)) returntrue;
} else { if (!cap_isidentical(cred->cap_effective,
cred->cap_permitted)) returntrue;
}
}
override_cred = prepare_creds(); if (!override_cred) return NULL;
/* * XXX access_need_override_creds performs checks in hopes of skipping * this work. Make sure it stays in sync if making any changes in this * routine.
*/
if (!issecure(SECURE_NO_SETUID_FIXUP)) { /* Clear the capabilities if we switch to a non-root user */
kuid_t root_uid = make_kuid(override_cred->user_ns, 0); if (!uid_eq(override_cred->uid, root_uid))
cap_clear(override_cred->cap_effective); else
override_cred->cap_effective =
override_cred->cap_permitted;
}
/* * The new set of credentials can *only* be used in * task-synchronous circumstances, and does not need * RCU freeing, unless somebody then takes a separate * reference to it. * * NOTE! This is _only_ true because this credential * is used purely for override_creds() that installs * it as the subjective cred. Other threads will be * accessing ->real_cred, not the subjective cred. * * If somebody _does_ make a copy of this (using the * 'get_current_cred()' function), that will clear the * non_rcu field, because now that other user may be * expecting RCU freeing. But normal thread-synchronous * cred accesses will keep things non-racy to avoid RCU * freeing.
*/
override_cred->non_rcu = 1; return override_creds(override_cred);
}
staticint do_faccessat(int dfd, constchar __user *filename, int mode, int flags)
{ struct path path; struct inode *inode; int res; unsignedint lookup_flags = LOOKUP_FOLLOW; conststruct cred *old_cred = NULL;
if (flags & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) return -EINVAL;
if (flags & AT_SYMLINK_NOFOLLOW)
lookup_flags &= ~LOOKUP_FOLLOW; if (flags & AT_EMPTY_PATH)
lookup_flags |= LOOKUP_EMPTY;
if (access_need_override_creds(flags)) {
old_cred = access_override_creds(); if (!old_cred) return -ENOMEM;
}
retry:
res = user_path_at(dfd, filename, lookup_flags, &path); if (res) goto out;
inode = d_backing_inode(path.dentry);
if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) { /* * MAY_EXEC on regular files is denied if the fs is mounted * with the "noexec" flag.
*/
res = -EACCES; if (path_noexec(&path)) goto out_path_release;
}
res = inode_permission(mnt_idmap(path.mnt), inode, mode | MAY_ACCESS); /* SuS v2 requires we report a read only fs too */ if (res || !(mode & S_IWOTH) || special_file(inode->i_mode)) goto out_path_release; /* * This is a rare case where using __mnt_is_readonly() * is OK without a mnt_want/drop_write() pair. Since * no actual write to the fs is performed here, we do * not need to telegraph to that to anyone. * * By doing this, we accept that this access is * inherently racy and know that the fs may change * state before we even see this result.
*/ if (__mnt_is_readonly(path.mnt))
res = -EROFS;
out_path_release:
path_put(&path); if (retry_estale(res, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL; goto retry;
}
out: if (old_cred)
put_cred(revert_creds(old_cred));
/* * Check whether @kuid is valid and if so generate and set vfsuid_t in * ia_vfsuid. * * Return: true if @kuid is valid, false if not.
*/ staticinlinebool setattr_vfsuid(struct iattr *attr, kuid_t kuid)
{ if (!uid_valid(kuid)) returnfalse;
attr->ia_valid |= ATTR_UID;
attr->ia_vfsuid = VFSUIDT_INIT(kuid); returntrue;
}
/* * Check whether @kgid is valid and if so generate and set vfsgid_t in * ia_vfsgid. * * Return: true if @kgid is valid, false if not.
*/ staticinlinebool setattr_vfsgid(struct iattr *attr, kgid_t kgid)
{ if (!gid_valid(kgid)) returnfalse;
attr->ia_valid |= ATTR_GID;
attr->ia_vfsgid = VFSGIDT_INIT(kgid); returntrue;
}
error = security_file_open(f); if (error) goto cleanup_all;
/* * Call fsnotify open permission hook and set FMODE_NONOTIFY_* bits * according to existing permission watches. * If FMODE_NONOTIFY mode was already set for an fanotify fd or for a * pseudo file, this call will not change the mode.
*/
error = fsnotify_open_perm_and_set_mode(f); if (error) goto cleanup_all;
error = break_lease(file_inode(f), f->f_flags); if (error) goto cleanup_all;
/* normally all 3 are set; ->open() can clear them if needed */
f->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; if (!open)
open = f->f_op->open; if (open) {
error = open(inode, f); if (error) goto cleanup_all;
}
f->f_mode |= FMODE_OPENED; if ((f->f_mode & FMODE_READ) &&
likely(f->f_op->read || f->f_op->read_iter))
f->f_mode |= FMODE_CAN_READ; if ((f->f_mode & FMODE_WRITE) &&
likely(f->f_op->write || f->f_op->write_iter))
f->f_mode |= FMODE_CAN_WRITE; if ((f->f_mode & FMODE_LSEEK) && !f->f_op->llseek)
f->f_mode &= ~FMODE_LSEEK; if (f->f_mapping->a_ops && f->f_mapping->a_ops->direct_IO)
f->f_mode |= FMODE_CAN_ODIRECT;
if ((f->f_flags & O_DIRECT) && !(f->f_mode & FMODE_CAN_ODIRECT)) return -EINVAL;
/* * XXX: Huge page cache doesn't support writing yet. Drop all page * cache for this file before processing writes.
*/ if (f->f_mode & FMODE_WRITE) { /* * Depends on full fence from get_write_access() to synchronize * against collapse_file() regarding i_writecount and nr_thps * updates. Ensures subsequent insertion of THPs into the page * cache will fail.
*/ if (filemap_nr_thps(inode->i_mapping)) { struct address_space *mapping = inode->i_mapping;
filemap_invalidate_lock(inode->i_mapping); /* * unmap_mapping_range just need to be called once * here, because the private pages is not need to be * unmapped mapping (e.g. data segment of dynamic * shared libraries here).
*/
unmap_mapping_range(mapping, 0, 0, 0);
truncate_inode_pages(mapping, 0);
filemap_invalidate_unlock(inode->i_mapping);
}
}
/** * finish_open - finish opening a file * @file: file pointer * @dentry: pointer to dentry * @open: open callback * * This can be used to finish opening a file passed to i_op->atomic_open(). * * If the open callback is set to NULL, then the standard f_op->open() * filesystem callback is substituted. * * NB: the dentry reference is _not_ consumed. If, for example, the dentry is * the return value of d_splice_alias(), then the caller needs to perform dput() * on it after finish_open(). * * Returns zero on success or -errno if the open failed.
*/ int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *))
{
BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
/** * finish_no_open - finish ->atomic_open() without opening the file * * @file: file pointer * @dentry: dentry, ERR_PTR(-E...) or NULL (as returned from ->lookup()) * * This can be used to set the result of a lookup in ->atomic_open(). * * NB: unlike finish_open() this function does consume the dentry reference and * the caller need not dput() it. * * Returns 0 or -E..., which must be the return value of ->atomic_open() after * having called this function.
*/ int finish_no_open(struct file *file, struct dentry *dentry)
{ if (IS_ERR(dentry)) return PTR_ERR(dentry);
file->f_path.dentry = dentry; return 0;
}
EXPORT_SYMBOL(finish_no_open);
/** * vfs_open - open the file at the given path * @path: path to open * @file: newly allocated file with f_flag initialized
*/ int vfs_open(conststruct path *path, struct file *file)
{ int ret;
file->f_path = *path;
ret = do_dentry_open(file, NULL); if (!ret) { /* * Once we return a file with FMODE_OPENED, __fput() will call * fsnotify_close(), so we need fsnotify_open() here for * symmetry.
*/
fsnotify_open(file);
} return ret;
}
struct file *dentry_open(conststruct path *path, int flags, conststruct cred *cred)
{ int error; struct file *f;
/* We must always pass in a valid mount pointer. */
BUG_ON(!path->mnt);
f = alloc_empty_file(flags, cred); if (!IS_ERR(f)) {
error = vfs_open(path, f); if (error) {
fput(f);
f = ERR_PTR(error);
}
} return f;
}
EXPORT_SYMBOL(dentry_open);
struct file *dentry_open_nonotify(conststruct path *path, int flags, conststruct cred *cred)
{ struct file *f = alloc_empty_file(flags, cred); if (!IS_ERR(f)) { int error;
file_set_fsnotify_mode(f, FMODE_NONOTIFY);
error = vfs_open(path, f); if (error) {
fput(f);
f = ERR_PTR(error);
}
} return f;
}
/** * dentry_create - Create and open a file * @path: path to create * @flags: O_ flags * @mode: mode bits for new file * @cred: credentials to use * * Caller must hold the parent directory's lock, and have prepared * a negative dentry, placed in @path->dentry, for the new file. * * Caller sets @path->mnt to the vfsmount of the filesystem where * the new file is to be created. The parent directory and the * negative dentry must reside on the same filesystem instance. * * On success, returns a "struct file *". Otherwise a ERR_PTR * is returned.
*/ struct file *dentry_create(conststruct path *path, int flags, umode_t mode, conststruct cred *cred)
{ struct file *f; int error;
f = alloc_empty_file(flags, cred); if (IS_ERR(f)) return f;
/** * kernel_file_open - open a file for kernel internal use * @path: path of the file to open * @flags: open flags * @cred: credentials for open * * Open a file for use by in-kernel consumers. The file is not accounted * against nr_files and must not be installed into the file descriptor * table. * * Return: Opened file on success, an error pointer on failure.
*/ struct file *kernel_file_open(conststruct path *path, int flags, conststruct cred *cred)
{ struct file *f; int error;
f = alloc_empty_file_noaccount(flags, cred); if (IS_ERR(f)) return f;
/* O_PATH beats everything else. */ if (how.flags & O_PATH)
how.flags &= O_PATH_FLAGS; /* Modes should only be set for create-like flags. */ if (!WILL_CREATE(how.flags))
how.mode = 0; return how;
}
/* * Strip flags that aren't relevant in determining struct open_flags.
*/
flags &= ~strip;
/* * Older syscalls implicitly clear all of the invalid flags or argument * values before calling build_open_flags(), but openat2(2) checks all * of its arguments.
*/ if (flags & ~VALID_OPEN_FLAGS) return -EINVAL; if (how->resolve & ~VALID_RESOLVE_FLAGS) return -EINVAL;
/* Scoping flags are mutually exclusive. */ if ((how->resolve & RESOLVE_BENEATH) && (how->resolve & RESOLVE_IN_ROOT)) return -EINVAL;
/* Deal with the mode. */ if (WILL_CREATE(flags)) { if (how->mode & ~S_IALLUGO) return -EINVAL;
op->mode = how->mode | S_IFREG;
} else { if (how->mode != 0) return -EINVAL;
op->mode = 0;
}
/* * Block bugs where O_DIRECTORY | O_CREAT created regular files. * Note, that blocking O_DIRECTORY | O_CREAT here also protects * O_TMPFILE below which requires O_DIRECTORY being raised.
*/ if ((flags & (O_DIRECTORY | O_CREAT)) == (O_DIRECTORY | O_CREAT)) return -EINVAL;
/* Now handle the creative implementation of O_TMPFILE. */ if (flags & __O_TMPFILE) { /* * In order to ensure programs get explicit errors when trying * to use O_TMPFILE on old kernels we enforce that O_DIRECTORY * is raised alongside __O_TMPFILE.
*/ if (!(flags & O_DIRECTORY)) return -EINVAL; if (!(acc_mode & MAY_WRITE)) return -EINVAL;
} if (flags & O_PATH) { /* O_PATH only permits certain other flags to be set. */ if (flags & ~O_PATH_FLAGS) return -EINVAL;
acc_mode = 0;
}
/* * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only * check for O_DSYNC if the need any syncing at all we enforce it's * always set instead of having to deal with possibly weird behaviour * for malicious applications setting only __O_SYNC.
*/ if (flags & __O_SYNC)
flags |= O_DSYNC;
op->open_flag = flags;
/* O_TRUNC implies we need access checks for write permissions */ if (flags & O_TRUNC)
acc_mode |= MAY_WRITE;
/* Allow the LSM permission hook to distinguish append
access from general write access. */ if (flags & O_APPEND)
acc_mode |= MAY_APPEND;
if (flags & O_DIRECTORY)
lookup_flags |= LOOKUP_DIRECTORY; if (!(flags & O_NOFOLLOW))
lookup_flags |= LOOKUP_FOLLOW;
if (how->resolve & RESOLVE_NO_XDEV)
lookup_flags |= LOOKUP_NO_XDEV; if (how->resolve & RESOLVE_NO_MAGICLINKS)
lookup_flags |= LOOKUP_NO_MAGICLINKS; if (how->resolve & RESOLVE_NO_SYMLINKS)
lookup_flags |= LOOKUP_NO_SYMLINKS; if (how->resolve & RESOLVE_BENEATH)
lookup_flags |= LOOKUP_BENEATH; if (how->resolve & RESOLVE_IN_ROOT)
lookup_flags |= LOOKUP_IN_ROOT; if (how->resolve & RESOLVE_CACHED) { /* Don't bother even trying for create/truncate/tmpfile open */ if (flags & (O_TRUNC | O_CREAT | __O_TMPFILE)) return -EAGAIN;
lookup_flags |= LOOKUP_CACHED;
}
op->lookup_flags = lookup_flags; return 0;
}
/** * file_open_name - open file and return file pointer * * @name: struct filename containing path to open * @flags: open flags as per the open(2) second argument * @mode: mode for the new file if O_CREAT is set, else ignored * * This is the helper to open a file from kernelspace if you really * have to. But in generally you should not do this, so please move * along, nothing to see here..
*/ struct file *file_open_name(struct filename *name, int flags, umode_t mode)
{ struct open_flags op; struct open_how how = build_open_how(flags, mode); int err = build_open_flags(&how, &op); if (err) return ERR_PTR(err); return do_filp_open(AT_FDCWD, name, &op);
}
/** * filp_open - open file and return file pointer * * @filename: path to open * @flags: open flags as per the open(2) second argument * @mode: mode for the new file if O_CREAT is set, else ignored * * This is the helper to open a file from kernelspace if you really * have to. But in generally you should not do this, so please move * along, nothing to see here..
*/ struct file *filp_open(constchar *filename, int flags, umode_t mode)
{ struct filename *name = getname_kernel(filename); struct file *file = ERR_CAST(name);
if (unlikely(usize < OPEN_HOW_SIZE_VER0)) return -EINVAL; if (unlikely(usize > PAGE_SIZE)) return -E2BIG;
err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize); if (err) return err;
audit_openat2_how(&tmp);
/* O_LARGEFILE is only allowed for non-O_PATH. */ if (!(tmp.flags & O_PATH) && force_o_largefile())
tmp.flags |= O_LARGEFILE;
return do_sys_openat2(dfd, filename, &tmp);
}
#ifdef CONFIG_COMPAT /* * Exactly like sys_open(), except that it doesn't set the * O_LARGEFILE flag.
*/
COMPAT_SYSCALL_DEFINE3(open, constchar __user *, filename, int, flags, umode_t, mode)
{ return do_sys_open(AT_FDCWD, filename, flags, mode);
}
/* * Exactly like sys_openat(), except that it doesn't set the * O_LARGEFILE flag.
*/
COMPAT_SYSCALL_DEFINE4(openat, int, dfd, constchar __user *, filename, int, flags, umode_t, mode)
{ return do_sys_open(dfd, filename, flags, mode);
} #endif
#ifndef __alpha__
/* * For backward compatibility? Maybe this should be moved * into arch/i386 instead?
*/
SYSCALL_DEFINE2(creat, constchar __user *, pathname, umode_t, mode)
{ int flags = O_CREAT | O_WRONLY | O_TRUNC;
int filp_close(struct file *filp, fl_owner_t id)
{ int retval;
retval = filp_flush(filp, id);
fput_close(filp);
return retval;
}
EXPORT_SYMBOL(filp_close);
/* * Careful here! We test whether the file pointer is NULL before * releasing the fd. This ensures that one clone task can't release * an fd while another clone is opening it.
*/
SYSCALL_DEFINE1(close, unsignedint, fd)
{ int retval; struct file *file;
file = file_close_fd(fd); if (!file) return -EBADF;
retval = filp_flush(file, current->files);
/* * We're returning to user space. Don't bother * with any delayed fput() cases.
*/
fput_close_sync(file);
if (likely(retval == 0)) return 0;
/* can't restart close syscall because file table entry was cleared */ if (retval == -ERESTARTSYS ||
retval == -ERESTARTNOINTR ||
retval == -ERESTARTNOHAND ||
retval == -ERESTART_RESTARTBLOCK)
retval = -EINTR;
return retval;
}
/* * This routine simulates a hangup on the tty, to arrange that users * are given clean terminals at login time.
*/
SYSCALL_DEFINE0(vhangup)
{ if (capable(CAP_SYS_TTY_CONFIG)) {
tty_vhangup_self(); return 0;
} return -EPERM;
}
/* * Called when an inode is about to be open. * We use this to disallow opening large files on 32bit systems if * the caller didn't specify O_LARGEFILE. On 64bit systems we force * on this flag in sys_open.
*/ int generic_file_open(struct inode * inode, struct file * filp)
{ if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) return -EOVERFLOW; return 0;
}
EXPORT_SYMBOL(generic_file_open);
/* * This is used by subsystems that don't want seekable * file descriptors. The function is not supposed to ever fail, the only * reason it returns an 'int' and not 'void' is so that it can be plugged * directly into file_operations structure.
*/ int nonseekable_open(struct inode *inode, struct file *filp)
{
filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); return 0;
}
EXPORT_SYMBOL(nonseekable_open);
/* * stream_open is used by subsystems that want stream-like file descriptors. * Such file descriptors are not seekable and don't have notion of position * (file.f_pos is always 0 and ppos passed to .read()/.write() is always NULL). * Contrary to file descriptors of other regular files, .read() and .write() * can run simultaneously. * * stream_open never fails and is marked to return int so that it could be * directly used as file_operations.open .
*/ int stream_open(struct inode *inode, struct file *filp)
{
filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS);
filp->f_mode |= FMODE_STREAM; return 0;
}
EXPORT_SYMBOL(stream_open);
¤ Dauer der Verarbeitung: 0.51 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.