/* * O_APPEND cannot be cleared if the file is marked as append-only * and the file is open for write.
*/ if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode)) return -EPERM;
/* O_NOATIME can only be set by the owner or superuser */ if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) if (!inode_owner_or_capable(file_mnt_idmap(filp), inode)) return -EPERM;
/* required for strict SunOS emulation */ if (O_NONBLOCK != O_NDELAY) if (arg & O_NDELAY)
arg |= O_NONBLOCK;
/* Pipe packetized mode is controlled by O_DIRECT flag */ if (!S_ISFIFO(inode->i_mode) &&
(arg & O_DIRECT) &&
!(filp->f_mode & FMODE_CAN_ODIRECT)) return -EINVAL;
if (filp->f_op->check_flags)
error = filp->f_op->check_flags(arg); if (error) return error;
/* * ->fasync() is responsible for setting the FASYNC bit.
*/ if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) {
error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); if (error < 0) goto out; if (error > 0)
error = 0;
}
spin_lock(&filp->f_lock);
filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
filp->f_iocb_flags = iocb_flags(filp);
spin_unlock(&filp->f_lock);
out: return error;
}
/* * Allocate an file->f_owner struct if it doesn't exist, handling racing * allocations correctly.
*/ int file_f_owner_allocate(struct file *file)
{ struct fown_struct *f_owner;
f_owner = file_f_owner(file); if (f_owner) return 0;
f_owner = kzalloc(sizeof(struct fown_struct), GFP_KERNEL); if (!f_owner) return -ENOMEM;
rwlock_init(&f_owner->lock);
f_owner->file = file; /* If someone else raced us, drop our allocation. */ if (unlikely(cmpxchg(&file->f_owner, NULL, f_owner)))
kfree(f_owner); return 0;
}
EXPORT_SYMBOL(file_f_owner_allocate);
switch (hint) { case RWH_WRITE_LIFE_NOT_SET: case RWH_WRITE_LIFE_NONE: case RWH_WRITE_LIFE_SHORT: case RWH_WRITE_LIFE_MEDIUM: case RWH_WRITE_LIFE_LONG: case RWH_WRITE_LIFE_EXTREME: returntrue; default: returnfalse;
}
}
if (!inode_owner_or_capable(file_mnt_idmap(file), inode)) return -EPERM;
if (copy_from_user(&hint, argp, sizeof(hint))) return -EFAULT; if (!rw_hint_valid(hint)) return -EINVAL;
WRITE_ONCE(inode->i_write_hint, hint);
/* * file->f_mapping->host may differ from inode. As an example, * blkdev_open() modifies file->f_mapping.
*/ if (file->f_mapping->host != inode)
WRITE_ONCE(file->f_mapping->host->i_write_hint, hint);
return 0;
}
/* Is the file descriptor a dup of the file? */ staticlong f_dupfd_query(int fd, struct file *filp)
{ CLASS(fd_raw, f)(fd);
if (fd_empty(f)) return -EBADF;
/* * We can do the 'fdput()' immediately, as the only thing that * matters is the pointer value which isn't changed by the fdput. * * Technically we didn't need a ref at all, and 'fdget()' was * overkill, but given our lockless file pointer lookup, the * alternatives are complicated.
*/ return fd_file(f) == filp;
}
/* Let the caller figure out whether a given file was just created. */ staticlong f_created_query(conststruct file *filp)
{ return !!(filp->f_mode & FMODE_CREATED);
}
staticint f_owner_sig(struct file *filp, int signum, bool setsig)
{ int ret = 0; struct fown_struct *f_owner;
might_sleep();
if (setsig) { if (!valid_signal(signum)) return -EINVAL;
ret = file_f_owner_allocate(filp); if (ret) return ret;
}
f_owner = file_f_owner(filp); if (setsig)
f_owner->signum = signum; elseif (f_owner)
ret = f_owner->signum; return ret;
}
staticlong do_fcntl(int fd, unsignedint cmd, unsignedlong arg, struct file *filp)
{ void __user *argp = (void __user *)arg; int argi = (int)arg; struct flock flock; long err = -EINVAL;
switch (cmd) { case F_CREATED_QUERY:
err = f_created_query(filp); break; case F_DUPFD:
err = f_dupfd(argi, filp, 0); break; case F_DUPFD_CLOEXEC:
err = f_dupfd(argi, filp, O_CLOEXEC); break; case F_DUPFD_QUERY:
err = f_dupfd_query(argi, filp); break; case F_GETFD:
err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; break; case F_SETFD:
err = 0;
set_close_on_exec(fd, argi & FD_CLOEXEC); break; case F_GETFL:
err = filp->f_flags; break; case F_SETFL:
err = setfl(fd, filp, argi); break; #if BITS_PER_LONG != 32 /* 32-bit arches must use fcntl64() */ case F_OFD_GETLK: #endif case F_GETLK: if (copy_from_user(&flock, argp, sizeof(flock))) return -EFAULT;
err = fcntl_getlk(filp, cmd, &flock); if (!err && copy_to_user(argp, &flock, sizeof(flock))) return -EFAULT; break; #if BITS_PER_LONG != 32 /* 32-bit arches must use fcntl64() */ case F_OFD_SETLK: case F_OFD_SETLKW:
fallthrough; #endif case F_SETLK: case F_SETLKW: if (copy_from_user(&flock, argp, sizeof(flock))) return -EFAULT;
err = fcntl_setlk(fd, filp, cmd, &flock); break; case F_GETOWN: /* * XXX If f_owner is a process group, the * negative return value will get converted * into an error. Oops. If we keep the * current syscall conventions, the only way * to fix this will be in libc.
*/
err = f_getown(filp);
force_successful_syscall_return(); break; case F_SETOWN:
err = f_setown(filp, argi, 1); break; case F_GETOWN_EX:
err = f_getown_ex(filp, arg); break; case F_SETOWN_EX:
err = f_setown_ex(filp, arg); break; case F_GETOWNER_UIDS:
err = f_getowner_uids(filp, arg); break; case F_GETSIG:
err = f_owner_sig(filp, 0, false); break; case F_SETSIG:
err = f_owner_sig(filp, argi, true); break; case F_GETLEASE:
err = fcntl_getlease(filp); break; case F_SETLEASE:
err = fcntl_setlease(fd, filp, argi); break; case F_NOTIFY:
err = fcntl_dirnotify(fd, filp, argi); break; case F_SETPIPE_SZ: case F_GETPIPE_SZ:
err = pipe_fcntl(filp, cmd, argi); break; case F_ADD_SEALS: case F_GET_SEALS:
err = memfd_fcntl(filp, cmd, argi); break; case F_GET_RW_HINT:
err = fcntl_get_rw_hint(filp, cmd, arg); break; case F_SET_RW_HINT:
err = fcntl_set_rw_hint(filp, cmd, arg); break; default: break;
} return err;
}
staticint check_fcntl_cmd(unsigned cmd)
{ switch (cmd) { case F_CREATED_QUERY: case F_DUPFD: case F_DUPFD_CLOEXEC: case F_DUPFD_QUERY: case F_GETFD: case F_SETFD: case F_GETFL: return 1;
} return 0;
}
staticunsignedint
convert_fcntl_cmd(unsignedint cmd)
{ switch (cmd) { case F_GETLK64: return F_GETLK; case F_SETLK64: return F_SETLK; case F_SETLKW64: return F_SETLKW;
}
return cmd;
}
/* * GETLK was successful and we need to return the data, but it needs to fit in * the compat structure. * l_start shouldn't be too big, unless the original start + end is greater than * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return * -EOVERFLOW in that case. l_len could be too big, in which case we just * truncate it, and only allow the app to see that part of the conflicting lock * that might make sense to it anyway
*/ staticint fixup_compat_flock(struct flock *flock)
{ if (flock->l_start > COMPAT_OFF_T_MAX) return -EOVERFLOW; if (flock->l_len > COMPAT_OFF_T_MAX)
flock->l_len = COMPAT_OFF_T_MAX; return 0;
}
staticvoid send_sigio_to_task(struct task_struct *p, struct fown_struct *fown, int fd, int reason, enum pid_type type)
{ /* * F_SETSIG can change ->signum lockless in parallel, make * sure we read it once and use the same value throughout.
*/ int signum = READ_ONCE(fown->signum);
if (!sigio_perm(p, fown, signum)) return;
switch (signum) { default: {
kernel_siginfo_t si;
/* Queue a rt signal with the appropriate fd as its value. We use SI_SIGIO as the source, not SI_KERNEL, since kernel signals always get delivered even if we can't queue. Failure to queue in this case _should_ be reported; we fall
back to SIGIO in that case. --sct */
clear_siginfo(&si);
si.si_signo = signum;
si.si_errno = 0;
si.si_code = reason; /* * Posix definies POLL_IN and friends to be signal * specific si_codes for SIG_POLL. Linux extended * these si_codes to other signals in a way that is * ambiguous if other signals also have signal * specific si_codes. In that case use SI_SIGIO instead * to remove the ambiguity.
*/ if ((signum != SIGPOLL) && sig_specific_sicodes(signum))
si.si_code = SI_SIGIO;
/* Make sure we are called with one of the POLL_* reasons, otherwise we could leak kernel stack into
userspace. */
BUG_ON((reason < POLL_IN) || ((reason - POLL_IN) >= NSIGPOLL)); if (reason - POLL_IN >= NSIGPOLL)
si.si_band = ~0L; else
si.si_band = mangle_poll(band_table[reason - POLL_IN]);
si.si_fd = fd; if (!do_send_sig_info(signum, &si, p, type)) break;
}
fallthrough; /* fall back on the old plain SIGIO signal */ case 0:
do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type);
}
}
void send_sigio(struct fown_struct *fown, int fd, int band)
{ struct task_struct *p; enum pid_type type; unsignedlong flags; struct pid *pid;
read_lock_irqsave(&fown->lock, flags);
type = fown->pid_type;
pid = fown->pid; if (!pid) goto out_unlock_fown;
/* * Remove a fasync entry. If successfully removed, return * positive and clear the FASYNC flag. If no entry exists, * do nothing and return 0. * * NOTE! It is very important that the FASYNC flag always * match the state "is the filp on a fasync list". *
*/ int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
{ struct fasync_struct *fa, **fp; int result = 0;
spin_lock(&filp->f_lock);
spin_lock(&fasync_lock); for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { if (fa->fa_file != filp) continue;
/* * NOTE! This can be used only for unused fasync entries: * entries that actually got inserted on the fasync list * need to be released by rcu - see fasync_remove_entry.
*/ void fasync_free(struct fasync_struct *new)
{
kmem_cache_free(fasync_cache, new);
}
/* * Insert a new entry into the fasync list. Return the pointer to the * old one if we didn't use the new one. * * NOTE! It is very important that the FASYNC flag always * match the state "is the filp on a fasync list".
*/ struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
{ struct fasync_struct *fa, **fp;
spin_lock(&filp->f_lock);
spin_lock(&fasync_lock); for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { if (fa->fa_file != filp) continue;
/* * Add a fasync entry. Return negative on error, positive if * added, and zero if did nothing but change an existing one.
*/ staticint fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
{ struct fasync_struct *new;
new = fasync_alloc(); if (!new) return -ENOMEM;
/* * fasync_insert_entry() returns the old (update) entry if * it existed. * * So free the (unused) new entry and return 0 to let the * caller know that we didn't add any new fasync entries.
*/ if (fasync_insert_entry(fd, filp, fapp, new)) {
fasync_free(new); return 0;
}
return 1;
}
/* * fasync_helper() is used by almost all character device drivers * to set up the fasync queue, and for regular files by the file * lease code. It returns negative on error, 0 if it did no changes * and positive if it added/deleted the entry.
*/ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
{ if (!on) return fasync_remove_entry(filp, fapp); return fasync_add_entry(fd, filp, fapp);
}
EXPORT_SYMBOL(fasync_helper);
/* * rcu_read_lock() is held
*/ staticvoid kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
{ while (fa) { struct fown_struct *fown; unsignedlong flags;
if (fa->magic != FASYNC_MAGIC) {
printk(KERN_ERR "kill_fasync: bad magic number in " "fasync_struct!\n"); return;
}
read_lock_irqsave(&fa->fa_lock, flags); if (fa->fa_file) {
fown = file_f_owner(fa->fa_file); if (!fown) goto next; /* Don't send SIGURG to processes which have not set a queued signum: SIGURG has its own default signalling
mechanism. */ if (!(sig == SIGURG && fown->signum == 0))
send_sigio(fown, fa->fa_fd, band);
}
next:
read_unlock_irqrestore(&fa->fa_lock, flags);
fa = rcu_dereference(fa->fa_next);
}
}
void kill_fasync(struct fasync_struct **fp, int sig, int band)
{ /* First a quick test without locking: usually * the list is empty.
*/ if (*fp) {
rcu_read_lock();
kill_fasync_rcu(rcu_dereference(*fp), sig, band);
rcu_read_unlock();
}
}
EXPORT_SYMBOL(kill_fasync);
staticint __init fcntl_init(void)
{ /* * Please add new bits here to ensure allocation uniqueness. * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY * is defined as O_NONBLOCK on some platforms and not on others.
*/
BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ !=
HWEIGHT32(
(VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
__FMODE_EXEC));
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.