// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/locks.c * * We implement four types of file locks: BSD locks, posix locks, open * file description locks, and leases. For details about BSD locks, * see the flock(2) man page; for details about the other three, see * fcntl(2). * * * Locking conflicts and dependencies: * If multiple threads attempt to lock the same byte (or flock the same file) * only one can be granted the lock, and other must wait their turn. * The first lock has been "applied" or "granted", the others are "waiting" * and are "blocked" by the "applied" lock.. * * Waiting and applied locks are all kept in trees whose properties are: * * - the root of a tree may be an applied or waiting lock. * - every other node in the tree is a waiting lock that * conflicts with every ancestor of that node. * * Every such tree begins life as a waiting singleton which obviously * satisfies the above properties. * * The only ways we modify trees preserve these properties: * * 1. We may add a new leaf node, but only after first verifying that it * conflicts with all of its ancestors. * 2. We may remove the root of a tree, creating a new singleton * tree from the root and N new trees rooted in the immediate * children. * 3. If the root of a tree is not currently an applied lock, we may * apply it (if possible). * 4. We may upgrade the root of the tree (either extend its range, * or upgrade its entire range from read to write). * * When an applied lock is modified in a way that reduces or downgrades any * part of its range, we remove all its children (2 above). This particularly * happens when a lock is unlocked. * * For each of those child trees we "wake up" the thread which is * waiting for the lock so it can continue handling as follows: if the * root of the tree applies, we do so (3). If it doesn't, it must * conflict with some applied lock. We remove (wake up) all of its children * (2), and add it is a new leaf to the tree rooted in the applied * lock (1). We then repeat the process recursively with those * children. *
*/ #include <linux/capability.h> #include <linux/file.h> #include <linux/fdtable.h> #include <linux/filelock.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/security.h> #include <linux/slab.h> #include <linux/syscalls.h> #include <linux/time.h> #include <linux/rcupdate.h> #include <linux/pid_namespace.h> #include <linux/hashtable.h> #include <linux/percpu.h> #include <linux/sysctl.h>
/* * The global file_lock_list is only used for displaying /proc/locks, so we * keep a list on each CPU, with each list protected by its own spinlock. * Global serialization is done using file_rwsem. * * Note that alterations to the list also require that the relevant flc_lock is * held.
*/ struct file_lock_list_struct {
spinlock_t lock; struct hlist_head hlist;
}; static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
/* * The blocked_hash is used to find POSIX lock loops for deadlock detection. * It is protected by blocked_lock_lock. * * We hash locks by lockowner in order to optimize searching for the lock a * particular lockowner is waiting on. * * FIXME: make this value scale via some heuristic? We generally will want more * buckets when we have more lockowners holding locks, but that's a little * difficult to determine without knowing what the workload will look like.
*/ #define BLOCKED_HASH_BITS 7 static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
/* * This lock protects the blocked_hash. Generally, if you're accessing it, you * want to be holding this lock. * * In addition, it also protects the fl->fl_blocked_requests list, and the * fl->fl_blocker pointer for file_lock structures that are acting as lock * requests (in contrast to those that are acting as records of acquired locks). * * Note that when we acquire this lock in order to change the above fields, * we often hold the flc_lock as well. In certain cases, when reading the fields * protected by this lock, we can skip acquiring it iff we already hold the * flc_lock.
*/ static DEFINE_SPINLOCK(blocked_lock_lock);
/* * Assign the pointer if it's not already assigned. If it is, then * free the context we just allocated.
*/ if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
kmem_cache_free(flctx_cache, ctx);
ctx = locks_inode_context(inode);
}
out:
trace_locks_get_lock_context(inode, type, ctx); return ctx;
}
/* Free a lock which is not in use. */ void locks_free_lock(struct file_lock *fl)
{
locks_release_private(fl);
kmem_cache_free(filelock_cache, fl);
}
EXPORT_SYMBOL(locks_free_lock);
/* Free a lease which is not in use. */ void locks_free_lease(struct file_lease *fl)
{
kmem_cache_free(filelease_cache, fl);
}
EXPORT_SYMBOL(locks_free_lease);
/* * As ctx->flc_lock is held, new requests cannot be added to * ->flc_blocked_requests, so we don't need a lock to check if it * is empty.
*/ if (list_empty(&fl->c.flc_blocked_requests)) return;
spin_lock(&blocked_lock_lock);
list_splice_init(&fl->c.flc_blocked_requests,
&new->c.flc_blocked_requests);
list_for_each_entry(f, &new->c.flc_blocked_requests,
c.flc_blocked_member)
f->c.flc_blocker = &new->c;
spin_unlock(&blocked_lock_lock);
}
staticinlineint flock_translate_cmd(int cmd) { switch (cmd) { case LOCK_SH: return F_RDLCK; case LOCK_EX: return F_WRLCK; case LOCK_UN: return F_UNLCK;
} return -EINVAL;
}
/* Fill in a file_lock structure with an appropriate FLOCK lock. */ staticvoid flock_make_lock(struct file *filp, struct file_lock *fl, int type)
{
locks_init_lock(fl);
/* * fasync_insert_entry() returns the old entry if any. If there was no * old entry, then it used "priv" and inserted it into the fasync list. * Clear the pointer to indicate that it shouldn't be freed.
*/ if (!fasync_insert_entry(fa->fa_fd, filp, &fl->fl_fasync, fa))
*priv = NULL;
/* Allocate a file_lock initialised to this type of lease */ staticstruct file_lease *lease_alloc(struct file *filp, int type)
{ struct file_lease *fl = locks_alloc_lease(); int error = -ENOMEM;
/* Check if two locks overlap each other.
*/ staticinlineint locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
{ return ((fl1->fl_end >= fl2->fl_start) &&
(fl2->fl_end >= fl1->fl_start));
}
/* * Check whether two locks have the same owner.
*/ staticint posix_same_owner(struct file_lock_core *fl1, struct file_lock_core *fl2)
{ return fl1->flc_owner == fl2->flc_owner;
}
/* Must be called with the flc_lock held! */ staticvoid locks_insert_global_locks(struct file_lock_core *flc)
{ struct file_lock_list_struct *fll = this_cpu_ptr(&file_lock_list);
/* Must be called with the flc_lock held! */ staticvoid locks_delete_global_locks(struct file_lock_core *flc)
{ struct file_lock_list_struct *fll;
percpu_rwsem_assert_held(&file_rwsem);
/* * Avoid taking lock if already unhashed. This is safe since this check * is done while holding the flc_lock, and new insertions into the list * also require that it be held.
*/ if (hlist_unhashed(&flc->flc_link)) return;
/* Remove waiter from blocker's block list. * When blocker ends up pointing to itself then the list is empty. * * Must be called with blocked_lock_lock held.
*/ staticvoid __locks_unlink_block(struct file_lock_core *waiter)
{
locks_delete_global_blocked(waiter);
list_del_init(&waiter->flc_blocked_member);
}
/* * The setting of flc_blocker to NULL marks the "done" * point in deleting a block. Paired with acquire at the top * of locks_delete_block().
*/
smp_store_release(&waiter->flc_blocker, NULL);
}
}
staticint __locks_delete_block(struct file_lock_core *waiter)
{ int status = -ENOENT;
/* * If fl_blocker is NULL, it won't be set again as this thread "owns" * the lock and is the only one that might try to claim the lock. * * We use acquire/release to manage fl_blocker so that we can * optimize away taking the blocked_lock_lock in many cases. * * The smp_load_acquire guarantees two things: * * 1/ that fl_blocked_requests can be tested locklessly. If something * was recently added to that list it must have been in a locked region * *before* the locked region when fl_blocker was set to NULL. * * 2/ that no other thread is accessing 'waiter', so it is safe to free * it. __locks_wake_up_blocks is careful not to touch waiter after * fl_blocker is released. * * If a lockless check of fl_blocker shows it to be NULL, we know that * no new locks can be inserted into its fl_blocked_requests list, and * can avoid doing anything further if the list is empty.
*/ if (!smp_load_acquire(&waiter->flc_blocker) &&
list_empty(&waiter->flc_blocked_requests)) return status;
spin_lock(&blocked_lock_lock); if (waiter->flc_blocker)
status = 0;
__locks_wake_up_blocks(waiter);
__locks_unlink_block(waiter);
/* * The setting of fl_blocker to NULL marks the "done" point in deleting * a block. Paired with acquire at the top of this function.
*/
smp_store_release(&waiter->flc_blocker, NULL);
spin_unlock(&blocked_lock_lock); return status;
}
/** * locks_delete_block - stop waiting for a file lock * @waiter: the lock which was waiting * * lockd/nfsd need to disconnect the lock while working on it.
*/ int locks_delete_block(struct file_lock *waiter)
{ return __locks_delete_block(&waiter->c);
}
EXPORT_SYMBOL(locks_delete_block);
/* Insert waiter into blocker's block list. * We use a circular list so that processes can be easily woken up in * the order they blocked. The documentation doesn't require this but * it seems like the reasonable thing to do. * * Must be called with both the flc_lock and blocked_lock_lock held. The * fl_blocked_requests list itself is protected by the blocked_lock_lock, * but by ensuring that the flc_lock is also held on insertions we can avoid * taking the blocked_lock_lock in some cases when we see that the * fl_blocked_requests list is empty. * * Rather than just adding to the list, we check for conflicts with any existing * waiters, and add beneath any waiter that blocks the new waiter. * Thus wakeups don't happen until needed.
*/ staticvoid __locks_insert_block(struct file_lock_core *blocker, struct file_lock_core *waiter, bool conflict(struct file_lock_core *, struct file_lock_core *))
{ struct file_lock_core *flc;
if ((blocker->flc_flags & (FL_POSIX|FL_OFDLCK)) == FL_POSIX)
locks_insert_global_blocked(waiter);
/* The requests in waiter->flc_blocked are known to conflict with * waiter, but might not conflict with blocker, or the requests * and lock which block it. So they all need to be woken.
*/
__locks_wake_up_blocks(waiter);
}
/* Must be called with flc_lock held. */ staticvoid locks_insert_block(struct file_lock_core *blocker, struct file_lock_core *waiter, bool conflict(struct file_lock_core *, struct file_lock_core *))
{
spin_lock(&blocked_lock_lock);
__locks_insert_block(blocker, waiter, conflict);
spin_unlock(&blocked_lock_lock);
}
/* * Wake up processes blocked waiting for blocker. * * Must be called with the inode->flc_lock held!
*/ staticvoid locks_wake_up_blocks(struct file_lock_core *blocker)
{ /* * Avoid taking global lock if list is empty. This is safe since new * blocked requests are only added to the list under the flc_lock, and * the flc_lock is always held here. Note that removal from the * fl_blocked_requests list does not require the flc_lock, so we must * recheck list_empty() after acquiring the blocked_lock_lock.
*/ if (list_empty(&blocker->flc_blocked_requests)) return;
/* Determine if lock sys_fl blocks lock caller_fl. Common functionality * checks for shared/exclusive status of overlapping locks.
*/ staticbool locks_conflict(struct file_lock_core *caller_flc, struct file_lock_core *sys_flc)
{ if (sys_flc->flc_type == F_WRLCK) returntrue; if (caller_flc->flc_type == F_WRLCK) returntrue; returnfalse;
}
/* Determine if lock sys_fl blocks lock caller_fl. POSIX specific * checking before calling the locks_conflict().
*/ staticbool posix_locks_conflict(struct file_lock_core *caller_flc, struct file_lock_core *sys_flc)
{ struct file_lock *caller_fl = file_lock(caller_flc); struct file_lock *sys_fl = file_lock(sys_flc);
/* POSIX locks owned by the same process do not conflict with * each other.
*/ if (posix_same_owner(caller_flc, sys_flc)) returnfalse;
/* Check whether they overlap */ if (!locks_overlap(caller_fl, sys_fl)) returnfalse;
return locks_conflict(caller_flc, sys_flc);
}
/* Determine if lock sys_fl blocks lock caller_fl. Used on xx_GETLK * path so checks for additional GETLK-specific things like F_UNLCK.
*/ staticbool posix_test_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
{ struct file_lock_core *caller = &caller_fl->c; struct file_lock_core *sys = &sys_fl->c;
/* F_UNLCK checks any locks on the same fd. */ if (lock_is_unlock(caller_fl)) { if (!posix_same_owner(caller, sys)) returnfalse; return locks_overlap(caller_fl, sys_fl);
} return posix_locks_conflict(caller, sys);
}
/* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific * checking before calling the locks_conflict().
*/ staticbool flock_locks_conflict(struct file_lock_core *caller_flc, struct file_lock_core *sys_flc)
{ /* FLOCK locks referring to the same filp do not conflict with * each other.
*/ if (caller_flc->flc_file == sys_flc->flc_file) returnfalse;
/* * Deadlock detection: * * We attempt to detect deadlocks that are due purely to posix file * locks. * * We assume that a task can be waiting for at most one lock at a time. * So for any acquired lock, the process holding that lock may be * waiting on at most one other lock. That lock in turns may be held by * someone waiting for at most one other lock. Given a requested lock * caller_fl which is about to wait for a conflicting lock block_fl, we * follow this chain of waiters to ensure we are not about to create a * cycle. * * Since we do this before we ever put a process to sleep on a lock, we * are ensured that there is never a cycle; that is what guarantees that * the while() loop in posix_locks_deadlock() eventually completes. * * Note: the above assumption may not be true when handling lock * requests from a broken NFS client. It may also fail in the presence * of tasks (such as posix threads) sharing the same open file table. * To handle those cases, we just bail out after a few iterations. * * For FL_OFDLCK locks, the owner is the filp, not the files_struct. * Because the owner is not even nominally tied to a thread of * execution, the deadlock detection below can't reasonably work well. Just * skip it for those. * * In principle, we could do a more limited deadlock detection on FL_OFDLCK * locks that just checks for the case where two tasks are attempting to * upgrade from read to write locks on the same inode.
*/
#define MAX_DEADLK_ITERATIONS 10
/* Find a lock that the owner of the given @blocker is blocking on. */ staticstruct file_lock_core *what_owner_is_waiting_for(struct file_lock_core *blocker)
{ struct file_lock_core *flc;
/* Must be called with the blocked_lock_lock held! */ staticbool posix_locks_deadlock(struct file_lock *caller_fl, struct file_lock *block_fl)
{ struct file_lock_core *caller = &caller_fl->c; struct file_lock_core *blocker = &block_fl->c; int i = 0;
lockdep_assert_held(&blocked_lock_lock);
/* * This deadlock detector can't reasonably detect deadlocks with * FL_OFDLCK locks, since they aren't owned by a process, per-se.
*/ if (caller->flc_flags & FL_OFDLCK) returnfalse;
while ((blocker = what_owner_is_waiting_for(blocker))) { if (i++ > MAX_DEADLK_ITERATIONS) returnfalse; if (posix_same_owner(caller, blocker)) returntrue;
} returnfalse;
}
/* Try to create a FLOCK lock on filp. We always insert new FLOCK locks * after any leases, but before any posix locks. * * Note that if called with an FL_EXISTS argument, the caller may determine * whether or not a lock was successfully freed by testing the return * value for -ENOENT.
*/ staticint flock_lock_inode(struct inode *inode, struct file_lock *request)
{ struct file_lock *new_fl = NULL; struct file_lock *fl; struct file_lock_context *ctx; int error = 0; bool found = false;
LIST_HEAD(dispose);
/* * We may need two file_lock structures for this operation, * so we get them in advance to avoid races. * * In some cases we can be sure, that no new locks will be needed
*/ if (!(request->c.flc_flags & FL_ACCESS) &&
(request->c.flc_type != F_UNLCK ||
request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
new_fl = locks_alloc_lock();
new_fl2 = locks_alloc_lock();
}
retry:
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock); /* * New lock request. Walk all POSIX locks and look for conflicts. If * there are any, either return error or put the request on the * blocker's list of waiters and the global blocked_hash.
*/ if (request->c.flc_type != F_UNLCK) {
list_for_each_entry(fl, &ctx->flc_posix, c.flc_list) { if (!posix_locks_conflict(&request->c, &fl->c)) continue; if (fl->fl_lmops && fl->fl_lmops->lm_lock_expirable
&& (*fl->fl_lmops->lm_lock_expirable)(fl)) {
owner = fl->fl_lmops->lm_mod_owner;
func = fl->fl_lmops->lm_expire_lock;
__module_get(owner);
spin_unlock(&ctx->flc_lock);
percpu_up_read(&file_rwsem);
(*func)();
module_put(owner); goto retry;
} if (conflock)
locks_copy_conflock(conflock, fl);
error = -EAGAIN; if (!(request->c.flc_flags & FL_SLEEP)) goto out; /* * Deadlock detection and insertion into the blocked * locks list must be done while holding the same lock!
*/
error = -EDEADLK;
spin_lock(&blocked_lock_lock); /* * Ensure that we don't find any locks blocked on this * request during deadlock detection.
*/
__locks_wake_up_blocks(&request->c); if (likely(!posix_locks_deadlock(request, fl))) {
error = FILE_LOCK_DEFERRED;
__locks_insert_block(&fl->c, &request->c,
posix_locks_conflict);
}
spin_unlock(&blocked_lock_lock); goto out;
}
}
/* If we're just looking for a conflict, we're done. */
error = 0; if (request->c.flc_flags & FL_ACCESS) goto out;
/* Find the first old lock with the same owner as the new lock */
list_for_each_entry(fl, &ctx->flc_posix, c.flc_list) { if (posix_same_owner(&request->c, &fl->c)) break;
}
/* Process locks with this owner. */
list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, c.flc_list) { if (!posix_same_owner(&request->c, &fl->c)) break;
/* Detect adjacent or overlapping regions (if same lock type) */ if (request->c.flc_type == fl->c.flc_type) { /* In all comparisons of start vs end, use * "start - 1" rather than "end + 1". If end * is OFFSET_MAX, end + 1 will become negative.
*/ if (fl->fl_end < request->fl_start - 1) continue; /* If the next lock in the list has entirely bigger * addresses than the new one, insert the lock here.
*/ if (fl->fl_start - 1 > request->fl_end) break;
/* If we come here, the new and old lock are of the * same type and adjacent or overlapping. Make one * lock yielding from the lower start address of both * locks to the higher end address.
*/ if (fl->fl_start > request->fl_start)
fl->fl_start = request->fl_start; else
request->fl_start = fl->fl_start; if (fl->fl_end < request->fl_end)
fl->fl_end = request->fl_end; else
request->fl_end = fl->fl_end; if (added) {
locks_delete_lock_ctx(&fl->c, &dispose); continue;
}
request = fl;
added = true;
} else { /* Processing for different lock types is a bit * more complex.
*/ if (fl->fl_end < request->fl_start) continue; if (fl->fl_start > request->fl_end) break; if (lock_is_unlock(request))
added = true; if (fl->fl_start < request->fl_start)
left = fl; /* If the next lock in the list has a higher end * address than the new one, insert the new one here.
*/ if (fl->fl_end > request->fl_end) {
right = fl; break;
} if (fl->fl_start >= request->fl_start) { /* The new lock completely replaces an old * one (This may happen several times).
*/ if (added) {
locks_delete_lock_ctx(&fl->c, &dispose); continue;
} /* * Replace the old lock with new_fl, and * remove the old one. It's safe to do the * insert here since we know that we won't be * using new_fl later, and that the lock is * just replacing an existing lock.
*/
error = -ENOLCK; if (!new_fl) goto out;
locks_copy_lock(new_fl, request);
locks_move_blocks(new_fl, request);
request = new_fl;
new_fl = NULL;
locks_insert_lock_ctx(&request->c,
&fl->c.flc_list);
locks_delete_lock_ctx(&fl->c, &dispose);
added = true;
}
}
}
/* * The above code only modifies existing locks in case of merging or * replacing. If new lock(s) need to be inserted all modifications are * done below this, so it's safe yet to bail out.
*/
error = -ENOLCK; /* "no luck" */ if (right && left == right && !new_fl2) goto out;
error = 0; if (!added) { if (lock_is_unlock(request)) { if (request->c.flc_flags & FL_EXISTS)
error = -ENOENT; goto out;
}
if (!new_fl) {
error = -ENOLCK; goto out;
}
locks_copy_lock(new_fl, request);
locks_move_blocks(new_fl, request);
locks_insert_lock_ctx(&new_fl->c, &fl->c.flc_list);
fl = new_fl;
new_fl = NULL;
} if (right) { if (left == right) { /* The new lock breaks the old one in two pieces, * so we have to use the second new lock.
*/
left = new_fl2;
new_fl2 = NULL;
locks_copy_lock(left, right);
locks_insert_lock_ctx(&left->c, &fl->c.flc_list);
}
right->fl_start = request->fl_end + 1;
locks_wake_up_blocks(&right->c);
} if (left) {
left->fl_end = request->fl_start - 1;
locks_wake_up_blocks(&left->c);
}
out:
trace_posix_lock_inode(inode, request, error);
spin_unlock(&ctx->flc_lock);
percpu_up_read(&file_rwsem); /* * Free any unused locks.
*/ if (new_fl)
locks_free_lock(new_fl); if (new_fl2)
locks_free_lock(new_fl2);
locks_dispose_list(&dispose);
return error;
}
/** * posix_lock_file - Apply a POSIX-style lock to a file * @filp: The file to apply the lock to * @fl: The lock to be applied * @conflock: Place to return a copy of the conflicting lock, if found. * * Add a POSIX style lock to a file. * We merge adjacent & overlapping locks whenever possible. * POSIX locks are sorted by owner task, then by starting address * * Note that if called with an FL_EXISTS argument, the caller may determine * whether or not a lock was successfully freed by testing the return * value for -ENOENT.
*/ int posix_lock_file(struct file *filp, struct file_lock *fl, struct file_lock *conflock)
{ return posix_lock_inode(file_inode(filp), fl, conflock);
}
EXPORT_SYMBOL(posix_lock_file);
/** * posix_lock_inode_wait - Apply a POSIX-style lock to a file * @inode: inode of file to which lock request should be applied * @fl: The lock to be applied * * Apply a POSIX style lock request to an inode.
*/ staticint posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
{ int error;
might_sleep (); for (;;) {
error = posix_lock_inode(inode, fl, NULL); if (error != FILE_LOCK_DEFERRED) break;
error = wait_event_interruptible(fl->c.flc_wait,
list_empty(&fl->c.flc_blocked_member)); if (error) break;
}
locks_delete_block(fl); return error;
}
staticvoid lease_clear_pending(struct file_lease *fl, int arg)
{ switch (arg) { case F_UNLCK:
fl->c.flc_flags &= ~FL_UNLOCK_PENDING;
fallthrough; case F_RDLCK:
fl->c.flc_flags &= ~FL_DOWNGRADE_PENDING;
}
}
/* We already had a lease on this file; just change its type */ int lease_modify(struct file_lease *fl, int arg, struct list_head *dispose)
{ int error = assign_type(&fl->c, arg);
if (error) return error;
lease_clear_pending(fl, arg);
locks_wake_up_blocks(&fl->c); if (arg == F_UNLCK) { struct file *filp = fl->c.flc_file;
staticbool past_time(unsignedlong then)
{ if (!then) /* 0 is a special value meaning "this never expires": */ returnfalse; return time_after(jiffies, then);
}
/** * __break_lease - revoke all outstanding leases on file * @inode: the inode of the file to return * @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR: * break all leases * @type: FL_LEASE: break leases and delegations; FL_DELEG: break * only delegations * * break_lease (inlined for speed) has checked there already is at least * some kind of lock (maybe a lease) on this file. Leases are broken on * a call to open() or truncate(). This function can sleep unless you * specified %O_NONBLOCK to your open().
*/ int __break_lease(struct inode *inode, unsignedint mode, unsignedint type)
{ int error = 0; struct file_lock_context *ctx; struct file_lease *new_fl, *fl, *tmp; unsignedlong break_time; int want_write = (mode & O_ACCMODE) != O_RDONLY;
LIST_HEAD(dispose);
if (!any_leases_conflict(inode, new_fl)) goto out;
break_time = 0; if (lease_break_time > 0) {
break_time = jiffies + lease_break_time * HZ; if (break_time == 0)
break_time++; /* so that 0 means no break time */
}
list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) { if (!leases_conflict(&fl->c, &new_fl->c)) continue; if (want_write) { if (fl->c.flc_flags & FL_UNLOCK_PENDING) continue;
fl->c.flc_flags |= FL_UNLOCK_PENDING;
fl->fl_break_time = break_time;
} else { if (lease_breaking(fl)) continue;
fl->c.flc_flags |= FL_DOWNGRADE_PENDING;
fl->fl_downgrade_time = break_time;
} if (fl->fl_lmops->lm_break(fl))
locks_delete_lock_ctx(&fl->c, &dispose);
}
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
trace_break_lease_unblock(inode, new_fl);
__locks_delete_block(&new_fl->c); if (error >= 0) { /* * Wait for the next conflicting lease that has not been * broken yet
*/ if (error == 0)
time_out_leases(inode, &dispose); if (any_leases_conflict(inode, new_fl)) goto restart;
error = 0;
}
out:
spin_unlock(&ctx->flc_lock);
percpu_up_read(&file_rwsem);
locks_dispose_list(&dispose);
free_lock:
locks_free_lease(new_fl); return error;
}
EXPORT_SYMBOL(__break_lease);
/** * lease_get_mtime - update modified time of an inode with exclusive lease * @inode: the inode * @time: pointer to a timespec which contains the last modified time * * This is to force NFS clients to flush their caches for files with * exclusive leases. The justification is that if someone has an * exclusive lease, then they could be modifying it.
*/ void lease_get_mtime(struct inode *inode, struct timespec64 *time)
{ bool has_lease = false; struct file_lock_context *ctx; struct file_lock_core *flc;
if (has_lease)
*time = current_time(inode);
}
EXPORT_SYMBOL(lease_get_mtime);
/** * fcntl_getlease - Enquire what lease is currently active * @filp: the file * * The value returned by this function will be one of * (if no lease break is pending): * * %F_RDLCK to indicate a shared lease is held. * * %F_WRLCK to indicate an exclusive lease is held. * * %F_UNLCK to indicate no lease is held. * * (if a lease break is pending): * * %F_RDLCK to indicate an exclusive lease needs to be * changed to a shared lease (or removed). * * %F_UNLCK to indicate the lease needs to be removed. * * XXX: sfr & willy disagree over whether F_INPROGRESS * should be returned to userspace.
*/ int fcntl_getlease(struct file *filp)
{ struct file_lease *fl; struct inode *inode = file_inode(filp); struct file_lock_context *ctx; int type = F_UNLCK;
LIST_HEAD(dispose);
ctx = locks_inode_context(inode); if (ctx && !list_empty_careful(&ctx->flc_lease)) {
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
time_out_leases(inode, &dispose);
list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) { if (fl->c.flc_file != filp) continue;
type = target_leasetype(fl); break;
}
spin_unlock(&ctx->flc_lock);
percpu_up_read(&file_rwsem);
locks_dispose_list(&dispose);
} return type;
}
/** * check_conflicting_open - see if the given file points to an inode that has * an existing open that would conflict with the * desired lease. * @filp: file to check * @arg: type of lease that we're trying to acquire * @flags: current lock flags * * Check to see if there's an existing open fd on this file that would * conflict with the lease we're trying to set.
*/ staticint
check_conflicting_open(struct file *filp, constint arg, int flags)
{ struct inode *inode = file_inode(filp); int self_wcount = 0, self_rcount = 0;
if (flags & FL_LAYOUT) return 0; if (flags & FL_DELEG) /* We leave these checks to the caller */ return 0;
/* * Make sure that only read/write count is from lease requestor. * Note that this will result in denying write leases when i_writecount * is negative, which is what we want. (We shouldn't grant write leases * on files open for execution.)
*/ if (filp->f_mode & FMODE_WRITE)
self_wcount = 1; elseif (filp->f_mode & FMODE_READ)
self_rcount = 1;
if (atomic_read(&inode->i_writecount) != self_wcount ||
atomic_read(&inode->i_readcount) != self_rcount) return -EAGAIN;
error = file_f_owner_allocate(filp); if (error) return error;
/* Note that arg is never F_UNLCK here */
ctx = locks_get_lock_context(inode, arg); if (!ctx) return -ENOMEM;
/* * In the delegation case we need mutual exclusion with * a number of operations that take the i_rwsem. We trylock * because delegations are an optional optimization, and if * there's some chance of a conflict--we'd rather not * bother, maybe that's a sign this just isn't a good file to * hand out a delegation on.
*/ if (is_deleg && !inode_trylock(inode)) return -EAGAIN;
/* * At this point, we know that if there is an exclusive * lease on this file, then we hold it on this filp * (otherwise our open of this file would have blocked). * And if we are trying to acquire an exclusive lease, * then the file is not open by anyone (including us) * except for this filp.
*/
error = -EAGAIN;
list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) { if (fl->c.flc_file == filp &&
fl->c.flc_owner == lease->c.flc_owner) {
my_fl = fl; continue;
}
/* * No exclusive leases if someone else has a lease on * this file:
*/ if (arg == F_WRLCK) goto out; /* * Modifying our existing lease is OK, but no getting a * new lease if someone else is opening for write:
*/ if (fl->c.flc_flags & FL_UNLOCK_PENDING) goto out;
}
if (my_fl != NULL) {
lease = my_fl;
error = lease->fl_lmops->lm_change(lease, arg, &dispose); if (error) goto out; goto out_setup;
}
error = -EINVAL; if (!leases_enable) goto out;
locks_insert_lock_ctx(&lease->c, &ctx->flc_lease); /* * The check in break_lease() is lockless. It's possible for another * open to race in after we did the earlier check for a conflicting * open but before the lease was inserted. Check again for a * conflicting open and cancel the lease if there is one. * * We also add a barrier here to ensure that the insertion of the lock * precedes these checks.
*/
smp_mb();
error = check_conflicting_open(filp, arg, lease->c.flc_flags); if (error) {
locks_unlink_lock_ctx(&lease->c); goto out;
}
out_setup: if (lease->fl_lmops->lm_setup)
lease->fl_lmops->lm_setup(lease, priv);
out:
spin_unlock(&ctx->flc_lock);
percpu_up_read(&file_rwsem);
locks_dispose_list(&dispose); if (is_deleg)
inode_unlock(inode); if (!error && !my_fl)
*flp = NULL; return error;
}
/** * generic_setlease - sets a lease on an open file * @filp: file pointer * @arg: type of lease to obtain * @flp: input - file_lock to use, output - file_lock inserted * @priv: private data for lm_setup (may be NULL if lm_setup * doesn't require it) * * The (input) flp->fl_lmops->lm_break function is required * by break_lease().
*/ int generic_setlease(struct file *filp, int arg, struct file_lease **flp, void **priv)
{ switch (arg) { case F_UNLCK: return generic_delete_lease(filp, *priv); case F_RDLCK: case F_WRLCK: if (!(*flp)->fl_lmops->lm_break) {
WARN_ON_ONCE(1); return -ENOLCK;
}
/* * Kernel subsystems can register to be notified on any attempt to set * a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd * to close files that it may have cached when there is an attempt to set a * conflicting lease.
*/ staticstruct srcu_notifier_head lease_notifier_chain;
int
kernel_setlease(struct file *filp, int arg, struct file_lease **lease, void **priv)
{ if (lease)
setlease_notifier(arg, *lease); if (filp->f_op->setlease) return filp->f_op->setlease(filp, arg, lease, priv); else return generic_setlease(filp, arg, lease, priv);
}
EXPORT_SYMBOL_GPL(kernel_setlease);
/** * vfs_setlease - sets a lease on an open file * @filp: file pointer * @arg: type of lease to obtain * @lease: file_lock to use when adding a lease * @priv: private info for lm_setup when adding a lease (may be * NULL if lm_setup doesn't require it) * * Call this to establish a lease on the file. The "lease" argument is not * used for F_UNLCK requests and may be NULL. For commands that set or alter * an existing lease, the ``(*lease)->fl_lmops->lm_break`` operation must be * set; if not, this function will return -ENOLCK (and generate a scary-looking * stack trace). * * The "priv" pointer is passed directly to the lm_setup function as-is. It * may be NULL if the lm_setup operation doesn't require it.
*/ int
vfs_setlease(struct file *filp, int arg, struct file_lease **lease, void **priv)
{ struct inode *inode = file_inode(filp);
vfsuid_t vfsuid = i_uid_into_vfsuid(file_mnt_idmap(filp), inode); int error;
if ((!vfsuid_eq_kuid(vfsuid, current_fsuid())) && !capable(CAP_LEASE)) return -EACCES; if (!S_ISREG(inode->i_mode)) return -EINVAL;
error = security_file_lock(filp, arg); if (error) return error; return kernel_setlease(filp, arg, lease, priv);
}
EXPORT_SYMBOL_GPL(vfs_setlease);
staticint do_fcntl_add_lease(unsignedint fd, struct file *filp, int arg)
{ struct file_lease *fl; struct fasync_struct *new; int error;
fl = lease_alloc(filp, arg); if (IS_ERR(fl)) return PTR_ERR(fl);
new = fasync_alloc(); if (!new) {
locks_free_lease(fl); return -ENOMEM;
}
new->fa_fd = fd;
error = vfs_setlease(filp, arg, &fl, (void **)&new); if (fl)
locks_free_lease(fl); if (new)
fasync_free(new); return error;
}
/** * fcntl_setlease - sets a lease on an open file * @fd: open file descriptor * @filp: file pointer * @arg: type of lease to obtain * * Call this fcntl to establish a lease on the file. * Note that you also need to call %F_SETSIG to * receive a signal when the lease is broken.
*/ int fcntl_setlease(unsignedint fd, struct file *filp, int arg)
{ if (arg == F_UNLCK) return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp); return do_fcntl_add_lease(fd, filp, arg);
}
/** * flock_lock_inode_wait - Apply a FLOCK-style lock to a file * @inode: inode of the file to apply to * @fl: The lock to be applied * * Apply a FLOCK style lock request to an inode.
*/ staticint flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
{ int error;
might_sleep(); for (;;) {
error = flock_lock_inode(inode, fl); if (error != FILE_LOCK_DEFERRED) break;
error = wait_event_interruptible(fl->c.flc_wait,
list_empty(&fl->c.flc_blocked_member)); if (error) break;
}
locks_delete_block(fl); return error;
}
/** * locks_lock_inode_wait - Apply a lock to an inode * @inode: inode of the file to apply to * @fl: The lock to be applied * * Apply a POSIX or FLOCK style lock request to an inode.
*/ int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
{ int res = 0; switch (fl->c.flc_flags & (FL_POSIX|FL_FLOCK)) { case FL_POSIX:
res = posix_lock_inode_wait(inode, fl); break; case FL_FLOCK:
res = flock_lock_inode_wait(inode, fl); break; default:
BUG();
} return res;
}
EXPORT_SYMBOL(locks_lock_inode_wait);
/** * sys_flock: - flock() system call. * @fd: the file descriptor to lock. * @cmd: the type of lock to apply. * * Apply a %FL_FLOCK style lock to an open file descriptor. * The @cmd can be one of: * * - %LOCK_SH -- a shared lock. * - %LOCK_EX -- an exclusive lock. * - %LOCK_UN -- remove an existing lock. * - %LOCK_MAND -- a 'mandatory' flock. (DEPRECATED) * * %LOCK_MAND support has been removed from the kernel.
*/
SYSCALL_DEFINE2(flock, unsignedint, fd, unsignedint, cmd)
{ int can_sleep, error, type; struct file_lock fl;
/* * LOCK_MAND locks were broken for a long time in that they never * conflicted with one another and didn't prevent any sort of open, * read or write activity. * * Just ignore these requests now, to preserve legacy behavior, but * throw a warning to let people know that they don't actually work.
*/ if (cmd & LOCK_MAND) {
pr_warn_once("%s(%d): Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n", current->comm, current->pid); return 0;
}
type = flock_translate_cmd(cmd & ~LOCK_NB); if (type < 0) return type;
/** * vfs_test_lock - test file byte range lock * @filp: The file to test lock for * @fl: The lock to test; also used to hold result * * Returns -ERRNO on failure. Indicates presence of conflicting lock by * setting conf->fl_type to something other than F_UNLCK.
*/ int vfs_test_lock(struct file *filp, struct file_lock *fl)
{
WARN_ON_ONCE(filp != fl->c.flc_file); if (filp->f_op->lock) return filp->f_op->lock(filp, F_GETLK, fl);
posix_test_lock(filp, fl); return 0;
}
EXPORT_SYMBOL_GPL(vfs_test_lock);
/** * locks_translate_pid - translate a file_lock's fl_pid number into a namespace * @fl: The file_lock who's fl_pid should be translated * @ns: The namespace into which the pid should be translated * * Used to translate a fl_pid into a namespace virtual pid number
*/ static pid_t locks_translate_pid(struct file_lock_core *fl, struct pid_namespace *ns)
{
pid_t vnr; struct pid *pid;
if (fl->flc_flags & FL_OFDLCK) return -1;
/* Remote locks report a negative pid value */ if (fl->flc_pid <= 0) return fl->flc_pid;
/* * If the flock owner process is dead and its pid has been already * freed, the translation below won't work, but we still want to show * flock owner pid number in init pidns.
*/ if (ns == &init_pid_ns) return (pid_t) fl->flc_pid;
/* Report the first existing lock that would conflict with l. * This implements the F_GETLK command of fcntl().
*/ int fcntl_getlk(struct file *filp, unsignedint cmd, struct flock *flock)
{ struct file_lock *fl; int error;
/** * vfs_lock_file - file byte range lock * @filp: The file to apply the lock to * @cmd: type of locking operation (F_SETLK, F_GETLK, etc.) * @fl: The lock to be applied * @conf: Place to return a copy of the conflicting lock, if found. * * A caller that doesn't care about the conflicting lock may pass NULL * as the final argument. * * If the filesystem defines a private ->lock() method, then @conf will * be left unchanged; so a caller that cares should initialize it to * some acceptable default. * * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX * locks, the ->lock() interface may return asynchronously, before the lock has * been granted or denied by the underlying filesystem, if (and only if) * lm_grant is set. Additionally EXPORT_OP_ASYNC_LOCK in export_operations * flags need to be set. * * Callers expecting ->lock() to return asynchronously will only use F_SETLK, * not F_SETLKW; they will set FL_SLEEP if (and only if) the request is for a * blocking lock. When ->lock() does return asynchronously, it must return * FILE_LOCK_DEFERRED, and call ->lm_grant() when the lock request completes. * If the request is for non-blocking lock the file system should return * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine * with the result. If the request timed out the callback routine will return a * nonzero return code and the file system should release the lock. The file * system is also responsible to keep a corresponding posix lock when it * grants a lock so the VFS can find out which locks are locally held and do * the correct lock cleanup when required. * The underlying filesystem must not drop the kernel lock or call * ->lm_grant() before returning to the caller with a FILE_LOCK_DEFERRED * return code.
*/ int vfs_lock_file(struct file *filp, unsignedint cmd, struct file_lock *fl, struct file_lock *conf)
{
WARN_ON_ONCE(filp != fl->c.flc_file); if (filp->f_op->lock) return filp->f_op->lock(filp, cmd, fl); else return posix_lock_file(filp, fl, conf);
}
EXPORT_SYMBOL_GPL(vfs_lock_file);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.