/* * Copyright (c) 2001 The Regents of the University of Michigan. * All rights reserved. * * Kendrick Smith <kmsmith@umich.edu> * Andy Adamson <kandros@umich.edu> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
*/
/* * Currently used for the del_recall_lru and file hash table. In an * effort to decrease the scope of the client_mutex, this spinlock may * eventually cover more:
*/ static DEFINE_SPINLOCK(state_lock);
/* * A waitqueue for all in-progress 4.0 CLOSE operations that are waiting for * the refcount on the open stateid to drop.
*/ static DECLARE_WAIT_QUEUE_HEAD(close_wq);
/* * A waitqueue where a writer to clients/#/ctl destroying a client can * wait for cl_rpc_users to drop to 0 and then for the client to be * unhashed.
*/ static DECLARE_WAIT_QUEUE_HEAD(expiry_wq);
/* must be called under the client_lock */ staticinlinevoid
renew_client_locked(struct nfs4_client *clp)
{ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
if (is_session_dead(ses)) return nfserr_badsession;
status = get_client_locked(ses->se_client); if (status) return status;
atomic_inc(&ses->se_ref); return nfs_ok;
}
/* * Since this is just an optimization, we don't try very hard if it * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and * just quit trying on anything else.
*/ switch (task->tk_status) { case -NFS4ERR_DELAY:
rpc_delay(task, 1 * HZ); return 0; default: return 1;
}
}
/* * We store the NONE, READ, WRITE, and BOTH bits separately in the * st_{access,deny}_bmap field of the stateid, in order to track not * only what share bits are currently in force, but also what * combinations of share bits previous opens have used. This allows us * to enforce the recommendation in * https://datatracker.ietf.org/doc/html/rfc7530#section-16.19.4 that * the server return an error if the client attempt to downgrade to a * combination of share bits not explicable by closing some of its * previous opens. * * This enforcement is arguably incomplete, since we don't keep * track of access/deny bit combinations; so, e.g., we allow: * * OPEN allow read, deny write * OPEN allow both, deny none * DOWNGRADE allow read, deny none * * which we should reject. * * But you could also argue that our current code is already overkill, * since it only exists to return NFS4ERR_INVAL on incorrect client * behavior.
*/ staticunsignedint
bmap_to_share_mode(unsignedlong bmap)
{ int i; unsignedint access = 0;
for (i = 1; i < 4; i++) { if (test_bit(i, &bmap))
access |= i;
} return access;
}
/* set share access for a given stateid */ staticinlinevoid
set_access(u32 access, struct nfs4_ol_stateid *stp)
{ unsignedchar mask = 1 << access;
/* test whether a given stateid is denying specific access */ staticinlinebool
test_deny(u32 deny, struct nfs4_ol_stateid *stp)
{ unsignedchar mask = 1 << deny;
return (bool)(stp->st_deny_bmap & mask);
}
staticint nfs4_access_to_omode(u32 access)
{ switch (access & NFS4_SHARE_ACCESS_BOTH) { case NFS4_SHARE_ACCESS_READ: return O_RDONLY; case NFS4_SHARE_ACCESS_WRITE: return O_WRONLY; case NFS4_SHARE_ACCESS_BOTH: return O_RDWR;
}
WARN_ON_ONCE(1); return O_RDONLY;
}
if (!f) return NULL;
spin_lock(&f->fi_lock);
ret = nfsd_file_get(f->fi_fds[O_RDWR]); if (!ret) {
ret = nfsd_file_get(f->fi_fds[O_WRONLY]); if (!ret)
ret = nfsd_file_get(f->fi_fds[O_RDONLY]);
}
spin_unlock(&f->fi_lock); return ret;
}
if (f->fi_fds[O_RDWR]) return f->fi_fds[O_RDWR]; if (f->fi_fds[O_WRONLY]) return f->fi_fds[O_WRONLY]; if (f->fi_fds[O_RDONLY]) return f->fi_fds[O_RDONLY]; return NULL;
}
/* * Start with a single page hash table to reduce resizing churn * on light workloads.
*/
.min_size = 256,
.automatic_shrinking = true,
};
/* * Check if courtesy clients have conflicting access and resolve it if possible * * access: is op_share_access if share_access is true. * Check if access mode, op_share_access, would conflict with * the current deny mode of the file 'fp'. * access: is op_share_deny if share_access is false. * Check if the deny mode, op_share_deny, would conflict with * current access of the file 'fp'. * stp: skip checking this entry. * new_stp: normal open, not open upgrade. * * Function returns: * false - access/deny mode conflict with normal client. * true - no conflict or conflict with courtesy client(s) is resolved.
*/ staticbool
nfs4_resolve_deny_conflicts_locked(struct nfs4_file *fp, bool new_stp, struct nfs4_ol_stateid *stp, u32 access, bool share_access)
{ struct nfs4_ol_stateid *st; bool resolvable = true; unsignedchar bmap; struct nfsd_net *nn; struct nfs4_client *clp;
lockdep_assert_held(&fp->fi_lock);
list_for_each_entry(st, &fp->fi_stateids, st_perfile) { /* ignore lock stateid */ if (st->st_openstp) continue; if (st == stp && new_stp) continue; /* check file access against deny mode or vice versa */
bmap = share_access ? st->st_deny_bmap : st->st_access_bmap; if (!(access & bmap_to_share_mode(bmap))) continue;
clp = st->st_stid.sc_client; if (try_to_expire_client(clp)) continue;
resolvable = false; break;
} if (resolvable) {
clp = stp->st_stid.sc_client;
nn = net_generic(clp->net, nfsd_net_id);
mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
} return resolvable;
}
static __be32 nfs4_file_check_deny(struct nfs4_file *fp, u32 deny)
{ /* Common case is that there is no deny mode. */ if (deny) { /* Does this deny mode make sense? */ if (deny & ~NFS4_SHARE_DENY_BOTH) return nfserr_inval;
if ((deny & NFS4_SHARE_DENY_READ) &&
atomic_read(&fp->fi_access[O_RDONLY])) return nfserr_share_denied;
if (access & NFS4_SHARE_ACCESS_WRITE)
__nfs4_file_put_access(fp, O_WRONLY); if (access & NFS4_SHARE_ACCESS_READ)
__nfs4_file_put_access(fp, O_RDONLY);
}
/* * Allocate a new open/delegation state counter. This is needed for * pNFS for proper return on close semantics. * * Note that we only allocate it for pNFS-enabled exports, otherwise * all pointers to struct nfs4_clnt_odstate are always NULL.
*/ staticstruct nfs4_clnt_odstate *
alloc_clnt_odstate(struct nfs4_client *clp)
{ struct nfs4_clnt_odstate *co;
co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL); if (co) {
co->co_client = clp;
refcount_set(&co->co_odcount, 1);
} return co;
}
stid = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_ol_stateid); if (!stid) return NULL;
return openlockstateid(stid);
}
/* * As the sc_free callback of deleg, this may be called by nfs4_put_stid * in nfsd_break_one_deleg. * Considering nfsd_break_one_deleg is called with the flc->flc_lock held, * this function mustn't ever sleep.
*/ staticvoid nfs4_free_deleg(struct nfs4_stid *stid)
{ struct nfs4_delegation *dp = delegstateid(stid);
/* * When we recall a delegation, we should be careful not to hand it * out again straight away. * To ensure this we keep a pair of bloom filters ('new' and 'old') * in which the filehandles of recalled delegations are "stored". * If a filehandle appear in either filter, a delegation is blocked. * When a delegation is recalled, the filehandle is stored in the "new" * filter. * Every 30 seconds we swap the filters and clear the "new" one, * unless both are empty of course. This results in delegations for a * given filehandle being blocked for between 30 and 60 seconds. * * Each filter is 256 bits. We hash the filehandle to 32bit and use the * low 3 bytes as hash-table indices. * * 'blocked_delegations_lock', which is always taken in block_delegations(), * is used to manage concurrent access. Testing does not need the lock * except when swapping the two filters.
*/ static DEFINE_SPINLOCK(blocked_delegations_lock); staticstruct bloom_pair { int entries, old_entries;
time64_t swap_time; intnew; /* index into 'set' */
DECLARE_BITMAP(set[2], 256);
} blocked_delegations;
/** * nfs4_delegation_exists - Discover if this delegation already exists * @clp: a pointer to the nfs4_client we're granting a delegation to * @fp: a pointer to the nfs4_file we're granting a delegation on * * Return: * On success: true iff an existing delegation is found
*/
/** * hash_delegation_locked - Add a delegation to the appropriate lists * @dp: a pointer to the nfs4_delegation we are adding. * @fp: a pointer to the nfs4_file we're granting a delegation on * * Return: * On success: NULL if the delegation was successfully hashed. * * On error: -EAGAIN if one was previously granted to this * nfs4_client for this nfs4_file. Delegation is not hashed. *
*/
spin_lock(&state_lock);
unhashed = unhash_delegation_locked(dp, SC_STATUS_CLOSED);
spin_unlock(&state_lock); if (unhashed)
destroy_unhashed_deleg(dp);
}
/** * revoke_delegation - perform nfs4 delegation structure cleanup * @dp: pointer to the delegation * * This function assumes that it's called either from the administrative * interface (nfsd4_revoke_states()) that's revoking a specific delegation * stateid or it's called from a laundromat thread (nfsd4_landromat()) that * determined that this specific state has expired and needs to be revoked * (both mark state with the appropriate stid sc_status mode). It is also * assumed that a reference was taken on the @dp state. * * If this function finds that the @dp state is SC_STATUS_FREED it means * that a FREE_STATEID operation for this stateid has been processed and * we can proceed to removing it from recalled list. However, if @dp state * isn't marked SC_STATUS_FREED, it means we need place it on the cl_revoked * list and wait for the FREE_STATEID to arrive from the client. At the same * time, we need to mark it as SC_STATUS_FREEABLE to indicate to the * nfsd4_free_stateid() function that this stateid has already been added * to the cl_revoked list and that nfsd4_free_stateid() is now responsible * for removing it from the list. Inspection of where the delegation state * in the revocation process is protected by the clp->cl_lock.
*/ staticvoid revoke_delegation(struct nfs4_delegation *dp)
{ struct nfs4_client *clp = dp->dl_stid.sc_client;
/* * A stateid that had a deny mode associated with it is being released * or downgraded. Recalculate the deny mode on the file.
*/ staticvoid
recalculate_deny_mode(struct nfs4_file *fp)
{ struct nfs4_ol_stateid *stp;
u32 old_deny;
for (i = 1; i < 4; i++) { if ((i & deny) != i) {
change = true;
clear_deny(i, stp);
}
}
/* Recalculate per-file deny mode if there was a change */ if (change)
recalculate_deny_mode(stp->st_stid.sc_file);
}
/* release all access and file references for a given stateid */ staticvoid
release_all_access(struct nfs4_ol_stateid *stp)
{ int i; struct nfs4_file *fp = stp->st_stid.sc_file;
if (fp && stp->st_deny_bmap != 0)
recalculate_deny_mode(fp);
for (i = 1; i < 4; i++) { if (test_access(i, stp))
nfs4_file_put_access(stp->st_stid.sc_file, i);
clear_access(i, stp);
}
}
/* * Put the persistent reference to an already unhashed generic stateid, while * holding the cl_lock. If it's the last reference, then put it onto the * reaplist for later destruction.
*/ staticvoid put_ol_stateid_locked(struct nfs4_ol_stateid *stp, struct list_head *reaplist)
{ struct nfs4_stid *s = &stp->st_stid; struct nfs4_client *clp = s->sc_client;
lockdep_assert_held(&clp->cl_lock);
WARN_ON_ONCE(!list_empty(&stp->st_locks));
if (!refcount_dec_and_test(&s->sc_count)) {
wake_up_all(&close_wq); return;
}
idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); if (s->sc_status & SC_STATUS_ADMIN_REVOKED)
atomic_dec(&s->sc_client->cl_admin_revoked);
list_add(&stp->st_locks, reaplist);
}
/* * Free a list of generic stateids that were collected earlier after being * fully unhashed.
*/ staticvoid
free_ol_stateid_reaplist(struct list_head *reaplist)
{ struct nfs4_ol_stateid *stp; struct nfs4_file *fp;
might_sleep();
while (!list_empty(reaplist)) {
stp = list_first_entry(reaplist, struct nfs4_ol_stateid,
st_locks);
list_del(&stp->st_locks);
fp = stp->st_stid.sc_file;
stp->st_stid.sc_free(&stp->st_stid); if (fp)
put_nfs4_file(fp);
}
}
spin_lock(&nn->client_lock);
s = oo->oo_last_closed_stid; if (s) {
list_del_init(&oo->oo_close_lru);
oo->oo_last_closed_stid = NULL;
}
spin_unlock(&nn->client_lock); if (s)
nfs4_put_stid(&s->st_stid);
}
/** * nfsd4_revoke_states - revoke all nfsv4 states associated with given filesystem * @net: used to identify instance of nfsd (there is one per net namespace) * @sb: super_block used to identify target filesystem * * All nfs4 states (open, lock, delegation, layout) held by the server instance * and associated with a file on the given filesystem will be revoked resulting * in any files being closed and so all references from nfsd to the filesystem * being released. Thus nfsd will no longer prevent the filesystem from being * unmounted. * * The clients which own the states will subsequently being notified that the * states have been "admin-revoked".
*/ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
{ struct nfsd_net *nn = net_generic(net, nfsd_net_id); unsignedint idhashval; unsignedint sc_types;
/* * Bump the seqid on cstate->replay_owner, and clear replay_owner if it * won't be used for replay.
*/ void nfsd4_bump_seqid(struct nfsd4_compound_state *cstate, __be32 nfserr)
{ struct nfs4_stateowner *so = cstate->replay_owner;
if (nfserr == nfserr_replay_me) return;
if (!seqid_mutating_err(ntohl(nfserr))) {
nfsd4_cstate_clear_replay(cstate); return;
} if (!so) return; if (so->so_is_open_owner)
release_last_closed_stateid(openowner(so));
so->so_seqid++; return;
}
/* * The protocol defines ca_maxresponssize_cached to include the size of * the rpc header, but all we need to cache is the data starting after * the end of the initial SEQUENCE operation--the rest we regenerate * each time. Therefore we can advertise a ca_maxresponssize_cached * value that is the number of bytes in our cache plus a few additional * bytes. In order to stay on the safe side, and not promise more than * we can cache, those additional bytes must be the minimum possible: 24 * bytes of rpc header (xid through accept state, with AUTH_NULL * verifier), 12 for the compound header (with zero-length tag), and 44 * for the SEQUENCE op response:
*/ #define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44)
staticstruct shrinker *nfsd_slot_shrinker; static DEFINE_SPINLOCK(nfsd_session_list_lock); static LIST_HEAD(nfsd_session_list); /* The sum of "target_slots-1" on every session. The shrinker can push this * down, though it can take a little while for the memory to actually * be freed. The "-1" is because we can never free slot 0 while the * session is active.
*/ static atomic_t nfsd_total_target_slots = ATOMIC_INIT(0);
staticvoid
free_session_slots(struct nfsd4_session *ses, int from)
{ int i;
if (from >= ses->se_fchannel.maxreqs) return;
for (i = from; i < ses->se_fchannel.maxreqs; i++) { struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
/* * Save the seqid in case we reactivate this slot. * This will never require a memory allocation so GFP * flag is irrelevant
*/
xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
free_svc_cred(&slot->sl_cred);
kfree(slot);
}
ses->se_fchannel.maxreqs = from; if (ses->se_target_maxslots > from) { int new_target = from ?: 1;
atomic_sub(ses->se_target_maxslots - new_target, &nfsd_total_target_slots);
ses->se_target_maxslots = new_target;
}
}
/** * reduce_session_slots - reduce the target max-slots of a session if possible * @ses: The session to affect * @dec: how much to decrease the target by * * This interface can be used by a shrinker to reduce the target max-slots * for a session so that some slots can eventually be freed. * It uses spin_trylock() as it may be called in a context where another * spinlock is held that has a dependency on client_lock. As shrinkers are * best-effort, skiping a session is client_lock is already held has no * great coast * * Return value: * The number of slots that the target was reduced by.
*/ staticint
reduce_session_slots(struct nfsd4_session *ses, int dec)
{ struct nfsd_net *nn = net_generic(ses->se_client->net,
nfsd_net_id); int ret = 0;
if (ses->se_target_maxslots <= 1) return ret; if (!spin_trylock(&nn->client_lock)) return ret;
ret = min(dec, ses->se_target_maxslots-1);
ses->se_target_maxslots -= ret;
atomic_sub(ret, &nfsd_total_target_slots);
ses->se_slot_gen += 1; if (ses->se_slot_gen == 0) { int i;
ses->se_slot_gen = 1; for (i = 0; i < ses->se_fchannel.maxreqs; i++) { struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
slot->sl_generation = 0;
}
}
spin_unlock(&nn->client_lock); return ret;
}
/* * The RPC and NFS session headers are never saved in * the slot reply cache buffer.
*/
size = fattrs->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ ?
0 : fattrs->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
nfsd4_hash_conn(conn, ses);
ret = nfsd4_register_conn(conn); if (ret) /* oops; xprt is already down: */
nfsd4_conn_lost(&conn->cn_xpt_user); /* We may have gained or lost a callback channel: */
nfsd4_probe_callback_sync(ses->se_client);
}
if (cses->flags & SESSION4_BACK_CHAN)
dir |= NFS4_CDFC4_BACK; return alloc_conn(rqstp, dir);
}
/* must be called under client_lock */ staticvoid nfsd4_del_conns(struct nfsd4_session *s)
{ struct nfs4_client *clp = s->se_client; struct nfsd4_conn *c;
spin_lock(&clp->cl_lock); while (!list_empty(&s->se_conns)) {
c = list_first_entry(&s->se_conns, struct nfsd4_conn, cn_persession);
list_del_init(&c->cn_persession);
spin_unlock(&clp->cl_lock);
{ struct sockaddr *sa = svc_addr(rqstp); /* * This is a little silly; with sessions there's no real * use for the callback address. Use the peer address * as a reasonable default for now, but consider fixing * the rpc client not to require an address in the * future:
*/
rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa);
clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
}
}
/* caller must hold client_lock */ staticstruct nfsd4_session *
__find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net)
{ struct nfsd4_session *elem; int idx; struct nfsd_net *nn = net_generic(net, nfsd_net_id);
lockdep_assert_held(&nn->client_lock);
dump_sessionid(__func__, sessionid);
idx = hash_sessionid(sessionid); /* Search in the appropriate list */
list_for_each_entry(elem, &nn->sessionid_hashtbl[idx], se_hash) { if (!memcmp(elem->se_sessionid.data, sessionid->data,
NFS4_MAX_SESSIONID_LEN)) { return elem;
}
}
dprintk("%s: session not found\n", __func__); return NULL;
}
staticstruct nfsd4_session *
find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net,
__be32 *ret)
{ struct nfsd4_session *session;
__be32 status = nfserr_badsession;
session = __find_in_sessionid_hashtbl(sessionid, net); if (!session) goto out;
status = nfsd4_get_session_locked(session); if (status)
session = NULL;
out:
*ret = status; return session;
}
/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ staticint
STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
{ /* * We're assuming the clid was not given out from a boot * precisely 2^32 (about 136 years) before this one. That seems * a safe assumption:
*/ if (clid->cl_boot == (u32)nn->boot_time) return 0;
trace_nfsd_clid_stale(clid); return 1;
}
/* must be called under the client_lock */ staticvoid
unhash_client_locked(struct nfs4_client *clp)
{ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct nfsd4_session *ses;
lockdep_assert_held(&nn->client_lock);
/* Mark the client as expired! */
clp->cl_time = 0; /* Make it invisible */ if (!list_empty(&clp->cl_idhash)) {
list_del_init(&clp->cl_idhash); if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
rb_erase(&clp->cl_namenode, &nn->conf_name_tree); else
rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
}
list_del_init(&clp->cl_lru);
spin_lock(&clp->cl_lock);
spin_lock(&nfsd_session_list_lock);
list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) {
list_del_init(&ses->se_hash);
list_del_init(&ses->se_all_sessions);
}
spin_unlock(&nfsd_session_list_lock);
spin_unlock(&clp->cl_lock);
}
spin_lock(&state_lock); while (!list_empty(&clp->cl_delegations)) {
dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
unhash_delegation_locked(dp, SC_STATUS_CLOSED);
list_add(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&state_lock); while (!list_empty(&reaplist)) {
dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru);
destroy_unhashed_deleg(dp);
} while (!list_empty(&clp->cl_revoked)) {
dp = list_entry(clp->cl_revoked.next, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru);
nfs4_put_stid(&dp->dl_stid);
} while (!list_empty(&clp->cl_openowners)) {
oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient);
nfs4_get_stateowner(&oo->oo_owner);
release_openowner(oo);
} for (i = 0; i < OWNER_HASH_SIZE; i++) { struct nfs4_stateowner *so, *tmp;
list_for_each_entry_safe(so, tmp, &clp->cl_ownerstr_hashtbl[i],
so_strhash) { /* Should be no openowners at this point */
WARN_ON_ONCE(so->so_is_open_owner);
remove_blocked_locks(lockowner(so));
}
}
nfsd4_return_all_client_layouts(clp);
nfsd4_shutdown_copy(clp);
nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt)
svc_xprt_put(clp->cl_cb_conn.cb_xprt);
atomic_add_unless(&nn->nfs4_client_count, -1, 0);
nfsd4_dec_courtesy_client_count(nn, clp);
free_client(clp);
wake_up_all(&expiry_wq);
}
staticbool groups_equal(struct group_info *g1, struct group_info *g2)
{ int i;
if (g1->ngroups != g2->ngroups) returnfalse; for (i=0; i<g1->ngroups; i++) if (!gid_eq(g1->gid[i], g2->gid[i])) returnfalse; returntrue;
}
/* * RFC 3530 language requires clid_inuse be returned when the * "principal" associated with a requests differs from that previously * used. We use uid, gid's, and gss principal string as our best * approximation. We also don't want to allow non-gss use of a client * established using gss: in theory cr_principal should catch that * change, but in practice cr_principal can be null even in the gss case * since gssd doesn't always pass down a principal string.
*/ staticbool is_gss_cred(struct svc_cred *cr)
{ /* Is cr_flavor one of the gss "pseudoflavors"?: */ return (cr->cr_flavor > RPC_AUTH_MAXFLAVOR);
}
staticbool
same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
{ if ((is_gss_cred(cr1) != is_gss_cred(cr2))
|| (!uid_eq(cr1->cr_uid, cr2->cr_uid))
|| (!gid_eq(cr1->cr_gid, cr2->cr_gid))
|| !groups_equal(cr1->cr_group_info, cr2->cr_group_info)) returnfalse; /* XXX: check that cr_targ_princ fields match ? */ if (cr1->cr_principal == cr2->cr_principal) returntrue; if (!cr1->cr_principal || !cr2->cr_principal) returnfalse; return 0 == strcmp(cr1->cr_principal, cr2->cr_principal);
}
if (!cr->cr_gss_mech) returnfalse;
service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor); return service == RPC_GSS_SVC_INTEGRITY ||
service == RPC_GSS_SVC_PRIVACY;
}
/* * This is opaque to client, so no need to byte-swap. Use * __force to keep sparse happy
*/
verf[0] = (__force __be32)(u32)ktime_get_real_seconds();
verf[1] = (__force __be32)nn->clverifier_counter++;
memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data));
}
spin_lock(&nf->fi_lock);
file = find_any_file_locked(nf); if (file) { /* * Note: a lock stateid isn't really the same thing as a lock, * it's the locking state held by one owner on a file, and there * may be multiple (or no) lock ranges associated with it. * (Same for the matter is true of open stateids.)
*/
/* * Normally we refuse to destroy clients that are in use, but here the * administrator is telling us to just do it. We also want to wait * so the caller has a guarantee that the client's locks are gone by * the time the write returns:
*/ staticvoid force_expire_client(struct nfs4_client *clp)
{ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); bool already_expired;
base = resp->cstate.data_offset;
slot->sl_datalen = buf->len - base; if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen))
WARN(1, "%s: sessions DRC could not cache compound\n",
__func__); return;
}
/* * Encode the replay sequence operation from the slot values. * If cachethis is FALSE encode the uncached rep error on the next * operation which sets resp->p and increments resp->opcnt for * nfs4svc_encode_compoundres. *
*/ static __be32
nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args, struct nfsd4_compoundres *resp)
{ struct nfsd4_op *op; struct nfsd4_slot *slot = resp->cstate.slot;
/* Encode the replayed sequence operation */
op = &args->ops[resp->opcnt - 1];
nfsd4_encode_operation(resp, op);
if (slot->sl_flags & NFSD4_SLOT_CACHED) return op->status; if (args->opcnt == 1) { /* * The original operation wasn't a solo sequence--we * always cache those--so this retry must not match the * original:
*/
op->status = nfserr_seq_false_retry;
} else {
op = &args->ops[resp->opcnt++];
op->status = nfserr_retry_uncached_rep;
nfsd4_encode_operation(resp, op);
} return op->status;
}
/* * The sequence operation is not cached because we can use the slot and * session values.
*/ static __be32
nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq)
{ struct nfsd4_slot *slot = resp->cstate.slot; struct xdr_stream *xdr = resp->xdr;
__be32 *p;
__be32 status;
dprintk("--> %s slot %p\n", __func__, slot);
status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp); if (status) return status;
p = xdr_reserve_space(xdr, slot->sl_datalen); if (!p) {
WARN_ON_ONCE(1); return nfserr_serverfault;
}
xdr_encode_opaque_fixed(p, slot->sl_data, slot->sl_datalen);
xdr_commit_encode(xdr);
if (exid->flags & ~EXCHGID4_FLAG_MASK_A) return nfserr_inval;
new = create_client(exid->clname, rqstp, &verf); if (new == NULL) return nfserr_jukebox;
status = copy_impl_id(new, exid); if (status) goto out_nolock;
exid->spo_must_allow[1] &= (
1 << (OP_TEST_STATEID - 32) |
1 << (OP_FREE_STATEID - 32)); if (!svc_rqst_integrity_protected(rqstp)) {
status = nfserr_inval; goto out_nolock;
} /* * Sometimes userspace doesn't give us a principal. * Which is a bug, really. Anyway, we can't enforce * MACH_CRED in that case, better to give up now:
*/ if (!new->cl_cred.cr_principal &&
!new->cl_cred.cr_raw_principal) {
status = nfserr_serverfault; goto out_nolock;
}
new->cl_mach_cred = true; break; case SP4_NONE: break; default: /* checked by xdr code */
WARN_ON_ONCE(1);
fallthrough; case SP4_SSV:
status = nfserr_encr_alg_unsupp; goto out_nolock;
}
/* * Note that RFC 8881 places no length limit on * nii_name, but this implementation permits no * more than NFS4_OPAQUE_LIMIT bytes.
*/
exid->nii_name.len = strlen(exid->server_impl_name); if (exid->nii_name.len > NFS4_OPAQUE_LIMIT)
exid->nii_name.len = NFS4_OPAQUE_LIMIT;
exid->nii_name.data = exid->server_impl_name;
/* just send zeros - the date is in nii_name */
exid->nii_time.tv_sec = 0;
exid->nii_time.tv_nsec = 0;
dprintk("nfsd4_exchange_id seqid %d flags %x\n",
conf->cl_cs_slot.sl_seqid, conf->cl_exchange_flags);
status = nfs_ok;
out:
spin_unlock(&nn->client_lock);
out_nolock: if (new)
expire_client(new); if (unconf) {
trace_nfsd_clid_expire_unconf(&unconf->cl_clientid);
expire_client(unconf);
} return status;
}
static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
{ /* The slot is in use, and no response has been sent. */ if (flags & NFSD4_SLOT_INUSE) { if (seqid == slot_seqid) return nfserr_jukebox; else return nfserr_seq_misordered;
} /* Note unsigned 32-bit arithmetic handles wraparound: */ if (likely(seqid == slot_seqid + 1)) return nfs_ok; if ((flags & NFSD4_SLOT_REUSED) && seqid == 1) return nfs_ok; if (seqid == slot_seqid) return nfserr_replay_cache; return nfserr_seq_misordered;
}
/* * Cache the create session result into the create session single DRC * slot cache by saving the xdr structure. sl_seqid has been set. * Do this for solo or embedded create session operations.
*/ staticvoid
nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses, struct nfsd4_clid_slot *slot, __be32 nfserr)
{
slot->sl_status = nfserr;
memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses));
}
/* * Server's NFSv4.1 backchannel support is AUTH_SYS-only for now. * These are based on similar macros in linux/sunrpc/msg_prot.h .
*/ #define RPC_MAX_HEADER_WITH_AUTH_SYS \
(RPC_CALLHDRSIZE + 2 * (2 + UNX_CALLSLACK))
if (ca->maxreq_sz < NFSD_CB_MAX_REQ_SZ) return nfserr_toosmall; if (ca->maxresp_sz < NFSD_CB_MAX_RESP_SZ) return nfserr_toosmall;
ca->maxresp_cached = 0; if (ca->maxops < 2) return nfserr_toosmall;
return nfs_ok;
}
static __be32 nfsd4_check_cb_sec(struct nfsd4_cb_sec *cbs)
{ switch (cbs->flavor) { case RPC_AUTH_NULL: case RPC_AUTH_UNIX: return nfs_ok; default: /* * GSS case: the spec doesn't allow us to return this * error. But it also doesn't allow us not to support * GSS. * I'd rather this fail hard than return some error the * client might think it can already handle:
*/ return nfserr_encr_alg_unsupp;
}
}
if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) return nfserr_inval;
status = nfsd4_check_cb_sec(&cr_ses->cb_sec); if (status) return status;
status = check_forechannel_attrs(&cr_ses->fore_channel, nn); if (status) return status;
status = check_backchannel_attrs(&cr_ses->back_channel); if (status) goto out_err;
status = nfserr_jukebox; new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel); if (!new) goto out_err;
conn = alloc_conn_from_crses(rqstp, cr_ses); if (!conn) goto out_free_session;
/* RFC 8881 Section 18.36.4 Phase 2: Sequence ID processing. */ if (conf) {
cs_slot = &conf->cl_cs_slot;
trace_nfsd_slot_seqid_conf(conf, cr_ses);
} else {
cs_slot = &unconf->cl_cs_slot;
trace_nfsd_slot_seqid_unconf(unconf, cr_ses);
}
status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); switch (status) { case nfs_ok:
cs_slot->sl_seqid++;
cr_ses->seqid = cs_slot->sl_seqid; break; case nfserr_replay_cache:
status = nfsd4_replay_create_session(cr_ses, cs_slot);
fallthrough; case nfserr_jukebox: /* The server MUST NOT cache NFS4ERR_DELAY */ goto out_free_conn; default: goto out_cache_error;
}
/* RFC 8881 Section 18.36.4 Phase 3: Client ID confirmation. */ if (conf) {
status = nfserr_wrong_cred; if (!nfsd4_mach_creds_match(conf, rqstp)) goto out_cache_error;
} else {
status = nfserr_clid_inuse; if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
!rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
trace_nfsd_clid_cred_mismatch(unconf, rqstp); goto out_cache_error;
}
status = nfserr_wrong_cred; if (!nfsd4_mach_creds_match(unconf, rqstp)) goto out_cache_error;
old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) {
status = mark_client_expired_locked(old); if (status) goto out_expired_error;
trace_nfsd_clid_replaced(&old->cl_clientid);
}
move_to_confirmed(unconf);
conf = unconf;
}
/* RFC 8881 Section 18.36.4 Phase 4: Session creation. */
status = nfs_ok; /* Persistent sessions are not supported */
cr_ses->flags &= ~SESSION4_PERSIST; /* Upshifting from TCP to RDMA is not supported */
cr_ses->flags &= ~SESSION4_RDMA; /* Report the correct number of backchannel slots */
cr_ses->back_channel.maxreqs = new->se_cb_highest_slot + 1;
/* cache solo and embedded create sessions under the client_lock */
nfsd4_cache_create_session(cr_ses, cs_slot, status);
spin_unlock(&nn->client_lock); if (conf == unconf)
fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY); /* init connection and backchannel */
nfsd4_init_conn(rqstp, conn, new);
nfsd4_put_session(new); if (old)
expire_client(old); return status;
out_expired_error: /* * Revert the slot seq_nr change so the server will process * the client's resend instead of returning a cached response.
*/ if (status == nfserr_jukebox) {
cs_slot->sl_seqid--;
cr_ses->seqid = cs_slot->sl_seqid; goto out_free_conn;
}
out_cache_error:
nfsd4_cache_create_session(cr_ses, cs_slot, status);
out_free_conn:
spin_unlock(&nn->client_lock);
free_conn(conn);
out_free_session:
__free_session(new);
out_err: return status;
}
static __be32 nfsd4_map_bcts_dir(u32 *dir)
{ switch (*dir) { case NFS4_CDFC4_FORE: case NFS4_CDFC4_BACK: return nfs_ok; case NFS4_CDFC4_FORE_OR_BOTH: case NFS4_CDFC4_BACK_OR_BOTH:
*dir = NFS4_CDFC4_BOTH; return nfs_ok;
} return nfserr_inval;
}
status = nfserr_not_only_op; if (nfsd4_compound_in_session(cstate, sessionid)) { if (!nfsd4_last_compound_op(r)) goto out;
ref_held_by_me++;
}
dump_sessionid(__func__, sessionid);
spin_lock(&nn->client_lock);
ses = find_in_sessionid_hashtbl(sessionid, net, &status); if (!ses) goto out_client_lock;
status = nfserr_wrong_cred; if (!nfsd4_mach_creds_match(ses->se_client, r)) goto out_put_session;
status = mark_session_dead_locked(ses, 1 + ref_held_by_me); if (status) goto out_put_session;
unhash_session(ses);
spin_unlock(&nn->client_lock);
if ((bool)(slot->sl_flags & NFSD4_SLOT_CACHETHIS) !=
(bool)seq->cachethis) returnfalse; /* * If there's an error then the reply can have fewer ops than * the call.
*/ if (slot->sl_opcnt < argp->opcnt && !slot->sl_status) returnfalse; /* * But if we cached a reply with *more* ops than the call you're * sending us now, then this new call is clearly not really a * replay of the old one:
*/ if (slot->sl_opcnt > argp->opcnt) returnfalse; /* This is the only check explicitly called by spec: */ if (!same_creds(&rqstp->rq_cred, &slot->sl_cred)) returnfalse; /* * There may be more comparisons we could actually do, but the * spec doesn't require us to catch every case where the calls * don't match (that would require caching the call as well as * the reply), so we don't bother.
*/ returntrue;
}
/* * Will be either used or freed by nfsd4_sequence_check_conn * below.
*/
conn = alloc_conn(rqstp, NFS4_CDFC4_FORE); if (!conn) return nfserr_jukebox;
/* * If the client ever uses the highest available slot, * gently try to allocate another 20%. This allows * fairly quick growth without grossly over-shooting what * the client might use.
*/ if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
session->se_target_maxslots >= session->se_fchannel.maxreqs &&
session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) { int s = session->se_fchannel.maxreqs; int cnt = DIV_ROUND_UP(s, 5); void *prev_slot;
do { /* * GFP_NOWAIT both allows allocation under a * spinlock, and only succeeds if there is * plenty of memory.
*/
slot = nfsd4_alloc_slot(&session->se_fchannel, s,
GFP_NOWAIT);
prev_slot = xa_load(&session->se_slots, s); if (xa_is_value(prev_slot) && slot) {
slot->sl_seqid = xa_to_value(prev_slot);
slot->sl_flags |= NFSD4_SLOT_REUSED;
} if (slot &&
!xa_is_err(xa_store(&session->se_slots, s, slot,
GFP_NOWAIT))) {
s += 1;
session->se_fchannel.maxreqs = s;
atomic_add(s - session->se_target_maxslots,
&nfsd_total_target_slots);
session->se_target_maxslots = s;
} else {
kfree(slot);
slot = NULL;
}
} while (slot && --cnt > 0);
}
if (nfsd4_has_session(cs)) { if (cs->status != nfserr_replay_cache) {
nfsd4_store_cache_entry(resp);
cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE;
} /* Drop session reference that was taken in nfsd4_sequence() */
nfsd4_put_session(cs->session);
} elseif (cs->clp)
put_client_renew(cs->clp);
}
if (rc->rca_one_fs) { if (!cstate->current_fh.fh_dentry) return nfserr_nofilehandle; /* * We don't take advantage of the rca_one_fs case. * That's OK, it's optional, we can safely ignore it.
*/ return nfs_ok;
}
status = nfserr_complete_already; if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &clp->cl_flags)) goto out;
status = nfserr_stale_clientid; if (is_client_expired(clp)) /* * The following error isn't really legal. * But we only get here if the client just explicitly * destroyed the client. Surely it no longer cares what * error it gets back on an operation for the dead * client.
*/ goto out;
status = nfs_ok;
trace_nfsd_clid_reclaim_complete(&clp->cl_clientid);
nfsd4_client_record_create(clp);
inc_reclaim_complete(clp);
out: return status;
}
if (STALE_CLIENTID(clid, nn)) return nfserr_stale_clientid;
spin_lock(&nn->client_lock);
conf = find_confirmed_client(clid, false, nn);
unconf = find_unconfirmed_client(clid, false, nn); /* * We try hard to give out unique clientid's, so if we get an * attempt to confirm the same clientid with a different cred, * the client may be buggy; this should never happen. * * Nevertheless, RFC 7530 recommends INUSE for this case:
*/
status = nfserr_clid_inuse; if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
trace_nfsd_clid_cred_mismatch(unconf, rqstp); goto out;
} if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
trace_nfsd_clid_cred_mismatch(conf, rqstp); goto out;
} if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) { if (conf && same_verf(&confirm, &conf->cl_confirm)) {
status = nfs_ok;
} else
status = nfserr_stale_clientid; goto out;
}
status = nfs_ok; if (conf) { if (get_client_locked(conf) == nfs_ok) {
old = unconf;
unhash_client_locked(old);
nfsd4_change_callback(conf, &unconf->cl_cb_conn);
} else {
conf = NULL;
}
}
if (!conf) {
old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) {
status = nfserr_clid_inuse; if (client_has_state(old)
&& !same_creds(&unconf->cl_cred,
&old->cl_cred)) {
old = NULL; goto out;
}
status = mark_client_expired_locked(old); if (status) {
old = NULL; goto out;
}
trace_nfsd_clid_replaced(&old->cl_clientid);
}
status = get_client_locked(unconf); if (status != nfs_ok) {
old = NULL; goto out;
}
move_to_confirmed(unconf);
conf = unconf;
}
spin_unlock(&nn->client_lock); if (conf == unconf)
fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY);
nfsd4_probe_callback(conf);
spin_lock(&nn->client_lock);
put_client_renew_locked(conf);
out:
spin_unlock(&nn->client_lock); if (old)
expire_client(old); return status;
}
switch (s->sc_type) { case SC_TYPE_OPEN:
stp = openlockstateid(s); if (unhash_open_stateid(stp, &reaplist))
put_ol_stateid_locked(stp, &reaplist);
spin_unlock(&cl->cl_lock);
free_ol_stateid_reaplist(&reaplist); break; case SC_TYPE_LOCK:
stp = openlockstateid(s);
unhashed = unhash_lock_stateid(stp);
spin_unlock(&cl->cl_lock); if (unhashed)
nfs4_put_stid(s); break; case SC_TYPE_DELEG:
dp = delegstateid(s);
list_del_init(&dp->dl_recall_lru);
spin_unlock(&cl->cl_lock);
nfs4_put_stid(s); break; default:
spin_unlock(&cl->cl_lock);
}
}
staticvoid nfsd40_drop_revoked_stid(struct nfs4_client *cl,
stateid_t *stid)
{ /* NFSv4.0 has no way for the client to tell the server * that it can forget an admin-revoked stateid. * So we keep it around until the first time that the * client uses it, and drop it the first time * nfserr_admin_revoked is returned. * For v4.1 and later we wait until explicitly told * to free the stateid.
*/ if (cl->cl_minorversion == 0) { struct nfs4_stid *st;
spin_lock(&cl->cl_lock);
st = find_stateid_locked(cl, stid); if (st)
nfsd4_drop_revoked_stid(st); else
spin_unlock(&cl->cl_lock);
}
}
static __be32
nfsd4_verify_open_stid(struct nfs4_stid *s)
{
__be32 ret = nfs_ok;
if (s->sc_status & SC_STATUS_ADMIN_REVOKED)
ret = nfserr_admin_revoked; elseif (s->sc_status & SC_STATUS_REVOKED)
ret = nfserr_deleg_revoked; elseif (s->sc_status & SC_STATUS_CLOSED)
ret = nfserr_bad_stateid; return ret;
}
/* Lock the stateid st_mutex, and deal with races with CLOSE */ static __be32
nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp)
{
__be32 ret;
mutex_lock_nested(&stp->st_mutex, LOCK_STATEID_MUTEX);
ret = nfsd4_verify_open_stid(&stp->st_stid); if (ret == nfserr_admin_revoked)
nfsd40_drop_revoked_stid(stp->st_stid.sc_client,
&stp->st_stid.sc_stateid);
if (ret != nfs_ok)
mutex_unlock(&stp->st_mutex); return ret;
}
stp = open->op_stp; /* We are moving these outside of the spinlocks to avoid the warnings */
mutex_init(&stp->st_mutex);
mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX);
out_unlock:
spin_unlock(&fp->fi_lock);
spin_unlock(&oo->oo_owner.so_client->cl_lock); if (retstp) { /* Handle races with CLOSE */ if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) {
nfs4_put_stid(&retstp->st_stid); goto retry;
} /* To keep mutex tracking happy */
mutex_unlock(&stp->st_mutex);
stp = retstp;
} return stp;
}
/* * In the 4.0 case we need to keep the owners around a little while to handle * CLOSE replay. We still do need to release any file access that is held by * them before returning however.
*/ staticvoid
move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
{ struct nfs4_ol_stateid *last; struct nfs4_openowner *oo = openowner(s->st_stateowner); struct nfsd_net *nn = net_generic(s->st_stid.sc_client->net,
nfsd_net_id);
/* * We know that we hold one reference via nfsd4_close, and another * "persistent" reference for the client. If the refcount is higher * than 2, then there are still calls in progress that are using this * stateid. We can't put the sc_file reference until they are finished. * Wait for the refcount to drop to 2. Since it has been unhashed, * there should be no danger of the refcount going back up again at * this point. * Some threads with a reference might be waiting for rp_locked, * so tell them to stop waiting.
*/
store_release_wake_up(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED);
wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
release_all_access(s); if (s->st_stid.sc_file) {
put_nfs4_file(s->st_stid.sc_file);
s->st_stid.sc_file = NULL;
}
spin_lock(&nn->client_lock);
last = oo->oo_last_closed_stid;
oo->oo_last_closed_stid = s;
list_move_tail(&oo->oo_close_lru, &nn->close_lru);
oo->oo_time = ktime_get_boottime_seconds();
spin_unlock(&nn->client_lock); if (last)
nfs4_put_stid(&last->st_stid);
}
rcu_read_lock();
list = rhltable_lookup(&nfs4_file_rhltable, &inode,
nfs4_file_rhash_params);
rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { if (refcount_inc_not_zero(&fi->fi_ref)) {
rcu_read_unlock(); return fi;
}
}
}
rcu_read_unlock(); return NULL;
}
/* * On hash insertion, identify entries with the same inode but * distinct filehandles. They will all be on the list returned * by rhltable_lookup(). * * inode->i_lock prevents racing insertions from adding an entry * for the same inode/fhp pair twice.
*/ static noinline_for_stack struct nfs4_file *
nfsd4_file_hash_insert(struct nfs4_file *new, conststruct svc_fh *fhp)
{ struct inode *inode = d_inode(fhp->fh_dentry); struct rhlist_head *tmp, *list; struct nfs4_file *ret = NULL; bool alias_found = false; struct nfs4_file *fi; int err;
rcu_read_lock();
spin_lock(&inode->i_lock);
list = rhltable_lookup(&nfs4_file_rhltable, &inode,
nfs4_file_rhash_params);
rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { if (refcount_inc_not_zero(&fi->fi_ref))
ret = fi;
} else
fi->fi_aliased = alias_found = true;
} if (ret) goto out_unlock;
/* * Called to check deny when READ with all zero stateid or * WRITE with all zero or all one stateid
*/ static __be32
nfs4_share_conflict(struct svc_fh *current_fh, unsignedint deny_type)
{ struct nfs4_file *fp;
__be32 ret = nfs_ok;
fp = nfsd4_file_hash_lookup(current_fh); if (!fp) return ret;
/* Check for conflicting share reservations */
spin_lock(&fp->fi_lock); if (fp->fi_share_deny & deny_type)
ret = nfserr_locked;
spin_unlock(&fp->fi_lock);
put_nfs4_file(fp); return ret;
}
/** * nfsd_wait_for_delegreturn - wait for delegations to be returned * @rqstp: the RPC transaction being executed * @inode: in-core inode of the file being waited for * * The timeout prevents deadlock if all nfsd threads happen to be * tied up waiting for returning delegations. * * Return values: * %true: delegation was returned * %false: timed out waiting for delegreturn
*/ bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode)
{ long __maybe_unused timeo;
/* * We can't do this in nfsd_break_deleg_cb because it is * already holding inode->i_lock. * * If the dl_time != 0, then we know that it has already been * queued for a lease break. Don't queue it again.
*/
spin_lock(&state_lock); if (delegation_hashed(dp) && dp->dl_time == 0) {
dp->dl_time = ktime_get_boottime_seconds();
list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
}
spin_unlock(&state_lock);
}
if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &dp->dl_recall.cb_flags)) return;
/* * We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease * callback (and since the lease code is serialized by the * flc_lock) we know the server hasn't removed the lease yet, and * we know it's safe to take a reference.
*/
refcount_inc(&dp->dl_stid.sc_count);
queued = nfsd4_run_cb(&dp->dl_recall);
WARN_ON_ONCE(!queued); if (!queued)
refcount_dec(&dp->dl_stid.sc_count);
}
dp->dl_recalled = true;
atomic_inc(&clp->cl_delegs_in_recall); if (try_to_expire_client(clp)) {
nn = net_generic(clp->net, nfsd_net_id);
mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
}
/* * We don't want the locks code to timeout the lease for us; * we'll remove it ourself if a delegation isn't returned * in time:
*/
fl->fl_break_time = 0;
spin_lock(&nn->client_lock);
found = find_confirmed_client(clid, sessions, nn); if (found)
atomic_inc(&found->cl_rpc_users);
spin_unlock(&nn->client_lock); return found;
}
static __be32 set_client(clientid_t *clid, struct nfsd4_compound_state *cstate, struct nfsd_net *nn)
{ if (cstate->clp) { if (!same_clid(&cstate->clp->cl_clientid, clid)) return nfserr_stale_clientid; return nfs_ok;
} if (STALE_CLIENTID(clid, nn)) return nfserr_stale_clientid; /* * We're in the 4.0 case (otherwise the SEQUENCE op would have * set cstate->clp), so session = false:
*/
cstate->clp = lookup_clientid(clid, false, nn); if (!cstate->clp) return nfserr_expired; return nfs_ok;
}
/* * In case we need it later, after we've already created the * file and don't want to risk a further failure:
*/
open->op_file = nfsd4_alloc_file(); if (open->op_file == NULL) return nfserr_jukebox;
status = set_client(clientid, cstate, nn); if (status) return status;
clp = cstate->clp;
strhashval = ownerstr_hashval(&open->op_owner);
retry:
oo = find_or_alloc_open_stateowner(strhashval, open, cstate);
open->op_openowner = oo; if (!oo) return nfserr_jukebox; if (nfsd4_cstate_assign_replay(cstate, &oo->oo_owner) == -EAGAIN) {
nfs4_put_stateowner(&oo->oo_owner); goto retry;
}
status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid); if (status) return status;
open->op_stp = nfs4_alloc_open_stateid(clp); if (!open->op_stp) return nfserr_jukebox;
if (nfsd4_has_session(cstate) &&
(cstate->current_fh.fh_export->ex_flags & NFSEXP_PNFS)) {
open->op_odstate = alloc_clnt_odstate(clp); if (!open->op_odstate) return nfserr_jukebox;
}
return nfs_ok;
}
staticinline __be32
nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
{ if (!(flags & RD_STATE) && deleg_is_read(dp->dl_type)) return nfserr_openmode; else return nfs_ok;
}
/* * Are we trying to set a deny mode that would conflict with * current access?
*/
status = nfs4_file_check_deny(fp, open->op_share_deny); if (status != nfs_ok) { if (status != nfserr_share_denied) {
spin_unlock(&fp->fi_lock); goto out;
} if (nfs4_resolve_deny_conflicts_locked(fp, new_stp,
stp, open->op_share_deny, false))
status = nfserr_jukebox;
spin_unlock(&fp->fi_lock); goto out;
}
/* set access to the file */
status = nfs4_file_get_access(fp, open->op_share_access); if (status != nfs_ok) { if (status != nfserr_share_denied) {
spin_unlock(&fp->fi_lock); goto out;
} if (nfs4_resolve_deny_conflicts_locked(fp, new_stp,
stp, open->op_share_access, true))
status = nfserr_jukebox;
spin_unlock(&fp->fi_lock); goto out;
}
/* Set access bits in stateid */
old_access_bmap = stp->st_access_bmap;
set_access(open->op_share_access, stp);
/* Set new deny mask */
old_deny_bmap = stp->st_deny_bmap;
set_deny(open->op_share_deny, stp);
fp->fi_share_deny |= (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
if (!fp->fi_fds[oflag]) {
spin_unlock(&fp->fi_lock);
status = nfsd_file_acquire_opened(rqstp, cur_fh, access,
open->op_filp, &nf); if (status != nfs_ok) goto out_put_access;
spin_lock(&fp->fi_lock); if (!fp->fi_fds[oflag]) {
fp->fi_fds[oflag] = nf;
nf = NULL;
}
}
spin_unlock(&fp->fi_lock); if (nf)
nfsd_file_put(nf);
status = nfserrno(nfsd_open_break_lease(cur_fh->fh_dentry->d_inode,
access)); if (status) goto out_put_access;
if (!test_access(open->op_share_access, stp)) return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open, false);
/* test and set deny mode */
spin_lock(&fp->fi_lock);
status = nfs4_file_check_deny(fp, open->op_share_deny); switch (status) { case nfs_ok:
set_deny(open->op_share_deny, stp);
fp->fi_share_deny |=
(open->op_share_deny & NFS4_SHARE_DENY_BOTH); break; case nfserr_share_denied: if (nfs4_resolve_deny_conflicts_locked(fp, false,
stp, open->op_share_deny, false))
status = nfserr_jukebox; break;
}
spin_unlock(&fp->fi_lock);
if (status != nfs_ok) return status;
status = nfsd4_truncate(rqstp, cur_fh, open); if (status != nfs_ok)
reset_union_bmap_deny(old_deny_bmap, stp); return status;
}
/* Should we give out recallable state?: */ staticbool nfsd4_cb_channel_good(struct nfs4_client *clp)
{ if (clp->cl_cb_state == NFSD4_CB_UP) returntrue; /* * In the sessions case, since we don't have to establish a * separate connection for callbacks, we assume it's OK * until we hear otherwise:
*/ return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
}
writes = atomic_read(&ino->i_writecount); if (!writes) return 0; /* * There could be multiple filehandles (hence multiple * nfs4_files) referencing this file, but that's not too * common; let's just give up in that case rather than * trying to go look up all the clients using that other * nfs4_file as well:
*/ if (fp->fi_aliased) return -EAGAIN; /* * If there's a close in progress, make sure that we see it * clear any fi_fds[] entries before we see it decrement * i_writecount:
*/
smp_mb__after_atomic();
if (fp->fi_fds[O_WRONLY])
writes--; if (fp->fi_fds[O_RDWR])
writes--; if (writes > 0) return -EAGAIN; /* There may be non-NFSv4 writers */ /* * It's possible there are non-NFSv4 write opens in progress, * but if they haven't incremented i_writecount yet then they * also haven't called break lease yet; so, they'll break this * lease soon enough. So, all that's left to check for is NFSv4 * opens:
*/
spin_lock(&fp->fi_lock);
list_for_each_entry(st, &fp->fi_stateids, st_perfile) { if (st->st_openstp == NULL /* it's an open */ &&
access_permit_write(st) &&
st->st_stid.sc_client != clp) {
spin_unlock(&fp->fi_lock); return -EAGAIN;
}
}
spin_unlock(&fp->fi_lock); /* * There's a small chance that we could be racing with another * NFSv4 open. However, any open that hasn't added itself to * the fi_stateids list also hasn't called break_lease yet; so, * they'll break this lease soon enough.
*/ return 0;
}
/* * It's possible that between opening the dentry and setting the delegation, * that it has been renamed or unlinked. Redo the lookup to verify that this * hasn't happened.
*/ staticint
nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp, struct svc_fh *parent)
{ struct svc_export *exp; struct dentry *child;
__be32 err;
exp_put(exp);
dput(child); if (child != file_dentry(fp->fi_deleg_file->nf_file)) return -EAGAIN;
return 0;
}
/* * We avoid breaking delegations held by a client due to its own activity, but * clearing setuid/setgid bits on a write is an implicit activity and the client * may not notice and continue using the old mode. Avoid giving out a delegation * on setuid/setgid files when the client is requesting an open for write.
*/ staticint
nfsd4_verify_setuid_write(struct nfsd4_open *open, struct nfsd_file *nf)
{ struct inode *inode = file_inode(nf->nf_file);
/* * The fi_had_conflict and nfs_get_existing_delegation checks * here are just optimizations; we'll need to recheck them at * the end:
*/ if (fp->fi_had_conflict) return ERR_PTR(-EAGAIN);
/* * Try for a write delegation first. RFC8881 section 10.4 says: * * "An OPEN_DELEGATE_WRITE delegation allows the client to handle, * on its own, all opens." * * Furthermore, section 9.1.2 says: * * "In the case of READ, the server may perform the corresponding * check on the access mode, or it may choose to allow READ for * OPEN4_SHARE_ACCESS_WRITE, to accommodate clients whose WRITE * implementation may unavoidably do reads (e.g., due to buffer * cache constraints)." * * We choose to offer a write delegation for OPEN with the * OPEN4_SHARE_ACCESS_WRITE access mode to accommodate such clients.
*/ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
nf = find_writeable_file(fp);
dl_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : OPEN_DELEGATE_WRITE;
}
/* * If the file is being opened O_RDONLY or we couldn't get a O_RDWR * file for some reason, then try for a read delegation instead.
*/ if (!nf && (open->op_share_access & NFS4_SHARE_ACCESS_READ)) {
nf = find_readable_file(fp);
dl_type = deleg_ts ? OPEN_DELEGATE_READ_ATTRS_DELEG : OPEN_DELEGATE_READ;
}
if (!nf) return ERR_PTR(-EAGAIN);
/* * File delegations and associated locks cannot be recovered if the * export is from an NFS proxy server.
*/ if (exportfs_cannot_lock(nf->nf_file->f_path.mnt->mnt_sb->s_export_op)) {
nfsd_file_put(nf); return ERR_PTR(-EOPNOTSUPP);
}
spin_lock(&state_lock);
spin_lock(&fp->fi_lock); if (nfs4_delegation_exists(clp, fp))
status = -EAGAIN; elseif (nfsd4_verify_setuid_write(open, nf))
status = -EAGAIN; elseif (!fp->fi_deleg_file) {
fp->fi_deleg_file = nf; /* increment early to prevent fi_deleg_file from being
* cleared */
fp->fi_delegees = 1;
nf = NULL;
} else
fp->fi_delegees++;
spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock); if (nf)
nfsd_file_put(nf); if (status) return ERR_PTR(status);
status = -ENOMEM;
dp = alloc_init_deleg(clp, fp, odstate, dl_type); if (!dp) goto out_delegees;
fl = nfs4_alloc_init_lease(dp); if (!fl) goto out_clnt_odstate;
status = kernel_setlease(fp->fi_deleg_file->nf_file,
fl->c.flc_type, &fl, NULL); if (fl)
locks_free_lease(fl); if (status) goto out_clnt_odstate;
if (parent) {
status = nfsd4_verify_deleg_dentry(open, fp, parent); if (status) goto out_unlock;
}
status = nfsd4_check_conflicting_opens(clp, fp); if (status) goto out_unlock;
/* * Now that the deleg is set, check again to ensure that nothing * raced in and changed the mode while we weren't looking.
*/
status = nfsd4_verify_setuid_write(open, fp->fi_deleg_file); if (status) goto out_unlock;
status = -EAGAIN; if (fp->fi_had_conflict) goto out_unlock;
spin_lock(&state_lock);
spin_lock(&clp->cl_lock);
spin_lock(&fp->fi_lock);
status = hash_delegation_locked(dp, fp);
spin_unlock(&fp->fi_lock);
spin_unlock(&clp->cl_lock);
spin_unlock(&state_lock);
/* * Add NFS4_SHARE_ACCESS_READ to the write delegation granted on OPEN * with NFS4_SHARE_ACCESS_WRITE by allocating separate nfsd_file and * struct file to be used for read with delegation stateid. *
*/ staticbool
nfsd4_add_rdaccess_to_wrdeleg(struct svc_rqst *rqstp, struct nfsd4_open *open, struct svc_fh *fh, struct nfs4_ol_stateid *stp)
{ struct nfs4_file *fp; struct nfsd_file *nf = NULL;
/* * The Linux NFS server does not offer write delegations to NFSv4.0 * clients in order to avoid conflicts between write delegations and * GETATTRs requesting CHANGE or SIZE attributes. * * With NFSv4.1 and later minorversions, the SEQUENCE operation that * begins each COMPOUND contains a client ID. Delegation recall can * be avoided when the server recognizes the client sending a * GETATTR also holds write delegation it conflicts with. * * However, the NFSv4.0 protocol does not enable a server to * determine that a GETATTR originated from the client holding the * conflicting delegation versus coming from some other client. Per * RFC 7530 Section 16.7.5, the server must recall or send a * CB_GETATTR even when the GETATTR originates from the client that * holds the conflicting delegation. * * An NFSv4.0 client can trigger a pathological situation if it * always sends a DELEGRETURN preceded by a conflicting GETATTR in * the same COMPOUND. COMPOUND execution will always stop at the * GETATTR and the DELEGRETURN will never get executed. The server * eventually revokes the delegation, which can result in loss of * open or lock state.
*/ staticvoid
nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open *open, struct nfs4_ol_stateid *stp, struct svc_fh *currentfh, struct svc_fh *fh)
{ struct nfs4_openowner *oo = openowner(stp->st_stateowner); bool deleg_ts = nfsd4_want_deleg_timestamps(open); struct nfs4_client *clp = stp->st_stid.sc_client; struct svc_fh *parent = NULL; struct nfs4_delegation *dp; struct kstat stat; int status = 0; int cb_up;
cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
open->op_recall = false; switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_PREVIOUS: if (!cb_up)
open->op_recall = true; break; case NFS4_OPEN_CLAIM_NULL:
parent = currentfh;
fallthrough; case NFS4_OPEN_CLAIM_FH: /* * Let's not give out any delegations till everyone's * had the chance to reclaim theirs, *and* until * NLM locks have all been reclaimed:
*/ if (locks_in_grace(clp->net)) goto out_no_deleg; if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) goto out_no_deleg; if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE &&
!clp->cl_minorversion) goto out_no_deleg; break; default: goto out_no_deleg;
}
dp = nfs4_set_delegation(open, stp, parent); if (IS_ERR(dp)) goto out_no_deleg;
/* 4.1 client asking for a delegation? */ if (open->op_deleg_want)
nfsd4_open_deleg_none_ext(open, status); return;
}
staticvoid nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, struct nfs4_delegation *dp)
{ if (deleg_is_write(dp->dl_type)) { if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_READ_DELEG) {
open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
} elseif (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG) {
open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
}
} /* Otherwise the client must be confused wanting a delegation * it already has, therefore we don't return * OPEN_DELEGATE_NONE_EXT and reason.
*/
}
/* Are we returning only a delegation stateid? */ staticbool open_xor_delegation(struct nfsd4_open *open)
{ if (!(open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION)) returnfalse; /* Did we actually get a delegation? */ if (!deleg_is_read(open->op_delegate_type) && !deleg_is_write(open->op_delegate_type)) returnfalse; returntrue;
}
/** * nfsd4_process_open2 - finish open processing * @rqstp: the RPC transaction being executed * @current_fh: NFSv4 COMPOUND's current filehandle * @open: OPEN arguments * * If successful, (1) truncate the file if open->op_truncate was * set, (2) set open->op_stateid, (3) set open->op_delegation. * * Returns %nfs_ok on success; otherwise an nfs4stat value in * network byte order is returned.
*/
__be32
nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
{ struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; struct nfs4_file *fp = NULL; struct nfs4_ol_stateid *stp = NULL; struct nfs4_delegation *dp = NULL;
__be32 status; bool new_stp = false;
/* * Lookup file; if found, lookup stateid and check open request, * and check for delegations in the process of being recalled. * If not found, create the nfs4_file struct
*/
fp = nfsd4_file_hash_insert(open->op_file, current_fh); if (unlikely(!fp)) return nfserr_jukebox; if (fp != open->op_file) {
status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; if (dp && nfsd4_is_deleg_cur(open) &&
(dp->dl_stid.sc_file != fp)) { /* * RFC8881 section 8.2.4 mandates the server to return * NFS4ERR_BAD_STATEID if the selected table entry does * not match the current filehandle. However returning * NFS4ERR_BAD_STATEID in the OPEN can cause the client * to repeatedly retry the operation with the same * stateid, since the stateid itself is valid. To avoid * this situation NFSD returns NFS4ERR_INVAL instead.
*/
status = nfserr_inval; goto out;
}
stp = nfsd4_find_and_lock_existing_open(fp, open);
} else {
open->op_file = NULL;
status = nfserr_bad_stateid; if (nfsd4_is_deleg_cur(open)) goto out;
}
if (!stp) {
stp = init_open_stateid(fp, open); if (!stp) {
status = nfserr_jukebox; goto out;
}
if (!open->op_stp)
new_stp = true;
}
/* * OPEN the file, or upgrade an existing OPEN. * If truncate fails, the OPEN fails. * * stp is already locked.
*/ if (!new_stp) { /* Stateid was found, this is an OPEN upgrade */
status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) {
mutex_unlock(&stp->st_mutex); goto out;
}
} else {
status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open, true); if (status) {
release_open_stateid(stp);
mutex_unlock(&stp->st_mutex); goto out;
}
if (nfsd4_has_session(&resp->cstate)) { if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_NO_DELEG) {
open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
open->op_why_no_deleg = WND4_NOT_WANTED; goto nodeleg;
}
}
/* * Attempt to hand out a delegation. No error return, because the * OPEN succeeds even if we fail.
*/
nfs4_open_delegation(rqstp, open, stp,
&resp->cstate.current_fh, current_fh);
/* * If there is an existing open stateid, it must be updated and * returned. Only respect WANT_OPEN_XOR_DELEGATION when a new * open stateid would have to be created.
*/ if (new_stp && open_xor_delegation(open)) {
memcpy(&open->op_stateid, &zero_stateid, sizeof(open->op_stateid));
open->op_rflags |= OPEN4_RESULT_NO_OPEN_STATEID;
release_open_stateid(stp);
}
nodeleg:
status = nfs_ok;
trace_nfsd_open(&stp->st_stid.sc_stateid);
out: /* 4.1 client trying to upgrade/downgrade delegation? */ if (open->op_delegate_type == OPEN_DELEGATE_NONE && dp &&
open->op_deleg_want)
nfsd4_deleg_xgrade_none_ext(open, dp);
if (fp)
put_nfs4_file(fp); if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; /* * To finish the open response, we just need to set the rflags.
*/
open->op_rflags |= NFS4_OPEN_RESULT_LOCKTYPE_POSIX; if (nfsd4_has_session(&resp->cstate))
open->op_rflags |= NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK; elseif (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED))
open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
if (dp)
nfs4_put_stid(&dp->dl_stid); if (stp)
nfs4_put_stid(&stp->st_stid);
return status;
}
void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, struct nfsd4_open *open)
{ if (open->op_openowner)
nfs4_put_stateowner(&open->op_openowner->oo_owner); if (open->op_file)
kmem_cache_free(file_slab, open->op_file); if (open->op_stp)
nfs4_put_stid(&open->op_stp->st_stid); if (open->op_odstate)
kmem_cache_free(odstate_slab, open->op_odstate);
}
trace_nfsd_clid_renew(clid);
status = set_client(clid, cstate, nn); if (status) return status;
clp = cstate->clp; if (!list_empty(&clp->cl_delegations)
&& clp->cl_cb_state != NFSD4_CB_UP) return nfserr_cb_path_down; return nfs_ok;
}
void
nfsd4_end_grace(struct nfsd_net *nn)
{ /* do nothing if grace period already ended */ if (nn->grace_ended) return;
trace_nfsd_grace_complete(nn);
nn->grace_ended = true; /* * If the server goes down again right now, an NFSv4 * client will still be allowed to reclaim after it comes back up, * even if it hasn't yet had a chance to reclaim state this time. *
*/
nfsd4_record_grace_done(nn); /* * At this point, NFSv4 clients can still reclaim. But if the * server crashes, any that have not yet reclaimed will be out * of luck on the next boot. * * (NFSv4.1+ clients are considered to have reclaimed once they * call RECLAIM_COMPLETE. NFSv4.0 clients are considered to * have reclaimed after their first OPEN.)
*/
locks_end_grace(&nn->nfsd4_manager); /* * At this point, and once lockd and/or any other containers * exit their grace period, further reclaims will fail and * regular locking can resume.
*/
}
/* * If we've waited a lease period but there are still clients trying to * reclaim, wait a little longer to give them a chance to finish.
*/ staticbool clients_still_reclaiming(struct nfsd_net *nn)
{
time64_t double_grace_period_end = nn->boot_time +
2 * nn->nfsd4_lease;
if (nn->track_reclaim_completes &&
atomic_read(&nn->nr_reclaim_complete) ==
nn->reclaim_str_hashtbl_size) returnfalse; if (!nn->somebody_reclaimed) returnfalse;
nn->somebody_reclaimed = false; /* * If we've given them *two* lease times to reclaim, and they're * still not done, give up:
*/ if (ktime_get_boottime_seconds() > double_grace_period_end) returnfalse; returntrue;
}
/* * This is called when nfsd is being shutdown, after all inter_ssc * cleanup were done, to destroy the ssc delayed unmount list.
*/ staticvoid nfsd4_ssc_shutdown_umount(struct nfsd_net *nn)
{ struct nfsd4_ssc_umount_item *ni = NULL; struct nfsd4_ssc_umount_item *tmp;
/* Check if any lock belonging to this lockowner has any blockers */ staticbool
nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo)
{ struct file_lock_context *ctx; struct nfs4_ol_stateid *stp; struct nfs4_file *nf;
list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
nf = stp->st_stid.sc_file;
ctx = locks_inode_context(nf->fi_inode); if (!ctx) continue; if (locks_owner_has_blockers(ctx, lo)) returntrue;
} returnfalse;
}
if (atomic_read(&clp->cl_delegs_in_recall)) returntrue;
spin_lock(&clp->cl_lock); for (i = 0; i < OWNER_HASH_SIZE; i++) {
list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[i],
so_strhash) { if (so->so_is_open_owner) continue;
lo = lockowner(so); if (nfs4_lockowner_has_blockers(lo)) {
spin_unlock(&clp->cl_lock); returntrue;
}
}
}
spin_unlock(&clp->cl_lock); returnfalse;
}
/* * It's possible for a client to try and acquire an already held lock * that is being held for a long time, and then lose interest in it. * So, we clean out any un-revisited request after a lease period * under the assumption that the client is no longer interested. * * RFC5661, sec. 9.6 states that the client must not rely on getting * notifications and must continue to poll for locks, even when the * server supports them. Thus this shouldn't lead to clients blocking * indefinitely once the lock does become free.
*/
BUG_ON(!list_empty(&reaplist));
spin_lock(&nn->blocked_locks_lock); while (!list_empty(&nn->blocked_locks_lru)) {
nbl = list_first_entry(&nn->blocked_locks_lru, struct nfsd4_blocked_lock, nbl_lru); if (!state_expired(<, nbl->nbl_time)) break;
list_move(&nbl->nbl_lru, &reaplist);
list_del_init(&nbl->nbl_list);
}
spin_unlock(&nn->blocked_locks_lock);
while (!list_empty(&reaplist)) {
nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, nbl_lru);
list_del_init(&nbl->nbl_lru);
free_blocked_lock(nbl);
} #ifdef CONFIG_NFSD_V4_2_INTER_SSC /* service the server-to-server copy delayed unmount list */
nfsd4_ssc_expire_umount(nn); #endif if (atomic_long_read(&num_delegations) >= max_delegations)
deleg_reaper(nn);
out: return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
}
if (clp->cl_state != NFSD4_ACTIVE) continue; if (list_empty(&clp->cl_delegations)) continue; if (atomic_read(&clp->cl_delegs_in_recall)) continue; if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &clp->cl_ra->ra_cb.cb_flags)) continue; if (ktime_get_boottime_seconds() - clp->cl_ra_time < 5) continue; if (clp->cl_cb_state != NFSD4_CB_UP) continue;
static
__be32 nfs4_check_openmode(struct nfs4_ol_stateid *stp, int flags)
{
__be32 status = nfserr_openmode;
/* For lock stateid's, we test the parent open, not the lock: */ if (stp->st_openstp)
stp = stp->st_openstp; if ((flags & WR_STATE) && !access_permit_write(stp)) goto out; if ((flags & RD_STATE) && !access_permit_read(stp)) goto out;
status = nfs_ok;
out: return status;
}
staticinline __be32
check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid, int flags)
{ if (ONE_STATEID(stateid) && (flags & RD_STATE)) return nfs_ok; elseif (opens_in_grace(net)) { /* Answer in remaining cases depends on existence of
* conflicting state; so we must wait out the grace period. */ return nfserr_grace;
} elseif (flags & WR_STATE) return nfs4_share_conflict(current_fh,
NFS4_SHARE_DENY_WRITE); else/* (flags & RD_STATE) && ZERO_STATEID(stateid) */ return nfs4_share_conflict(current_fh,
NFS4_SHARE_DENY_READ);
}
static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session)
{ /* * When sessions are used the stateid generation number is ignored * when it is zero.
*/ if (has_session && in->si_generation == 0) return nfs_ok;
if (in->si_generation == ref->si_generation) return nfs_ok;
/* If the client sends us a stateid from the future, it's buggy: */ if (nfsd4_stateid_generation_after(in, ref)) return nfserr_bad_stateid; /* * However, we could see a stateid from the past, even from a * non-buggy client. For example, if the client sends a lock * while some IO is outstanding, the lock may bump si_generation * while the IO is still in flight. The client could avoid that * situation by waiting for responses on all the IO requests, * but better performance may result in retrying IO that * receives an old_stateid error if requests are rarely * reordered in flight:
*/ return nfserr_old_stateid;
}
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
CLOSE_STATEID(stateid)) return status;
spin_lock(&cl->cl_lock);
s = find_stateid_locked(cl, stateid); if (!s) goto out_unlock;
status = nfsd4_stid_check_stateid_generation(stateid, s, 1); if (status) goto out_unlock;
status = nfsd4_verify_open_stid(s); if (status) goto out_unlock;
switch (s->sc_type) { case SC_TYPE_DELEG:
status = nfs_ok; break; case SC_TYPE_OPEN: case SC_TYPE_LOCK:
status = nfsd4_check_openowner_confirmed(openlockstateid(s)); break; default:
printk("unknown stateid type %x\n", s->sc_type);
status = nfserr_bad_stateid;
}
out_unlock:
spin_unlock(&cl->cl_lock); if (status == nfserr_admin_revoked)
nfsd40_drop_revoked_stid(cl, stateid); return status;
}
/* * only return revoked delegations if explicitly asked. * otherwise we report revoked or bad_stateid status.
*/ if (statusmask & SC_STATUS_REVOKED)
return_revoked = true; if (typemask & SC_TYPE_DELEG) /* Always allow REVOKED for DELEG so we can * return the appropriate error.
*/
statusmask |= SC_STATUS_REVOKED;
switch (s->sc_type) { case SC_TYPE_DELEG: case SC_TYPE_OPEN: case SC_TYPE_LOCK: if (flags & RD_STATE)
ret = find_readable_file(s->sc_file); else
ret = find_writeable_file(s->sc_file);
}
return ret;
}
static __be32
nfs4_check_olstateid(struct nfs4_ol_stateid *ols, int flags)
{
__be32 status;
status = nfsd4_check_openowner_confirmed(ols); if (status) return status; return nfs4_check_openmode(ols, flags);
}
nf = nfs4_find_file(s, flags); if (nf) {
status = nfsd_permission(&rqstp->rq_cred,
fhp->fh_export, fhp->fh_dentry,
acc | NFSD_MAY_OWNER_OVERRIDE); if (status) {
nfsd_file_put(nf); goto out;
}
} else {
status = nfsd_file_acquire(rqstp, fhp, acc, &nf); if (status) return status;
}
*nfp = nf;
out: return status;
} staticvoid
_free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps)
{
WARN_ON_ONCE(cps->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID); if (!refcount_dec_and_test(&cps->cp_stateid.cs_count)) return;
list_del(&cps->cp_list);
idr_remove(&nn->s2s_cp_stateids,
cps->cp_stateid.cs_stid.si_opaque.so_id);
kfree(cps);
} /* * A READ from an inter server to server COPY will have a * copy stateid. Look up the copy notify stateid from the * idr structure and take a reference on it.
*/
__be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st, struct nfs4_client *clp, struct nfs4_cpntf_state **cps)
{
copy_stateid_t *cps_t; struct nfs4_cpntf_state *state = NULL;
if (st->si_opaque.so_clid.cl_id != nn->s2s_cp_cl_id) return nfserr_bad_stateid;
spin_lock(&nn->s2s_cp_lock);
cps_t = idr_find(&nn->s2s_cp_stateids, st->si_opaque.so_id); if (cps_t) {
state = container_of(cps_t, struct nfs4_cpntf_state,
cp_stateid); if (state->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID) {
state = NULL; goto unlock;
} if (!clp)
refcount_inc(&state->cp_stateid.cs_count); else
_free_cpntf_state_locked(nn, state);
}
unlock:
spin_unlock(&nn->s2s_cp_lock); if (!state) return nfserr_bad_stateid; if (!clp)
*cps = state; return 0;
}
status = manage_cpntf_state(nn, st, NULL, &cps); if (status) return status;
cps->cpntf_time = ktime_get_boottime_seconds();
status = nfserr_expired;
found = lookup_clientid(&cps->cp_p_clid, true, nn); if (!found) goto out;
*stid = find_stateid_by_type(found, &cps->cp_p_stateid,
SC_TYPE_DELEG|SC_TYPE_OPEN|SC_TYPE_LOCK,
0); if (*stid)
status = nfs_ok; else
status = nfserr_bad_stateid;
/** * nfs4_preprocess_stateid_op - find and prep stateid for an operation * @rqstp: incoming request from client * @cstate: current compound state * @fhp: filehandle associated with requested stateid * @stateid: stateid (provided by client) * @flags: flags describing type of operation to be done * @nfp: optional nfsd_file return pointer (may be NULL) * @cstid: optional returned nfs4_stid pointer (may be NULL) * * Given info from the client, look up a nfs4_stid for the operation. On * success, it returns a reference to the nfs4_stid and/or the nfsd_file * associated with it.
*/
__be32
nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
stateid_t *stateid, int flags, struct nfsd_file **nfp, struct nfs4_stid **cstid)
{ struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct nfs4_stid *s = NULL;
__be32 status;
if (nfp)
*nfp = NULL;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
status = check_special_stateids(net, fhp, stateid, flags); goto done;
}
status = nfsd4_lookup_stateid(cstate, stateid,
SC_TYPE_DELEG|SC_TYPE_OPEN|SC_TYPE_LOCK,
0, &s, nn); if (status == nfserr_bad_stateid)
status = find_cpntf_state(nn, stateid, &s); if (status) return status;
status = nfsd4_stid_check_stateid_generation(stateid, s,
nfsd4_has_session(cstate)); if (status) goto out;
switch (s->sc_type) { case SC_TYPE_DELEG:
status = nfs4_check_delegmode(delegstateid(s), flags); break; case SC_TYPE_OPEN: case SC_TYPE_LOCK:
status = nfs4_check_olstateid(openlockstateid(s), flags); break;
} if (status) goto out;
status = nfs4_check_fh(fhp, s);
done: if (status == nfs_ok && nfp)
status = nfs4_check_file(rqstp, fhp, s, nfp, flags);
out: if (s) { if (!status && cstid)
*cstid = s; else
nfs4_put_stid(s);
} return status;
}
/* * Test if the stateid is valid
*/
__be32
nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
{ struct nfsd4_test_stateid *test_stateid = &u->test_stateid; struct nfsd4_test_stateid_id *stateid; struct nfs4_client *cl = cstate->clp;
status = nfsd4_check_seqid(cstate, sop, seqid); if (status) return status;
status = nfsd4_lock_ol_stateid(stp); if (status != nfs_ok) return status;
status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); if (status == nfs_ok)
status = nfs4_check_fh(current_fh, &stp->st_stid); if (status != nfs_ok)
mutex_unlock(&stp->st_mutex); return status;
}
/** * nfs4_preprocess_seqid_op - find and prep an ol_stateid for a seqid-morphing op * @cstate: compund state * @seqid: seqid (provided by client) * @stateid: stateid (provided by client) * @typemask: mask of allowable types for this operation * @statusmask: mask of allowed states: 0 or STID_CLOSED * @stpp: return pointer for the stateid found * @nn: net namespace for request * * Given a stateid+seqid from a client, look up an nfs4_ol_stateid and * return it in @stpp. On a nfs_ok return, the returned stateid will * have its st_mutex locked.
*/ static __be32
nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
stateid_t *stateid, unsignedshort typemask, unsignedshort statusmask, struct nfs4_ol_stateid **stpp, struct nfsd_net *nn)
{
__be32 status; struct nfs4_stid *s; struct nfs4_ol_stateid *stp = NULL;
trace_nfsd_preprocess(seqid, stateid);
*stpp = NULL;
retry:
status = nfsd4_lookup_stateid(cstate, stateid,
typemask, statusmask, &s, nn); if (status) return status;
stp = openlockstateid(s); if (nfsd4_cstate_assign_replay(cstate, stp->st_stateowner) == -EAGAIN) {
nfs4_put_stateowner(stp->st_stateowner); goto retry;
}
status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp); if (!status)
*stpp = stp; else
nfs4_put_stid(&stp->st_stid); return status;
}
/* * Technically we don't _really_ have to increment or copy it, since * it should just be gone after this operation and we clobber the * copied value below, but we continue to do so here just to ensure * that racing ops see that there was a state change.
*/
nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
need_move_to_close_list = nfsd4_close_open_stateid(stp);
mutex_unlock(&stp->st_mutex); if (need_move_to_close_list)
move_to_close_lru(stp, net);
/* v4.1+ suggests that we send a special stateid in here, since the * clients should just ignore this anyway. Since this is not useful * for v4.0 clients either, we set it to the special close_stateid * universally. * * See RFC5661 section 18.2.4, and RFC7530 section 16.2.5
*/
memcpy(&close->cl_stateid, &close_stateid, sizeof(close->cl_stateid));
/* put reference from nfs4_preprocess_seqid_op */
nfs4_put_stid(&stp->st_stid);
out: return status;
}
/* last octet in a range */ staticinline u64
last_byte_offset(u64 start, u64 len)
{
u64 end;
WARN_ON_ONCE(!len);
end = start + len; return end > start ? end - 1: NFS4_MAX_UINT64;
}
/* * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that * we can't properly handle lock requests that go beyond the (2^63 - 1)-th * byte, because of sign extension problems. Since NFSv4 calls for 64-bit * locking, this prevents us from being completely protocol-compliant. The * real solution to this problem is to start using unsigned file offsets in * the VFS, but this is a very deep change!
*/ staticinlinevoid
nfs4_transform_lock_offset(struct file_lock *lock)
{ if (lock->fl_start < 0)
lock->fl_start = OFFSET_MAX; if (lock->fl_end < 0)
lock->fl_end = OFFSET_MAX;
}
/* An empty list means that something else is going to be using it */
spin_lock(&nn->blocked_locks_lock); if (!list_empty(&nbl->nbl_list)) {
list_del_init(&nbl->nbl_list);
list_del_init(&nbl->nbl_lru);
queue = true;
}
spin_unlock(&nn->blocked_locks_lock);
if (queue) {
trace_nfsd_cb_notify_lock(lo, nbl);
nfsd4_try_run_cb(&nbl->nbl_cb);
}
}
/* If ost is not hashed, ost->st_locks will not be valid */ if (!nfs4_ol_stateid_unhashed(ost))
list_for_each_entry(lst, &ost->st_locks, st_locks) { if (lst->st_stateowner == &lo->lo_owner) {
refcount_inc(&lst->st_stid.sc_count); return lst;
}
} return NULL;
}
lo = find_lockowner_str(cl, &lock->lk_new_owner); if (!lo) {
strhashval = ownerstr_hashval(&lock->lk_new_owner);
lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); if (lo == NULL) return nfserr_jukebox;
} else { /* with an existing lockowner, seqids must be the same */
status = nfserr_bad_seqid; if (!cstate->minorversion &&
lock->lk_new_lock_seqid != lo->lo_owner.so_seqid) goto out;
}
lst = find_or_create_lock_stateid(lo, fi, inode, ost, new); if (lst == NULL) {
status = nfserr_jukebox; goto out;
}
if (check_lock_length(lock->lk_offset, lock->lk_length)) return nfserr_inval;
status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0); if (status != nfs_ok) return status; if (exportfs_cannot_lock(cstate->current_fh.fh_dentry->d_sb->s_export_op)) {
status = nfserr_notsupp; goto out;
}
if (lock->lk_is_new) { if (nfsd4_has_session(cstate)) /* See rfc 5661 18.10.3: given clientid is ignored: */
memcpy(&lock->lk_new_clientid,
&cstate->clp->cl_clientid, sizeof(clientid_t));
/* validate and update open stateid and open seqid */
status = nfs4_preprocess_confirmed_seqid_op(cstate,
lock->lk_new_open_seqid,
&lock->lk_new_open_stateid,
&open_stp, nn); if (status) goto out;
mutex_unlock(&open_stp->st_mutex);
open_sop = openowner(open_stp->st_stateowner);
status = nfserr_bad_stateid; if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
&lock->lk_new_clientid)) goto out;
status = lookup_or_create_lock_state(cstate, open_stp, lock,
&lock_stp, &new);
} else {
status = nfs4_preprocess_seqid_op(cstate,
lock->lk_old_lock_seqid,
&lock->lk_old_lock_stateid,
SC_TYPE_LOCK, 0, &lock_stp,
nn);
} if (status) goto out;
lock_sop = lockowner(lock_stp->st_stateowner);
lkflg = setlkflg(lock->lk_type);
status = nfs4_check_openmode(lock_stp, lkflg); if (status) goto out;
status = nfserr_grace; if (locks_in_grace(net) && !lock->lk_reclaim) goto out;
status = nfserr_no_grace; if (!locks_in_grace(net) && lock->lk_reclaim) goto out;
if (lock->lk_reclaim)
flags |= FL_RECLAIM;
fp = lock_stp->st_stid.sc_file; switch (lock->lk_type) { case NFS4_READW_LT:
fallthrough; case NFS4_READ_LT:
spin_lock(&fp->fi_lock);
nf = find_readable_file_locked(fp); if (nf)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
spin_unlock(&fp->fi_lock);
type = F_RDLCK; break; case NFS4_WRITEW_LT:
fallthrough; case NFS4_WRITE_LT:
spin_lock(&fp->fi_lock);
nf = find_writeable_file_locked(fp); if (nf)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
spin_unlock(&fp->fi_lock);
type = F_WRLCK; break; default:
status = nfserr_inval; goto out;
}
err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, conflock); switch (err) { case 0: /* success! */
nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
status = 0; if (lock->lk_reclaim)
nn->somebody_reclaimed = true; break; case FILE_LOCK_DEFERRED:
kref_put(&nbl->nbl_kref, free_nbl);
nbl = NULL;
fallthrough; case -EAGAIN: /* conflock holds conflicting lock */
status = nfserr_denied;
dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
nfs4_set_lock_denied(conflock, &lock->lk_denied); break; case -EDEADLK:
status = nfserr_deadlock; break; default:
dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err);
status = nfserrno(err); break;
}
out: if (nbl) { /* dequeue it if we queued it before */ if (flags & FL_SLEEP) {
spin_lock(&nn->blocked_locks_lock); if (!list_empty(&nbl->nbl_list) &&
!list_empty(&nbl->nbl_lru)) {
list_del_init(&nbl->nbl_list);
list_del_init(&nbl->nbl_lru);
kref_put(&nbl->nbl_kref, free_nbl);
} /* nbl can use one of lists to be linked to reaplist */
spin_unlock(&nn->blocked_locks_lock);
}
free_blocked_lock(nbl);
} if (nf)
nfsd_file_put(nf); if (lock_stp) { /* Bump seqid manually if the 4.0 replay owner is openowner */ if (cstate->replay_owner &&
cstate->replay_owner != &lock_sop->lo_owner &&
seqid_mutating_err(ntohl(status)))
lock_sop->lo_owner.so_seqid++;
/* * If this is a new, never-before-used stateid, and we are * returning an error, then just go ahead and release it.
*/ if (status && new)
release_lock_stateid(lock_stp);
mutex_unlock(&lock_stp->st_mutex);
nfs4_put_stid(&lock_stp->st_stid);
} if (open_stp)
nfs4_put_stid(&open_stp->st_stid);
nfsd4_bump_seqid(cstate, status); if (conflock)
locks_free_lock(conflock); return status;
}
/* * The NFSv4 spec allows a client to do a LOCKT without holding an OPEN, * so we do a temporary open here just to get an open file to pass to * vfs_test_lock.
*/ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
{ struct nfsd_file *nf; struct inode *inode;
__be32 err;
err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf); if (err) return err;
inode = fhp->fh_dentry->d_inode;
inode_lock(inode); /* to block new leases till after test_lock: */
err = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); if (err) goto out;
lock->c.flc_file = nf->nf_file;
err = nfserrno(vfs_test_lock(nf->nf_file, lock));
lock->c.flc_file = NULL;
out:
inode_unlock(inode);
nfsd_file_put(nf); return err;
}
out_nfserr:
status = nfserrno(err); goto put_file;
}
/* * returns * true: locks held by lockowner * false: no locks held by lockowner
*/ staticbool
check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
{ struct file_lock *fl; int status = false; struct nfsd_file *nf; struct inode *inode; struct file_lock_context *flctx;
spin_lock(&fp->fi_lock);
nf = find_any_file_locked(fp); if (!nf) { /* Any valid lock stateid should have some sort of access */
WARN_ON_ONCE(1); goto out;
}
if (flctx && !list_empty_careful(&flctx->flc_posix)) {
spin_lock(&flctx->flc_lock);
for_each_file_lock(fl, &flctx->flc_posix) { if (fl->c.flc_owner == (fl_owner_t)lowner) {
status = true; break;
}
}
spin_unlock(&flctx->flc_lock);
}
out:
spin_unlock(&fp->fi_lock); return status;
}
/** * nfsd4_release_lockowner - process NFSv4.0 RELEASE_LOCKOWNER operations * @rqstp: RPC transaction * @cstate: NFSv4 COMPOUND state * @u: RELEASE_LOCKOWNER arguments * * Check if there are any locks still held and if not, free the lockowner * and any lock state that is owned. * * Return values: * %nfs_ok: lockowner released or not found * %nfserr_locks_held: lockowner still in use * %nfserr_stale_clientid: clientid no longer active * %nfserr_expired: clientid not recognized
*/
__be32
nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
{ struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
clientid_t *clid = &rlockowner->rl_clientid; struct nfs4_ol_stateid *stp; struct nfs4_lockowner *lo; struct nfs4_client *clp;
LIST_HEAD(reaplist);
__be32 status;
/* * failure => all reset bets are off, nfserr_no_grace... * * The caller is responsible for freeing name.data if NULL is returned (it * will be freed in nfs4_remove_reclaim_record in the normal case).
*/ struct nfs4_client_reclaim *
nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash, struct nfsd_net *nn)
{ unsignedint strhashval; struct nfs4_client_reclaim *crp;
__be32
nfs4_check_open_reclaim(struct nfs4_client *clp)
{ if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &clp->cl_flags)) return nfserr_no_grace;
if (nfsd4_client_record_check(clp)) return nfserr_reclaim_bad;
return nfs_ok;
}
/* * Since the lifetime of a delegation isn't limited to that of an open, a * client may quite reasonably hang on to a delegation as long as it has * the inode cached. This becomes an obvious problem the first time a * client's inode cache approaches the size of the server's total memory. * * For now we avoid this problem by imposing a hard limit on the number * of delegations, which varies according to the server's memory size.
*/ staticvoid
set_max_delegations(void)
{ /* * Allow at most 4 delegations per megabyte of RAM. Quick * estimates suggest that in the worst case (where every delegation * is for a different inode), a delegation could take about 1.5K, * giving a worst case usage of about 6% of memory.
*/
max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT);
}
staticint nfs4_state_create_net(struct net *net)
{ struct nfsd_net *nn = net_generic(net, nfsd_net_id); int i;
nn->conf_id_hashtbl = kmalloc_array(CLIENT_HASH_SIZE, sizeof(struct list_head),
GFP_KERNEL); if (!nn->conf_id_hashtbl) goto err;
nn->unconf_id_hashtbl = kmalloc_array(CLIENT_HASH_SIZE, sizeof(struct list_head),
GFP_KERNEL); if (!nn->unconf_id_hashtbl) goto err_unconf_id;
nn->sessionid_hashtbl = kmalloc_array(SESSION_HASH_SIZE, sizeof(struct list_head),
GFP_KERNEL); if (!nn->sessionid_hashtbl) goto err_sessionid;
for (i = 0; i < CLIENT_HASH_SIZE; i++) {
INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]);
INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]);
} for (i = 0; i < SESSION_HASH_SIZE; i++)
INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]);
nn->conf_name_tree = RB_ROOT;
nn->unconf_name_tree = RB_ROOT;
nn->boot_time = ktime_get_real_seconds();
nn->grace_ended = false;
nn->nfsd4_manager.block_opens = true;
INIT_LIST_HEAD(&nn->nfsd4_manager.list);
INIT_LIST_HEAD(&nn->client_lru);
INIT_LIST_HEAD(&nn->close_lru);
INIT_LIST_HEAD(&nn->del_recall_lru);
spin_lock_init(&nn->client_lock);
spin_lock_init(&nn->s2s_cp_lock);
idr_init(&nn->s2s_cp_stateids);
atomic_set(&nn->pending_async_copies, 0);
/* * functions to set current state id
*/ void
nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
{
put_stateid(cstate, &u->open_downgrade.od_stateid);
}
/** * nfsd4_vet_deleg_time - vet and set the timespec for a delegated timestamp update * @req: timestamp from the client * @orig: original timestamp in the inode * @now: current time * * Given a timestamp from the client response, check it against the * current timestamp in the inode and the current time. Returns true * if the inode's timestamp needs to be updated, and false otherwise. * @req may also be changed if the timestamp needs to be clamped.
*/ bool nfsd4_vet_deleg_time(struct timespec64 *req, conststruct timespec64 *orig, conststruct timespec64 *now)
{
/* * "When the time presented is before the original time, then the * update is ignored." Also no need to update if there is no change.
*/ if (timespec64_compare(req, orig) <= 0) returnfalse;
/* * "When the time presented is in the future, the server can either * clamp the new time to the current time, or it may * return NFS4ERR_DELAY to the client, allowing it to retry."
*/ if (timespec64_compare(req, now) > 0)
*req = *now;
/** * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict * @rqstp: RPC transaction context * @dentry: dentry of inode to be checked for a conflict * @pdp: returned WRITE delegation, if one was found * * This function is called when there is a conflict between a write * delegation and a change/size GETATTR from another client. The server * must either use the CB_GETATTR to get the current values of the * attributes from the client that holds the delegation or recall the * delegation before replying to the GETATTR. See RFC 8881 section * 18.7.4. * * Returns 0 if there is no conflict; otherwise an nfs_stat * code is returned. If @pdp is set to a non-NULL value, then the * caller must put the reference.
*/
__be32
nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_delegation **pdp)
{
__be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct file_lock_context *ctx; struct nfs4_delegation *dp = NULL; struct file_lease *fl; struct nfs4_cb_fattr *ncf; struct inode *inode = d_inode(dentry);
ctx = locks_inode_context(inode); if (!ctx) return nfs_ok;
#define NON_NFSD_LEASE ((void *)1)
spin_lock(&ctx->flc_lock);
for_each_file_lock(fl, &ctx->flc_lease) { if (fl->c.flc_flags == FL_LAYOUT) continue; if (fl->c.flc_type == F_WRLCK) { if (fl->fl_lmops == &nfsd_lease_mng_ops)
dp = fl->c.flc_owner; else
dp = NON_NFSD_LEASE;
} break;
} if (dp == NULL || dp == NON_NFSD_LEASE ||
dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) {
spin_unlock(&ctx->flc_lock); if (dp == NON_NFSD_LEASE) {
status = nfserrno(nfsd_open_break_lease(inode,
NFSD_MAY_READ)); if (status != nfserr_jukebox ||
!nfsd_wait_for_delegreturn(rqstp, inode)) return status;
} return 0;
}
wait_on_bit_timeout(&ncf->ncf_getattr.cb_flags, NFSD4_CALLBACK_RUNNING,
TASK_UNINTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT); if (ncf->ncf_cb_status) { /* Recall delegation only if client didn't respond */
status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); if (status != nfserr_jukebox ||
!nfsd_wait_for_delegreturn(rqstp, inode)) goto out_status;
} if (!ncf->ncf_file_modified &&
(ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
ncf->ncf_cur_fsize != ncf->ncf_cb_fsize))
ncf->ncf_file_modified = true; if (ncf->ncf_file_modified) { int err;
/* * Per section 10.4.3 of RFC 8881, the server would * not update the file's metadata with the client's * modified size
*/
err = cb_getattr_update_times(dentry, dp); if (err) {
status = nfserrno(err); goto out_status;
}
ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
*pdp = dp; return nfs_ok;
}
status = nfs_ok;
out_status:
nfs4_put_stid(&dp->dl_stid); return status;
}
Messung V0.5 in Prozent
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.199Bemerkung:
(vorverarbeitet am 2026-04-27)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.