// SPDX-License-Identifier: GPL-2.0 /* * File operations used by nfsd. Some of these have been ripped from * other parts of the kernel because they weren't exported, others * are partial duplicates with added or changed functionality. * * Note that several functions dget() the dentry upon which they want * to act, most notably those that create directory entries. Response * dentry's are dput()'d if necessary in the release callback. * So if you notice code paths that apparently fail to dput() the * dentry, don't worry--they have been taken care of. * * Copyright (C) 1995-1999 Olaf Kirch <okir@monad.swb.de> * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
*/
for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) { if (nfs_errtbl[i].syserr == errno) return nfs_errtbl[i].nfserr;
}
WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno); return nfserr_io;
}
/* * Called from nfsd_lookup and encode_dirent. Check if we have crossed * a mount point. * Returns -EAGAIN or -ETIMEDOUT leaving *dpp and *expp unchanged, * or nfs_ok having possibly changed *dpp and *expp
*/ int
nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, struct svc_export **expp)
{ struct svc_export *exp = *expp, *exp2 = NULL; struct dentry *dentry = *dpp; struct path path = {.mnt = mntget(exp->ex_path.mnt),
.dentry = dget(dentry)}; unsignedint follow_flags = 0; int err = 0;
if (exp->ex_flags & NFSEXP_CROSSMOUNT)
follow_flags = LOOKUP_AUTOMOUNT;
err = follow_down(&path, follow_flags); if (err < 0) goto out; if (path.mnt == exp->ex_path.mnt && path.dentry == dentry &&
nfsd_mountpoint(dentry, exp) == 2) { /* This is only a mountpoint in some other namespace */
path_put(&path); goto out;
}
exp2 = rqst_exp_get_by_name(rqstp, &path); if (IS_ERR(exp2)) {
err = PTR_ERR(exp2); /* * We normally allow NFS clients to continue * "underneath" a mountpoint that is not exported. * The exception is V4ROOT, where no traversal is ever * allowed without an explicit export of the new * directory.
*/ if (err == -ENOENT && !(exp->ex_flags & NFSEXP_V4ROOT))
err = 0;
path_put(&path); goto out;
} if (nfsd_v4client(rqstp) ||
(exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { /* successfully crossed mount point */ /* * This is subtle: path.dentry is *not* on path.mnt * at this point. The only reason we are safe is that * original mnt is pinned down by exp, so we should * put path *before* putting exp
*/
*dpp = path.dentry;
path.dentry = dentry;
*expp = exp2;
exp2 = exp;
}
path_put(&path);
exp_put(exp2);
out: return err;
}
/* * For nfsd purposes, we treat V4ROOT exports as though there was an * export at *every* directory. * We return: * '1' if this dentry *must* be an export point, * '2' if it might be, if there is really a mount here, and * '0' if there is no chance of an export point here.
*/ int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp)
{ if (!d_inode(dentry)) return 0; if (exp->ex_flags & NFSEXP_V4ROOT) return 1; if (nfsd4_is_junction(dentry)) return 1; if (d_managed(dentry)) /* * Might only be a mountpoint in a different namespace, * but we need to check.
*/ return 2; return 0;
}
/** * nfsd_lookup - look up a single path component for nfsd * * @rqstp: the request context * @fhp: the file handle of the directory * @name: the component name, or %NULL to look up parent * @len: length of name to examine * @resfh: pointer to pre-initialised filehandle to hold result. * * Look up one component of a pathname. * N.B. After this call _both_ fhp and resfh need an fh_put * * If the lookup would cross a mountpoint, and the mounted filesystem * is exported to the client with NFSEXP_NOHIDE, then the lookup is * accepted as it stands and the mounted directory is * returned. Otherwise the covered directory is returned. * NOTE: this mountpoint crossing is not supported properly by all * clients and is explicitly disallowed for NFSv3 *
*/
__be32
nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, constchar *name, unsignedint len, struct svc_fh *resfh)
{ struct svc_export *exp; struct dentry *dentry;
__be32 err;
err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); if (err) return err;
err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry); if (err) return err;
err = check_nfsd_access(exp, rqstp, false); if (err) goto out; /* * Note: we compose the file handle now, but as the * dentry may be negative, it may need to be updated.
*/
err = fh_compose(resfh, exp, dentry, fhp); if (!err && d_really_is_negative(dentry))
err = nfserr_noent;
out:
dput(dentry);
exp_put(exp); return err;
}
staticvoid
commit_reset_write_verifier(struct nfsd_net *nn, struct svc_rqst *rqstp, int err)
{ switch (err) { case -EAGAIN: case -ESTALE: /* * Neither of these are the result of a problem with * durable storage, so avoid a write verifier reset.
*/ break; default:
nfsd_reset_write_verifier(nn);
trace_nfsd_writeverf_reset(nn, rqstp, err);
}
}
if (!EX_ISSYNC(fhp->fh_export)) return 0; return commit_inode_metadata(inode);
}
/* * Go over the attributes and take care of the small differences between * NFS semantics and what Linux expects.
*/ staticvoid
nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
{ /* Ignore mode updates on symlinks */ if (S_ISLNK(inode->i_mode))
iap->ia_valid &= ~ATTR_MODE;
/* sanitize the mode change */ if (iap->ia_valid & ATTR_MODE) {
iap->ia_mode &= S_IALLUGO;
iap->ia_mode |= (inode->i_mode & ~S_IALLUGO);
}
/* Revoke setuid/setgid on chown */ if (!S_ISDIR(inode->i_mode) &&
((iap->ia_valid & ATTR_UID) || (iap->ia_valid & ATTR_GID))) {
iap->ia_valid |= ATTR_KILL_PRIV; if (iap->ia_valid & ATTR_MODE) { /* we're setting mode too, just clear the s*id bits */
iap->ia_mode &= ~S_ISUID; if (iap->ia_mode & S_IXGRP)
iap->ia_mode &= ~S_ISGID;
} else { /* set ATTR_KILL_* bits and let VFS handle it */
iap->ia_valid |= ATTR_KILL_SUID;
iap->ia_valid |=
setattr_should_drop_sgid(&nop_mnt_idmap, inode);
}
}
}
/* * Avoid the additional setattr call below if the only other * attribute that the client sends is the mtime, as we update * it as part of the size change above.
*/ if ((iap->ia_valid & ~ATTR_MTIME) == 0) return 0;
}
if ((iap->ia_valid & ~ATTR_DELEG) == 0) return 0;
/* * If ATTR_DELEG is set, then this is an update from a client that * holds a delegation. If this is an update for only the atime, the * ctime should not be changed. If the update contains the mtime * too, then ATTR_CTIME should already be set.
*/ if (!(iap->ia_valid & ATTR_DELEG))
iap->ia_valid |= ATTR_CTIME;
/** * nfsd_setattr - Set various file attributes. * @rqstp: controlling RPC transaction * @fhp: filehandle of target * @attr: attributes to set * @guardtime: do not act if ctime.tv_sec does not match this timestamp * * This call may adjust the contents of @attr (in particular, this * call may change the bits in the na_iattr.ia_valid field). * * Returns nfs_ok on success, otherwise an NFS status code is * returned. Caller must release @fhp by calling fh_put in either * case.
*/
__be32
nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_attrs *attr, conststruct timespec64 *guardtime)
{ struct dentry *dentry; struct inode *inode; struct iattr *iap = attr->na_iattr; int accmode = NFSD_MAY_SATTR;
umode_t ftype = 0;
__be32 err; int host_err = 0; bool get_write_count; bool size_change = (iap->ia_valid & ATTR_SIZE); int retries;
/* * If utimes(2) and friends are called with times not NULL, we should * not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission * will return EACCES, when the caller's effective UID does not match * the owner of the file, and the caller is not privileged. In this * situation, we should return EPERM(notify_change will return this).
*/ if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME)) {
accmode |= NFSD_MAY_OWNER_OVERRIDE; if (!(iap->ia_valid & (ATTR_ATIME_SET | ATTR_MTIME_SET)))
accmode |= NFSD_MAY_WRITE;
}
/* Callers that do fh_verify should do the fh_want_write: */
get_write_count = !fhp->fh_dentry;
/* Get inode */
err = fh_verify(rqstp, fhp, ftype, accmode); if (err) return err; if (get_write_count) {
host_err = fh_want_write(fhp); if (host_err) goto out;
}
dentry = fhp->fh_dentry;
inode = d_inode(dentry);
nfsd_sanitize_attrs(inode, iap);
/* * The size case is special, it changes the file in addition to the * attributes, and file systems don't expect it to be mixed with * "random" attribute changes. We thus split out the size change * into a separate call to ->setattr, and do the rest as a separate * setattr call.
*/ if (size_change) {
err = nfsd_get_write_access(rqstp, fhp, iap); if (err) return err;
}
inode_lock(inode);
err = fh_fill_pre_attrs(fhp); if (err) goto out_unlock;
/* * notify_change() can alter its iattr argument, making * @iap unsuitable for submission multiple times. Make a * copy for every loop iteration.
*/
attrs = *iap;
host_err = __nfsd_setattr(dentry, &attrs); if (host_err != -EAGAIN || !retries--) break; if (!nfsd_wait_for_delegreturn(rqstp, inode)) break;
} if (attr->na_seclabel && attr->na_seclabel->len)
attr->na_labelerr = security_inode_setsecctx(dentry,
attr->na_seclabel->data, attr->na_seclabel->len); if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && attr->na_pacl)
attr->na_aclerr = set_posix_acl(&nop_mnt_idmap,
dentry, ACL_TYPE_ACCESS,
attr->na_pacl); if (IS_ENABLED(CONFIG_FS_POSIX_ACL) &&
!attr->na_aclerr && attr->na_dpacl && S_ISDIR(inode->i_mode))
attr->na_aclerr = set_posix_acl(&nop_mnt_idmap,
dentry, ACL_TYPE_DEFAULT,
attr->na_dpacl);
out_fill_attrs: /* * RFC 1813 Section 3.3.2 does not mandate that an NFS server * returns wcc_data for SETATTR. Some client implementations * depend on receiving wcc_data, however, to sort out partial * updates (eg., the client requested that size and mode be * modified, but the server changed only the file mode).
*/
fh_fill_post_attrs(fhp);
out_unlock:
inode_unlock(inode); if (size_change)
put_write_access(inode);
out: if (!host_err)
host_err = commit_metadata(fhp); return err != 0 ? err : nfserrno(host_err);
}
#ifdefined(CONFIG_NFSD_V4) /* * NFS junction information is stored in an extended attribute.
*/ #define NFSD_JUNCTION_XATTR_NAME XATTR_TRUSTED_PREFIX "junction.nfs"
/** * nfsd4_is_junction - Test if an object could be an NFS junction * * @dentry: object to test * * Returns 1 if "dentry" appears to contain NFS junction information. * Otherwise 0 is returned.
*/ int nfsd4_is_junction(struct dentry *dentry)
{ struct inode *inode = d_inode(dentry);
if (inode == NULL) return 0; if (inode->i_mode & S_IXUGO) return 0; if (!(inode->i_mode & S_ISVTX)) return 0; if (vfs_getxattr(&nop_mnt_idmap, dentry, NFSD_JUNCTION_XATTR_NAME,
NULL, 0) <= 0) return 0; return 1;
}
since = READ_ONCE(dst->f_wb_err);
cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0); if (cloned < 0) {
ret = nfserrno(cloned); goto out_err;
} if (count && cloned != count) {
ret = nfserrno(-EINVAL); goto out_err;
} if (sync) {
loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX; int status = vfs_fsync_range(dst, dst_pos, dst_end, 0);
if (!status)
status = filemap_check_wb_err(dst->f_mapping, since); if (!status)
status = commit_inode_metadata(file_inode(src)); if (status < 0) { struct nfsd_net *nn = net_generic(nf_dst->nf_net,
nfsd_net_id);
/* * Limit copy to 4MB to prevent indefinitely blocking an nfsd * thread and client rpc slot. The choice of 4MB is somewhat * arbitrary. We might instead base this on r/wsize, or make it * tunable, or use a time instead of a byte limit, or implement * asynchronous copy. In theory a client could also recognize a * limit like this and pipeline multiple COPY requests.
*/
count = min_t(u64, count, 1 << 22);
ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
if (ret == -EOPNOTSUPP || ret == -EXDEV)
ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count,
COPY_FILE_SPLICE); return ret;
}
__be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, loff_t len, int flags)
{ int error;
if (!S_ISREG(file_inode(file)->i_mode)) return nfserr_inval;
staticstruct accessmap nfs3_anyaccess[] = { /* Some clients - Solaris 2.6 at least, make an access call * to the server to check for access for things like /dev/null * (which really, the server doesn't care about). So * We provide simple access checking for them, looking * mainly at mode bits, and we make sure to ignore read-only * filesystem checks
*/
{ NFS3_ACCESS_READ, NFSD_MAY_READ },
{ NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC },
{ NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS },
{ NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS },
query = *access; for (; map->access; map++) { if (map->access & query) {
__be32 err2;
sresult |= map->access;
err2 = nfsd_permission(&rqstp->rq_cred, export,
dentry, map->how); switch (err2) { case nfs_ok:
result |= map->access; break;
/* the following error codes just mean the access was not allowed,
* rather than an error occurred */ case nfserr_rofs: case nfserr_acces: case nfserr_perm: /* simply don't "or" in the access bit. */ break; default:
error = err2; goto out;
}
}
}
*access = result; if (supported)
*supported = sresult;
out: return error;
}
int nfsd_open_break_lease(struct inode *inode, int access)
{ unsignedint mode;
/* * Open an existing file or directory. * The may_flags argument indicates the type of open (read/write/lock) * and additional flags. * N.B. After this call fhp needs an fh_put
*/ staticint
__nfsd_open(struct svc_fh *fhp, umode_t type, int may_flags, struct file **filp)
{ struct path path; struct inode *inode; struct file *file; int flags = O_RDONLY|O_LARGEFILE; int host_err = -EPERM;
/* * If we get here, then the client has already done an "open", * and (hopefully) checked permission - so allow OWNER_OVERRIDE * in case a chmod has now revoked permission. * * Arguably we should also allow the owner override for * directories, but we never have and it doesn't seem to have * caused anyone a problem. If we were to change this, note * also that our filldir callbacks would need a variant of * lookup_one_positive_unlocked() that doesn't check permissions.
*/ if (type == S_IFREG)
may_flags |= NFSD_MAY_OWNER_OVERRIDE;
retry:
err = fh_verify(rqstp, fhp, type, may_flags); if (!err) {
host_err = __nfsd_open(fhp, type, may_flags, filp); if (host_err == -EOPENSTALE && !retried) {
retried = true;
fh_put(fhp); goto retry;
}
err = nfserrno(host_err);
} return err;
}
/** * nfsd_open_verified - Open a regular file for the filecache * @fhp: NFS filehandle of the file to open * @may_flags: internal permission flags * @filp: OUT: open "struct file *" * * Returns zero on success, or a negative errno value.
*/ int
nfsd_open_verified(struct svc_fh *fhp, int may_flags, struct file **filp)
{ return __nfsd_open(fhp, S_IFREG, may_flags, filp);
}
/* * Grab and keep cached pages associated with a file in the svc_rqst * so that they can be passed to the network sendmsg routines * directly. They will be released after the sending has completed. * * Return values: Number of bytes consumed, or -EIO if there are no * remaining pages in rqstp->rq_pages.
*/ staticint
nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd)
{ struct svc_rqst *rqstp = sd->u.data; struct page *page = buf->page; // may be a compound one unsigned offset = buf->offset; struct page *last_page;
last_page = page + (offset + sd->len - 1) / PAGE_SIZE; for (page += offset / PAGE_SIZE; page <= last_page; page++) { /* * Skip page replacement when extending the contents of the * current page. But note that we may get two zero_pages in a * row from shmem.
*/ if (page == *(rqstp->rq_next_page - 1) &&
offset_in_page(rqstp->rq_res.page_base +
rqstp->rq_res.page_len)) continue; if (unlikely(!svc_rqst_replace_page(rqstp, page))) return -EIO;
} if (rqstp->rq_res.page_len == 0) // first call
rqstp->rq_res.page_base = offset % PAGE_SIZE;
rqstp->rq_res.page_len += sd->len; return sd->len;
}
/** * nfsd_splice_read - Perform a VFS read using a splice pipe * @rqstp: RPC transaction context * @fhp: file handle of file to be read * @file: opened struct file of file to be read * @offset: starting byte offset * @count: IN: requested number of bytes; OUT: number of bytes read * @eof: OUT: set non-zero if operation reached the end of the file * * Returns nfs_ok on success, otherwise an nfserr stat value is * returned.
*/
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, unsignedlong *count,
u32 *eof)
{ struct splice_desc sd = {
.len = 0,
.total_len = *count,
.pos = offset,
.u.data = rqstp,
};
ssize_t host_err;
/** * nfsd_iter_read - Perform a VFS read using an iterator * @rqstp: RPC transaction context * @fhp: file handle of file to be read * @file: opened struct file of file to be read * @offset: starting byte offset * @count: IN: requested number of bytes; OUT: number of bytes read * @base: offset in first page of read buffer * @eof: OUT: set non-zero if operation reached the end of the file * * Some filesystems or situations cannot use nfsd_splice_read. This * function is the slightly less-performant fallback for those cases. * * Returns nfs_ok on success, otherwise an nfserr stat value is * returned.
*/
__be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, unsignedlong *count, unsignedint base, u32 *eof)
{ unsignedlong v, total; struct iov_iter iter; struct kiocb kiocb;
ssize_t host_err;
size_t len;
/* * Gathered writes: If another process is currently writing to the file, * there's a high chance this is another nfsd (triggered by a bulk write * from a client's biod). Rather than syncing the file with each write * request, we sleep for 10 msec. * * I don't know if this roughly approximates C. Juszak's idea of * gathered writes, but it's a nice and simple solution (IMHO), and it * seems to work:-) * * Note: we do this only in the NFSv2 case, since v3 and higher have a * better tool (separate unstable writes and commits) for solving this * problem.
*/ staticint wait_for_concurrent_writes(struct file *file)
{ struct inode *inode = file_inode(file); static ino_t last_ino; static dev_t last_dev; int err = 0;
if (sb->s_export_op)
exp_op_flags = sb->s_export_op->flags;
if (test_bit(RQ_LOCAL, &rqstp->rq_flags) &&
!(exp_op_flags & EXPORT_OP_REMOTE_FS)) { /* * We want throttling in balance_dirty_pages() * and shrink_inactive_list() to only consider * the backingdev we are writing to, so that nfs to * localhost doesn't cause nfsd to lock up due to all * the client's dirty pages or its congested queue.
*/
current->flags |= PF_LOCAL_THROTTLE;
restore_flags = true;
}
exp = fhp->fh_export;
if (!EX_ISSYNC(exp))
stable = NFS_UNSTABLE;
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = offset; if (stable && !fhp->fh_use_wgather)
kiocb.ki_flags |= IOCB_DSYNC;
/** * nfsd_read_splice_ok - check if spliced reading is supported * @rqstp: RPC transaction context * * Return values: * %true: nfsd_splice_read() may be used * %false: nfsd_splice_read() must not be used * * NFS READ normally uses splice to send data in-place. However the * data in cache can change after the reply's MIC is computed but * before the RPC reply is sent. To prevent the client from * rejecting the server-computed MIC in this somewhat rare case, do * not use splice with the GSS integrity and privacy services.
*/ bool nfsd_read_splice_ok(struct svc_rqst *rqstp)
{ if (nfsd_disable_splice_read) returnfalse; switch (svc_auth_flavor(rqstp)) { case RPC_AUTH_GSS_KRB5I: case RPC_AUTH_GSS_KRB5P: returnfalse;
} returntrue;
}
/** * nfsd_read - Read data from a file * @rqstp: RPC transaction context * @fhp: file handle of file to be read * @offset: starting byte offset * @count: IN: requested number of bytes; OUT: number of bytes read * @eof: OUT: set non-zero if operation reached the end of the file * * The caller must verify that there is enough space in @rqstp.rq_res * to perform this operation. * * N.B. After this call fhp needs an fh_put * * Returns nfs_ok on success, otherwise an nfserr stat value is * returned.
*/
__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, unsignedlong *count, u32 *eof)
{ struct nfsd_file *nf; struct file *file;
__be32 err;
/** * nfsd_write - open a file and write data to it * @rqstp: RPC execution context * @fhp: File handle of file to write into; nfsd_write() may modify it * @offset: Byte offset of start * @payload: xdr_buf containing the write payload * @cnt: IN: number of bytes to write, OUT: number of bytes actually written * @stable: An NFS stable_how value * @verf: NFS WRITE verifier * * Upon return, caller must invoke fh_put on @fhp. * * Return values: * An nfsstat value in network byte order.
*/
__be32
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, conststruct xdr_buf *payload, unsignedlong *cnt, int stable,
__be32 *verf)
{ struct nfsd_file *nf;
__be32 err;
trace_nfsd_write_start(rqstp, fhp, offset, *cnt);
err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_WRITE, &nf); if (err) goto out;
/** * nfsd_commit - Commit pending writes to stable storage * @rqstp: RPC request being processed * @fhp: NFS filehandle * @nf: target file * @offset: raw offset from beginning of file * @count: raw count of bytes to sync * @verf: filled in with the server's current write verifier * * Note: we guarantee that data that lies within the range specified * by the 'offset' and 'count' parameters will be synced. The server * is permitted to sync data that lies outside this range at the * same time. * * Unfortunately we cannot lock the file to make sure we return full WCC * data to the client, as locking happens lower down in the filesystem. * * Return values: * An nfsstat value in network byte order.
*/
__be32
nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
u64 offset, u32 count, __be32 *verf)
{
__be32 err = nfs_ok;
u64 maxbytes;
loff_t start, end; struct nfsd_net *nn;
/* * Convert the client-provided (offset, count) range to a * (start, end) range. If the client-provided range falls * outside the maximum file size of the underlying FS, * clamp the sync range appropriately.
*/
start = 0;
end = LLONG_MAX;
maxbytes = (u64)fhp->fh_dentry->d_sb->s_maxbytes; if (offset < maxbytes) {
start = offset; if (count && (offset + count - 1 < maxbytes))
end = offset + count - 1;
}
nn = net_generic(nf->nf_net, nfsd_net_id); if (EX_ISSYNC(fhp->fh_export)) {
errseq_t since = READ_ONCE(nf->nf_file->f_wb_err); int err2;
/** * nfsd_create_setattr - Set a created file's attributes * @rqstp: RPC transaction being executed * @fhp: NFS filehandle of parent directory * @resfhp: NFS filehandle of new object * @attrs: requested attributes of new object * * Returns nfs_ok on success, or an nfsstat in network byte order.
*/
__be32
nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct svc_fh *resfhp, struct nfsd_attrs *attrs)
{ struct iattr *iap = attrs->na_iattr;
__be32 status;
/* * Mode has already been set by file creation.
*/
iap->ia_valid &= ~ATTR_MODE;
/* * Setting uid/gid works only for root. Irix appears to * send along the gid on create when it tries to implement * setgid directories via NFS:
*/ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
/* * Callers expect new file metadata to be committed even * if the attributes have not changed.
*/ if (nfsd_attrs_valid(attrs))
status = nfsd_setattr(rqstp, resfhp, attrs, NULL); else
status = nfserrno(commit_metadata(resfhp));
/* * Transactional filesystems had a chance to commit changes * for both parent and child simultaneously making the * following commit_metadata a noop in many cases.
*/ if (!status)
status = nfserrno(commit_metadata(fhp));
/* * Update the new filehandle to pick up the new attributes.
*/ if (!status)
status = fh_update(resfhp);
return status;
}
/* HPUX client sometimes creates a file in mode 000, and sets size to 0. * setting size to 0 may fail for some specific file systems by the permission * checking which requires WRITE permission but the mode is 000. * we ignore the resizing(to 0) on the just new created file, since the size is * 0 after file created. * * call this only after vfs_create() is called.
* */ staticvoid
nfsd_check_ignore_resizing(struct iattr *iap)
{ if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
iap->ia_valid &= ~ATTR_SIZE;
}
/* The parent directory should already be locked: */
__be32
nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_attrs *attrs, int type, dev_t rdev, struct svc_fh *resfhp)
{ struct dentry *dentry, *dchild; struct inode *dirp; struct iattr *iap = attrs->na_iattr;
__be32 err; int host_err = 0;
err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_NOP); if (err) return err;
dentry = fhp->fh_dentry;
host_err = fh_want_write(fhp); if (host_err) return nfserrno(host_err);
inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT);
dchild = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry);
host_err = PTR_ERR(dchild); if (IS_ERR(dchild)) {
err = nfserrno(host_err); goto out_unlock;
}
err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); /* * We unconditionally drop our ref to dchild as fh_compose will have * already grabbed its own ref for it.
*/
dput(dchild); if (err) goto out_unlock;
err = fh_fill_pre_attrs(fhp); if (err != nfs_ok) goto out_unlock;
err = nfsd_create_locked(rqstp, fhp, attrs, type, rdev, resfhp);
fh_fill_post_attrs(fhp);
out_unlock:
inode_unlock(dentry->d_inode); return err;
}
/* * Read a symlink. On entry, *lenp must contain the maximum path length that * fits into the buffer. On return, it contains the true length. * N.B. After this call fhp needs an fh_put
*/
__be32
nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
{
__be32 err; constchar *link; struct path path;
DEFINE_DELAYED_CALL(done); int len;
err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP); if (unlikely(err)) return err;
if (unlikely(!d_is_symlink(path.dentry))) return nfserr_inval;
touch_atime(&path);
link = vfs_get_link(path.dentry, &done); if (IS_ERR(link)) return nfserrno(PTR_ERR(link));
len = strlen(link); if (len < *lenp)
*lenp = len;
memcpy(buf, link, *lenp);
do_delayed_call(&done); return 0;
}
/** * nfsd_symlink - Create a symlink and look up its inode * @rqstp: RPC transaction being executed * @fhp: NFS filehandle of parent directory * @fname: filename of the new symlink * @flen: length of @fname * @path: content of the new symlink (NUL-terminated) * @attrs: requested attributes of new object * @resfhp: NFS filehandle of new object * * N.B. After this call _both_ fhp and resfhp need an fh_put * * Returns nfs_ok on success, or an nfsstat in network byte order.
*/
__be32
nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *fname, int flen, char *path, struct nfsd_attrs *attrs, struct svc_fh *resfhp)
{ struct dentry *dentry, *dnew;
__be32 err, cerr; int host_err;
/** * nfsd_link - create a link * @rqstp: RPC transaction context * @ffhp: the file handle of the directory where the new link is to be created * @name: the filename of the new link * @len: the length of @name in octets * @tfhp: the file handle of an existing file object * * After this call _both_ ffhp and tfhp need an fh_put. * * Returns a generic NFS status code in network byte-order.
*/
__be32
nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *name, int len, struct svc_fh *tfhp)
{ struct dentry *ddir, *dnew, *dold; struct inode *dirp; int type;
__be32 err; int host_err;
if (inode && S_ISREG(inode->i_mode))
ret = nfsd_file_is_cached(inode); return ret;
}
/** * nfsd_rename - rename a directory entry * @rqstp: RPC transaction context * @ffhp: the file handle of parent directory containing the entry to be renamed * @fname: the filename of directory entry to be renamed * @flen: the length of @fname in octets * @tfhp: the file handle of parent directory to contain the renamed entry * @tname: the filename of the new entry * @tlen: the length of @tlen in octets * * After this call _both_ ffhp and tfhp need an fh_put. * * Returns a generic NFS status code in network byte-order.
*/
__be32
nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, struct svc_fh *tfhp, char *tname, int tlen)
{ struct dentry *fdentry, *tdentry, *odentry, *ndentry, *trap; int type = S_IFDIR;
__be32 err; int host_err; bool close_cached = false;
/* * If the target dentry has cached open files, then we need to * try to close them prior to doing the rename. Final fput * shouldn't be done with locks held however, so we delay it * until this point and then reattempt the whole shebang.
*/ if (close_cached) {
close_cached = false;
nfsd_close_cached_files(ndentry);
dput(ndentry); goto retry;
}
out: return err;
}
/** * nfsd_unlink - remove a directory entry * @rqstp: RPC transaction context * @fhp: the file handle of the parent directory to be modified * @type: enforced file type of the object to be removed * @fname: the name of directory entry to be removed * @flen: length of @fname in octets * * After this call fhp needs an fh_put. * * Returns a generic NFS status code in network byte-order.
*/
__be32
nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, char *fname, int flen)
{ struct dentry *dentry, *rdentry; struct inode *dirp; struct inode *rinode;
__be32 err; int host_err;
trace_nfsd_vfs_unlink(rqstp, fhp, fname, flen);
err = nfserr_acces; if (!flen || isdotent(fname, flen)) goto out;
err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_REMOVE); if (err) goto out;
host_err = fh_want_write(fhp); if (host_err) goto out_nfserr;
inode_unlock(dirp); if (!host_err)
host_err = commit_metadata(fhp);
dput(rdentry);
iput(rinode); /* truncate the inode here */
out_drop_write:
fh_drop_write(fhp);
out_nfserr: if (host_err == -EBUSY) { /* * See RFC 8881 Section 18.25.4 para 4: NFSv4 REMOVE * wants a status unique to the object type.
*/ if (type != S_IFDIR)
err = nfserr_file_open; else
err = nfserr_acces;
}
out: return err != nfs_ok ? err : nfserrno(host_err);
out_unlock:
inode_unlock(dirp); goto out_drop_write;
}
/* * We do this buffering because we must not call back into the file * system's ->lookup() method from the filldir callback. That may well * deadlock a number of file systems. * * This is based heavily on the implementation of same in XFS.
*/ struct buffered_dirent {
u64 ino;
loff_t offset; int namlen; unsignedint d_type; char name[];
};
reclen = ALIGN(sizeof(*de) + de->namlen, sizeof(u64));
size -= reclen;
de = (struct buffered_dirent *)((char *)de + reclen);
} if (size > 0) /* We bailed out early */ break;
offset = vfs_llseek(file, 0, SEEK_CUR);
}
free_page((unsignedlong)(buf.dirent));
if (host_err) return nfserrno(host_err);
*offsetp = offset; return cdp->err;
}
/** * nfsd_readdir - Read entries from a directory * @rqstp: RPC transaction context * @fhp: NFS file handle of directory to be read * @offsetp: OUT: seek offset of final entry that was read * @cdp: OUT: an eof error value * @func: entry filler actor * * This implementation ignores the NFSv3/4 verifier cookie. * * NB: normal system calls hold file->f_pos_lock when calling * ->iterate_shared and ->llseek, but nfsd_readdir() does not. * Because the struct file acquired here is not visible to other * threads, it's internal state does not need mutex protection. * * Returns nfs_ok on success, otherwise an nfsstat code is * returned.
*/
__be32
nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, struct readdir_cd *cdp, nfsd_filldir_t func)
{
__be32 err; struct file *file;
loff_t offset = *offsetp; int may_flags = NFSD_MAY_READ;
if (err == nfserr_eof || err == nfserr_toosmall)
err = nfs_ok; /* can still be found in ->err */
out_close:
nfsd_filp_close(file);
out: return err;
}
/** * nfsd_filp_close: close a file synchronously * @fp: the file to close * * nfsd_filp_close() is similar in behaviour to filp_close(). * The difference is that if this is the final close on the * file, the that finalisation happens immediately, rather then * being handed over to a work_queue, as it the case for * filp_close(). * When a user-space process closes a file (even when using * filp_close() the finalisation happens before returning to * userspace, so it is effectively synchronous. When a kernel thread * uses file_close(), on the other hand, the handling is completely * asynchronous. This means that any cost imposed by that finalisation * is not imposed on the nfsd thread, and nfsd could potentually * close files more quickly than the work queue finalises the close, * which would lead to unbounded growth in the queue. * * In some contexts is it not safe to synchronously wait for * close finalisation (see comment for __fput_sync()), but nfsd * does not match those contexts. In partcilarly it does not, at the * time that this function is called, hold and locks and no finalisation * of any file, socket, or device driver would have any cause to wait * for nfsd to make progress.
*/ void nfsd_filp_close(struct file *fp)
{
get_file(fp);
filp_close(fp, NULL);
__fput_sync(fp);
}
/* * Get file system stats * N.B. After this call fhp needs an fh_put
*/
__be32
nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access)
{
__be32 err;
#ifdef CONFIG_NFSD_V4 /* * Helper function to translate error numbers. In the case of xattr operations, * some error codes need to be translated outside of the standard translations. * * ENODATA needs to be translated to nfserr_noxattr. * E2BIG to nfserr_xattr2big. * * Additionally, vfs_listxattr can return -ERANGE. This means that the * file has too many extended attributes to retrieve inside an * XATTR_LIST_MAX sized buffer. This is a bug in the xattr implementation: * filesystems will allow the adding of extended attributes until they hit * their own internal limit. This limit may be larger than XATTR_LIST_MAX. * So, at that point, the attributes are present and valid, but can't * be retrieved using listxattr, since the upper level xattr code enforces * the XATTR_LIST_MAX limit. * * This bug means that we need to deal with listxattr returning -ERANGE. The * best mapping is to return TOOSMALL.
*/ static __be32
nfsd_xattr_errno(int err)
{ switch (err) { case -ENODATA: return nfserr_noxattr; case -E2BIG: return nfserr_xattr2big; case -ERANGE: return nfserr_toosmall;
} return nfserrno(err);
}
/* * Retrieve the specified user extended attribute. To avoid always * having to allocate the maximum size (since we are not getting * a maximum size from the RPC), do a probe + alloc. Hold a reader * lock on i_rwsem to prevent the extended attribute from changing * size while we're doing this.
*/
__be32
nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, void **bufp, int *lenp)
{
ssize_t len;
__be32 err; char *buf; struct inode *inode; struct dentry *dentry;
err = fh_verify(rqstp, fhp, 0, NFSD_MAY_READ); if (err) return err;
len = vfs_getxattr(&nop_mnt_idmap, dentry, name, buf, len); if (len <= 0) {
kvfree(buf);
buf = NULL;
err = nfsd_xattr_errno(len);
}
*lenp = len;
*bufp = buf;
out:
inode_unlock_shared(inode);
return err;
}
/* * Retrieve the xattr names. Since we can't know how many are * user extended attributes, we must get all attributes here, * and have the XDR encode filter out the "user." ones. * * While this could always just allocate an XATTR_LIST_MAX * buffer, that's a waste, so do a probe + allocate. To * avoid any changes between the probe and allocate, wrap * this in inode_lock.
*/
__be32
nfsd_listxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char **bufp, int *lenp)
{
ssize_t len;
__be32 err; char *buf; struct inode *inode; struct dentry *dentry;
err = fh_verify(rqstp, fhp, 0, NFSD_MAY_READ); if (err) return err;
len = vfs_listxattr(dentry, buf, len); if (len <= 0) {
kvfree(buf);
err = nfsd_xattr_errno(len); goto out;
}
*lenp = len;
*bufp = buf;
err = nfs_ok;
out:
inode_unlock_shared(inode);
return err;
}
/** * nfsd_removexattr - Remove an extended attribute * @rqstp: RPC transaction being executed * @fhp: NFS filehandle of object with xattr to remove * @name: name of xattr to remove (NUL-terminate) * * Pass in a NULL pointer for delegated_inode, and let the client deal * with NFS4ERR_DELAY (same as with e.g. setattr and remove). *
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.62 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.