/* * Module for the pnfs nfs4 file layout driver. * Defines all I/O and Policy interface operations, plus code * to register itself with the pNFS client. * * Copyright (c) 2002 * The Regents of the University of Michigan * All Rights Reserved * * Dean Hildebrand <dhildebz@umich.edu> * * Permission is granted to use, copy, create derivative works, and * redistribute this software and such derivative works for any purpose, * so long as the name of the University of Michigan is not used in * any advertising or publicity pertaining to the use or distribution * of this software without specific, written prior authorization. If * the above copyright notice or any other identification of the * University of Michigan is included in any copy of any portion of * this software, then the disclaimer below must also be included. * * This software is provided as is, without representation or warranty * of any kind either express or implied, including without limitation * the implied warranties of merchantability, fitness for a particular * purpose, or noninfringement. The Regents of the University of * Michigan shall not be liable for any damages, including special, * indirect, incidental, or consequential damages, with respect to any * claim arising out of or in connection with the use of the software, * even if it has been or is hereafter advised of the possibility of * such damages.
*/
/* This function is used by the layout driver to calculate the * offset of the file on the dserver based on whether the * layout type is STRIPE_DENSE or STRIPE_SPARSE
*/ static loff_t
filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
{ struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
switch (flseg->stripe_type) { case STRIPE_SPARSE: return offset;
case STRIPE_DENSE: return filelayout_get_dense_offset(flseg, offset);
}
switch (task->tk_status) { /* DS session errors */ case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: case -NFS4ERR_BAD_HIGH_SLOT: case -NFS4ERR_DEADSESSION: case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: case -NFS4ERR_SEQ_FALSE_RETRY: case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session. Exchangeid " "flags 0x%x\n", __func__, task->tk_status,
clp->cl_exchange_flags);
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); break; case -NFS4ERR_DELAY: case -NFS4ERR_GRACE:
rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX); break; case -NFS4ERR_RETRY_UNCACHED_REP: break; /* Invalidate Layout errors */ case -NFS4ERR_ACCESS: case -NFS4ERR_PNFS_NO_LAYOUT: case -ESTALE: /* mapped NFS4ERR_STALE */ case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */ case -EISDIR: /* mapped NFS4ERR_ISDIR */ case -NFS4ERR_FHEXPIRED: case -NFS4ERR_WRONG_TYPE:
dprintk("%s Invalid layout error %d\n", __func__,
task->tk_status); /* * Destroy layout so new i/o will get a new layout. * Layout will not be destroyed until all current lseg * references are put. Mark layout as invalid to resend failed * i/o and all i/o waiting on the slot table to the MDS until * layout is destroyed and a new valid layout is obtained.
*/
pnfs_destroy_layout(NFS_I(inode));
rpc_wake_up(&tbl->slot_tbl_waitq); goto reset; /* RPC connection errors */ case -ECONNREFUSED: case -EHOSTDOWN: case -EHOSTUNREACH: case -ENETUNREACH: case -EIO: case -ETIMEDOUT: case -EPIPE: case -EPROTO: case -ENODEV:
dprintk("%s DS connection error %d\n", __func__,
task->tk_status);
nfs4_mark_deviceid_unavailable(devid);
pnfs_error_mark_layout_for_return(inode, lseg);
pnfs_set_lo_fail(lseg);
rpc_wake_up(&tbl->slot_tbl_waitq);
fallthrough; default:
reset:
dprintk("%s Retry through MDS. Error %d\n", __func__,
task->tk_status); return -NFS4ERR_RESET_TO_MDS;
}
task->tk_status = 0; return -EAGAIN;
}
/* NFS_PROTO call done callback routines */
staticint filelayout_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr)
{ int err;
switch (err) { case -NFS4ERR_RESET_TO_MDS:
filelayout_reset_read(hdr); return task->tk_status; case -EAGAIN:
rpc_restart_call_prepare(task); return -EAGAIN;
}
return 0;
}
/* * We reference the rpc_cred of the first WRITE that triggers the need for * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. * rfc5661 is not clear about which credential should be used.
*/ staticvoid
filelayout_set_layoutcommit(struct nfs_pgio_header *hdr)
{
loff_t end_offs = 0;
if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
hdr->res.verf->committed == NFS_FILE_SYNC) return; if (hdr->res.verf->committed == NFS_DATA_SYNC)
end_offs = hdr->mds_offset + (loff_t)hdr->res.count;
/* Note: if the write is unstable, don't set end_offs until commit */
pnfs_set_layoutcommit(hdr->inode, hdr->lseg, end_offs);
dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
(unsignedlong) NFS_I(hdr->inode)->layout->plh_lwb);
}
/* * Call ops for the async read/write cases * In the case of dense layouts, the offset needs to be reset to its * original value.
*/ staticvoid filelayout_read_prepare(struct rpc_task *task, void *data)
{ struct nfs_pgio_header *hdr = data;
if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
rpc_exit(task, -EIO); return;
} if (filelayout_reset_to_mds(hdr->lseg)) {
dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
filelayout_reset_read(hdr);
rpc_exit(task, 0); return;
}
hdr->pgio_done_cb = filelayout_read_done_cb;
if (nfs4_setup_sequence(hdr->ds_clp,
&hdr->args.seq_args,
&hdr->res.seq_res,
task)) return; if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
hdr->args.lock_context, FMODE_READ) == -EIO)
rpc_exit(task, -EIO); /* lost lock, terminate I/O */
}
staticint
filelayout_check_deviceid(struct pnfs_layout_hdr *lo, struct nfs4_filelayout_segment *fl,
gfp_t gfp_flags)
{ struct nfs4_deviceid_node *d; struct nfs4_file_layout_dsaddr *dsaddr; int status = -EINVAL;
/* Is the deviceid already set? If so, we're good. */ if (fl->dsaddr != NULL) return 0;
/* find and reference the deviceid */
d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &fl->deviceid,
lo->plh_lc_cred, gfp_flags); if (d == NULL) goto out;
dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); /* Found deviceid is unavailable */ if (filelayout_test_devid_unavailable(&dsaddr->id_node)) goto out_put;
if (fl->first_stripe_index >= dsaddr->stripe_count) {
dprintk("%s Bad first_stripe_index %u\n",
__func__, fl->first_stripe_index); goto out_put;
}
if ((fl->stripe_type == STRIPE_SPARSE &&
fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) ||
(fl->stripe_type == STRIPE_DENSE &&
fl->num_fh != dsaddr->stripe_count)) {
dprintk("%s num_fh %u not valid for given packing\n",
__func__, fl->num_fh); goto out_put;
}
status = 0;
/* * Atomic compare and xchange to ensure we don't scribble * over a non-NULL pointer.
*/ if (cmpxchg(&fl->dsaddr, NULL, dsaddr) != NULL) goto out_put;
out: return status;
out_put:
nfs4_fl_put_deviceid(dsaddr); goto out;
}
/* * filelayout_check_layout() * * Make sure layout segment parameters are sane WRT the device. * At this point no generic layer initialization of the lseg has occurred, * and nothing has been added to the layout_hdr cache. *
*/ staticint
filelayout_check_layout(struct pnfs_layout_hdr *lo, struct nfs4_filelayout_segment *fl, struct nfs4_layoutget_res *lgr,
gfp_t gfp_flags)
{ int status = -EINVAL;
dprintk("--> %s\n", __func__);
if (fl->pattern_offset > lgr->range.offset) {
dprintk("%s pattern_offset %lld too large\n",
__func__, fl->pattern_offset); goto out;
}
if (!fl->stripe_unit) {
dprintk("%s Invalid stripe unit (%u)\n",
__func__, fl->stripe_unit); goto out;
}
/* Note that a zero value for num_fh is legal for STRIPE_SPARSE.
* Futher checking is done in filelayout_check_layout */ if (fl->num_fh >
max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT)) goto out_err;
if (fl->num_fh > 0) {
fl->fh_array = kcalloc(fl->num_fh, sizeof(fl->fh_array[0]),
gfp_flags); if (!fl->fh_array) goto out_err;
}
for (i = 0; i < fl->num_fh; i++) { /* Do we want to use a mempool here? */
fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags); if (!fl->fh_array[i]) goto out_err;
p = xdr_inline_decode(&stream, 4); if (unlikely(!p)) goto out_err;
fl->fh_array[i]->size = be32_to_cpup(p++); if (fl->fh_array[i]->size > NFS_MAXFHSIZE) {
printk(KERN_ERR "NFS: Too big fh %d received %d\n",
i, fl->fh_array[i]->size); goto out_err;
}
p = xdr_inline_decode(&stream, fl->fh_array[i]->size); if (unlikely(!p)) goto out_err;
memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
dprintk("DEBUG: %s: fh len %d\n", __func__,
fl->fh_array[i]->size);
}
/* see if req and prev are in the same stripe */ if (prev) {
p_stripe = (u64)req_offset(prev) - segment_offset;
r_stripe = (u64)req_offset(req) - segment_offset;
do_div(p_stripe, stripe_unit);
do_div(r_stripe, stripe_unit);
if (p_stripe != r_stripe) return 0;
}
/* calculate remaining bytes in the current stripe */
div_u64_rem((u64)req_offset(req) - segment_offset,
stripe_unit,
&stripe_offset);
WARN_ON_ONCE(stripe_offset > stripe_unit); if (stripe_offset >= stripe_unit) return 0; return min(stripe_unit - (unsignedint)stripe_offset, size);
}
{ struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
u32 i, j;
if (fl->commit_through_mds) {
nfs_request_add_commit_list(req, cinfo);
} else { /* Note that we are calling nfs4_fl_calc_j_index on each page * that ends up being committed to a data server. An attractive * alternative is to add a field to nfs_write_data and nfs_page * to store the value calculated in filelayout_write_pagelist * and just use that here.
*/
j = nfs4_fl_calc_j_index(lseg, req_offset(req));
i = select_bucket_index(fl, j);
pnfs_layout_mark_request_commit(req, lseg, cinfo, i);
}
}
if (flseg->stripe_type == STRIPE_SPARSE) { if (flseg->num_fh == 1)
i = 0; elseif (flseg->num_fh == 0) /* Use the MDS OPEN fh set in nfs_read_rpcsetup */ return NULL;
} return flseg->fh_array[i];
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.