/* * linux/fs/nfs/blocklayout/blocklayout.c * * Module for the NFSv4.1 pNFS block layout driver. * * Copyright (c) 2006 The Regents of the University of Michigan. * All rights reserved. * * Andy Adamson <andros@citi.umich.edu> * Fred Isaman <iisaman@umich.edu> * * permission is granted to use, copy, create derivative works and * redistribute this software and such derivative works for any purpose, * so long as the name of the university of michigan is not used in * any advertising or publicity pertaining to the use or distribution * of this software without specific, written prior authorization. if * the above copyright notice or any other identification of the * university of michigan is included in any copy of any portion of * this software, then the disclaimer below must also be included. * * this software is provided as is, without representation from the * university of michigan as to its fitness for any purpose, and without * warranty by the university of michigan of any kind, either express * or implied, including without limitation the implied warranties of * merchantability and fitness for a particular purpose. the regents * of the university of michigan shall not be liable for any damages, * including special, indirect, incidental, or consequential damages, * with respect to any claim arising out or in connection with the use * of the software, even if it has been or is hereafter advised of the * possibility of such damages.
*/
/* The data we are handed might be spread across several bios. We need * to track when the last one is finished.
*/ struct parallel_io { struct kref refcnt; void (*pnfs_callback) (void *data); void *data;
};
par = alloc_parallel(header); if (!par) return PNFS_NOT_ATTEMPTED;
par->pnfs_callback = bl_end_par_io_read;
blk_start_plug(&plug);
isect = (sector_t) (f_offset >> SECTOR_SHIFT); /* Code assumes extents are page-aligned */ for (i = pg_index; i < header->page_array.npages; i++) { if (extent_length <= 0) { /* We've used up the previous extent */
bio = bl_submit_bio(bio);
/* Get the next one */ if (!ext_tree_lookup(bl, isect, &be, false)) {
header->pnfs_error = -EIO; goto out;
}
extent_length = be.be_length - (isect - be.be_f_offset);
}
if (bio->bi_status) { if (!header->pnfs_error)
header->pnfs_error = -EIO;
pnfs_set_lo_fail(header->lseg);
bl_mark_devices_unavailable(header, true);
}
bio_put(bio);
put_parallel(par);
}
/* Function scheduled for call during bl_end_par_io_write, * it marks sectors as written and extends the commitlist.
*/ staticvoid bl_write_cleanup(struct work_struct *work)
{ struct rpc_task *task = container_of(work, struct rpc_task, u.tk_work); struct nfs_pgio_header *hdr =
container_of(task, struct nfs_pgio_header, task);
/* Called when last of bios associated with a bl_write_pagelist call finishes */ staticvoid bl_end_par_io_write(void *data)
{ struct nfs_pgio_header *hdr = data;
/* At this point, header->page_aray is a (sequential) list of nfs_pages. * We want to write each, and if there is an error set pnfs_error * to have it redone using nfs.
*/
par = alloc_parallel(header); if (!par) return PNFS_NOT_ATTEMPTED;
par->pnfs_callback = bl_end_par_io_write;
blk_start_plug(&plug);
/* we always write out the whole page */
offset = offset & (loff_t)PAGE_MASK;
isect = offset >> SECTOR_SHIFT;
for (i = pg_index; i < header->page_array.npages; i++) { if (extent_length <= 0) { /* We've used up the previous extent */
bio = bl_submit_bio(bio); /* Get the next one */ if (!ext_tree_lookup(bl, isect, &be, true)) {
header->pnfs_error = -EINVAL; goto out;
}
/* Tracks info needed to ensure extents in layout obey constraints of spec */ struct layout_verification {
u32 mode; /* R or RW */
u64 start; /* Expected start of next non-COW extent */
u64 inval; /* Start of INVAL coverage */
u64 cowread; /* End of COW read coverage */
};
/* Verify the extent meets the layout requirements of the pnfs-block draft, * section 2.3.1.
*/ staticint verify_extent(struct pnfs_block_extent *be, struct layout_verification *lv)
{ if (lv->mode == IOMODE_READ) { if (be->be_state == PNFS_BLOCK_READWRITE_DATA ||
be->be_state == PNFS_BLOCK_INVALID_DATA) return -EIO; if (be->be_f_offset != lv->start) return -EIO;
lv->start += be->be_length; return 0;
} /* lv->mode == IOMODE_RW */ if (be->be_state == PNFS_BLOCK_READWRITE_DATA) { if (be->be_f_offset != lv->start) return -EIO; if (lv->cowread > lv->start) return -EIO;
lv->start += be->be_length;
lv->inval = lv->start; return 0;
} elseif (be->be_state == PNFS_BLOCK_INVALID_DATA) { if (be->be_f_offset != lv->start) return -EIO;
lv->start += be->be_length; return 0;
} elseif (be->be_state == PNFS_BLOCK_READ_DATA) { if (be->be_f_offset > lv->start) return -EIO; if (be->be_f_offset < lv->inval) return -EIO; if (be->be_f_offset < lv->cowread) return -EIO; /* It looks like you might want to min this with lv->start, * but you really don't.
*/
lv->inval = lv->inval + be->be_length;
lv->cowread = be->be_f_offset + be->be_length; return 0;
} else return -EIO;
}
/* * Devices that are marked unavailable are left in the cache with a * timeout to avoid sending GETDEVINFO after every LAYOUTGET, or * constantly attempting to register the device. Once marked as * unavailable they must be deleted and never reused.
*/ if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) { unsignedlong end = jiffies; unsignedlong start = end - PNFS_DEVICE_RETRY_TIMEOUT;
if (!time_in_range(node->timestamp_unavailable, start, end)) { /* Uncork subsequent GETDEVINFO operations for this device */
nfs4_delete_deviceid(node->ld, node->nfs_client, id); goto retry;
} goto out_put;
}
if (!bl_register_dev(container_of(node, struct pnfs_block_dev, node))) { /* * If we cannot register, treat this device as transient: * Make a negative cache entry for the device
*/
nfs4_mark_deviceid_unavailable(node); goto out_put;
}
/* * The next three values are read in as bytes, but stored in the * extent structure in 512-byte granularity.
*/
error = -EIO; if (decode_sector_number(&p, &be->be_f_offset) < 0) goto out_put_deviceid; if (decode_sector_number(&p, &be->be_length) < 0) goto out_put_deviceid; if (decode_sector_number(&p, &be->be_v_offset) < 0) goto out_put_deviceid;
be->be_state = be32_to_cpup(p++);
status = -EIO;
p = xdr_inline_decode(&xdr, 4); if (unlikely(!p)) goto out_free_scratch;
count = be32_to_cpup(p++);
dprintk("%s: number of extents %d\n", __func__, count);
/* * Decode individual extents, putting them in temporary staging area * until whole layout is decoded to make error recovery easier.
*/ for (i = 0; i < count; i++) {
status = bl_alloc_extent(&xdr, lo, &lv, &extents, gfp_mask); if (status) goto process_extents;
}
if (lgr->range.offset + lgr->range.length !=
lv.start << SECTOR_SHIFT) {
dprintk("%s Final length mismatch\n", __func__);
status = -EIO; goto process_extents;
}
if (lv.start < lv.cowread) {
dprintk("%s Final uncovered COW extent\n", __func__);
status = -EIO;
}
if (server->pnfs_blksize == 0) {
dprintk("%s Server did not return blksize\n", __func__); return -EINVAL;
} if (server->pnfs_blksize > PAGE_SIZE) {
printk(KERN_ERR "%s: pNFS blksize %d not supported.\n",
__func__, server->pnfs_blksize); return -EINVAL;
}
return 0;
}
staticbool
is_aligned_req(struct nfs_pageio_descriptor *pgio, struct nfs_page *req, unsignedint alignment, bool is_write)
{ /* * Always accept buffered writes, higher layers take care of the * right alignment.
*/ if (pgio->pg_dreq == NULL) returntrue;
if (!IS_ALIGNED(req->wb_offset, alignment)) returnfalse;
if (IS_ALIGNED(req->wb_bytes, alignment)) returntrue;
if (is_write &&
(req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode))) { /* * If the write goes up to the inode size, just write * the full page. Data past the inode size is * guaranteed to be zeroed by the higher level client * code, and this behaviour is mandated by RFC 5663 * section 2.3.2.
*/ returntrue;
}
/* * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number * of bytes (maximum @req->wb_bytes) that can be coalesced.
*/ static size_t
bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req)
{ if (!is_aligned_req(pgio, req, SECTOR_SIZE, false)) return 0; return pnfs_generic_pg_test(pgio, prev, req);
}
/* * Return the number of contiguous bytes for a given inode * starting at page frame idx.
*/ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx)
{ struct address_space *mapping = inode->i_mapping;
pgoff_t end;
/* Optimize common case that writes from 0 to end of file */
end = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); if (end != inode->i_mapping->nrpages) {
rcu_read_lock();
end = page_cache_next_miss(mapping, idx + 1, ULONG_MAX);
rcu_read_unlock();
}
/* * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number * of bytes (maximum @req->wb_bytes) that can be coalesced.
*/ static size_t
bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req)
{ if (!is_aligned_req(pgio, req, PAGE_SIZE, true)) return 0; return pnfs_generic_pg_test(pgio, prev, req);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.