if (iocb->ki_flags & IOCB_ALLOC_CACHE)
opf |= REQ_ALLOC_CACHE;
bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
&blkdev_dio_pool);
dio = container_of(bio, struct blkdev_dio, bio);
atomic_set(&dio->ref, 1); /* * Grab an extra reference to ensure the dio structure which is embedded * into the first bio stays around.
*/
bio_get(bio);
ret = bio_iov_iter_get_pages(bio, iter); if (unlikely(ret)) {
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio); break;
} if (iocb->ki_flags & IOCB_NOWAIT) { /* * This is nonblocking IO, and we need to allocate * another bio if we have data left to map. As we * cannot guarantee that one of the sub bios will not * fail getting issued FOR NOWAIT and as error results * are coalesced across all of them, be safe and ask for * a retry of this from blocking context.
*/ if (unlikely(iov_iter_count(iter))) {
ret = -EAGAIN; goto fail;
}
bio->bi_opf |= REQ_NOWAIT;
} if (iocb->ki_flags & IOCB_HAS_METADATA) {
ret = bio_integrity_map_iter(bio, iocb->private); if (unlikely(ret)) goto fail;
}
if (is_read) { if (dio->flags & DIO_SHOULD_DIRTY)
bio_set_pages_dirty(bio);
} else {
task_io_account_write(bio->bi_iter.bi_size);
}
dio->size += bio->bi_iter.bi_size;
pos += bio->bi_iter.bi_size;
nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS); if (!nr_pages) {
submit_bio(bio); break;
}
atomic_inc(&dio->ref);
submit_bio(bio);
bio = bio_alloc(bdev, nr_pages, opf, GFP_KERNEL);
}
blk_finish_plug(&plug);
if (!is_sync) return -EIOCBQUEUED;
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE); if (!READ_ONCE(dio->waiter)) break;
blk_io_schedule();
}
__set_current_state(TASK_RUNNING);
if (!ret)
ret = blk_status_to_errno(dio->bio.bi_status); if (likely(!ret))
ret = dio->size;
if (iov_iter_is_bvec(iter)) { /* * Users don't rely on the iterator being in any particular * state for async I/O returning -EIOCBQUEUED, hence we can * avoid expensive iov_iter_advance(). Bypass * bio_iov_iter_get_pages() and set the bvec directly.
*/
bio_iov_bvec_set(bio, iter);
} else {
ret = bio_iov_iter_get_pages(bio, iter); if (unlikely(ret)) goto out_bio_put;
}
dio->size = bio->bi_iter.bi_size;
if (is_read) { if (user_backed_iter(iter)) {
dio->flags |= DIO_SHOULD_DIRTY;
bio_set_pages_dirty(bio);
}
} else {
task_io_account_write(bio->bi_iter.bi_size);
}
if (iocb->ki_flags & IOCB_HAS_METADATA) {
ret = bio_integrity_map_iter(bio, iocb->private);
WRITE_ONCE(iocb->private, NULL); if (unlikely(ret)) goto out_bio_put;
}
if (iocb->ki_flags & IOCB_ATOMIC)
bio->bi_opf |= REQ_ATOMIC;
if (iocb->ki_flags & IOCB_NOWAIT)
bio->bi_opf |= REQ_NOWAIT;
if (blkdev_dio_invalid(bdev, iocb, iter)) return -EINVAL;
if (iov_iter_rw(iter) == WRITE) {
u16 max_write_streams = bdev_max_write_streams(bdev);
if (iocb->ki_write_stream) { if (iocb->ki_write_stream > max_write_streams) return -EINVAL;
} elseif (max_write_streams) { enum rw_hint write_hint =
file_inode(iocb->ki_filp)->i_write_hint;
/* * Just use the write hint as write stream for block * device writes. This assumes no file system is * mounted that would use the streams differently.
*/ if (write_hint <= max_write_streams)
iocb->ki_write_stream = write_hint;
}
}
/* * We cannot call mpage_writepages() as it does not take the buffer lock. * We must use block_write_full_folio() directly which holds the buffer * lock. The buffer lock provides the synchronisation with writeback * that filesystems rely on when they use the blockdev's mapping.
*/ staticint blkdev_writepages(struct address_space *mapping, struct writeback_control *wbc)
{ struct folio *folio = NULL; struct blk_plug plug; int err;
/* * for a block special file file_inode(file)->i_size is zero * so we compute the size by hand (just as in block_read/write above)
*/ static loff_t blkdev_llseek(struct file *file, loff_t offset, int whence)
{ struct inode *bd_inode = bdev_file_inode(file);
loff_t retval;
staticint blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
{ struct block_device *bdev = I_BDEV(filp->f_mapping->host); int error;
error = file_write_and_wait_range(filp, start, end); if (error) return error;
/* * There is no need to serialise calls to blkdev_issue_flush with * i_mutex and doing so causes performance issues with concurrent * O_SYNC writers to a block device.
*/
error = blkdev_issue_flush(bdev); if (error == -EOPNOTSUPP)
error = 0;
return error;
}
/** * file_to_blk_mode - get block open flags from file flags * @file: file whose open flags should be converted * * Look at file open flags and generate corresponding block open flags from * them. The function works both for file just being open (e.g. during ->open * callback) and for file that is already open. This is actually non-trivial * (see comment in the function).
*/
blk_mode_t file_to_blk_mode(struct file *file)
{
blk_mode_t mode = 0;
if (file->f_mode & FMODE_READ)
mode |= BLK_OPEN_READ; if (file->f_mode & FMODE_WRITE)
mode |= BLK_OPEN_WRITE; /* * do_dentry_open() clears O_EXCL from f_flags, use file->private_data * to determine whether the open was exclusive for already open files.
*/ if (file->private_data)
mode |= BLK_OPEN_EXCL; elseif (file->f_flags & O_EXCL)
mode |= BLK_OPEN_EXCL; if (file->f_flags & O_NDELAY)
mode |= BLK_OPEN_NDELAY;
/* * If all bits in O_ACCMODE set (aka O_RDWR | O_WRONLY), the floppy * driver has historically allowed ioctls as if the file was opened for * writing, but does not allow and actual reads or writes.
*/ if ((file->f_flags & O_ACCMODE) == (O_RDWR | O_WRONLY))
mode |= BLK_OPEN_WRITE_IOCTL;
mode = file_to_blk_mode(filp); /* Use the file as the holder. */ if (mode & BLK_OPEN_EXCL)
filp->private_data = filp;
ret = bdev_permission(inode->i_rdev, mode, filp->private_data); if (ret) return ret;
bdev = blkdev_get_no_open(inode->i_rdev, true); if (!bdev) return -ENXIO;
if (bdev_can_atomic_write(bdev))
filp->f_mode |= FMODE_CAN_ATOMIC_WRITE; if (blk_get_integrity(bdev->bd_disk))
filp->f_mode |= FMODE_HAS_METADATA;
ret = bdev_open(bdev, mode, filp->private_data, NULL, filp); if (ret)
blkdev_put_no_open(bdev); return ret;
}
/* * Write data to the block device. Only intended for the block device itself * and the raw driver which basically is a fake block device. * * Does not take i_mutex for the write and thus is not for general purpose * use.
*/ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
{ struct file *file = iocb->ki_filp; struct inode *bd_inode = bdev_file_inode(file); struct block_device *bdev = I_BDEV(bd_inode); bool atomic = iocb->ki_flags & IOCB_ATOMIC;
loff_t size = bdev_nr_bytes(bdev);
size_t shorted = 0;
ssize_t ret;
if (bdev_read_only(bdev)) return -EPERM;
if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev)) return -ETXTBSY;
if (!iov_iter_count(from)) return 0;
if (iocb->ki_pos >= size) return -ENOSPC;
if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT) return -EOPNOTSUPP;
if (atomic) {
ret = generic_atomic_write_valid(iocb, from); if (ret) return ret;
}
size -= iocb->ki_pos; if (iov_iter_count(from) > size) { if (atomic) return -EINVAL;
shorted = iov_iter_count(from) - size;
iov_iter_truncate(from, size);
}
ret = file_update_time(file); if (ret) return ret;
if (iocb->ki_flags & IOCB_DIRECT) {
ret = blkdev_direct_write(iocb, from); if (ret >= 0 && iov_iter_count(from))
ret = direct_write_fallback(iocb, from, ret,
blkdev_buffered_write(iocb, from));
} else { /* * Take i_rwsem and invalidate_lock to avoid racing with * set_blocksize changing i_blkbits/folio order and punching * out the pagecache.
*/
inode_lock_shared(bd_inode);
ret = blkdev_buffered_write(iocb, from);
inode_unlock_shared(bd_inode);
}
if (ret > 0)
ret = generic_write_sync(iocb, ret);
iov_iter_reexpand(from, iov_iter_count(from) + shorted); return ret;
}
if (iocb->ki_flags & IOCB_DIRECT) {
ret = kiocb_write_and_wait(iocb, count); if (ret < 0) goto reexpand;
file_accessed(iocb->ki_filp);
ret = blkdev_direct_IO(iocb, to); if (ret > 0) {
iocb->ki_pos += ret;
count -= ret;
} if (ret != -EIOCBQUEUED)
iov_iter_revert(to, count - iov_iter_count(to)); if (ret < 0 || !count) goto reexpand;
}
/* * Take i_rwsem and invalidate_lock to avoid racing with set_blocksize * changing i_blkbits/folio order and punching out the pagecache.
*/
inode_lock_shared(bd_inode);
ret = filemap_read(iocb, to, ret);
inode_unlock_shared(bd_inode);
reexpand: if (unlikely(shorted))
iov_iter_reexpand(to, iov_iter_count(to) + shorted); return ret;
}
staticlong blkdev_fallocate(struct file *file, int mode, loff_t start,
loff_t len)
{ struct inode *inode = bdev_file_inode(file); struct block_device *bdev = I_BDEV(inode);
loff_t end = start + len - 1;
loff_t isize; unsignedint flags; int error;
/* Fail if we don't recognize the flags. */ if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED) return -EOPNOTSUPP; /* * Don't allow writing zeroes if the device does not enable the * unmap write zeroes operation.
*/ if ((mode & FALLOC_FL_WRITE_ZEROES) &&
!bdev_write_zeroes_unmap_sectors(bdev)) return -EOPNOTSUPP;
/* Don't go off the end of the device. */
isize = bdev_nr_bytes(bdev); if (start >= isize) return -EINVAL; if (end >= isize) { if (mode & FALLOC_FL_KEEP_SIZE) {
len = isize - start;
end = start + len - 1;
} else return -EINVAL;
}
/* * Don't allow IO that isn't aligned to logical block size.
*/ if ((start | len) & (bdev_logical_block_size(bdev) - 1)) return -EINVAL;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.