/* Used to help calculate the FUSE connection's max_pages limit for a request's * size. Parts of the struct fuse_req are sliced into scattergather lists in * addition to the pages used, so this can help account for that overhead.
*/ #define FUSE_HEADER_OVERHEAD 4
/* List of virtio-fs device instances and a lock for the list. Also provides * mutual exclusion in device removal and mounting path
*/ static DEFINE_MUTEX(virtio_fs_mutex); static LIST_HEAD(virtio_fs_instances);
/* The /sys/fs/virtio_fs/ kset */ staticstruct kset *virtio_fs_kset;
enum {
VQ_HIPRIO,
VQ_REQUEST
};
#define VQ_NAME_LEN 24
/* Per-virtqueue state */ struct virtio_fs_vq {
spinlock_t lock; struct virtqueue *vq; /* protected by ->lock */ struct work_struct done_work; struct list_head queued_reqs; struct list_head end_reqs; /* End these requests */ struct work_struct dispatch_work; struct fuse_dev *fud; bool connected; long in_flight; struct completion in_flight_zero; /* No inflight requests */ struct kobject *kobj; char name[VQ_NAME_LEN];
} ____cacheline_aligned_in_smp;
/* A virtio-fs device instance */ struct virtio_fs { struct kobject kobj; struct kobject *mqs_kobj; struct list_head list; /* on virtio_fs_instances */ char *tag; struct virtio_fs_vq *vqs; unsignedint nvqs; /* number of virtqueues */ unsignedint num_request_queues; /* number of request queues */ struct dax_device *dax_dev;
unsignedint *mq_map; /* index = cpu id, value = request vq id */
/* DAX memory window where file contents are mapped */ void *window_kaddr;
phys_addr_t window_phys_addr;
size_t window_len;
};
/* Should be called with fsvq->lock held. */ staticinlinevoid inc_in_flight_req(struct virtio_fs_vq *fsvq)
{
fsvq->in_flight++;
}
/* Should be called with fsvq->lock held. */ staticinlinevoid dec_in_flight_req(struct virtio_fs_vq *fsvq)
{
WARN_ON(fsvq->in_flight <= 0);
fsvq->in_flight--; if (!fsvq->in_flight)
complete(&fsvq->in_flight_zero);
}
/* Wait for in flight requests to finish.*/
spin_lock(&fsvq->lock); if (fsvq->in_flight) { /* We are holding virtio_fs_mutex. There should not be any * waiters waiting for completion.
*/
reinit_completion(&fsvq->in_flight_zero);
spin_unlock(&fsvq->lock);
wait_for_completion(&fsvq->in_flight_zero);
} else {
spin_unlock(&fsvq->lock);
}
staticvoid virtio_fs_drain_all_queues_locked(struct virtio_fs *fs)
{ struct virtio_fs_vq *fsvq; int i;
for (i = 0; i < fs->nvqs; i++) {
fsvq = &fs->vqs[i];
virtio_fs_drain_queue(fsvq);
}
}
staticvoid virtio_fs_drain_all_queues(struct virtio_fs *fs)
{ /* Provides mutual exclusion between ->remove and ->kill_sb * paths. We don't want both of these draining queue at the * same time. Current completion logic reinits completion * and that means there should not be any other thread * doing reinit or waiting for completion already.
*/
mutex_lock(&virtio_fs_mutex);
virtio_fs_drain_all_queues_locked(fs);
mutex_unlock(&virtio_fs_mutex);
}
staticvoid virtio_fs_start_all_queues(struct virtio_fs *fs)
{ struct virtio_fs_vq *fsvq; int i;
for (i = 0; i < fs->nvqs; i++) {
fsvq = &fs->vqs[i];
spin_lock(&fsvq->lock);
fsvq->connected = true;
spin_unlock(&fsvq->lock);
}
}
staticvoid virtio_fs_delete_queues_sysfs(struct virtio_fs *fs)
{ struct virtio_fs_vq *fsvq; int i;
for (i = 0; i < fs->nvqs; i++) {
fsvq = &fs->vqs[i];
kobject_put(fsvq->kobj);
}
}
staticint virtio_fs_add_queues_sysfs(struct virtio_fs *fs)
{ struct virtio_fs_vq *fsvq; char buff[12]; int i, j, ret;
for (i = 0; i < fs->nvqs; i++) {
fsvq = &fs->vqs[i];
sprintf(buff, "%d", i);
fsvq->kobj = kobject_create_and_add(buff, fs->mqs_kobj); if (!fsvq->kobj) {
ret = -ENOMEM; goto out_del;
}
ret = sysfs_create_group(fsvq->kobj, &virtio_fs_vq_attr_group); if (ret) {
kobject_put(fsvq->kobj); goto out_del;
}
}
/* Add a new instance to the list or return -EEXIST if tag name exists*/ staticint virtio_fs_add_instance(struct virtio_device *vdev, struct virtio_fs *fs)
{ struct virtio_fs *fs2; int ret;
/* Use the virtio_device's index as a unique identifier, there is no * need to allocate our own identifiers because the virtio_fs instance * is only visible to userspace as long as the underlying virtio_device * exists.
*/
fs->kobj.kset = virtio_fs_kset;
ret = kobject_add(&fs->kobj, NULL, "%d", vdev->index); if (ret < 0) goto out_unlock;
fs->mqs_kobj = kobject_create_and_add("mqs", &fs->kobj); if (!fs->mqs_kobj) {
ret = -ENOMEM; goto out_del;
}
ret = sysfs_create_link(&fs->kobj, &vdev->dev.kobj, "device"); if (ret < 0) goto out_put;
ret = virtio_fs_add_queues_sysfs(fs); if (ret) goto out_remove;
for (i = 0; i < fs->nvqs; i++) { struct virtio_fs_vq *fsvq = &fs->vqs[i];
if (!fsvq->fud) continue;
fuse_dev_free(fsvq->fud);
fsvq->fud = NULL;
}
}
/* Read filesystem name from virtio config into fs->tag (must kfree()). */ staticint virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs)
{ char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; char *end;
size_t len;
virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag),
&tag_buf, sizeof(tag_buf));
end = memchr(tag_buf, '\0', sizeof(tag_buf)); if (end == tag_buf) return -EINVAL; /* empty tag */ if (!end)
end = &tag_buf[sizeof(tag_buf)];
len = end - tag_buf;
fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL); if (!fs->tag) return -ENOMEM;
memcpy(fs->tag, tag_buf, len);
fs->tag[len] = '\0';
/* While the VIRTIO specification allows any character, newlines are * awkward on mount(8) command-lines and cause problems in the sysfs * "tag" attr and uevent TAG= properties. Forbid them.
*/ if (strchr(fs->tag, '\n')) {
dev_dbg(&vdev->dev, "refusing virtiofs tag with newline character\n"); return -EINVAL;
}
dev_info(&vdev->dev, "discovered new tag: %s\n", fs->tag); return 0;
}
/* Work function for hiprio completion */ staticvoid virtio_fs_hiprio_done_work(struct work_struct *work)
{ struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
done_work); struct virtqueue *vq = fsvq->vq;
/* * Returns 1 if queue is full and sender should wait a bit before sending * next request, 0 otherwise.
*/ staticint send_forget_request(struct virtio_fs_vq *fsvq, struct virtio_fs_forget *forget, bool in_flight)
{ struct scatterlist sg; struct virtqueue *vq; int ret = 0; bool notify; struct virtio_fs_forget_req *req = &forget->req;
spin_lock(&fsvq->lock); if (!fsvq->connected) { if (in_flight)
dec_in_flight_req(fsvq);
kfree(forget); goto out;
}
ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC); if (ret < 0) { if (ret == -ENOSPC) {
pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
ret);
list_add_tail(&forget->list, &fsvq->queued_reqs); if (!in_flight)
inc_in_flight_req(fsvq); /* Queue is full */
ret = 1;
} else {
pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
ret);
kfree(forget); if (in_flight)
dec_in_flight_req(fsvq);
} goto out;
}
if (!in_flight)
inc_in_flight_req(fsvq);
notify = virtqueue_kick_prepare(vq);
spin_unlock(&fsvq->lock);
if (notify)
virtqueue_notify(vq); return ret;
out:
spin_unlock(&fsvq->lock); return ret;
}
/* Try to push previously queued requests, as the queue might no longer be full */
spin_lock(&fsvq->lock); if (!list_empty(&fsvq->queued_reqs))
schedule_work(&fsvq->dispatch_work);
spin_unlock(&fsvq->lock);
}
return;
fallback: /* Attempt to map evenly in groups over the CPUs */
masks = group_cpus_evenly(fs->num_request_queues, &nr_masks); /* If even this fails we default to all CPUs use first request queue */ if (!masks) {
for_each_possible_cpu(cpu)
fs->mq_map[cpu] = VQ_REQUEST; return;
}
/* Initialize virtqueues */ staticint virtio_fs_setup_vqs(struct virtio_device *vdev, struct virtio_fs *fs)
{ struct virtqueue_info *vqs_info; struct virtqueue **vqs; /* Specify pre_vectors to ensure that the queues before the * request queues (e.g. hiprio) don't claim any of the CPUs in * the multi-queue mapping and interrupt affinities
*/ struct irq_affinity desc = { .pre_vectors = VQ_REQUEST }; unsignedint i; int ret = 0;
/* Free virtqueues (device must already be reset) */ staticvoid virtio_fs_cleanup_vqs(struct virtio_device *vdev)
{
vdev->config->del_vqs(vdev);
}
/* Map a window offset to a page frame number. The window offset will have * been produced by .iomap_begin(), which maps a file offset to a window * offset.
*/ staticlong virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, enum dax_access_mode mode, void **kaddr, unsignedlong *pfn)
{ struct virtio_fs *fs = dax_get_private(dax_dev);
phys_addr_t offset = PFN_PHYS(pgoff);
size_t max_nr_pages = fs->window_len / PAGE_SIZE - pgoff;
pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL); if (!pgmap) return -ENOMEM;
pgmap->type = MEMORY_DEVICE_FS_DAX;
/* Ideally we would directly use the PCI BAR resource but * devm_memremap_pages() wants its own copy in pgmap. So * initialize a struct resource from scratch (only the start * and end fields will be used).
*/
pgmap->range = (struct range) {
.start = (phys_addr_t) cache_reg.addr,
.end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1,
};
pgmap->nr_range = 1;
fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap); if (IS_ERR(fs->window_kaddr)) return PTR_ERR(fs->window_kaddr);
mutex_lock(&virtio_fs_mutex); /* This device is going away. No one should get new reference */
list_del_init(&fs->list);
virtio_fs_delete_queues_sysfs(fs);
sysfs_remove_link(&fs->kobj, "device");
kobject_put(fs->mqs_kobj);
kobject_del(&fs->kobj);
virtio_fs_stop_all_queues(fs);
virtio_fs_drain_all_queues_locked(fs);
virtio_reset_device(vdev);
virtio_fs_cleanup_vqs(vdev);
vdev->priv = NULL; /* Put device reference on virtio_fs object */
virtio_fs_put_locked(fs);
mutex_unlock(&virtio_fs_mutex);
}
#ifdef CONFIG_PM_SLEEP staticint virtio_fs_freeze(struct virtio_device *vdev)
{ /* TODO need to save state here */
pr_warn("virtio-fs: suspend/resume not yet supported\n"); return -EOPNOTSUPP;
}
staticint virtio_fs_restore(struct virtio_device *vdev)
{ /* TODO need to restore state here */ return 0;
} #endif/* CONFIG_PM_SLEEP */
staticvoid virtio_fs_send_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
{ /* * TODO interrupts. * * Normal fs operations on a local filesystems aren't interruptible. * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) * with shared lock between host and guest.
*/
}
/* Count number of scatter-gather elements required */ staticunsignedint sg_count_fuse_folios(struct fuse_folio_desc *folio_descs, unsignedint num_folios, unsignedint total_len)
{ unsignedint i; unsignedint this_len;
for (i = 0; i < num_folios && total_len; i++) {
this_len = min(folio_descs[i].length, total_len);
total_len -= this_len;
}
return i;
}
/* Return the number of scatter-gather list elements required */ staticunsignedint sg_count_fuse_req(struct fuse_req *req)
{ struct fuse_args *args = req->args; struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); unsignedint size, total_sgs = 1 /* fuse_in_header */;
if (args->in_numargs - args->in_pages)
total_sgs += 1;
/* Add folios to scatter-gather list and return number of elements used */ staticunsignedint sg_init_fuse_folios(struct scatterlist *sg, struct folio **folios, struct fuse_folio_desc *folio_descs, unsignedint num_folios, unsignedint total_len)
{ unsignedint i; unsignedint this_len;
for (i = 0; i < num_folios && total_len; i++) {
sg_init_table(&sg[i], 1);
this_len = min(folio_descs[i].length, total_len);
sg_set_folio(&sg[i], folios[i], this_len, folio_descs[i].offset);
total_len -= this_len;
}
return i;
}
/* Add args to scatter-gather list and return number of elements used */ staticunsignedint sg_init_fuse_args(struct scatterlist *sg, struct fuse_req *req, struct fuse_arg *args, unsignedint numargs, bool argpages, void *argbuf, unsignedint *len_used)
{ struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); unsignedint total_sgs = 0; unsignedint len;
len = fuse_len_args(numargs - argpages, args); if (len)
sg_init_one(&sg[total_sgs++], argbuf, len);
/* Add a request to a virtqueue and kick the device */ staticint virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, struct fuse_req *req, bool in_flight,
gfp_t gfp)
{ /* requests need at least 4 elements */ struct scatterlist *stack_sgs[6]; struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; struct scatterlist **sgs = stack_sgs; struct scatterlist *sg = stack_sg; struct virtqueue *vq; struct fuse_args *args = req->args; unsignedint argbuf_used = 0; unsignedint out_sgs = 0; unsignedint in_sgs = 0; unsignedint total_sgs; unsignedint i, hash; int ret; bool notify; struct fuse_pqueue *fpq;
/* Does the sglist fit on the stack? */
total_sgs = sg_count_fuse_req(req); if (total_sgs > ARRAY_SIZE(stack_sgs)) {
sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), gfp);
sg = kmalloc_array(total_sgs, sizeof(sg[0]), gfp); if (!sgs || !sg) {
ret = -ENOMEM; goto out;
}
}
/* Use a bounce buffer since stack args cannot be mapped */
ret = copy_args_to_argbuf(req, gfp); if (ret < 0) goto out;
fsvq = &fs->vqs[queue_id];
ret = virtio_fs_enqueue_req(fsvq, req, false, GFP_ATOMIC); if (ret < 0) { if (ret == -ENOSPC) { /* * Virtqueue full. Retry submission from worker * context as we might be holding fc->bg_lock.
*/
spin_lock(&fsvq->lock);
list_add_tail(&req->list, &fsvq->queued_reqs);
inc_in_flight_req(fsvq);
spin_unlock(&fsvq->lock); return;
}
req->out.h.error = ret;
pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret);
/* Can't end request in submission context. Use a worker */
spin_lock(&fsvq->lock);
list_add_tail(&req->list, &fsvq->end_reqs);
schedule_work(&fsvq->dispatch_work);
spin_unlock(&fsvq->lock); return;
}
}
/* After holding mutex, make sure virtiofs device is still there. * Though we are holding a reference to it, drive ->remove might * still have cleaned up virtual queues. In that case bail out.
*/
err = -EINVAL; if (list_empty(&fs->list)) {
pr_info("virtio-fs: tag <%s> not found\n", fs->tag); goto err;
}
err = -ENOMEM; /* Allocate fuse_dev for hiprio and notification queues */ for (i = 0; i < fs->nvqs; i++) { struct virtio_fs_vq *fsvq = &fs->vqs[i];
fsvq->fud = fuse_dev_alloc(); if (!fsvq->fud) goto err_free_fuse_devs;
}
/* virtiofs allocates and installs its own fuse devices */
ctx->fudptr = NULL; if (ctx->dax_mode != FUSE_DAX_NEVER) { if (ctx->dax_mode == FUSE_DAX_ALWAYS && !fs->dax_dev) {
err = -EINVAL;
pr_err("virtio-fs: dax can't be enabled as filesystem" " device does not support it.\n"); goto err_free_fuse_devs;
}
ctx->dax_dev = fs->dax_dev;
}
err = fuse_fill_super_common(sb, ctx); if (err < 0) goto err_free_fuse_devs;
for (i = 0; i < fs->nvqs; i++) { struct virtio_fs_vq *fsvq = &fs->vqs[i];
fuse_dev_install(fsvq->fud, fc);
}
/* Previous unmount will stop all queues. Start these again */
virtio_fs_start_all_queues(fs);
fuse_send_init(fm);
mutex_unlock(&virtio_fs_mutex); return 0;
/* Stop dax worker. Soon evict_inodes() will be called which * will free all memory ranges belonging to all inodes.
*/ if (IS_ENABLED(CONFIG_FUSE_DAX))
fuse_dax_cancel_work(fc);
/* Stop forget queue. Soon destroy will be sent */
spin_lock(&fsvq->lock);
fsvq->connected = false;
spin_unlock(&fsvq->lock);
virtio_fs_drain_all_queues(vfs);
fuse_conn_destroy(fm);
/* fuse_conn_destroy() must have sent destroy. Stop all queues * and drain one more time and free fuse devices. Freeing fuse * devices will drop their reference on fuse_conn and that in * turn will drop its reference on virtio_fs object.
*/
virtio_fs_stop_all_queues(vfs);
virtio_fs_drain_all_queues(vfs);
virtio_fs_free_devs(vfs);
}
/* If mount failed, we can still be called without any fc */ if (sb->s_root) {
last = fuse_mount_remove(fm); if (last)
virtio_fs_conn_destroy(fm);
}
kill_anon_super(sb);
fuse_mount_destroy(fm);
}
if (!fsc->source) return invalf(fsc, "No source specified");
/* This gets a reference on virtio_fs object. This ptr gets installed * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() * to drop the reference to this object.
*/
fs = virtio_fs_find_instance(fsc->source); if (!fs) {
pr_info("virtio-fs: tag <%s> not found\n", fsc->source); return -EINVAL;
}
virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq); if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD)) goto out_err;
err = -ENOMEM;
fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); if (!fc) goto out_err;
fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); if (!fm) goto out_err;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.