staticbool fuse_fpq_processing_expired(struct fuse_conn *fc, struct list_head *processing)
{ int i;
for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) if (fuse_request_expired(fc, &processing[i])) returntrue;
returnfalse;
}
/* * Check if any requests aren't being completed by the time the request timeout * elapses. To do so, we: * - check the fiq pending list * - check the bg queue * - check the fpq io and processing lists * * To make this fast, we only check against the head request on each list since * these are generally queued in order of creation time (eg newer requests get * queued to the tail). We might miss a few edge cases (eg requests transitioning * between lists, re-sent requests at the head of the pending list having a * later creation time than other requests on that list, etc.) but that is fine * since if the request never gets fulfilled, it will eventually be caught.
*/ void fuse_check_timeout(struct work_struct *work)
{ struct delayed_work *dwork = to_delayed_work(work); struct fuse_conn *fc = container_of(dwork, struct fuse_conn,
timeout.work); struct fuse_iqueue *fiq = &fc->iq; struct fuse_dev *fud; struct fuse_pqueue *fpq; bool expired = false;
if (!atomic_read(&fc->num_waiting)) goto out;
spin_lock(&fiq->lock);
expired = fuse_request_expired(fc, &fiq->pending);
spin_unlock(&fiq->lock); if (expired) goto abort_conn;
spin_lock(&fc->bg_lock);
expired = fuse_request_expired(fc, &fc->bg_queue);
spin_unlock(&fc->bg_lock); if (expired) goto abort_conn;
staticvoid fuse_drop_waiting(struct fuse_conn *fc)
{ /* * lockess check of fc->connected is okay, because atomic_dec_and_test() * provides a memory barrier matched with the one in fuse_wait_aborted() * to ensure no wake-up is missed.
*/ if (atomic_dec_and_test(&fc->num_waiting) &&
!READ_ONCE(fc->connected)) { /* wake up aborters */
wake_up_all(&fc->blocked_waitq);
}
}
__set_bit(FR_WAITING, &req->flags); if (for_background)
__set_bit(FR_BACKGROUND, &req->flags);
/* * Keep the old behavior when idmappings support was not * declared by a FUSE server. * * For those FUSE servers who support idmapped mounts, * we send UID/GID only along with "inode creation" * fuse requests, otherwise idmap == &invalid_mnt_idmap and * req->in.h.{u,g}id will be equal to FUSE_INVALID_UIDGID.
*/
fsuid = no_idmap ? current_fsuid() : mapped_fsuid(idmap, fc->user_ns);
fsgid = no_idmap ? current_fsgid() : mapped_fsgid(idmap, fc->user_ns);
req->in.h.uid = from_kuid(fc->user_ns, fsuid);
req->in.h.gid = from_kgid(fc->user_ns, fsgid);
if (refcount_dec_and_test(&req->count)) { if (test_bit(FR_BACKGROUND, &req->flags)) { /* * We get here in the unlikely case that a background * request was allocated but not sent
*/
spin_lock(&fc->bg_lock); if (!fc->blocked)
wake_up(&fc->blocked_waitq);
spin_unlock(&fc->bg_lock);
}
if (test_bit(FR_WAITING, &req->flags)) {
__clear_bit(FR_WAITING, &req->flags);
fuse_drop_waiting(fc);
}
/* * This function is called when a request is finished. Either a reply * has arrived or it was aborted (and not yet sent) or some error * occurred during communication with userspace, or the device file * was closed. The requester thread is woken up (if still waiting), * the 'end' callback is called if given, else the reference to the * request is released
*/ void fuse_request_end(struct fuse_req *req)
{ struct fuse_mount *fm = req->fm; struct fuse_conn *fc = fm->fc; struct fuse_iqueue *fiq = &fc->iq;
if (test_and_set_bit(FR_FINISHED, &req->flags)) goto put_request;
trace_fuse_request_end(req); /* * test_and_set_bit() implies smp_mb() between bit * changing and below FR_INTERRUPTED check. Pairs with * smp_mb() from queue_interrupt().
*/ if (test_bit(FR_INTERRUPTED, &req->flags)) {
spin_lock(&fiq->lock);
list_del_init(&req->intr_entry);
spin_unlock(&fiq->lock);
}
WARN_ON(test_bit(FR_PENDING, &req->flags));
WARN_ON(test_bit(FR_SENT, &req->flags)); if (test_bit(FR_BACKGROUND, &req->flags)) {
spin_lock(&fc->bg_lock);
clear_bit(FR_BACKGROUND, &req->flags); if (fc->num_background == fc->max_background) {
fc->blocked = 0;
wake_up(&fc->blocked_waitq);
} elseif (!fc->blocked) { /* * Wake up next waiter, if any. It's okay to use * waitqueue_active(), as we've already synced up * fc->blocked with waiters with the wake_up() call * above.
*/ if (waitqueue_active(&fc->blocked_waitq))
wake_up(&fc->blocked_waitq);
}
fc->num_background--;
fc->active_background--;
flush_bg_queue(fc);
spin_unlock(&fc->bg_lock);
} else { /* Wake up waiter sleeping in request_wait_answer() */
wake_up(&req->waitq);
}
if (test_bit(FR_ASYNC, &req->flags))
req->args->end(fm, req->args, req->out.h.error);
put_request:
fuse_put_request(req);
}
EXPORT_SYMBOL_GPL(fuse_request_end);
/* Check for we've sent request to interrupt this req */ if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) return -EINVAL;
fiq->ops->send_interrupt(fiq, req);
return 0;
}
bool fuse_remove_pending_req(struct fuse_req *req, spinlock_t *lock)
{
spin_lock(lock); if (test_bit(FR_PENDING, &req->flags)) { /* * FR_PENDING does not get cleared as the request will end * up in destruction anyway.
*/
list_del(&req->list);
spin_unlock(lock);
__fuse_put_request(req);
req->out.h.error = -EINTR; returntrue;
}
spin_unlock(lock); returnfalse;
}
if (!fc->no_interrupt) { /* Any signal may interrupt this */
err = wait_event_interruptible(req->waitq,
test_bit(FR_FINISHED, &req->flags)); if (!err) return;
set_bit(FR_INTERRUPTED, &req->flags); /* matches barrier in fuse_dev_do_read() */
smp_mb__after_atomic(); if (test_bit(FR_SENT, &req->flags))
queue_interrupt(req);
}
if (!test_bit(FR_FORCE, &req->flags)) { bool removed;
/* Only fatal signals may interrupt this */
err = wait_event_killable(req->waitq,
test_bit(FR_FINISHED, &req->flags)); if (!err) return;
if (test_bit(FR_URING, &req->flags))
removed = fuse_uring_remove_pending_req(req); else
removed = fuse_remove_pending_req(req, &fiq->lock); if (removed) return;
}
/* * Either request is already in userspace, or it was forced. * Wait it out.
*/
wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
}
if (fc->minor < 9) { switch (args->opcode) { case FUSE_LOOKUP: case FUSE_CREATE: case FUSE_MKNOD: case FUSE_MKDIR: case FUSE_SYMLINK: case FUSE_LINK:
args->out_args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; break; case FUSE_GETATTR: case FUSE_SETATTR:
args->out_args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; break;
}
} if (fc->minor < 12) { switch (args->opcode) { case FUSE_CREATE:
args->in_args[0].size = sizeof(struct fuse_open_in); break; case FUSE_MKNOD:
args->in_args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE; break;
}
}
}
/* * Lock the request. Up to the next unlock_request() there mustn't be * anything that could cause a page-fault. If the request was already * aborted bail out.
*/ staticint lock_request(struct fuse_req *req)
{ int err = 0; if (req) {
spin_lock(&req->waitq.lock); if (test_bit(FR_ABORTED, &req->flags))
err = -ENOENT; else
set_bit(FR_LOCKED, &req->flags);
spin_unlock(&req->waitq.lock);
} return err;
}
/* * Unlock request. If it was aborted while locked, caller is responsible * for unlocking and ending the request.
*/ staticint unlock_request(struct fuse_req *req)
{ int err = 0; if (req) {
spin_lock(&req->waitq.lock); if (test_bit(FR_ABORTED, &req->flags))
err = -ENOENT; else
clear_bit(FR_LOCKED, &req->flags);
spin_unlock(&req->waitq.lock);
} return err;
}
/* * Get another pagefull of userspace buffer, and map it to kernel * address space, and lock request
*/ staticint fuse_copy_fill(struct fuse_copy_state *cs)
{ struct page *page; int err;
err = unlock_request(cs->req); if (err) return err;
fuse_copy_finish(cs); if (cs->pipebufs) { struct pipe_buffer *buf = cs->pipebufs;
if (!cs->write) {
err = pipe_buf_confirm(cs->pipe, buf); if (err) return err;
/* * Attempt to steal a page from the splice() pipe and move it into the * pagecache. If successful, the pointer in @pagep will be updated. The * folio that was originally in @pagep will lose a reference and the new * folio returned in @pagep will carry a reference.
*/ staticint fuse_try_move_folio(struct fuse_copy_state *cs, struct folio **foliop)
{ int err; struct folio *oldfolio = *foliop; struct folio *newfolio; struct pipe_buffer *buf = cs->pipebufs;
folio_get(oldfolio);
err = unlock_request(cs->req); if (err) goto out_put_old;
fuse_copy_finish(cs);
err = pipe_buf_confirm(cs->pipe, buf); if (err) goto out_put_old;
if (fuse_check_folio(newfolio) != 0) goto out_fallback_unlock;
/* * This is a new and locked page, it shouldn't be mapped or * have any special flags on it
*/ if (WARN_ON(folio_mapped(oldfolio))) goto out_fallback_unlock; if (WARN_ON(folio_has_private(oldfolio))) goto out_fallback_unlock; if (WARN_ON(folio_test_dirty(oldfolio) ||
folio_test_writeback(oldfolio))) goto out_fallback_unlock; if (WARN_ON(folio_test_mlocked(oldfolio))) goto out_fallback_unlock;
replace_page_cache_folio(oldfolio, newfolio);
folio_get(newfolio);
if (!(buf->flags & PIPE_BUF_FLAG_LRU))
folio_add_lru(newfolio);
/* * Release while we have extra ref on stolen page. Otherwise * anon_pipe_buf_release() might think the page can be reused.
*/
pipe_buf_release(cs->pipe, buf);
/* * Transfer an interrupt request to userspace * * Unlike other requests this is assembled on demand, without a need * to allocate a separate fuse_req structure. * * Called with fiq->lock held, releases it
*/ staticint fuse_read_interrupt(struct fuse_iqueue *fiq, struct fuse_copy_state *cs,
size_t nbytes, struct fuse_req *req)
__releases(fiq->lock)
{ struct fuse_in_header ih; struct fuse_interrupt_in arg; unsigned reqsize = sizeof(ih) + sizeof(arg); int err;
/* * Read a single request into the userspace filesystem's buffer. This * function waits until a request is available, then removes it from * the pending list and copies request data to userspace buffer. If * no reply is needed (FORGET) or request has been aborted or there * was an error during the copying then it's finished by calling * fuse_request_end(). Otherwise add it to the processing list, and set * the 'sent' flag.
*/ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, struct fuse_copy_state *cs, size_t nbytes)
{
ssize_t err; struct fuse_conn *fc = fud->fc; struct fuse_iqueue *fiq = &fc->iq; struct fuse_pqueue *fpq = &fud->pq; struct fuse_req *req; struct fuse_args *args; unsigned reqsize; unsignedint hash;
/* * Require sane minimum read buffer - that has capacity for fixed part * of any request header + negotiated max_write room for data. * * Historically libfuse reserves 4K for fixed header room, but e.g. * GlusterFS reserves only 80 bytes * * = `sizeof(fuse_in_header) + sizeof(fuse_write_in)` * * which is the absolute minimum any sane filesystem should be using * for header room.
*/ if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER, sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in) +
fc->max_write)) return -EINVAL;
restart: for (;;) {
spin_lock(&fiq->lock); if (!fiq->connected || request_pending(fiq)) break;
spin_unlock(&fiq->lock);
if (file->f_flags & O_NONBLOCK) return -EAGAIN;
err = wait_event_interruptible_exclusive(fiq->waitq,
!fiq->connected || request_pending(fiq)); if (err) return err;
}
/* If request is too large, reply with an error and restart the read */ if (nbytes < reqsize) {
req->out.h.error = -EIO; /* SETXATTR is special, since it may contain too large data */ if (args->opcode == FUSE_SETXATTR)
req->out.h.error = -E2BIG;
fuse_request_end(req); goto restart;
}
spin_lock(&fpq->lock); /* * Must not put request on fpq->io queue after having been shut down by * fuse_abort_conn()
*/ if (!fpq->connected) {
req->out.h.error = err = -ECONNABORTED; goto out_end;
staticint fuse_dev_open(struct inode *inode, struct file *file)
{ /* * The fuse device's file's private_data is used to hold * the fuse_conn(ection) when it is mounted, and is used to * keep track of whether the file has been mounted already.
*/
file->private_data = NULL; return 0;
}
bufs = kvmalloc_array(pipe->max_usage, sizeof(struct pipe_buffer),
GFP_KERNEL); if (!bufs) return -ENOMEM;
fuse_copy_init(&cs, true, NULL);
cs.pipebufs = bufs;
cs.pipe = pipe;
ret = fuse_dev_do_read(fud, in, &cs, len); if (ret < 0) goto out;
if (pipe_buf_usage(pipe) + cs.nr_segs > pipe->max_usage) {
ret = -EIO; goto out;
}
for (ret = total = 0; page_nr < cs.nr_segs; total += ret) { /* * Need to be careful about this. Having buf->ops in module * code can Oops if the buffer persists after module unload.
*/
bufs[page_nr].ops = &nosteal_pipe_buf_ops;
bufs[page_nr].flags = 0;
ret = add_to_pipe(pipe, &bufs[page_nr++]); if (unlikely(ret < 0)) break;
} if (total)
ret = total;
out: for (; page_nr < cs.nr_segs; page_nr++)
put_page(bufs[page_nr].page);
num = min(outarg->size, fc->max_write); if (outarg->offset > file_size)
num = 0; elseif (outarg->offset + num > file_size)
num = file_size - outarg->offset;
/* * Resending all processing queue requests. * * During a FUSE daemon panics and failover, it is possible for some inflight * requests to be lost and never returned. As a result, applications awaiting * replies would become stuck forever. To address this, we can use notification * to trigger resending of these pending requests to the FUSE daemon, ensuring * they are properly processed again. * * Please note that this strategy is applicable only to idempotent requests or * if the FUSE daemon takes careful measures to avoid processing duplicated * non-idempotent requests.
*/ staticvoid fuse_resend(struct fuse_conn *fc)
{ struct fuse_dev *fud; struct fuse_req *req, *next; struct fuse_iqueue *fiq = &fc->iq;
LIST_HEAD(to_queue); unsignedint i;
spin_lock(&fc->lock); if (!fc->connected) {
spin_unlock(&fc->lock); return;
}
spin_lock(&fpq->lock); for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
list_splice_tail_init(&fpq->processing[i], &to_queue);
spin_unlock(&fpq->lock);
}
spin_unlock(&fc->lock);
list_for_each_entry_safe(req, next, &to_queue, list) {
set_bit(FR_PENDING, &req->flags);
clear_bit(FR_SENT, &req->flags); /* mark the request as resend request */
req->in.h.unique |= FUSE_UNIQUE_RESEND;
}
spin_lock(&fiq->lock); if (!fiq->connected) {
spin_unlock(&fiq->lock);
list_for_each_entry(req, &to_queue, list)
clear_bit(FR_PENDING, &req->flags);
fuse_dev_end_requests(&to_queue); return;
} /* iq and pq requests are both oldest to newest */
list_splice(&to_queue, &fiq->pending);
fuse_dev_wake_and_unlock(fiq);
}
/* * Increments the fuse connection epoch. This will result of dentries from * previous epochs to be invalidated. * * XXX optimization: add call to shrink_dcache_sb()?
*/ staticint fuse_notify_inc_epoch(struct fuse_conn *fc)
{
atomic_inc(&fc->epoch);
/* * Write a single reply to a request. First the header is copied from * the write buffer. The request is then searched on the processing * list by the unique ID found in the header. If found, then remove * it from the list and copy the rest of the buffer to the request. * The request is finished by calling fuse_request_end().
*/ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, struct fuse_copy_state *cs, size_t nbytes)
{ int err; struct fuse_conn *fc = fud->fc; struct fuse_pqueue *fpq = &fud->pq; struct fuse_req *req; struct fuse_out_header oh;
err = -EINVAL; if (nbytes < sizeof(struct fuse_out_header)) goto out;
err = fuse_copy_one(cs, &oh, sizeof(oh)); if (err) goto copy_finish;
err = -EINVAL; if (oh.len != nbytes) goto copy_finish;
/* * Zero oh.unique indicates unsolicited notification message * and error contains notification code.
*/ if (!oh.unique) {
err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs); goto copy_finish;
}
/* * Abort all requests. * * Emergency exit in case of a malicious or accidental deadlock, or just a hung * filesystem. * * The same effect is usually achievable through killing the filesystem daemon * and all users of the filesystem. The exception is the combination of an * asynchronous request and the tricky deadlock (see * Documentation/filesystems/fuse.rst). * * Aborting requests under I/O goes as follows: 1: Separate out unlocked * requests, they should be finished off immediately. Locked requests will be * finished after unlock; see unlock_request(). 2: Finish off the unlocked * requests. It is possible that some request will finish before we can. This * is OK, the request will in that case be removed from the list before we touch * it.
*/ void fuse_abort_conn(struct fuse_conn *fc)
{ struct fuse_iqueue *fiq = &fc->iq;
/* * fc->lock must not be taken to avoid conflicts with io-uring * locks
*/
fuse_uring_abort(fc);
} else {
spin_unlock(&fc->lock);
}
}
EXPORT_SYMBOL_GPL(fuse_abort_conn);
spin_lock(&fpq->lock);
WARN_ON(!list_empty(&fpq->io)); for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
list_splice_init(&fpq->processing[i], &to_end);
spin_unlock(&fpq->lock);
fuse_dev_end_requests(&to_end);
/* Are we the last open device? */ if (atomic_dec_and_test(&fc->dev_count)) {
WARN_ON(fc->iq.fasync != NULL);
fuse_abort_conn(fc);
}
fuse_dev_free(fud);
} return 0;
}
EXPORT_SYMBOL_GPL(fuse_dev_release);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.