// SPDX-License-Identifier: GPL-2.0-only /* * kvm eventfd support - use eventfd objects to signal various KVM events * * Copyright 2009 Novell. All Rights Reserved. * Copyright 2010 Red Hat, Inc. and/or its affiliates. * * Author: * Gregory Haskins <ghaskins@novell.com>
*/
/* * Since resampler irqfds share an IRQ source ID, we de-assert once * then notify all of the resampler irqfds using this GSI. We can't * do multiple de-asserts or we risk racing with incoming re-asserts.
*/ staticvoid
irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
{ struct kvm_kernel_irqfd_resampler *resampler; struct kvm *kvm; int idx;
/* Make sure irqfd has been initialized in assign path. */
synchronize_srcu_expedited(&kvm->irq_srcu);
/* * Synchronize with the wait-queue and unhook ourselves to prevent * further events.
*/
eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
/* * We know no new events will be scheduled at this point, so block * until all previously outstanding events have completed
*/
flush_work(&irqfd->inject);
if (irqfd->resampler) {
irqfd_resampler_shutdown(irqfd);
eventfd_ctx_put(irqfd->resamplefd);
}
/* * It is now safe to release the object's resources
*/ #if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
irq_bypass_unregister_consumer(&irqfd->consumer); #endif
eventfd_ctx_put(irqfd->eventfd);
kfree(irqfd);
}
/* assumes kvm->irqfds.lock is held */ staticbool
irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
{ return list_empty(&irqfd->list) ? false : true;
}
/* * Mark the irqfd as inactive and schedule it for removal * * assumes kvm->irqfds.lock is held
*/ staticvoid
irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
{
BUG_ON(!irqfd_is_active(irqfd));
list_del_init(&irqfd->list);
queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
}
int __attribute__((weak)) kvm_arch_set_irq_inatomic( struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, int irq_source_id, int level, bool line_status)
{ return -EWOULDBLOCK;
}
/* * Called with wqh->lock held and interrupts disabled
*/ staticint
irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{ struct kvm_kernel_irqfd *irqfd =
container_of(wait, struct kvm_kernel_irqfd, wait);
__poll_t flags = key_to_poll(key); struct kvm_kernel_irq_routing_entry irq; struct kvm *kvm = irqfd->kvm; unsigned seq; int idx; int ret = 0;
if (flags & EPOLLIN) { /* * WARNING: Do NOT take irqfds.lock in any path except EPOLLHUP, * as KVM holds irqfds.lock when registering the irqfd with the * eventfd.
*/
u64 cnt;
eventfd_ctx_do_read(irqfd->eventfd, &cnt);
idx = srcu_read_lock(&kvm->irq_srcu); do {
seq = read_seqcount_begin(&irqfd->irq_entry_sc);
irq = irqfd->irq_entry;
} while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); /* An event has been signaled, inject an interrupt */ if (kvm_arch_set_irq_inatomic(&irq, kvm,
KVM_USERSPACE_IRQ_SOURCE_ID, 1, false) == -EWOULDBLOCK)
schedule_work(&irqfd->inject);
srcu_read_unlock(&kvm->irq_srcu, idx);
ret = 1;
}
if (flags & EPOLLHUP) { /* The eventfd is closing, detach from KVM */ unsignedlong iflags;
/* * Taking irqfds.lock is safe here, as KVM holds a reference to * the eventfd when registering the irqfd, i.e. this path can't * be reached while kvm_irqfd_add() is running.
*/
spin_lock_irqsave(&kvm->irqfds.lock, iflags);
/* * We must check if someone deactivated the irqfd before * we could acquire the irqfds.lock since the item is * deactivated from the KVM side before it is unhooked from * the wait-queue. If it is already deactivated, we can * simply return knowing the other side will cleanup for us. * We cannot race against the irqfd going away since the * other side is required to acquire wqh->lock, which we hold
*/ if (irqfd_is_active(irqfd))
irqfd_deactivate(irqfd);
/* * Note, irqfds.lock protects the irqfd's irq_entry, i.e. its routing, * and irqfds.items. It does NOT protect registering with the eventfd.
*/
spin_lock_irq(&kvm->irqfds.lock);
/* * Initialize the routing information prior to adding the irqfd to the * eventfd's waitqueue, as irqfd_wakeup() can be invoked as soon as the * irqfd is registered.
*/
irqfd_update(kvm, irqfd);
/* * Add the irqfd as a priority waiter on the eventfd, with a custom * wake-up handler, so that KVM *and only KVM* is notified whenever the * underlying eventfd is signaled.
*/
init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
/* * Temporarily lie to lockdep about holding irqfds.lock to avoid a * false positive regarding potential deadlock with irqfd_wakeup() * (see irqfd_wakeup() for details). * * Adding to the wait queue will fail if there is already a priority * waiter, i.e. if the eventfd is associated with another irqfd (in any * VM). Note, kvm_irqfd_deassign() waits for all in-flight shutdown * jobs to complete, i.e. ensures the irqfd has been removed from the * eventfd's waitqueue before returning to userspace.
*/
spin_release(&kvm->irqfds.lock.dep_map, _RET_IP_);
p->ret = add_wait_queue_priority_exclusive(wqh, &irqfd->wait);
spin_acquire(&kvm->irqfds.lock.dep_map, 0, 0, _RET_IP_); if (p->ret) goto out;
/* * Set the irqfd routing and add it to KVM's list before registering * the irqfd with the eventfd, so that the routing information is valid * and stays valid, e.g. if there are GSI routing changes, prior to * making the irqfd visible, i.e. before it might be signaled. * * Note, holding SRCU ensures a stable read of routing information, and * also prevents irqfd_shutdown() from freeing the irqfd before it's * fully initialized.
*/
idx = srcu_read_lock(&kvm->irq_srcu);
/* * Register the irqfd with the eventfd by polling on the eventfd, and * simultaneously and the irqfd to KVM's list. If there was en event * pending on the eventfd prior to registering, manually trigger IRQ * injection.
*/
irqfd_pt.irqfd = irqfd;
irqfd_pt.kvm = kvm;
init_poll_funcptr(&irqfd_pt.pt, kvm_irqfd_register);
events = vfs_poll(fd_file(f), &irqfd_pt.pt);
ret = irqfd_pt.ret; if (ret) goto fail_poll;
if (events & EPOLLIN)
schedule_work(&irqfd->inject);
/* * shutdown any irqfd's that match fd+gsi
*/ staticint
kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
{ struct kvm_kernel_irqfd *irqfd, *tmp; struct eventfd_ctx *eventfd;
eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) return PTR_ERR(eventfd);
spin_lock_irq(&kvm->irqfds.lock);
list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) { /* * This clearing of irq_entry.type is needed for when * another thread calls kvm_irq_routing_update before * we flush workqueue below (we synchronize with * kvm_irq_routing_update using irqfds.lock).
*/
write_seqcount_begin(&irqfd->irq_entry_sc);
irqfd->irq_entry.type = 0;
write_seqcount_end(&irqfd->irq_entry_sc);
irqfd_deactivate(irqfd);
}
}
/* * Block until we know all outstanding shutdown jobs have completed * so that we guarantee there will not be any more interrupts on this * gsi once this deassign function returns.
*/
flush_workqueue(irqfd_cleanup_wq);
return 0;
}
int
kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
{ if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE)) return -EINVAL;
if (args->flags & KVM_IRQFD_FLAG_DEASSIGN) return kvm_irqfd_deassign(kvm, args);
return kvm_irqfd_assign(kvm, args);
}
/* * This function is called as the kvm VM fd is being released. Shutdown all * irqfds that still remain open
*/ void
kvm_irqfd_release(struct kvm *kvm)
{ struct kvm_kernel_irqfd *irqfd, *tmp;
/* * Block until we know all outstanding shutdown jobs have completed * since we do not take a kvm* reference.
*/
flush_workqueue(irqfd_cleanup_wq);
}
/* * Take note of a change in irq routing. * Caller must invoke synchronize_srcu_expedited(&kvm->irq_srcu) afterwards.
*/ void kvm_irq_routing_update(struct kvm *kvm)
{ struct kvm_kernel_irqfd *irqfd;
spin_lock_irq(&kvm->irqfds.lock);
list_for_each_entry(irqfd, &kvm->irqfds.items, list) { #if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) /* Under irqfds.lock, so can read irq_entry safely */ struct kvm_kernel_irq_routing_entry old = irqfd->irq_entry; #endif
irqfd_update(kvm, irqfd);
#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) if (irqfd->producer)
kvm_arch_update_irqfd_routing(irqfd, &old, &irqfd->irq_entry); #endif
}
/* * create a host-wide workqueue for issuing deferred shutdown requests * aggregated from all vm* instances. We need our own isolated * queue to ease flushing work items when a VM exits.
*/ int kvm_irqfd_init(void)
{
irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0); if (!irqfd_cleanup_wq) return -ENOMEM;
/* * -------------------------------------------------------------------- * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal. * * userspace can register a PIO/MMIO address with an eventfd for receiving * notification when the memory has been touched. * --------------------------------------------------------------------
*/
if (addr != p->addr) /* address must be precise for a hit */ returnfalse;
if (!p->length) /* length = 0 means only look at the address, so always a hit */ returntrue;
if (len != p->length) /* address-range must be precise for a hit */ returnfalse;
if (p->wildcard) /* all else equal, wildcard is always a hit */ returntrue;
/* otherwise, we have to actually compare the data */
BUG_ON(!IS_ALIGNED((unsignedlong)val, len));
switch (len) { case 1:
_val = *(u8 *)val; break; case 2:
_val = *(u16 *)val; break; case 4:
_val = *(u32 *)val; break; case 8:
_val = *(u64 *)val; break; default: returnfalse;
}
return _val == p->datamatch;
}
/* MMIO/PIO writes trigger an event if the addr/val match */ staticint
ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, int len, constvoid *val)
{ struct _ioeventfd *p = to_ioeventfd(this);
if (!ioeventfd_in_range(p, addr, len, val)) return -EOPNOTSUPP;
eventfd_signal(p->eventfd); return 0;
}
/* * This function is called as KVM is completely shutting down. We do not * need to worry about locking just nuke anything we have as quickly as possible
*/ staticvoid
ioeventfd_destructor(struct kvm_io_device *this)
{ struct _ioeventfd *p = to_ioeventfd(this);
/* The datamatch feature is optional, otherwise this is a wildcard */ if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
p->datamatch = args->datamatch; else
p->wildcard = true;
mutex_lock(&kvm->slots_lock);
/* Verify that there isn't a match already */ if (ioeventfd_check_collision(kvm, p)) {
ret = -EEXIST; goto unlock_fail;
}
kvm_iodevice_init(&p->dev, &ioeventfd_ops);
ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
&p->dev); if (ret < 0) goto unlock_fail;
bus_idx = ioeventfd_bus_from_flags(args->flags); /* must be natural-word sized, or 0 to ignore length */ switch (args->len) { case 0: case 1: case 2: case 4: case 8: break; default: return -EINVAL;
}
/* check for range overflow */ if (args->addr + args->len < args->addr) return -EINVAL;
/* check for extra flags that we don't understand */ if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) return -EINVAL;
/* ioeventfd with no length can't be combined with DATAMATCH */ if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)) return -EINVAL;
ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); if (ret) goto fail;
/* When length is ignored, MMIO is also put on a separate bus, for * faster lookups.
*/ if (!args->len && bus_idx == KVM_MMIO_BUS) {
ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); if (ret < 0) goto fast_fail;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.