// SPDX-License-Identifier: GPL-2.0-only /* * eventfd support for mshv * * Heavily inspired from KVM implementation of irqfd/ioeventfd. The basic * framework code is taken from the kvm implementation. * * All credits to kvm developers.
*/
hlist_for_each_entry_rcu(irqfd, &resampler->rsmplr_irqfd_list,
irqfd_resampler_hnode) { if (hv_should_clear_interrupt(irqfd->irqfd_lapic_irq.lapic_control.interrupt_type))
hv_call_clear_virtual_interrupt(partition->pt_id);
eventfd_signal(irqfd->irqfd_resamplefd);
}
srcu_read_unlock(&partition->pt_irq_srcu, idx);
}
#if IS_ENABLED(CONFIG_X86_64) staticbool
mshv_vp_irq_vector_injected(union hv_vp_register_page_interrupt_vectors iv,
u32 vector)
{ int i;
for (i = 0; i < iv.vector_count; i++) { if (iv.vector[i] == vector) returntrue;
}
returnfalse;
}
staticint mshv_vp_irq_try_set_vector(struct mshv_vp *vp, u32 vector)
{ union hv_vp_register_page_interrupt_vectors iv, new_iv;
iv = vp->vp_register_page->interrupt_vectors;
new_iv = iv;
if (mshv_vp_irq_vector_injected(iv, vector)) return 0;
if (iv.vector_count >= HV_VP_REGISTER_PAGE_MAX_VECTOR_COUNT) return -ENOSPC;
new_iv.vector[new_iv.vector_count++] = vector;
if (cmpxchg(&vp->vp_register_page->interrupt_vectors.as_uint64,
iv.as_uint64, new_iv.as_uint64) != iv.as_uint64) return -EAGAIN;
return 0;
}
staticint mshv_vp_irq_set_vector(struct mshv_vp *vp, u32 vector)
{ int ret;
do {
ret = mshv_vp_irq_try_set_vector(vp, vector);
} while (ret == -EAGAIN && !need_resched());
return ret;
}
/* * Try to raise irq for guest via shared vector array. hyp does the actual * inject of the interrupt.
*/ staticint mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd)
{ struct mshv_partition *partition = irqfd->irqfd_partn; struct mshv_lapic_irq *irq = &irqfd->irqfd_lapic_irq; struct mshv_vp *vp;
if (!(ms_hyperv.ext_features &
HV_VP_DISPATCH_INTERRUPT_INJECTION_AVAILABLE)) return -EOPNOTSUPP;
if (hv_scheduler_type != HV_SCHEDULER_TYPE_ROOT) return -EOPNOTSUPP;
if (irq->lapic_control.logical_dest_mode) return -EOPNOTSUPP;
vp = partition->pt_vp_array[irq->lapic_apic_id];
if (!vp->vp_register_page) return -EOPNOTSUPP;
if (mshv_vp_irq_set_vector(vp, irq->lapic_vector)) return -EINVAL;
if (vp->run.flags.root_sched_dispatched &&
vp->vp_register_page->interrupt_vectors.as_uint64) return -EBUSY;
/* * Synchronize with the wait-queue and unhook ourselves to prevent * further events.
*/
remove_wait_queue(irqfd->irqfd_wqh, &irqfd->irqfd_wait);
if (irqfd->irqfd_resampler) {
mshv_irqfd_resampler_shutdown(irqfd);
eventfd_ctx_put(irqfd->irqfd_resamplefd);
}
/* * It is now safe to release the object's resources
*/
eventfd_ctx_put(irqfd->irqfd_eventfd_ctx);
kfree(irqfd);
}
/* assumes partition->pt_irqfds_lock is held */ staticbool mshv_irqfd_is_active(struct mshv_irqfd *irqfd)
{ return !hlist_unhashed(&irqfd->irqfd_hnode);
}
/* * Mark the irqfd as inactive and schedule it for removal * * assumes partition->pt_irqfds_lock is held
*/ staticvoid mshv_irqfd_deactivate(struct mshv_irqfd *irqfd)
{ if (!mshv_irqfd_is_active(irqfd)) return;
/* * Called with wqh->lock held and interrupts disabled
*/ staticint mshv_irqfd_wakeup(wait_queue_entry_t *wait, unsignedint mode, int sync, void *key)
{ struct mshv_irqfd *irqfd = container_of(wait, struct mshv_irqfd,
irqfd_wait); unsignedlong flags = (unsignedlong)key; int idx; unsignedint seq; struct mshv_partition *pt = irqfd->irqfd_partn; int ret = 0;
if (flags & POLLIN) {
u64 cnt;
eventfd_ctx_do_read(irqfd->irqfd_eventfd_ctx, &cnt);
idx = srcu_read_lock(&pt->pt_irq_srcu); do {
seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
} while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
/* An event has been signaled, raise an interrupt */
ret = mshv_try_assert_irq_fast(irqfd); if (ret)
mshv_assert_irq_slow(irqfd);
srcu_read_unlock(&pt->pt_irq_srcu, idx);
ret = 1;
}
if (flags & POLLHUP) { /* The eventfd is closing, detach from the partition */ unsignedlong flags;
spin_lock_irqsave(&pt->pt_irqfds_lock, flags);
/* * We must check if someone deactivated the irqfd before * we could acquire the pt_irqfds_lock since the item is * deactivated from the mshv side before it is unhooked from * the wait-queue. If it is already deactivated, we can * simply return knowing the other side will cleanup for us. * We cannot race against the irqfd going away since the * other side is required to acquire wqh->lock, which we hold
*/ if (mshv_irqfd_is_active(irqfd))
mshv_irqfd_deactivate(irqfd);
/* * TODO: Ensure there isn't already an exclusive, priority waiter, e.g. * that the irqfd isn't already bound to another partition. Only the * first exclusive waiter encountered will be notified, and * add_wait_queue_priority() doesn't enforce exclusivity.
*/
irqfd->irqfd_wait.flags |= WQ_FLAG_EXCLUSIVE;
add_wait_queue_priority(wqh, &irqfd->irqfd_wait);
}
if (!irqfd->irqfd_resampler) {
rp = kzalloc(sizeof(*rp), GFP_KERNEL_ACCOUNT); if (!rp) {
ret = -ENOMEM;
mutex_unlock(&pt->irqfds_resampler_lock); goto fail;
}
/* * Install our own custom wake-up handling so we are notified via * a callback whenever someone signals the underlying eventfd
*/
init_waitqueue_func_entry(&irqfd->irqfd_wait, mshv_irqfd_wakeup);
init_poll_funcptr(&irqfd->irqfd_polltbl, mshv_irqfd_queue_proc);
spin_lock_irq(&pt->pt_irqfds_lock); if (args->flags & BIT(MSHV_IRQFD_BIT_RESAMPLE) &&
!irqfd->irqfd_lapic_irq.lapic_control.level_triggered) { /* * Resample Fd must be for level triggered interrupt * Otherwise return with failure
*/
spin_unlock_irq(&pt->pt_irqfds_lock);
ret = -EINVAL; goto fail;
}
ret = 0;
hlist_for_each_entry(tmp, &pt->pt_irqfds_list, irqfd_hnode) { if (irqfd->irqfd_eventfd_ctx != tmp->irqfd_eventfd_ctx) continue; /* This fd is used for another irq already. */
ret = -EBUSY;
spin_unlock_irq(&pt->pt_irqfds_lock); goto fail;
}
/* * Check if there was an event already pending on the eventfd * before we registered, and trigger it as if we didn't miss it.
*/
events = vfs_poll(fd_file(f), &irqfd->irqfd_polltbl);
fail: if (irqfd->irqfd_resampler)
mshv_irqfd_resampler_shutdown(irqfd);
if (resamplefd && !IS_ERR(resamplefd))
eventfd_ctx_put(resamplefd);
if (eventfd && !IS_ERR(eventfd))
eventfd_ctx_put(eventfd);
out:
kfree(irqfd); return ret;
}
/* * shutdown any irqfd's that match fd+gsi
*/ staticint mshv_irqfd_deassign(struct mshv_partition *pt, struct mshv_user_irqfd *args)
{ struct mshv_irqfd *irqfd; struct hlist_node *n; struct eventfd_ctx *eventfd;
eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) return PTR_ERR(eventfd);
hlist_for_each_entry_safe(irqfd, n, &pt->pt_irqfds_list,
irqfd_hnode) { if (irqfd->irqfd_eventfd_ctx == eventfd &&
irqfd->irqfd_irqnum == args->gsi)
mshv_irqfd_deactivate(irqfd);
}
eventfd_ctx_put(eventfd);
/* * Block until we know all outstanding shutdown jobs have completed * so that we guarantee there will not be any more interrupts on this * gsi once this deassign function returns.
*/
flush_workqueue(irqfd_cleanup_wq);
return 0;
}
int mshv_set_unset_irqfd(struct mshv_partition *pt, struct mshv_user_irqfd *args)
{ if (args->flags & ~MSHV_IRQFD_FLAGS_MASK) return -EINVAL;
if (args->flags & BIT(MSHV_IRQFD_BIT_DEASSIGN)) return mshv_irqfd_deassign(pt, args);
return mshv_irqfd_assign(pt, args);
}
/* * This function is called as the mshv VM fd is being released. * Shutdown all irqfds that still remain open
*/ staticvoid mshv_irqfd_release(struct mshv_partition *pt)
{ struct mshv_irqfd *irqfd; struct hlist_node *n;
spin_lock_irq(&pt->pt_irqfds_lock);
hlist_for_each_entry_safe(irqfd, n, &pt->pt_irqfds_list, irqfd_hnode)
mshv_irqfd_deactivate(irqfd);
spin_unlock_irq(&pt->pt_irqfds_lock);
/* * Block until we know all outstanding shutdown jobs have completed * since we do not take a mshv_partition* reference.
*/
flush_workqueue(irqfd_cleanup_wq);
}
int mshv_irqfd_wq_init(void)
{
irqfd_cleanup_wq = alloc_workqueue("mshv-irqfd-cleanup", 0, 0); if (!irqfd_cleanup_wq) return -ENOMEM;
/* * -------------------------------------------------------------------- * ioeventfd: translate a MMIO memory write to an eventfd signal. * * userspace can register a MMIO address with an eventfd for receiving * notification when the memory has been touched. * --------------------------------------------------------------------
*/
/* The datamatch feature is optional, otherwise this is a wildcard */ if (args->flags & BIT(MSHV_IOEVENTFD_BIT_DATAMATCH)) {
p->iovntfd_datamatch = args->datamatch;
} else {
p->iovntfd_wildcard = true;
doorbell_flags |= HV_DOORBELL_FLAG_TRIGGER_ANY_VALUE;
}
if (ioeventfd_check_collision(pt, p)) {
ret = -EEXIST; goto unlock_fail;
}
ret = mshv_register_doorbell(pt->pt_id, ioeventfd_mmio_write,
(void *)pt, p->iovntfd_addr,
p->iovntfd_datamatch, doorbell_flags); if (ret < 0) goto unlock_fail;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.