/* * The bit 16 ~ bit 31 of kvm_userspace_memory_region::flags are internally * used in kvm, other bits are visible for userspace which are defined in * include/linux/kvm_h.
*/ #define KVM_MEMSLOT_INVALID (1UL << 16)
/* * Bit 63 of the memslot generation number is an "update in-progress flag", * e.g. is temporarily set for the duration of kvm_swap_active_memslots(). * This flag effectively creates a unique generation number that is used to * mark cached memslot data, e.g. MMIO accesses, as potentially being stale, * i.e. may (or may not) have come from the previous memslots generation. * * This is necessary because the actual memslots update is not atomic with * respect to the generation number update. Updating the generation number * first would allow a vCPU to cache a spte from the old memslots using the * new generation number, and updating the generation number after switching * to the new memslots would allow cache hits using the old generation number * to reference the defunct memslots. * * This mechanism is used to prevent getting hits in KVM's caches while a * memslot update is in-progress, and to prevent cache hits *after* updating * the actual generation number against accesses that were inserted into the * cache *before* the memslots were updated.
*/ #define KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS BIT_ULL(63)
/* Two fragments for cross MMIO pages. */ #define KVM_MAX_MMIO_FRAGMENTS 2
/* * For the normal pfn, the highest 12 bits should be zero, * so we can mask bit 62 ~ bit 52 to indicate the error pfn, * mask bit 63 to indicate the noslot pfn.
*/ #define KVM_PFN_ERR_MASK (0x7ffULL << 52) #define KVM_PFN_ERR_NOSLOT_MASK (0xfffULL << 52) #define KVM_PFN_NOSLOT (0x1ULL << 63)
/* * error pfns indicate that the gfn is in slot but faild to * translate it to pfn on host.
*/ staticinlinebool is_error_pfn(kvm_pfn_t pfn)
{ return !!(pfn & KVM_PFN_ERR_MASK);
}
/* * KVM_PFN_ERR_SIGPENDING indicates that fetching the PFN was interrupted * by a pending signal. Note, the signal may or may not be fatal.
*/ staticinlinebool is_sigpending_pfn(kvm_pfn_t pfn)
{ return pfn == KVM_PFN_ERR_SIGPENDING;
}
/* * error_noslot pfns indicate that the gfn can not be * translated to pfn - it is not in slot or failed to * translate it to pfn.
*/ staticinlinebool is_error_noslot_pfn(kvm_pfn_t pfn)
{ return !!(pfn & KVM_PFN_ERR_NOSLOT_MASK);
}
/* noslot pfn indicates that the gfn is not in slot. */ staticinlinebool is_noslot_pfn(kvm_pfn_t pfn)
{ return pfn == KVM_PFN_NOSLOT;
}
/* * architectures with KVM_HVA_ERR_BAD other than PAGE_OFFSET (e.g. s390) * provide own defines and kvm_is_error_hva
*/ #ifndef KVM_HVA_ERR_BAD
/* * KVM_REQ_OUTSIDE_GUEST_MODE exists is purely as way to force the vCPU to * OUTSIDE_GUEST_MODE. KVM_REQ_OUTSIDE_GUEST_MODE differs from a vCPU "kick" * in that it ensures the vCPU has reached OUTSIDE_GUEST_MODE before continuing * on. A kick only guarantees that the vCPU is on its way out, e.g. a previous * kick may have set vcpu->mode to EXITING_GUEST_MODE, and so there's no * guarantee the vCPU received an IPI and has actually exited guest mode.
*/ #define KVM_REQ_OUTSIDE_GUEST_MODE (KVM_REQUEST_NO_ACTION | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
struct kvm_host_map { /* * Only valid if the 'pfn' is managed by the host kernel (i.e. There is * a 'struct page' for it. When using mem= kernel parameter some memory * can be used as guest memory but they are not managed by host * kernel).
*/ struct page *pinned_page; struct page *page; void *hva;
kvm_pfn_t pfn;
kvm_pfn_t gfn; bool writable;
};
/* * Used to check if the mapping is valid or not. Never use 'kvm_host_map' * directly to check for that.
*/ staticinlinebool kvm_vcpu_mapped(struct kvm_host_map *map)
{ return !!map->hva;
}
/* * Sometimes a large or cross-page mmio needs to be broken up into separate * exits for userspace servicing.
*/ struct kvm_mmio_fragment {
gpa_t gpa; void *data; unsigned len;
};
struct kvm_vcpu { struct kvm *kvm; #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier preempt_notifier; #endif int cpu; int vcpu_id; /* id given by userspace at creation */ int vcpu_idx; /* index into kvm->vcpu_array */ int ____srcu_idx; /* Don't use this directly. You've been warned. */ #ifdef CONFIG_PROVE_RCU int srcu_depth; #endif int mode;
u64 requests; unsignedlong guest_debug;
#ifdef CONFIG_HAS_IOMEM int mmio_needed; int mmio_read_completed; int mmio_is_write; int mmio_cur_fragment; int mmio_nr_fragments; struct kvm_mmio_fragment mmio_fragments[KVM_MAX_MMIO_FRAGMENTS]; #endif
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT /* * Cpu relax intercept or pause loop exit optimization * in_spin_loop: set when a vcpu does a pause loop exit * or cpu relax intercepted. * dy_eligible: indicates whether vcpu is eligible for directed yield.
*/ struct { bool in_spin_loop; bool dy_eligible;
} spin_loop; #endif bool wants_to_run; bool preempted; bool ready; bool scheduled_out; struct kvm_vcpu_arch arch; struct kvm_vcpu_stat stat; char stats_id[KVM_STATS_NAME_SIZE]; struct kvm_dirty_ring dirty_ring;
/* * The most recently used memslot by this vCPU and the slots generation * for which it is valid. * No wraparound protection is needed since generations won't overflow in * thousands of years, even assuming 1M memslot operations per second.
*/ struct kvm_memory_slot *last_used_slot;
u64 last_used_slot_gen;
};
/* * Start accounting time towards a guest. * Must be called before entering guest context.
*/ static __always_inline void guest_timing_enter_irqoff(void)
{ /* * This is running in ioctl context so its safe to assume that it's the * stime pending cputime to flush.
*/
instrumentation_begin();
vtime_account_guest_enter();
instrumentation_end();
}
/* * Enter guest context and enter an RCU extended quiescent state. * * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is * unsafe to use any code which may directly or indirectly use RCU, tracing * (including IRQ flag tracing), or lockdep. All code in this period must be * non-instrumentable.
*/ static __always_inline void guest_context_enter_irqoff(void)
{ /* * KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode * is very similar to exiting to userspace from rcu point of view. In * addition CPU may stay in a guest mode for quite a long time (up to * one time slice). Lets treat guest mode as quiescent state, just like * we do with user-mode execution.
*/ if (!context_tracking_guest_enter()) {
instrumentation_begin();
rcu_virt_note_context_switch();
instrumentation_end();
}
}
/* * Deprecated. Architectures should move to guest_timing_enter_irqoff() and * guest_state_enter_irqoff().
*/ static __always_inline void guest_enter_irqoff(void)
{
guest_timing_enter_irqoff();
guest_context_enter_irqoff();
}
/** * guest_state_enter_irqoff - Fixup state when entering a guest * * Entry to a guest will enable interrupts, but the kernel state is interrupts * disabled when this is invoked. Also tell RCU about it. * * 1) Trace interrupts on state * 2) Invoke context tracking if enabled to adjust RCU state * 3) Tell lockdep that interrupts are enabled * * Invoked from architecture specific code before entering a guest. * Must be called with interrupts disabled and the caller must be * non-instrumentable. * The caller has to invoke guest_timing_enter_irqoff() before this. * * Note: this is analogous to exit_to_user_mode().
*/ static __always_inline void guest_state_enter_irqoff(void)
{
instrumentation_begin();
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare();
instrumentation_end();
/* * Exit guest context and exit an RCU extended quiescent state. * * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is * unsafe to use any code which may directly or indirectly use RCU, tracing * (including IRQ flag tracing), or lockdep. All code in this period must be * non-instrumentable.
*/ static __always_inline void guest_context_exit_irqoff(void)
{ /* * Guest mode is treated as a quiescent state, see * guest_context_enter_irqoff() for more details.
*/ if (!context_tracking_guest_exit()) {
instrumentation_begin();
rcu_virt_note_context_switch();
instrumentation_end();
}
}
/* * Stop accounting time towards a guest. * Must be called after exiting guest context.
*/ static __always_inline void guest_timing_exit_irqoff(void)
{
instrumentation_begin(); /* Flush the guest cputime we spent on the guest */
vtime_account_guest_exit();
instrumentation_end();
}
/* * Deprecated. Architectures should move to guest_state_exit_irqoff() and * guest_timing_exit_irqoff().
*/ static __always_inline void guest_exit_irqoff(void)
{
guest_context_exit_irqoff();
guest_timing_exit_irqoff();
}
/** * guest_state_exit_irqoff - Establish state when returning from guest mode * * Entry from a guest disables interrupts, but guest mode is traced as * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. * * 1) Tell lockdep that interrupts are disabled * 2) Invoke context tracking if enabled to reactivate RCU * 3) Trace interrupts off state * * Invoked from architecture specific code after exiting a guest. * Must be invoked with interrupts disabled and the caller must be * non-instrumentable. * The caller has to invoke guest_timing_exit_irqoff() after this. * * Note: this is analogous to enter_from_user_mode().
*/ static __always_inline void guest_state_exit_irqoff(void)
{
lockdep_hardirqs_off(CALLER_ADDR0);
guest_context_exit_irqoff();
staticinlineint kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
{ /* * The memory barrier ensures a previous write to vcpu->requests cannot * be reordered with the read of vcpu->mode. It pairs with the general * memory barrier following the write of vcpu->mode in VCPU RUN.
*/
smp_mb__before_atomic(); return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE);
}
/* * Some of the bitops functions do not support too long bitmaps. * This number must be determined not to exceed such limits.
*/ #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)
/* * Since at idle each memslot belongs to two memslot sets it has to contain * two embedded nodes for each data structure that it forms a part of. * * Two memslot sets (one active and one inactive) are necessary so the VM * continues to run on one memslot set while the other is being modified. * * These two memslot sets normally point to the same set of memslots. * They can, however, be desynchronized when performing a memslot management * operation by replacing the memslot to be modified by its copy. * After the operation is complete, both memslot sets once again point to * the same, common set of memslot data. * * The memslots themselves are independent of each other so they can be * individually added or deleted.
*/ struct kvm_memory_slot { struct hlist_node id_node[2]; struct interval_tree_node hva_node[2]; struct rb_node gfn_node[2];
gfn_t base_gfn; unsignedlong npages; unsignedlong *dirty_bitmap; struct kvm_arch_memory_slot arch; unsignedlong userspace_addr;
u32 flags; short id;
u16 as_id;
#ifdef CONFIG_KVM_PRIVATE_MEM struct { /* * Writes protected by kvm->slots_lock. Acquiring a * reference via kvm_gmem_get_file() is protected by * either kvm->slots_lock or kvm->srcu.
*/ struct file *file;
pgoff_t pgoff;
} gmem; #endif
};
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING struct kvm_irq_routing_table { int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
u32 nr_rt_entries; /* * Array indexed by gsi. Each entry contains list of irq chips * the gsi is connected to.
*/ struct hlist_head map[] __counted_by(nr_rt_entries);
}; #endif
struct kvm_memslots {
u64 generation;
atomic_long_t last_used_slot; struct rb_root_cached hva_tree; struct rb_root gfn_tree; /* * The mapping table from slot id to memslot. * * 7-bit bucket count matches the size of the old id to index array for * 512 slots, while giving good performance with this slot count. * Higher bucket counts bring only small performance improvements but * always result in higher memory usage (even for lower memslot counts).
*/
DECLARE_HASHTABLE(id_hash, 7); int node_idx;
};
/* * Protects the arch-specific fields of struct kvm_memory_slots in * use by the VM. To be used under the slots_lock (above) or in a * kvm->srcu critical section where acquiring the slots_lock would * lead to deadlock with the synchronize_srcu in * kvm_swap_active_memslots().
*/ struct mutex slots_arch_lock; struct mm_struct *mm; /* userspace tied to this vm */ unsignedlong nr_memslot_pages; /* The two memslot sets - active and inactive (per address space) */ struct kvm_memslots __memslots[KVM_MAX_NR_ADDRESS_SPACES][2]; /* The current active memslot set for each address space */ struct kvm_memslots __rcu *memslots[KVM_MAX_NR_ADDRESS_SPACES]; struct xarray vcpu_array; /* * Protected by slots_lock, but can be read outside if an * incorrect answer is acceptable.
*/
atomic_t nr_memslots_dirty_logging;
/* Used to wait for completion of MMU notifiers. */
spinlock_t mn_invalidate_lock; unsignedlong mn_active_invalidate_count; struct rcuwait mn_memslots_update_rcuwait;
/* For management / invalidation of gfn_to_pfn_caches */
spinlock_t gpc_lock; struct list_head gpc_list;
/* * created_vcpus is protected by kvm->lock, and is incremented * at the beginning of KVM_CREATE_VCPU. online_vcpus is only * incremented after storing the kvm_vcpu pointer in vcpus, * and is accessed atomically.
*/
atomic_t online_vcpus; int max_vcpus; int created_vcpus; int last_boosted_vcpu; struct list_head vm_list; struct mutex lock; struct kvm_io_bus __rcu *buses[KVM_NR_BUSES]; #ifdef CONFIG_HAVE_KVM_IRQCHIP struct {
spinlock_t lock; struct list_head items; /* resampler_list update side is protected by resampler_lock. */ struct list_head resampler_list; struct mutex resampler_lock;
} irqfds; #endif struct list_head ioeventfds; struct kvm_vm_stat stat; struct kvm_arch arch;
refcount_t users_count; #ifdef CONFIG_KVM_MMIO struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
spinlock_t ring_lock; struct list_head coalesced_zones; #endif
struct mutex irq_lock; #ifdef CONFIG_HAVE_KVM_IRQCHIP /* * Update side is protected by irq_lock.
*/ struct kvm_irq_routing_table __rcu *irq_routing;
/* * Note, "data corruption" refers to corruption of host kernel data structures, * not guest data. Guest data corruption, suspected or confirmed, that is tied * and contained to a single VM should *never* BUG() and potentially panic the * host, i.e. use this variant of KVM_BUG() if and only if a KVM data structure * is corrupted and that corruption can have a cascading effect to other parts * of the hosts and/or to other VMs.
*/ #define KVM_BUG_ON_DATA_CORRUPTION(cond, kvm) \
({ \ bool __ret = !!(cond); \
\ if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) \
BUG_ON(__ret); \ elseif (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged)) \
kvm_vm_bugged(kvm); \
unlikely(__ret); \
})
staticinlinestruct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
{ int num_vcpus = atomic_read(&kvm->online_vcpus);
/* * Explicitly verify the target vCPU is online, as the anti-speculation * logic only limits the CPU's ability to speculate, e.g. given a "bad" * index, clamping the index to 0 would return vCPU0, not NULL.
*/ if (i >= num_vcpus) return NULL;
i = array_index_nospec(i, num_vcpus);
/* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */
smp_rmb(); return xa_load(&kvm->vcpu_array, i);
}
/* * Find the so called "upper bound" of a key - the first node that has * its key strictly greater than the searched one (the start gfn in our case).
*/
iter->node = NULL; for (tmp = slots->gfn_tree.rb_node; tmp; ) {
slot = container_of(tmp, struct kvm_memory_slot, gfn_node[idx]); if (start < slot->base_gfn) {
iter->node = tmp;
tmp = tmp->rb_left;
} else {
tmp = tmp->rb_right;
}
}
/* * Find the slot with the lowest gfn that can possibly intersect with * the range, so we'll ideally have slot start <= range start
*/ if (iter->node) { /* * A NULL previous node means that the very first slot * already has a higher start gfn. * In this case slot start > range start.
*/
tmp = rb_prev(iter->node); if (tmp)
iter->node = tmp;
} else { /* a NULL node below means no slots */
iter->node = rb_last(&slots->gfn_tree);
}
if (iter->node) {
iter->slot = container_of(iter->node, struct kvm_memory_slot, gfn_node[idx]);
/* * It is possible in the slot start < range start case that the * found slot ends before or at range start (slot end <= range start) * and so it does not overlap the requested range. * * In such non-overlapping case the next slot (if it exists) will * already have slot start > range start, otherwise the logic above * would have found it instead of the current slot.
*/ if (iter->slot->base_gfn + iter->slot->npages <= start)
kvm_memslot_iter_next(iter);
}
}
staticinlinebool kvm_memslot_iter_is_valid(struct kvm_memslot_iter *iter, gfn_t end)
{ if (!iter->node) returnfalse;
/* * If this slot starts beyond or at the end of the range so does * every next one
*/ return iter->slot->base_gfn < end;
}
/* Iterate over each memslot at least partially intersecting [start, end) range */ #define kvm_for_each_memslot_in_gfn_range(iter, slots, start, end) \ for (kvm_memslot_iter_start(iter, slots, start); \
kvm_memslot_iter_is_valid(iter, end); \
kvm_memslot_iter_next(iter))
/* * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: * - create a new memory slot * - delete an existing memory slot * - modify an existing memory slot * -- move it in the guest physical memory space * -- just change its flags * * Since flags can be changed by some of these operations, the following * differentiation is the best we can do for kvm_set_memory_region():
*/ enum kvm_mr_change {
KVM_MR_CREATE,
KVM_MR_DELETE,
KVM_MR_MOVE,
KVM_MR_FLAGS_ONLY,
};
/* * If the page that KVM got from the *primary MMU* is writable, and KVM * installed or reused a SPTE, mark the page/folio dirty. Note, this * may mark a folio dirty even if KVM created a read-only SPTE, e.g. if * the GFN is write-protected. Folios can't be safely marked dirty * outside of mmu_lock as doing so could race with writeback on the * folio. As a result, KVM can't mark folios dirty in the fast page * fault handler, and so KVM must (somewhat) speculatively mark the * folio dirty if KVM could locklessly make the SPTE writable.
*/ if (unused)
kvm_release_page_unused(page); elseif (dirty)
kvm_release_page_dirty(page); else
kvm_release_page_clean(page);
}
unsignedlong kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); unsignedlong kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, int len); int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsignedlong len); int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsignedlong len); int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, constvoid *data, int offset, int len); int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, constvoid *data, unsignedlong len); void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
/** * kvm_gpc_init - initialize gfn_to_pfn_cache. * * @gpc: struct gfn_to_pfn_cache object. * @kvm: pointer to kvm instance. * * This sets up a gfn_to_pfn_cache by initializing locks and assigning the * immutable attributes. Note, the cache must be zero-allocated (or zeroed by * the caller before init).
*/ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm);
/** * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest * physical address. * * @gpc: struct gfn_to_pfn_cache object. * @gpa: guest physical address to map. * @len: sanity check; the range being access must fit a single page. * * @return: 0 for success. * -EINVAL for a mapping which would cross a page boundary. * -EFAULT for an untranslatable guest physical address. * * This primes a gfn_to_pfn_cache and links it into the @gpc->kvm's list for * invalidations to be processed. Callers are required to use kvm_gpc_check() * to ensure that the cache is valid before accessing the target page.
*/ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsignedlong len);
/** * kvm_gpc_activate_hva - prepare a cached kernel mapping and HPA for a given HVA. * * @gpc: struct gfn_to_pfn_cache object. * @hva: userspace virtual address to map. * @len: sanity check; the range being access must fit a single page. * * @return: 0 for success. * -EINVAL for a mapping which would cross a page boundary. * -EFAULT for an untranslatable guest physical address. * * The semantics of this function are the same as those of kvm_gpc_activate(). It * merely bypasses a layer of address translation.
*/ int kvm_gpc_activate_hva(struct gfn_to_pfn_cache *gpc, unsignedlong hva, unsignedlong len);
/** * kvm_gpc_check - check validity of a gfn_to_pfn_cache. * * @gpc: struct gfn_to_pfn_cache object. * @len: sanity check; the range being access must fit a single page. * * @return: %true if the cache is still valid and the address matches. * %false if the cache is not valid. * * Callers outside IN_GUEST_MODE context should hold a read lock on @gpc->lock * while calling this function, and then continue to hold the lock until the * access is complete. * * Callers in IN_GUEST_MODE may do so without locking, although they should * still hold a read lock on kvm->scru for the memslot checks.
*/ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsignedlong len);
/** * kvm_gpc_refresh - update a previously initialized cache. * * @gpc: struct gfn_to_pfn_cache object. * @len: sanity check; the range being access must fit a single page. * * @return: 0 for success. * -EINVAL for a mapping which would cross a page boundary. * -EFAULT for an untranslatable guest physical address. * * This will attempt to refresh a gfn_to_pfn_cache. Note that a successful * return from this function does not mean the page can be immediately * accessed because it may have raced with an invalidation. Callers must * still lock and check the cache status, as this function does not return * with the lock still held to permit access.
*/ int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsignedlong len);
/** * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache. * * @gpc: struct gfn_to_pfn_cache object. * * This removes a cache from the VM's list to be processed on MMU notifier * invocation.
*/ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc);
#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min); int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min); int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc); void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc); void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); #endif
#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING /* * kvm_arch_{enable,disable}_virtualization() are called on one CPU, under * kvm_usage_lock, immediately after/before 0=>1 and 1=>0 transitions of * kvm_usage_count, i.e. at the beginning of the generic hardware enabling * sequence, and at the end of the generic hardware disabling sequence.
*/ void kvm_arch_enable_virtualization(void); void kvm_arch_disable_virtualization(void); /* * kvm_arch_{enable,disable}_virtualization_cpu() are called on "every" CPU to * do the actual twiddling of hardware bits. The hooks are called on all * online CPUs when KVM enables/disabled virtualization, and on a single CPU * when that CPU is onlined/offlined (including for Resume/Suspend).
*/ int kvm_arch_enable_virtualization_cpu(void); void kvm_arch_disable_virtualization_cpu(void); #endif bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu); void kvm_arch_pre_destroy_vm(struct kvm *kvm); void kvm_arch_create_vm_debugfs(struct kvm *kvm);
#ifndef __KVM_HAVE_ARCH_VM_ALLOC /* * All architectures that want to use vzalloc currently also * need their own kvm_arch_alloc_vm implementation.
*/ staticinlinestruct kvm *kvm_arch_alloc_vm(void)
{ return kzalloc(sizeof(struct kvm), GFP_KERNEL_ACCOUNT);
} #endif
/* * Wake a vCPU if necessary, but don't do any stats/metadata updates. Returns * true if the vCPU was blocking and was awakened, false otherwise.
*/ staticinlinebool __kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
{ return !!rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
}
#ifdef __KVM_HAVE_ARCH_INTC_INITIALIZED /* * returns true if the virtual interrupt controller is initialized and * ready to accept virtual IRQ. On some architectures the virtual interrupt * controller is dynamically instantiated and this is not always true.
*/ bool kvm_arch_intc_initialized(struct kvm *kvm); #else staticinlinebool kvm_arch_intc_initialized(struct kvm *kvm)
{ returntrue;
} #endif
int kvm_irq_map_gsi(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *entries, int gsi); int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, int irq_source_id, int level, bool line_status); int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status); bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_notify_acked_gsi(struct kvm *kvm, int gsi); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); void kvm_unregister_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
/* * Returns a pointer to the memslot if it contains gfn. * Otherwise returns NULL.
*/ staticinlinestruct kvm_memory_slot *
try_get_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
{ if (!slot) return NULL;
/* * Returns a pointer to the memslot that contains gfn. Otherwise returns NULL. * * With "approx" set returns the memslot also when the address falls * in a hole. In that case one of the memslots bordering the hole is * returned.
*/ staticinlinestruct kvm_memory_slot *
search_memslots(struct kvm_memslots *slots, gfn_t gfn, bool approx)
{ struct kvm_memory_slot *slot; struct rb_node *node; int idx = slots->node_idx;
/* * __gfn_to_memslot() and its descendants are here to allow arch code to inline * the lookups in hot paths. gfn_to_memslot() itself isn't here as an inline * because that would bloat other code too much.
*/ staticinlinestruct kvm_memory_slot *
__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
{ return ____gfn_to_memslot(slots, gfn, false);
}
staticinlineunsignedlong
__gfn_to_hva_memslot(conststruct kvm_memory_slot *slot, gfn_t gfn)
{ /* * The index was checked originally in search_memslots. To avoid * that a malicious guest builds a Spectre gadget out of e.g. page * table walks, do not let the processor speculate loads outside * the guest's registered memslots.
*/ unsignedlong offset = gfn - slot->base_gfn;
offset = array_index_nospec(offset, slot->npages); return slot->userspace_addr + offset * PAGE_SIZE;
}
/** * kvm_stats_linear_hist_update() - Update bucket value for linear histogram * statistics data. * * @data: start address of the stats data * @size: the number of bucket of the stats data * @value: the new value used to update the linear histogram's bucket * @bucket_size: the size (width) of a bucket
*/ staticinlinevoid kvm_stats_linear_hist_update(u64 *data, size_t size,
u64 value, size_t bucket_size)
{
size_t index = div64_u64(value, bucket_size);
index = min(index, size - 1);
++data[index];
}
/** * kvm_stats_log_hist_update() - Update bucket value for logarithmic histogram * statistics data. * * @data: start address of the stats data * @size: the number of bucket of the stats data * @value: the new value used to update the logarithmic histogram's bucket
*/ staticinlinevoid kvm_stats_log_hist_update(u64 *data, size_t size, u64 value)
{
size_t index = fls64(value);
#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER staticinlineint mmu_invalidate_retry(struct kvm *kvm, unsignedlong mmu_seq)
{ if (unlikely(kvm->mmu_invalidate_in_progress)) return 1; /* * Ensure the read of mmu_invalidate_in_progress happens before * the read of mmu_invalidate_seq. This interacts with the * smp_wmb() in mmu_notifier_invalidate_range_end to make sure * that the caller either sees the old (non-zero) value of * mmu_invalidate_in_progress or the new (incremented) value of * mmu_invalidate_seq. * * PowerPC Book3s HV KVM calls this under a per-page lock rather * than under kvm->mmu_lock, for scalability, so can't rely on * kvm->mmu_lock to keep things ordered.
*/
smp_rmb(); if (kvm->mmu_invalidate_seq != mmu_seq) return 1; return 0;
}
staticinlineint mmu_invalidate_retry_gfn(struct kvm *kvm, unsignedlong mmu_seq,
gfn_t gfn)
{
lockdep_assert_held(&kvm->mmu_lock); /* * If mmu_invalidate_in_progress is non-zero, then the range maintained * by kvm_mmu_notifier_invalidate_range_start contains all addresses
--> --------------------
--> maximum size reached
--> --------------------
¤ Dauer der Verarbeitung: 0.27 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.