raw_spin_lock(&b->lock);
e = _find_apf_task(b, token); if (e) { /* dummy entry exist -> wake up was delivered ahead of PF */
hlist_del(&e->link);
raw_spin_unlock(&b->lock);
kfree(e); returnfalse;
}
/* * kvm_async_pf_task_wait_schedule - Wait for pagefault to be handled * @token: Token to identify the sleep node entry * * Invoked from the async pagefault handling code or from the VM exit page * fault handler. In both cases RCU is watching.
*/ void kvm_async_pf_task_wait_schedule(u32 token)
{ struct kvm_task_sleep_node n;
DECLARE_SWAITQUEUE(wait);
lockdep_assert_irqs_disabled();
if (!kvm_async_pf_queue_task(token, &n)) return;
for (;;) {
prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE); if (hlist_unhashed(&n.link)) break;
again:
raw_spin_lock(&b->lock);
n = _find_apf_task(b, token); if (!n) { /* * Async #PF not yet handled, add a dummy entry for the token. * Allocating the token must be down outside of the raw lock * as the allocator is preemptible on PREEMPT_RT kernels.
*/ if (!dummy) {
raw_spin_unlock(&b->lock);
dummy = kzalloc(sizeof(*dummy), GFP_ATOMIC);
/* * Continue looping on allocation failure, eventually * the async #PF will be handled and allocating a new * node will be unnecessary.
*/ if (!dummy)
cpu_relax();
/* * Recheck for async #PF completion before enqueueing * the dummy token to avoid duplicate list entries.
*/ goto again;
}
dummy->token = token;
dummy->cpu = smp_processor_id();
init_swait_queue_head(&dummy->wq);
hlist_add_head(&dummy->link, &b->list);
dummy = NULL;
} else {
apf_task_wake_one(n);
}
raw_spin_unlock(&b->lock);
/* A dummy token might be allocated and ultimately not used. */
kfree(dummy);
}
EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
state = irqentry_enter(regs);
instrumentation_begin();
/* * If the host managed to inject an async #PF into an interrupt * disabled region, then die hard as this is not going to end well * and the host side is seriously broken.
*/ if (unlikely(!(regs->flags & X86_EFLAGS_IF)))
panic("Host injected async #PF in interrupt disabled region\n");
if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) { if (unlikely(!(user_mode(regs))))
panic("Host injected async #PF in kernel mode\n"); /* Page is swapped out by the host. */
kvm_async_pf_task_wait_schedule(token);
} else {
WARN_ONCE(1, "Unexpected async PF flags: %x\n", flags);
}
static notrace __maybe_unused void kvm_guest_apic_eoi_write(void)
{ /** * This relies on __test_and_clear_bit to modify the memory * in a way that is atomic with respect to the local CPU. * The hypervisor only accesses this memory from the local CPU so * there's no need for lock or memory barriers. * An optimization barrier is implied in apic write.
*/ if (__test_and_clear_bit(KVM_PV_EOI_BIT, this_cpu_ptr(&kvm_apic_eoi))) return;
apic_native_eoi();
}
staticvoid kvm_guest_cpu_init(void)
{ if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
u64 pa;
wrmsrq(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(async_pf_enabled, true);
pr_debug("setup async PF for cpu %d\n", smp_processor_id());
}
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) { unsignedlong pa;
/* Size alignment is implied but just to make it explicit. */
BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
__this_cpu_write(kvm_apic_eoi, 0);
pa = slow_virt_to_phys(this_cpu_ptr(&kvm_apic_eoi))
| KVM_MSR_ENABLED;
wrmsrq(MSR_KVM_PV_EOI_EN, pa);
}
if (has_steal_clock)
kvm_register_steal_time();
}
staticvoid kvm_pv_disable_apf(void)
{ if (!__this_cpu_read(async_pf_enabled)) return;
/* * Iterate through all possible CPUs and map the memory region pointed * by apf_reason, steal_time and kvm_apic_eoi as decrypted at once. * * Note: we iterate through all possible CPUs to ensure that CPUs * hotplugged will have their per-cpu variable already mapped as * decrypted.
*/ staticvoid __init sev_map_percpu_data(void)
{ int cpu;
if (cc_vendor != CC_VENDOR_AMD ||
!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) return;
staticvoid kvm_smp_send_call_func_ipi(conststruct cpumask *mask)
{ int cpu;
native_send_call_func_ipi(mask);
/* Make sure other vCPUs get a chance to run if they need to. */
for_each_cpu(cpu, mask) { if (!idle_cpu(cpu) && vcpu_is_preempted(cpu)) {
kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu)); break;
}
}
}
cpumask_copy(flushmask, cpumask); /* * We have to call flush only on online vCPUs. And * queue flush_on_enter for pre-empted vCPUs
*/
for_each_cpu(cpu, flushmask) { /* * The local vCPU is never preempted, so we do not explicitly * skip check for local vCPU - it will never be cleared from * flushmask.
*/
src = &per_cpu(steal_time, cpu);
state = READ_ONCE(src->preempted); if ((state & KVM_VCPU_PREEMPTED)) { if (try_cmpxchg(&src->preempted, &state,
state | KVM_VCPU_FLUSH_TLB))
__cpumask_clear_cpu(cpu, flushmask);
}
}
native_flush_tlb_multi(flushmask, info);
}
static __init int kvm_alloc_cpumask(void)
{ int cpu;
staticvoid __init kvm_smp_prepare_boot_cpu(void)
{ /* * Map the per-cpu variables as decrypted before kvm_guest_cpu_init() * shares the guest physical address with the hypervisor.
*/
sev_map_percpu_data();
/* * After a PV feature is registered, the host will keep writing to the * registered memory location. If the guest happens to shutdown, this memory * won't be valid. In cases like kexec, in which you install a new kernel, this * means a random memory location will be kept being written.
*/ #ifdef CONFIG_CRASH_DUMP staticvoid kvm_crash_shutdown(struct pt_regs *regs)
{
kvm_guest_cpu_offline(true);
native_machine_crash_shutdown(regs);
} #endif
paravirt_ops_setup();
register_reboot_notifier(&kvm_pv_reboot_nb); for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
raw_spin_lock_init(&async_pf_sleepers[i].lock);
if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
has_steal_clock = 1;
static_call_update(pv_steal_clock, kvm_steal_clock);
/* * Hard lockup detection is enabled by default. Disable it, as guests * can get false positives too easily, for example if the host is * overcommitted.
*/
hardlockup_detector_disable();
}
static noinline uint32_t __kvm_cpuid_base(void)
{ if (boot_cpu_data.cpuid_level < 0) return 0; /* So we don't blow up on old processors */
if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) return cpuid_base_hypervisor(KVM_SIGNATURE, 0);
/* * Reset the host's shared pages list related to kernel * specific page encryption status settings before we load a * new kernel by kexec. Reset the page encryption status * during early boot instead of just before kexec to avoid SMP * races during kvm_pv_guest_cpu_reboot(). * NOTE: We cannot reset the complete shared pages list * here as we need to retain the UEFI/OVMF firmware * specific settings.
*/
for (i = 0; i < e820_table->nr_entries; i++) { struct e820_entry *entry = &e820_table->entries[i];
/* * Ensure that _bss_decrypted section is marked as decrypted in the * shared pages list.
*/
early_set_mem_enc_dec_hypercall((unsignedlong)__start_bss_decrypted,
__end_bss_decrypted - __start_bss_decrypted, 0);
/* * If not booted using EFI, enable Live migration support.
*/ if (!efi_enabled(EFI_BOOT))
wrmsrq(MSR_KVM_MIGRATION_CONTROL,
KVM_MIGRATION_READY);
}
kvmclock_init();
x86_platform.apic_post_init = kvm_apic_init;
/* * Set WB as the default cache mode for SEV-SNP and TDX, with a single * UC range for the legacy PCI hole, e.g. so that devices that expect * to get UC/WC mappings don't get surprised with WB.
*/
guest_force_mtrr_state(&pci_hole, 1, MTRR_TYPE_WRBACK);
}
#ifdefined(CONFIG_AMD_MEM_ENCRYPT) staticvoid kvm_sev_es_hcall_prepare(struct ghcb *ghcb, struct pt_regs *regs)
{ /* RAX and CPL are already in the GHCB */
ghcb_set_rbx(ghcb, regs->bx);
ghcb_set_rcx(ghcb, regs->cx);
ghcb_set_rdx(ghcb, regs->dx);
ghcb_set_rsi(ghcb, regs->si);
}
staticbool kvm_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs)
{ /* No checking of the return state needed */ returntrue;
} #endif
static __init int activate_jump_labels(void)
{ if (has_steal_clock) {
static_key_slow_inc(¶virt_steal_enabled); if (steal_acc)
static_key_slow_inc(¶virt_steal_rq_enabled);
}
return 0;
}
arch_initcall(activate_jump_labels);
#ifdef CONFIG_PARAVIRT_SPINLOCKS
/* Kick a cpu by its apicid. Used to wake up a halted vcpu */ staticvoid kvm_kick_cpu(int cpu)
{ unsignedlong flags = 0;
u32 apicid;
staticvoid kvm_wait(u8 *ptr, u8 val)
{ if (in_nmi()) return;
/* * halt until it's our turn and kicked. Note that we do safe halt * for irq enabled case to avoid hang when lock info is overwritten * in irq spinlock slowpath and no spurious interrupt occur to save us.
*/ if (irqs_disabled()) { if (READ_ONCE(*ptr) == val)
halt();
} else {
local_irq_disable();
/* safe_halt() will enable IRQ */ if (READ_ONCE(*ptr) == val)
safe_halt(); else
local_irq_enable();
}
}
/* * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
*/ void __init kvm_spinlock_init(void)
{ /* * Disable PV spinlocks and use native qspinlock when dedicated pCPUs * are available.
*/ if (kvm_para_has_hint(KVM_HINTS_REALTIME)) {
pr_info("PV spinlocks disabled with KVM_HINTS_REALTIME hints\n"); goto out;
}
if (num_possible_cpus() == 1) {
pr_info("PV spinlocks disabled, single CPU\n"); goto out;
}
if (nopvspin) {
pr_info("PV spinlocks disabled, forced by \"nopvspin\" parameter\n"); goto out;
}
/* * In case host doesn't support KVM_FEATURE_PV_UNHALT there is still an * advantage of keeping virt_spin_lock_key enabled: virt_spin_lock() is * preferred over native qspinlock when vCPU is preempted.
*/ if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) {
pr_info("PV spinlocks disabled, no host support\n"); return;
}
/* * When PV spinlock is enabled which is preferred over * virt_spin_lock(), virt_spin_lock_key's value is meaningless. * Just disable it anyway.
*/
out:
static_branch_disable(&virt_spin_lock_key);
}
void arch_haltpoll_enable(unsignedint cpu)
{ if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) {
pr_err_once("host does not support poll control\n");
pr_err_once("host upgrade recommended\n"); return;
}
/* Enable guest halt poll disables host halt poll */
smp_call_function_single(cpu, kvm_disable_host_haltpoll, NULL, 1);
}
EXPORT_SYMBOL_GPL(arch_haltpoll_enable);
void arch_haltpoll_disable(unsignedint cpu)
{ if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) return;
/* Disable guest halt poll enables host halt poll */
smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1);
}
EXPORT_SYMBOL_GPL(arch_haltpoll_disable); #endif
Messung V0.5
¤ Dauer der Verarbeitung: 0.15 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.