/* * Set osvw_len to higher value when updated Revision Guides * are published and we know what the new status bits are
*/ static uint64_t osvw_len = 4, osvw_status;
static DEFINE_PER_CPU(u64, current_tsc_ratio);
/* * These 2 parameters are used to config the controls for Pause-Loop Exiting: * pause_filter_count: On processors that support Pause filtering(indicated * by CPUID Fn8000_000A_EDX), the VMCB provides a 16 bit pause filter * count value. On VMRUN this value is loaded into an internal counter. * Each time a pause instruction is executed, this counter is decremented * until it reaches zero at which time a #VMEXIT is generated if pause * intercept is enabled. Refer to AMD APM Vol 2 Section 15.14.4 Pause * Intercept Filtering for more details. * This also indicate if ple logic enabled. * * pause_filter_thresh: In addition, some processor families support advanced * pause filtering (indicated by CPUID Fn8000_000A_EDX) upper bound on * the amount of time a guest is allowed to execute in a pause loop. * In this mode, a 16-bit pause filter threshold field is added in the * VMCB. The threshold value is a cycle count that is used to reset the * pause counter. As with simple pause filtering, VMRUN loads the pause * count value from VMCB into an internal counter. Then, on each pause * instruction the hardware checks the elapsed number of cycles since * the most recent pause instruction against the pause filter threshold. * If the elapsed cycle count is greater than the pause filter threshold, * then the internal pause count is reloaded from the VMCB and execution * continues. If the elapsed cycle count is less than the pause filter * threshold, then the internal pause count is decremented. If the count * value is less than zero and PAUSE intercept is enabled, a #VMEXIT is * triggered. If advanced pause filtering is supported and pause filter * threshold field is set to zero, the filter will operate in the simpler, * count only mode.
*/
/* Default resets per-vcpu window every exit to pause_filter_count. */ staticunsignedshort pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
module_param(pause_filter_count_shrink, ushort, 0444);
/* Default is to compute the maximum so we can never overflow. */ staticunsignedshort pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
module_param(pause_filter_count_max, ushort, 0444);
/* * Use nested page tables by default. Note, NPT may get forced off by * svm_hardware_setup() if it's unsupported by hardware or the host kernel.
*/ bool npt_enabled = true;
module_param_named(npt, npt_enabled, bool, 0444);
/* * enable / disable AVIC. Because the defaults differ for APICv * support between VMX and SVM we cannot use module_param_named.
*/ staticbool avic;
module_param(avic, bool, 0444);
module_param(enable_ipiv, bool, 0444);
/* * Only MSR_TSC_AUX is switched via the user return hook. EFER is switched via * the VMCB, and the SYSCALL/SYSENTER MSRs are handled by VMLOAD/VMSAVE. * * RDTSCP and RDPID are not used in the kernel, specifically to allow KVM to * defer the restoration of TSC_AUX until the CPU returns to userspace.
*/ int tsc_aux_uret_slot __ro_after_init = -1;
if (!npt_enabled) { /* Shadow paging assumes NX to be available. */
efer |= EFER_NX;
if (!(efer & EFER_LMA))
efer &= ~EFER_LME;
}
if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) { if (!(efer & EFER_SVME)) {
svm_leave_nested(vcpu);
svm_set_gif(svm, true); /* #GP intercept is still needed for vmware backdoor */ if (!enable_vmware_backdoor)
clr_exception_intercept(svm, GP_VECTOR);
/* * Free the nested guest state, unless we are in SMM. * In this case we will return to the nested guest * as soon as we leave SMM.
*/ if (!is_smm(vcpu))
svm_free_nested(svm);
} else { int ret = svm_allocate_nested(svm);
if (ret) {
vcpu->arch.efer = old_efer; return ret;
}
/* * Never intercept #GP for SEV guests, KVM can't * decrypt guest memory to workaround the erratum.
*/ if (svm_gp_erratum_intercept && !sev_guest(vcpu->kvm))
set_exception_intercept(svm, GP_VECTOR);
}
}
/* * SEV-ES does not expose the next RIP. The RIP update is controlled by * the type of exit and the #VC handler in the guest.
*/ if (sev_es_guest(vcpu->kvm)) goto done;
/* * Due to architectural shortcomings, the CPU doesn't always provide * NextRIP, e.g. if KVM intercepted an exception that occurred while * the CPU was vectoring an INTO/INT3 in the guest. Temporarily skip * the instruction even if NextRIP is supported to acquire the next * RIP so that it can be shoved into the NextRIP field, otherwise * hardware will fail to advance guest RIP during event injection. * Drop the exception/interrupt if emulation fails and effectively * retry the instruction, it's the least awful option. If NRIPS is * in use, the skip must not commit any side effects such as clearing * the interrupt shadow or RFLAGS.RF.
*/ if (!__svm_skip_emulated_instruction(vcpu, !nrips)) return -EIO;
rip = kvm_rip_read(vcpu);
/* * Save the injection information, even when using next_rip, as the * VMCB's next_rip will be lost (cleared on VM-Exit) if the injection * doesn't complete due to a VM-Exit occurring while the CPU is * vectoring the event. Decoding the instruction isn't guaranteed to * work as there may be no backing instruction, e.g. if the event is * being injected by L1 for L2, or if the guest is patching INT3 into * a different instruction.
*/
svm->soft_int_injected = true;
svm->soft_int_csbase = svm->vmcb->save.cs.base;
svm->soft_int_old_rip = old_rip;
svm->soft_int_next_rip = rip;
if (nrips)
kvm_rip_write(vcpu, old_rip);
if (static_cpu_has(X86_FEATURE_NRIPS))
svm->vmcb->control.next_rip = rip;
if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH)) return;
/* Use _safe variants to not break nested virtualization */ if (native_read_msr_safe(MSR_AMD64_DC_CFG, &val)) return;
val |= (1ULL << 47);
native_write_msr_safe(MSR_AMD64_DC_CFG, val);
erratum_383_found = true;
}
staticvoid svm_init_osvw(struct kvm_vcpu *vcpu)
{ /* * Guests should see errata 400 and 415 as fixed (assuming that * HLT and IO instructions are intercepted).
*/
vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
vcpu->arch.osvw.status = osvw_status & ~(6ULL);
/* * By increasing VCPU's osvw.length to 3 we are telling the guest that * all osvw.status bits inside that length, including bit 0 (which is * reserved for erratum 298), are valid. However, if host processor's * osvw_len is 0 then osvw_status[0] carries no information. We need to * be conservative here and therefore we tell the guest that erratum 298 * is present (because we really don't know).
*/ if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
vcpu->arch.osvw.status |= 1;
}
staticbool __kvm_is_svm_supported(void)
{ int cpu = smp_processor_id(); struct cpuinfo_x86 *c = &cpu_data(cpu);
if (c->x86_vendor != X86_VENDOR_AMD &&
c->x86_vendor != X86_VENDOR_HYGON) {
pr_err("CPU %d isn't AMD or Hygon\n", cpu); returnfalse;
}
if (!cpu_has(c, X86_FEATURE_SVM)) {
pr_err("SVM not supported by CPU %d\n", cpu); returnfalse;
}
if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
pr_info("KVM is unsupported when running as an SEV guest\n"); returnfalse;
}
staticvoid svm_disable_virtualization_cpu(void)
{ /* Make sure we clean up behind us */ if (tsc_scaling)
__svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
kvm_cpu_svm_disable();
amd_pmu_disable_virt();
}
staticint svm_enable_virtualization_cpu(void)
{
struct svm_cpu_data *sd;
uint64_t efer; int me = raw_smp_processor_id();
rdmsrq(MSR_EFER, efer); if (efer & EFER_SVME) return -EBUSY;
if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { /* * Set the default value, even if we don't use TSC scaling * to avoid having stale value in the msr
*/
__svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
}
/* * Get OSVW bits. * * Note that it is possible to have a system with mixed processor * revisions and therefore different OSVW bits. If bits are not the same * on different processors then choose the worst case (i.e. if erratum * is present on one processor and not on another then assume that the * erratum is present everywhere).
*/ if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
u64 len, status = 0; int err;
err = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &len); if (!err)
err = native_read_msr_safe(MSR_AMD64_OSVW_STATUS, &status);
staticbool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
{ /* * For non-nested case: * If the L01 MSR bitmap does not intercept the MSR, then we need to * save it. * * For nested case: * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it.
*/ void *msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm :
to_svm(vcpu)->msrpm;
/* * Set all bits in the permissions map so that all MSR and I/O accesses * are intercepted by default.
*/
pm = page_address(pages);
memset(pm, 0xff, PAGE_SIZE * (1 << order));
/* * Note! Always intercept LVTT, as TSC-deadline timer mode * isn't virtualized by hardware, and the CPU will generate a * #GP instead of a #VMEXIT.
*/
X2APIC_MSR(APIC_LVTTHMR),
X2APIC_MSR(APIC_LVTPC),
X2APIC_MSR(APIC_LVT0),
X2APIC_MSR(APIC_LVT1),
X2APIC_MSR(APIC_LVTERR),
X2APIC_MSR(APIC_TMICT),
X2APIC_MSR(APIC_TMCCT),
X2APIC_MSR(APIC_TDCR),
}; int i;
if (intercept == svm->x2avic_msrs_intercepted) return;
if (!x2avic_enabled) return;
for (i = 0; i < ARRAY_SIZE(x2avic_passthrough_msrs); i++)
svm_set_intercept_for_msr(&svm->vcpu, x2avic_passthrough_msrs[i],
MSR_TYPE_RW, intercept);
if (cpu_feature_enabled(X86_FEATURE_IBPB))
svm_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W,
!guest_has_pred_cmd_msr(vcpu));
if (cpu_feature_enabled(X86_FEATURE_FLUSH_L1D))
svm_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W,
!guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D));
/* * Disable interception of SPEC_CTRL if KVM doesn't need to manually * context switch the MSR (SPEC_CTRL is virtualized by the CPU), or if * the guest has a non-zero SPEC_CTRL value, i.e. is likely actively * using SPEC_CTRL.
*/ if (cpu_feature_enabled(X86_FEATURE_V_SPEC_CTRL))
svm_set_intercept_for_msr(vcpu, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW,
!guest_has_spec_ctrl_msr(vcpu)); else
svm_set_intercept_for_msr(vcpu, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW,
!svm->spec_ctrl);
/* * Intercept SYSENTER_EIP and SYSENTER_ESP when emulating an Intel CPU, * as AMD hardware only store 32 bits, whereas Intel CPUs track 64 bits.
*/
svm_set_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW,
guest_cpuid_is_intel_compatible(vcpu));
svm_set_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW,
guest_cpuid_is_intel_compatible(vcpu));
if (kvm_aperfmperf_in_guest(vcpu->kvm)) {
svm_disable_intercept_for_msr(vcpu, MSR_IA32_APERF, MSR_TYPE_R);
svm_disable_intercept_for_msr(vcpu, MSR_IA32_MPERF, MSR_TYPE_R);
}
if (sev_es_guest(vcpu->kvm))
sev_es_recalc_msr_intercepts(vcpu);
/* * x2APIC intercepts are modified on-demand and cannot be filtered by * userspace.
*/
}
if (enable_lbrv && !current_enable_lbrv)
__svm_enable_lbrv(vcpu); elseif (!enable_lbrv && current_enable_lbrv)
__svm_disable_lbrv(vcpu);
/* * During nested transitions, it is possible that the current VMCB has * LBR_CTL set, but the previous LBR_CTL had it cleared (or vice versa). * In this case, even though LBR_CTL does not need an update, intercepts * do, so always recalculate the intercepts here.
*/
svm_recalc_lbr_msr_intercepts(vcpu);
}
if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) { /* Clear our flags if they were not set by the guest */ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
svm->vmcb->save.rflags &= ~X86_EFLAGS_TF; if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
}
}
staticvoid grow_ple_window(struct kvm_vcpu *vcpu)
{ struct vcpu_svm *svm = to_svm(vcpu); struct vmcb_control_area *control = &svm->vmcb->control; int old = control->pause_filter_count;
/* * Intercept INVPCID if shadow paging is enabled to sync/free shadow * roots, or if INVPCID is disabled in the guest to inject #UD.
*/ if (kvm_cpu_cap_has(X86_FEATURE_INVPCID)) { if (!npt_enabled ||
!guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_INVPCID))
svm_set_intercept(svm, INTERCEPT_INVPCID); else
svm_clr_intercept(svm, INTERCEPT_INVPCID);
}
if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) { if (guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP))
svm_clr_intercept(svm, INTERCEPT_RDTSCP); else
svm_set_intercept(svm, INTERCEPT_RDTSCP);
}
if (guest_cpuid_is_intel_compatible(vcpu)) {
svm_set_intercept(svm, INTERCEPT_VMLOAD);
svm_set_intercept(svm, INTERCEPT_VMSAVE);
svm->vmcb->control.virt_ext &= ~VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
} else { /* * If hardware supports Virtual VMLOAD VMSAVE then enable it * in VMCB and clear intercepts to avoid #VMEXIT.
*/ if (vls) {
svm_clr_intercept(svm, INTERCEPT_VMLOAD);
svm_clr_intercept(svm, INTERCEPT_VMSAVE);
svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
}
}
}
set_exception_intercept(svm, PF_VECTOR);
set_exception_intercept(svm, UD_VECTOR);
set_exception_intercept(svm, MC_VECTOR);
set_exception_intercept(svm, AC_VECTOR);
set_exception_intercept(svm, DB_VECTOR); /* * Guest access to VMware backdoor ports could legitimately * trigger #GP because of TSS I/O permission bitmap. * We intercept those #GP and allow access to them anyway * as VMware does.
*/ if (enable_vmware_backdoor)
set_exception_intercept(svm, GP_VECTOR);
err = -ENOMEM;
vmcb01_page = snp_safe_alloc_page(); if (!vmcb01_page) goto out;
if (sev_es_guest(vcpu->kvm)) { /* * SEV-ES guests require a separate VMSA page used to contain * the encrypted register state of the guest.
*/
vmsa_page = snp_safe_alloc_page(); if (!vmsa_page) goto error_free_vmcb_page;
}
err = avic_init_vcpu(svm); if (err) goto error_free_vmsa_page;
staticvoid svm_srso_vm_destroy(void)
{ if (!cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) return;
if (atomic_dec_return(&srso_nr_vms)) return;
guard(spinlock)(&srso_lock);
/* * Verify a new VM didn't come along, acquire the lock, and increment * the count before this task acquired the lock.
*/ if (atomic_read(&srso_nr_vms)) return;
staticvoid svm_srso_vm_init(void)
{ if (!cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) return;
/* * Acquire the lock on 0 => 1 transitions to ensure a potential 1 => 0 * transition, i.e. destroying the last VM, is fully complete, e.g. so * that a delayed IPI doesn't clear BP_SPEC_REDUCE after a vCPU runs.
*/ if (atomic_inc_not_zero(&srso_nr_vms)) return;
if (sev_es_guest(vcpu->kvm))
sev_es_unmap_ghcb(svm);
if (svm->guest_state_loaded) return;
/* * Save additional host state that will be restored on VMEXIT (sev-es) * or subsequent vmload of host save area.
*/
vmsave(sd->save_area_pa); if (sev_es_guest(vcpu->kvm))
sev_es_prepare_switch_to_guest(svm, sev_es_host_save_area(sd));
if (tsc_scaling)
__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
/* * TSC_AUX is always virtualized (context switched by hardware) for * SEV-ES guests when the feature is available. For non-SEV-ES guests, * context switch TSC_AUX via the user_return MSR infrastructure (not * all CPUs support TSC_AUX virtualization).
*/ if (likely(tsc_aux_uret_slot >= 0) &&
(!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm)))
kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
if (svm->nmi_singlestep) { /* Hide our flags if they were not set by the guest */ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
rflags &= ~X86_EFLAGS_TF; if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
rflags &= ~X86_EFLAGS_RF;
} return rflags;
}
/* * Any change of EFLAGS.VM is accompanied by a reload of SS * (caused by either a task switch or an inter-privilege IRET), * so we do not need to update the CPL here.
*/
to_svm(vcpu)->vmcb->save.rflags = rflags;
}
switch (reg) { case VCPU_EXREG_PDPTR: /* * When !npt_enabled, mmu->pdptrs[] is already available since * it is always updated per SDM when moving to CRs.
*/ if (npt_enabled)
load_pdptrs(vcpu, kvm_read_cr3(vcpu)); break; default:
KVM_BUG_ON(1, vcpu->kvm);
}
}
/* * The following fields are ignored when AVIC is enabled
*/
WARN_ON(kvm_vcpu_apicv_activated(&svm->vcpu));
svm_set_intercept(svm, INTERCEPT_VINTR);
/* * Recalculating intercepts may have cleared the VINTR intercept. If * V_INTR_MASKING is enabled in vmcb12, then the effective RFLAGS.IF * for L1 physical interrupts is L1's RFLAGS.IF at the time of VMRUN. * Requesting an interrupt window if save.RFLAGS.IF=0 is pointless as * interrupts will never be unblocked while L2 is running.
*/ if (!svm_is_intercept(svm, INTERCEPT_VINTR)) return;
/* * This is just a dummy VINTR to actually cause a vmexit to happen. * Actual injection of virtual interrupts happens through EVENTINJ.
*/
control = &svm->vmcb->control;
control->int_vector = 0x0;
control->int_ctl &= ~V_INTR_PRIO_MASK;
control->int_ctl |= V_IRQ_MASK |
((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
}
/* Drop int_ctl fields related to VINTR injection. */
svm->vmcb->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK; if (is_guest_mode(&svm->vcpu)) {
svm->vmcb01.ptr->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK;
switch (seg) { case VCPU_SREG_CS: return &save->cs; case VCPU_SREG_DS: return &save->ds; case VCPU_SREG_ES: return &save->es; case VCPU_SREG_FS: return &save01->fs; case VCPU_SREG_GS: return &save01->gs; case VCPU_SREG_SS: return &save->ss; case VCPU_SREG_TR: return &save01->tr; case VCPU_SREG_LDTR: return &save01->ldtr;
}
BUG(); return NULL;
}
/* * AMD CPUs circa 2014 track the G bit for all segments except CS. * However, the SVM spec states that the G bit is not observed by the * CPU, and some VMware virtual CPUs drop the G bit for all segments. * So let's synthesize a legal G bit for all segments, this helps * running KVM nested. It also helps cross-vendor migration, because * Intel's vmentry has a check on the 'G' bit.
*/
var->g = s->limit > 0xfffff;
/* * AMD's VMCB does not have an explicit unusable field, so emulate it * for cross vendor migration purposes by "not present"
*/
var->unusable = !var->present;
switch (seg) { case VCPU_SREG_TR: /* * Work around a bug where the busy flag in the tr selector * isn't exposed
*/
var->type |= 0x2; break; case VCPU_SREG_DS: case VCPU_SREG_ES: case VCPU_SREG_FS: case VCPU_SREG_GS: /* * The accessed bit must always be set in the segment * descriptor cache, although it can be cleared in the * descriptor, the cached bit always remains at 1. Since * Intel has a check on this, set it here to support * cross-vendor migration.
*/ if (!var->unusable)
var->type |= 0x1; break; case VCPU_SREG_SS: /* * On AMD CPUs sometimes the DB bit in the segment * descriptor is left as 1, although the whole segment has * been made unusable. Clear it here to pass an Intel VMX * entry check when cross vendor migrating.
*/ if (var->unusable)
var->db = 0; /* This is symmetric with svm_set_segment() */
var->dpl = to_svm(vcpu)->vmcb->save.cpl; break;
}
}
/* * For guests that don't set guest_state_protected, the cr3 update is * handled via kvm_mmu_load() while entering the guest. For guests * that do (SEV-ES/SEV-SNP), the cr3 update needs to be written to * VMCB save area now, since the save area will become the initial * contents of the VMSA, and future VMCB save area updates won't be * seen.
*/ if (sev_es_guest(vcpu->kvm)) {
svm->vmcb->save.cr3 = cr3;
vmcb_mark_dirty(svm->vmcb, VMCB_CR);
}
}
if (!npt_enabled) {
hcr0 |= X86_CR0_PG | X86_CR0_WP; if (old_paging != is_paging(vcpu))
svm_set_cr4(vcpu, kvm_read_cr4(vcpu));
}
/* * re-enable caching here because the QEMU bios * does not do it - this results in some delay at * reboot
*/ if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
hcr0 &= ~(X86_CR0_CD | X86_CR0_NW);
/* * This is always accurate, except if SYSRET returned to a segment * with SS.DPL != 3. Intel does not have this quirk, and always * forces SS.DPL to 3 on sysret, so we ignore that case; fixing it * would entail passing the CPL to userspace and back.
*/ if (seg == VCPU_SREG_SS) /* This is symmetric with svm_get_segment() */
svm->vmcb->save.cpl = (var->dpl & 3);
if (WARN_ON_ONCE(sev_es_guest(vcpu->kvm))) return;
get_debugreg(vcpu->arch.db[0], 0);
get_debugreg(vcpu->arch.db[1], 1);
get_debugreg(vcpu->arch.db[2], 2);
get_debugreg(vcpu->arch.db[3], 3); /* * We cannot reset svm->vmcb->save.dr6 to DR6_ACTIVE_LOW here, * because db_interception might need it. We can do it before vmentry.
*/
vcpu->arch.dr6 = svm->vmcb->save.dr6;
vcpu->arch.dr7 = svm->vmcb->save.dr7;
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
set_dr_intercepts(svm);
}
/* * WARN if hardware generates a fault with an error code that collides * with KVM-defined sythentic flags. Clear the flags and continue on, * i.e. don't terminate the VM, as KVM can't possibly be relying on a * flag that KVM doesn't know about.
*/ if (WARN_ON_ONCE(error_code & PFERR_SYNTHETIC_MASK))
error_code &= ~PFERR_SYNTHETIC_MASK;
if (sev_snp_guest(vcpu->kvm) && (error_code & PFERR_GUEST_ENC_MASK))
error_code |= PFERR_PRIVATE_ACCESS;
/* * VMCB is undefined after a SHUTDOWN intercept. INIT the vCPU to put * the VMCB in a known good state. Unfortuately, KVM doesn't have * KVM_MP_STATE_SHUTDOWN and can't add it without potentially breaking * userspace. At a platform view, INIT is acceptable behavior as * there exist bare metal platforms that automatically INIT the CPU * in response to shutdown. * * The VM save area for SEV-ES guests has already been encrypted so it * cannot be reinitialized, i.e. synthesizing INIT is futile.
*/ if (!sev_es_guest(vcpu->kvm)) {
clear_page(svm->vmcb); #ifdef CONFIG_KVM_SMM if (is_smm(vcpu))
kvm_smm_changed(vcpu, false); #endif
kvm_vcpu_reset(vcpu, true);
}
if (is_guest_mode(vcpu)) { /* Returns '1' or -errno on failure, '0' on success. */
ret = nested_svm_simple_vmexit(svm, guest_mode_exit_codes[opcode]); if (ret) return ret; return 1;
} return svm_instr_handlers[opcode](vcpu);
}
/* * #GP handling code. Note that #GP can be triggered under the following two * cases: * 1) SVM VM-related instructions (VMRUN/VMSAVE/VMLOAD) that trigger #GP on * some AMD CPUs when EAX of these instructions are in the reserved memory * regions (e.g. SMM memory on host). * 2) VMware backdoor
*/ staticint gp_interception(struct kvm_vcpu *vcpu)
{ struct vcpu_svm *svm = to_svm(vcpu);
u32 error_code = svm->vmcb->control.exit_info_1; int opcode;
/* Both #GP cases have zero error_code */ if (error_code) goto reinject;
/* Decode the instruction for usage later */ if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK) goto reinject;
opcode = svm_instr_opcode(vcpu);
if (opcode == NONE_SVM_INSTR) { if (!enable_vmware_backdoor) goto reinject;
/* * VMware backdoor emulation on #GP interception only handles * IN{S}, OUT{S}, and RDPMC.
*/ if (!is_guest_mode(vcpu)) return kvm_emulate_instruction(vcpu,
EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
} else { /* All SVM instructions expect page aligned RAX */ if (svm->vmcb->save.rax & ~PAGE_MASK) goto reinject;
void svm_set_gif(struct vcpu_svm *svm, bool value)
{ if (value) { /* * If VGIF is enabled, the STGI intercept is only added to * detect the opening of the SMI/NMI window; remove it now. * Likewise, clear the VINTR intercept, we will set it * again while processing KVM_REQ_EVENT if needed.
*/ if (vgif)
svm_clr_intercept(svm, INTERCEPT_STGI); if (svm_is_intercept(svm, INTERCEPT_VINTR))
svm_clear_vintr(svm);
/* * After a CLGI no interrupts should come. But if vGIF is * in use, we still rely on the VINTR intercept (rather than * STGI) to detect an open interrupt window.
*/ if (!vgif)
svm_clear_vintr(svm);
}
}
staticint stgi_interception(struct kvm_vcpu *vcpu)
{ int ret;
if (nested_svm_check_permissions(vcpu)) return 1;
ret = kvm_skip_emulated_instruction(vcpu);
svm_set_gif(to_svm(vcpu), true); return ret;
}
staticint clgi_interception(struct kvm_vcpu *vcpu)
{ int ret;
if (nested_svm_check_permissions(vcpu)) return 1;
ret = kvm_skip_emulated_instruction(vcpu);
svm_set_gif(to_svm(vcpu), false); return ret;
}
staticint dr_interception(struct kvm_vcpu *vcpu)
{ struct vcpu_svm *svm = to_svm(vcpu); int reg, dr; int err = 0;
/* * SEV-ES intercepts DR7 only to disable guest debugging and the guest issues a VMGEXIT * for DR7 write only. KVM cannot change DR7 (always swapped as type 'A') so return early.
*/ if (sev_es_guest(vcpu->kvm)) return 1;
if (vcpu->guest_debug == 0) { /* * No more DR vmexits; force a reload of the debug registers * and reenter on this instruction. The next vmexit will * retrieve the full state of the debug registers.
*/
clr_dr_intercepts(svm);
vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT; return 1;
}
if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS)) return emulate_on_interception(vcpu);
reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0; if (dr >= 16) { /* mov to DRn */
dr -= 16;
err = kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg));
} else {
kvm_register_write(vcpu, reg, kvm_get_dr(vcpu, dr));
}
return kvm_complete_insn_gp(vcpu, err);
}
staticint cr8_write_interception(struct kvm_vcpu *vcpu)
{ int r;
u8 cr8_prev = kvm_get_cr8(vcpu); /* instruction emulation calls kvm_set_cr8() */
r = cr_interception(vcpu); if (lapic_in_kernel(vcpu)) return r; if (cr8_prev <= kvm_get_cr8(vcpu)) return r;
vcpu->run->exit_reason = KVM_EXIT_SET_TPR; return 0;
}
staticint efer_trap(struct kvm_vcpu *vcpu)
{ struct msr_data msr_info; int ret;
/* * Clear the EFER_SVME bit from EFER. The SVM code always sets this * bit in svm_set_efer(), but __kvm_valid_efer() checks it against * whether the guest has X86_FEATURE_SVM - this avoids a failure if * the guest doesn't have X86_FEATURE_SVM.
*/
msr_info.host_initiated = false;
msr_info.index = MSR_EFER;
msr_info.data = to_svm(vcpu)->vmcb->control.exit_info_1 & ~EFER_SVME;
ret = kvm_set_msr_common(vcpu, &msr_info);
/* check for svm_disable while efer.svme is set */ if (svm_dis && (vcpu->arch.efer & EFER_SVME)) return 1;
return 0;
}
staticint svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
{ struct vcpu_svm *svm = to_svm(vcpu); int ret = 0;
u32 ecx = msr->index;
u64 data = msr->data;
if (sev_es_prevent_msr_access(vcpu, msr)) return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0;
switch (ecx) { case MSR_AMD64_TSC_RATIO:
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_TSCRATEMSR)) {
if (!msr->host_initiated) return 1; /* * In case TSC scaling is not enabled, always * leave this MSR at the default value. * * Due to bug in qemu 6.2.0, it would try to set * this msr to 0 if tsc scaling is not enabled. * Ignore this value as well.
*/ if (data != 0 && data != svm->tsc_ratio_msr) return 1; break;
}
if (data & SVM_TSC_RATIO_RSVD) return 1;
svm->tsc_ratio_msr = data;
if (guest_cpu_cap_has(vcpu, X86_FEATURE_TSCRATEMSR) &&
is_guest_mode(vcpu))
nested_svm_update_tsc_ratio_msr(vcpu);
break; case MSR_IA32_CR_PAT:
ret = kvm_set_msr_common(vcpu, msr); if (ret) break;
svm->vmcb01.ptr->save.g_pat = data; if (is_guest_mode(vcpu))
nested_vmcb02_compute_g_pat(svm);
vmcb_mark_dirty(svm->vmcb, VMCB_NPT); break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated &&
!guest_has_spec_ctrl_msr(vcpu)) return 1;
if (kvm_spec_ctrl_test_value(data)) return 1;
if (boot_cpu_has(X86_FEATURE_V_SPEC_CTRL))
svm->vmcb->save.spec_ctrl = data; else
svm->spec_ctrl = data; if (!data) break;
/* * For non-nested: * When it's written (to non-zero) for the first time, pass * it through. * * For nested: * The handling of the MSR bitmap for L2 guests is done in * nested_svm_merge_msrpm(). * We update the L1 MSR bit as well since it will end up * touching the MSR anyway now.
*/
svm_disable_intercept_for_msr(vcpu, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW); break; case MSR_AMD64_VIRT_SPEC_CTRL: if (!msr->host_initiated &&
!guest_cpu_cap_has(vcpu, X86_FEATURE_VIRT_SSBD)) return 1;
if (data & ~SPEC_CTRL_SSBD) return 1;
svm->virt_spec_ctrl = data; break; case MSR_STAR:
svm->vmcb01.ptr->save.star = data; break; #ifdef CONFIG_X86_64 case MSR_LSTAR:
svm->vmcb01.ptr->save.lstar = data; break; case MSR_CSTAR:
svm->vmcb01.ptr->save.cstar = data; break; case MSR_GS_BASE:
svm->vmcb01.ptr->save.gs.base = data; break; case MSR_FS_BASE:
svm->vmcb01.ptr->save.fs.base = data; break; case MSR_KERNEL_GS_BASE:
svm->vmcb01.ptr->save.kernel_gs_base = data; break; case MSR_SYSCALL_MASK:
svm->vmcb01.ptr->save.sfmask = data; break; #endif case MSR_IA32_SYSENTER_CS:
svm->vmcb01.ptr->save.sysenter_cs = data; break; case MSR_IA32_SYSENTER_EIP:
svm->vmcb01.ptr->save.sysenter_eip = (u32)data; /* * We only intercept the MSR_IA32_SYSENTER_{EIP|ESP} msrs * when we spoof an Intel vendor ID (for cross vendor migration). * In this case we use this intercept to track the high * 32 bit part of these msrs to support Intel's * implementation of SYSENTER/SYSEXIT.
*/
svm->sysenter_eip_hi = guest_cpuid_is_intel_compatible(vcpu) ? (data >> 32) : 0; break; case MSR_IA32_SYSENTER_ESP:
svm->vmcb01.ptr->save.sysenter_esp = (u32)data;
svm->sysenter_esp_hi = guest_cpuid_is_intel_compatible(vcpu) ? (data >> 32) : 0; break; case MSR_TSC_AUX: /* * TSC_AUX is always virtualized for SEV-ES guests when the * feature is available. The user return MSR support is not * required in this case because TSC_AUX is restored on #VMEXIT * from the host save area.
*/ if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && sev_es_guest(vcpu->kvm)) break;
/* * TSC_AUX is usually changed only during boot and never read * directly. Intercept TSC_AUX and switch it via user return.
*/
preempt_disable();
ret = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
preempt_enable(); if (ret) break;
svm->tsc_aux = data; break; case MSR_IA32_DEBUGCTLMSR: if (!lbrv) {
kvm_pr_unimpl_wrmsr(vcpu, ecx, data); break;
}
/* * Suppress BTF as KVM doesn't virtualize BTF, but there's no * way to communicate lack of support to the guest.
*/ if (data & DEBUGCTLMSR_BTF) {
kvm_pr_unimpl_wrmsr(vcpu, MSR_IA32_DEBUGCTLMSR, data);
data &= ~DEBUGCTLMSR_BTF;
}
if (data & DEBUGCTL_RESERVED_BITS) return 1;
if (svm->vmcb->save.dbgctl == data) break;
svm->vmcb->save.dbgctl = data;
vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
svm_update_lbrv(vcpu); break; case MSR_VM_HSAVE_PA: /* * Old kernels did not validate the value written to * MSR_VM_HSAVE_PA. Allow KVM_SET_MSR to set an invalid * value to allow live migrating buggy or malicious guests * originating from those kernels.
*/ if (!msr->host_initiated && !page_address_valid(vcpu, data)) return 1;
svm->nested.hsave_msr = data & PAGE_MASK; break; case MSR_VM_CR: return svm_set_vm_cr(vcpu, data); case MSR_VM_IGNNE:
kvm_pr_unimpl_wrmsr(vcpu, ecx, data); break; case MSR_AMD64_DE_CFG: {
u64 supported_de_cfg;
if (svm_get_feature_msr(ecx, &supported_de_cfg)) return 1;
/* * If not running nested, for AVIC, the only reason to end up here is ExtINTs. * In this case AVIC was temporarily disabled for * requesting the IRQ window and we have to re-enable it. * * If running nested, still remove the VM wide AVIC inhibit to * support case in which the interrupt window was requested when the * vCPU was not running nested.
* All vCPUs which run still run nested, will remain to have their * AVIC still inhibited due to per-cpu AVIC inhibition.
*/
kvm_clear_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_IRQWIN);
++vcpu->stat.irq_window_exits; return 1;
}
staticint pause_interception(struct kvm_vcpu *vcpu)
{ bool in_kernel; /* * CPL is not made available for an SEV-ES guest, therefore * vcpu->arch.preempted_in_kernel can never be true. Just * set in_kernel to false as well.
*/
in_kernel = !sev_es_guest(vcpu->kvm) && svm_get_cpl(vcpu) == 0;
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_INVPCID)) {
kvm_queue_exception(vcpu, UD_VECTOR); return 1;
}
/* * For an INVPCID intercept: * EXITINFO1 provides the linear address of the memory operand. * EXITINFO2 provides the contents of the register operand.
*/
type = svm->vmcb->control.exit_info_2;
gva = svm->vmcb->control.exit_info_1;
/* * FIXME: Perform segment checks for 32-bit mode, and inject #SS if the * stack segment is used. The intercept takes priority over all * #GP checks except CPL>0, but somehow still generates a linear * address? The APM is sorely lacking.
*/ if (is_noncanonical_address(gva, vcpu, 0)) {
kvm_queue_exception_e(vcpu, GP_VECTOR, 0); return 1;
}
/* * If userspace has NOT changed RIP, then KVM's ABI is to let the guest * execute the bus-locking instruction. Set the bus lock counter to '1' * to effectively step past the bus lock.
*/ if (kvm_is_linear_rip(vcpu, vcpu->arch.cui_linear_rip))
svm->vmcb->control.bus_lock_counter = 1;
/* SEV-ES guests must use the CR write traps to track CR registers. */ if (!sev_es_guest(vcpu->kvm)) { if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE))
vcpu->arch.cr0 = svm->vmcb->save.cr0; if (npt_enabled)
vcpu->arch.cr3 = svm->vmcb->save.cr3;
}
if (is_guest_mode(vcpu)) { int vmexit;
trace_kvm_nested_vmexit(vcpu, KVM_ISA_SVM);
vmexit = nested_svm_exit_special(svm);
if (vmexit == NESTED_EXIT_CONTINUE)
vmexit = nested_svm_exit_handled(svm);
/* * If the previous vmrun of the vmcb occurred on a different physical * cpu, then mark the vmcb dirty and assign a new asid. Hardware's * vmcb clean bits are per logical CPU, as are KVM's asid assignments.
*/ if (unlikely(svm->current_vmcb->cpu != vcpu->cpu)) {
svm->current_vmcb->asid_generation = 0;
vmcb_mark_all_dirty(svm->vmcb);
svm->current_vmcb->cpu = vcpu->cpu;
}
if (sev_guest(vcpu->kvm)) return pre_sev_run(svm, vcpu->cpu);
/* FIXME: handle wraparound of asid_generation */ if (svm->current_vmcb->asid_generation != sd->asid_generation)
new_asid(svm, sd);
/* * No need to manually track NMI masking when vNMI is enabled, hardware * automatically sets V_NMI_BLOCKING_MASK as appropriate, including the * case where software directly injects an NMI.
*/ if (!is_vnmi_enabled(svm)) {
svm->nmi_masked = true;
svm_set_iret_intercept(svm);
}
++vcpu->stat.nmi_injections;
}
/* * Because the pending NMI is serviced by hardware, KVM can't know when * the NMI is "injected", but for all intents and purposes, passing the * NMI off to hardware counts as injection.
*/
++vcpu->stat.nmi_injections;
void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode, int trig_mode, int vector)
{ /* * apic->apicv_active must be read after vcpu->mode. * Pairs with smp_store_release in vcpu_enter_guest.
*/ bool in_guest_mode = (smp_load_acquire(&vcpu->mode) == IN_GUEST_MODE);
/* Note, this is called iff the local APIC is in-kernel. */ if (!READ_ONCE(vcpu->arch.apic->apicv_active)) { /* Process the interrupt via kvm_check_and_inject_events(). */
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu); return;
}
trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode, trig_mode, vector); if (in_guest_mode) { /* * Signal the doorbell to tell hardware to inject the IRQ. If * the vCPU exits the guest before the doorbell chimes, hardware * will automatically process AVIC interrupts at the next VMRUN.
*/
avic_ring_doorbell(vcpu);
} else { /* * Wake the vCPU if it was blocking. KVM will then detect the * pending IRQ when checking if the vCPU has a wake event.
*/
kvm_vcpu_wake_up(vcpu);
}
}
staticvoid svm_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, int trig_mode, int vector)
{
kvm_lapic_set_irr(vector, apic);
/* * Pairs with the smp_mb_*() after setting vcpu->guest_mode in * vcpu_enter_guest() to ensure the write to the vIRR is ordered before * the read of guest_mode. This guarantees that either VMRUN will see * and process the new vIRR entry, or that svm_complete_interrupt_delivery * will signal the doorbell if the CPU has already entered the guest.
*/
smp_mb__after_atomic();
svm_complete_interrupt_delivery(apic->vcpu, delivery_mode, trig_mode, vector);
}
staticvoid svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{ struct vcpu_svm *svm = to_svm(vcpu);
/* * SEV-ES guests must always keep the CR intercepts cleared. CR * tracking is done using the CR write traps.
*/ if (sev_es_guest(vcpu->kvm)) return;
if (nested_svm_virtualize_tpr(vcpu)) return;
svm_clr_intercept(svm, INTERCEPT_CR8_WRITE);
if (irr == -1) return;
if (tpr >= irr)
svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
}
/* An NMI must not be injected into L2 if it's supposed to VM-Exit. */ if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(svm)) return -EBUSY; return 1;
}
if (is_guest_mode(vcpu)) { /* As long as interrupts are being delivered... */ if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
? !(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF)
: !(kvm_get_rflags(vcpu) & X86_EFLAGS_IF)) returntrue;
/* ... vmexits aren't blocked by the interrupt shadow */ if (nested_exit_on_intr(svm)) returnfalse;
} else { if (!svm_get_if_flag(vcpu)) returntrue;
}
if (svm->nested.nested_run_pending) return -EBUSY;
if (svm_interrupt_blocked(vcpu)) return 0;
/* * An IRQ must not be injected into L2 if it's supposed to VM-Exit, * e.g. if the IRQ arrived asynchronously after checking nested events.
*/ if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(svm)) return -EBUSY;
/* * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes * 1, because that's a separate STGI/VMRUN intercept. The next time we * get that intercept, this function will be called again though and * we'll get the vintr intercept. However, if the vGIF feature is * enabled, the STGI interception will not occur. Enable the irq * window under the assumption that the hardware will set the GIF.
*/ if (vgif || gif_set(svm)) { /* * IRQ window is not needed when AVIC is enabled, * unless we have pending ExtINT since it cannot be injected * via AVIC. In such case, KVM needs to temporarily disable AVIC, * and fallback to injecting IRQ via V_IRQ. * * If running nested, AVIC is already locally inhibited * on this vCPU, therefore there is no need to request * the VM wide AVIC inhibition.
*/ if (!is_guest_mode(vcpu))
kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_IRQWIN);
/* * If NMIs are outright masked, i.e. the vCPU is already handling an * NMI, and KVM has not yet intercepted an IRET, then there is nothing * more to do at this time as KVM has already enabled IRET intercepts. * If KVM has already intercepted IRET, then single-step over the IRET, * as NMIs aren't architecturally unmasked until the IRET completes. * * If vNMI is enabled, KVM should never request an NMI window if NMIs * are masked, as KVM allows at most one to-be-injected NMI and one * pending NMI. If two NMIs arrive simultaneously, KVM will inject one * NMI and set V_NMI_PENDING for the other, but if and only if NMIs are * unmasked. KVM _will_ request an NMI window in some situations, e.g. * if the vCPU is in an STI shadow or if GIF=0, KVM can't immediately * inject the NMI. In those situations, KVM needs to single-step over * the STI shadow or intercept STGI.
*/ if (svm_get_nmi_mask(vcpu)) {
WARN_ON_ONCE(is_vnmi_enabled(svm));
if (!svm->awaiting_iret_completion) return; /* IRET will cause a vm exit */
}
/* * SEV-ES guests are responsible for signaling when a vCPU is ready to * receive a new NMI, as SEV-ES guests can't be single-stepped, i.e. * KVM can't intercept and single-step IRET to detect when NMIs are * unblocked (architecturally speaking). See SVM_VMGEXIT_NMI_COMPLETE. * * Note, GIF is guaranteed to be '1' for SEV-ES guests as hardware * ignores SEV-ES guest writes to EFER.SVME *and* CLGI/STGI are not * supported NAEs in the GHCB protocol.
*/ if (sev_es_guest(vcpu->kvm)) return;
if (!gif_set(svm)) { if (vgif)
svm_set_intercept(svm, INTERCEPT_STGI); return; /* STGI will cause a vm exit */
}
/* * Something prevents NMI from been injected. Single step over possible * problem (IRET or exception injection or interrupt shadow)
*/
svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu);
svm->nmi_singlestep = true;
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
}
/* * Unlike VMX, SVM doesn't provide a way to flush only NPT TLB entries. * A TLB flush for the current ASID flushes both "host" and "guest" TLB * entries, and thus is a superset of Hyper-V's fine grained flushing.
*/
kvm_hv_vcpu_purge_flush_tlb(vcpu);
/* * Flush only the current ASID even if the TLB flush was invoked via * kvm_flush_remote_tlbs(). Although flushing remote TLBs requires all * ASIDs to be flushed, KVM uses a single ASID for L1 and L2, and * unconditionally does a TLB flush on both nested VM-Enter and nested * VM-Exit (via kvm_mmu_reset_context()).
*/ if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; else
svm->current_vmcb->asid_generation--;
}
/* * When running on Hyper-V with EnlightenedNptTlb enabled, explicitly * flush the NPT mappings via hypercall as flushing the ASID only * affects virtual to physical mappings, it does not invalidate guest * physical to host physical mappings.
*/ if (svm_hv_is_enlightened_tlb_enabled(vcpu) && VALID_PAGE(root_tdp))
hyperv_flush_guest_mapping(root_tdp);
svm_flush_tlb_asid(vcpu);
}
staticvoid svm_flush_tlb_all(struct kvm_vcpu *vcpu)
{ /* * When running on Hyper-V with EnlightenedNptTlb enabled, remote TLB * flushes should be routed to hv_flush_remote_tlbs() without requesting * a "regular" remote flush. Reaching this point means either there's * a KVM bug or a prior hv_flush_remote_tlbs() call failed, both of * which might be fatal to the guest. Yell, but try to recover.
*/ if (WARN_ON_ONCE(svm_hv_is_enlightened_tlb_enabled(vcpu)))
hv_flush_remote_tlbs(vcpu->kvm);
/* * If NRIPS is enabled, KVM must snapshot the pre-VMRUN next_rip that's * associated with the original soft exception/interrupt. next_rip is * cleared on all exits that can occur while vectoring an event, so KVM * needs to manually set next_rip for re-injection. Unlike the !nrips * case below, this needs to be done if and only if KVM is re-injecting * the same event, i.e. if the event is a soft exception/interrupt, * otherwise next_rip is unused on VMRUN.
*/ if (nrips && (is_soft || (is_exception && kvm_exception_is_soft(vector))) &&
kvm_is_linear_rip(vcpu, svm->soft_int_old_rip + svm->soft_int_csbase))
svm->vmcb->control.next_rip = svm->soft_int_next_rip; /* * If NRIPS isn't enabled, KVM must manually advance RIP prior to * injecting the soft exception/interrupt. That advancement needs to * be unwound if vectoring didn't complete. Note, the new event may * not be the injected event, e.g. if KVM injected an INTn, the INTn * hit a #NP in the guest, and the #NP encountered a #PF, the #NP will * be the reported vectored event, but RIP still needs to be unwound.
*/ elseif (!nrips && (is_soft || is_exception) &&
kvm_is_linear_rip(vcpu, svm->soft_int_next_rip + svm->soft_int_csbase))
kvm_rip_write(vcpu, svm->soft_int_old_rip);
}
/* * Next RIP must be provided as IRQs are disabled, and accessing guest * memory to decode the instruction might fault, i.e. might sleep.
*/ if (!nrips || !control->next_rip) return EXIT_FASTPATH_NONE;
if (is_guest_mode(vcpu)) return EXIT_FASTPATH_NONE;
switch (control->exit_code) { case SVM_EXIT_MSR: if (!control->exit_info_1) break; return handle_fastpath_set_msr_irqoff(vcpu); case SVM_EXIT_HLT: return handle_fastpath_hlt(vcpu); default: break;
}
/* * Set RFLAGS.IF prior to VMRUN, as the host's RFLAGS.IF at the time of * VMRUN controls whether or not physical IRQs are masked (KVM always * runs with V_INTR_MASKING_MASK). Toggle RFLAGS.IF here to avoid the * temptation to do STI+VMRUN+CLI, as AMD CPUs bleed the STI shadow * into guest state if delivery of an event during VMRUN triggers a * #VMEXIT, and the guest_state transitions already tell lockdep that * IRQs are being enabled/disabled. Note! GIF=0 for the entirety of * this path, so IRQs aren't actually unmasked while running host code.
*/
raw_local_irq_enable();
amd_clear_divider();
if (sev_es_guest(vcpu->kvm))
__svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted,
sev_es_host_save_area(sd)); else
__svm_vcpu_run(svm, spec_ctrl_intercepted);
/* * Disable singlestep if we're injecting an interrupt/exception. * We don't want our modified rflags to be pushed on the stack where * we might not be able to easily reset them if we disabled NMI * singlestep later.
*/ if (svm->nmi_singlestep && svm->vmcb->control.event_inj) { /* * Event injection happens before external interrupts cause a * vmexit and interrupts are disabled here, so smp_send_reschedule * is enough to force an immediate vmexit.
*/
disable_nmi_singlestep(svm);
force_immediate_exit = true;
}
if (force_immediate_exit)
smp_send_reschedule(vcpu->cpu);
/* * Run with all-zero DR6 unless the guest can write DR6 freely, so that * KVM can get the exact cause of a #DB. Note, loading guest DR6 from * KVM's snapshot is only necessary when DR accesses won't exit.
*/ if (unlikely(run_flags & KVM_RUN_LOAD_GUEST_DR6))
svm_set_dr6(vcpu, vcpu->arch.dr6); elseif (likely(!(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)))
svm_set_dr6(vcpu, DR6_ACTIVE_LOW);
clgi();
kvm_load_guest_xsave_state(vcpu);
/* * Hardware only context switches DEBUGCTL if LBR virtualization is * enabled. Manually load DEBUGCTL if necessary (and restore it after * VM-Exit), as running with the host's DEBUGCTL can negatively affect * guest state and can even be fatal, e.g. due to Bus Lock Detect.
*/ if (!(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) &&
vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl)
update_debugctlmsr(svm->vmcb->save.dbgctl);
kvm_wait_lapic_expire(vcpu);
/* * If this vCPU has touched SPEC_CTRL, restore the guest's value if * it's non-zero. Since vmentry is serialising on affected CPUs, there * is no need to worry about the conditional branch over the wrmsr * being speculatively taken.
*/ if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
x86_spec_ctrl_set_guest(svm->virt_spec_ctrl);
svm_vcpu_enter_exit(vcpu, spec_ctrl_intercepted);
if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
x86_spec_ctrl_restore_host(svm->virt_spec_ctrl);
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
if (!(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) &&
vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl)
update_debugctlmsr(vcpu->arch.host_debugctl);
kvm_load_host_xsave_state(vcpu);
stgi();
/* Any pending NMI will happen here */
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_after_interrupt(vcpu);
sync_cr8_to_lapic(vcpu);
svm->next_rip = 0; if (is_guest_mode(vcpu)) {
nested_sync_control_from_vmcb02(svm);
/* Track VMRUNs that have made past consistency checking */ if (svm->nested.nested_run_pending &&
svm->vmcb->control.exit_code != SVM_EXIT_ERR)
++vcpu->stat.nested_run;
/* if exit due to PF check for async PF */ if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
vcpu->arch.apf.host_apf_flags =
kvm_read_and_reset_apf_flags();
vcpu->arch.regs_avail &= ~SVM_REGS_LAZY_LOAD_SET;
/* * We need to handle MC intercepts here before the vcpu has a chance to * change the physical cpu
*/ if (unlikely(svm->vmcb->control.exit_code ==
SVM_EXIT_EXCP_BASE + MC_VECTOR))
svm_handle_mce(vcpu);
/* * The kvm parameter can be NULL (module initialization, or invocation before * VM creation). Be sure to check the kvm parameter before using it.
*/ staticbool svm_has_emulated_msr(struct kvm *kvm, u32 index)
{ switch (index) { case MSR_IA32_MCG_EXT_CTL: case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: returnfalse; case MSR_IA32_SMBASE: if (!IS_ENABLED(CONFIG_KVM_SMM)) returnfalse; /* SEV-ES guests do not support SMM, so report false */ if (kvm && sev_es_guest(kvm)) returnfalse; break; default: break;
}
/* * SVM doesn't provide a way to disable just XSAVES in the guest, KVM * can only disable all variants of by disallowing CR4.OSXSAVE from * being set. As a result, if the host has XSAVE and XSAVES, and the * guest has XSAVE enabled, the guest can execute XSAVES without * faulting. Treat XSAVES as enabled in this case regardless of * whether it's advertised to the guest so that KVM context switches * XSS on VM-Enter/VM-Exit. Failure to do so would effectively give * the guest read/write access to the host's XSS.
*/
guest_cpu_cap_change(vcpu, X86_FEATURE_XSAVES,
boot_cpu_has(X86_FEATURE_XSAVES) &&
guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVE));
/* * Intercept VMLOAD if the vCPU model is Intel in order to emulate that * VMLOAD drops bits 63:32 of SYSENTER (ignoring the fact that exposing * SVM on Intel is bonkers and extremely unlikely to work).
*/ if (guest_cpuid_is_intel_compatible(vcpu))
guest_cpu_cap_clear(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
if (sev_guest(vcpu->kvm))
sev_vcpu_after_set_cpuid(svm);
if (info->intercept >= ARRAY_SIZE(x86_intercept_map)) goto out;
icpt_info = x86_intercept_map[info->intercept];
if (stage != icpt_info.stage) goto out;
switch (icpt_info.exit_code) { case SVM_EXIT_READ_CR0: if (info->intercept == x86_intercept_cr_read)
icpt_info.exit_code += info->modrm_reg; break; case SVM_EXIT_WRITE_CR0: { unsignedlong cr0, val;
if (info->intercept == x86_intercept_cr_write)
icpt_info.exit_code += info->modrm_reg;
if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 ||
info->intercept == x86_intercept_clts) break;
if (!(vmcb12_is_intercept(&svm->nested.ctl,
INTERCEPT_SELECTIVE_CR0))) break;
cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
val = info->src_val & ~SVM_CR0_SELECTIVE_MASK;
if (info->intercept == x86_intercept_lmsw) {
cr0 &= 0xfUL;
val &= 0xfUL; /* lmsw can't clear PE - catch this here */ if (cr0 & X86_CR0_PE)
val |= X86_CR0_PE;
}
if (cr0 ^ val)
icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
break;
} case SVM_EXIT_READ_DR0: case SVM_EXIT_WRITE_DR0:
icpt_info.exit_code += info->modrm_reg; break; case SVM_EXIT_MSR: if (info->intercept == x86_intercept_wrmsr)
vmcb->control.exit_info_1 = 1; else
vmcb->control.exit_info_1 = 0; break; case SVM_EXIT_PAUSE: /* * We get this for NOP only, but pause * is rep not, check this here
*/ if (info->rep_prefix != REPE_PREFIX) goto out; break; case SVM_EXIT_IOIO: {
u64 exit_info;
u32 bytes;
/* An SMI must not be injected into L2 if it's supposed to VM-Exit. */ if (for_injection && is_guest_mode(vcpu) && nested_exit_on_smi(svm)) return -EBUSY;
return 1;
}
staticint svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
{ struct vcpu_svm *svm = to_svm(vcpu); struct kvm_host_map map_save; int ret;
if (!is_guest_mode(vcpu)) return 0;
/* * 32-bit SMRAM format doesn't preserve EFER and SVM state. Userspace is * responsible for ensuring nested SVM and SMIs are mutually exclusive.
*/
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) return 1;
ret = nested_svm_simple_vmexit(svm, SVM_EXIT_SW); if (ret) return ret;
/* * KVM uses VMCB01 to store L1 host state while L2 runs but * VMCB01 is going to be used during SMM and thus the state will * be lost. Temporary save non-VMLOAD/VMSAVE state to the host save * area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the * format of the area is identical to guest save area offsetted * by 0x400 (matches the offset of 'struct vmcb_save_area' * within 'struct vmcb'). Note: HSAVE area may also be used by * L1 hypervisor to save additional host context (e.g. KVM does * that, see svm_prepare_switch_to_guest()) which must be * preserved.
*/ if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save)) return 1;
if (!gif_set(svm)) { if (vgif)
svm_set_intercept(svm, INTERCEPT_STGI); /* STGI will cause a vm exit */
} else { /* We must be in SMM; RSM will cause a vmexit anyway. */
}
} #endif
/* Check that emulation is possible during event vectoring */ if ((svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK) &&
!kvm_can_emulate_event_vectoring(emul_type)) return X86EMUL_UNHANDLEABLE_VECTORING;
/* Emulation is always possible when KVM has access to all guest state. */ if (!sev_guest(vcpu->kvm)) return X86EMUL_CONTINUE;
/* #UD and #GP should never be intercepted for SEV guests. */
WARN_ON_ONCE(emul_type & (EMULTYPE_TRAP_UD |
EMULTYPE_TRAP_UD_FORCED |
EMULTYPE_VMWARE_GP));
/* * Emulation is impossible for SEV-ES guests as KVM doesn't have access * to guest register state.
*/ if (sev_es_guest(vcpu->kvm)) return X86EMUL_RETRY_INSTR;
/* * Emulation is possible if the instruction is already decoded, e.g. * when completing I/O after returning from userspace.
*/ if (emul_type & EMULTYPE_NO_DECODE) return X86EMUL_CONTINUE;
/* * Emulation is possible for SEV guests if and only if a prefilled * buffer containing the bytes of the intercepted instruction is * available. SEV guest memory is encrypted with a guest specific key * and cannot be decrypted by KVM, i.e. KVM would read ciphertext and * decode garbage. * * If KVM is NOT trying to simply skip an instruction, inject #UD if * KVM reached this point without an instruction buffer. In practice, * this path should never be hit by a well-behaved guest, e.g. KVM * doesn't intercept #UD or #GP for SEV guests, but this path is still * theoretically reachable, e.g. via unaccelerated fault-like AVIC * access, and needs to be handled by KVM to avoid putting the guest * into an infinite loop. Injecting #UD is somewhat arbitrary, but * its the least awful option given lack of insight into the guest. * * If KVM is trying to skip an instruction, simply resume the guest. * If a #NPF occurs while the guest is vectoring an INT3/INTO, then KVM * will attempt to re-inject the INT3/INTO and skip the instruction. * In that scenario, retrying the INT3/INTO and hoping the guest will * make forward progress is the only option that has a chance of * success (and in practice it will work the vast majority of the time).
*/ if (unlikely(!insn)) { if (emul_type & EMULTYPE_SKIP) return X86EMUL_UNHANDLEABLE;
/* * Emulate for SEV guests if the insn buffer is not empty. The buffer * will be empty if the DecodeAssist microcode cannot fetch bytes for * the faulting instruction because the code fetch itself faulted, e.g. * the guest attempted to fetch from emulated MMIO or a guest page * table used to translate CS:RIP resides in emulated MMIO.
*/ if (likely(insn_len)) return X86EMUL_CONTINUE;
/* * Detect and workaround Errata 1096 Fam_17h_00_0Fh. * * Errata: * When CPU raises #NPF on guest data access and vCPU CR4.SMAP=1, it is * possible that CPU microcode implementing DecodeAssist will fail to * read guest memory at CS:RIP and vmcb.GuestIntrBytes will incorrectly * be '0'. This happens because microcode reads CS:RIP using a _data_ * loap uop with CPL=0 privileges. If the load hits a SMAP #PF, ucode * gives up and does not fill the instruction bytes buffer. * * As above, KVM reaches this point iff the VM is an SEV guest, the CPU * supports DecodeAssist, a #NPF was raised, KVM's page fault handler * triggered emulation (e.g. for MMIO), and the CPU returned 0 in the * GuestIntrBytes field of the VMCB. * * This does _not_ mean that the erratum has been encountered, as the * DecodeAssist will also fail if the load for CS:RIP hits a legitimate * #PF, e.g. if the guest attempt to execute from emulated MMIO and * encountered a reserved/not-present #PF. * * To hit the erratum, the following conditions must be true: * 1. CR4.SMAP=1 (obviously). * 2. CR4.SMEP=0 || CPL=3. If SMEP=1 and CPL<3, the erratum cannot * have been hit as the guest would have encountered a SMEP * violation #PF, not a #NPF. * 3. The #NPF is not due to a code fetch, in which case failure to * retrieve the instruction bytes is legitimate (see abvoe). * * In addition, don't apply the erratum workaround if the #NPF occurred * while translating guest page tables (see below).
*/
error_code = svm->vmcb->control.exit_info_1; if (error_code & (PFERR_GUEST_PAGE_MASK | PFERR_FETCH_MASK)) goto resume_guest;
/* * If the fault occurred in userspace, arbitrarily inject #GP * to avoid killing the guest and to hopefully avoid confusing * the guest kernel too much, e.g. injecting #PF would not be * coherent with respect to the guest's page tables. Request * triple fault if the fault occurred in the kernel as there's * no fault that KVM can inject without confusing the guest. * In practice, the triple fault is moot as no sane SEV kernel * will execute from user memory while also running with SMAP=1.
*/ if (is_user)
kvm_inject_gp(vcpu, 0); else
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); return X86EMUL_PROPAGATE_FAULT;
}
resume_guest: /* * If the erratum was not hit, simply resume the guest and let it fault * again. While awful, e.g. the vCPU may get stuck in an infinite loop * if the fault is at CPL=0, it's the lesser of all evils. Exiting to * userspace will kill the guest, and letting the emulator read garbage * will yield random behavior and potentially corrupt the guest. * * Simply resuming the guest is technically not a violation of the SEV * architecture. AMD's APM states that all code fetches and page table * accesses for SEV guest are encrypted, regardless of the C-Bit. The * APM also states that encrypted accesses to MMIO are "ignored", but * doesn't explicitly define "ignored", i.e. doing nothing and letting * the guest spin is technically "ignoring" the access.
*/ return X86EMUL_RETRY_INSTR;
}
/* * The default MMIO mask is a single bit (excluding the present bit), * which could conflict with the memory encryption bit. Check for * memory encryption support and override the default MMIO mask if * memory encryption is enabled.
*/ static __init void svm_adjust_mmio_mask(void)
{ unsignedint enc_bit, mask_bit;
u64 msr, mask;
/* If there is no memory encryption support, use existing mask */ if (cpuid_eax(0x80000000) < 0x8000001f) return;
/* If memory encryption is not enabled, use existing mask */
rdmsrq(MSR_AMD64_SYSCFG, msr); if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) return;
/* Increment the mask bit if it is the same as the encryption bit */ if (enc_bit == mask_bit)
mask_bit++;
/* * If the mask bit location is below 52, then some bits above the * physical addressing limit will always be reserved, so use the * rsvd_bits() function to generate the mask. This mask, along with * the present bit, will be used to generate a page fault with * PFER.RSV = 1. * * If the mask bit location is 52 (or above), then clear the mask.
*/
mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;
/* CPUID 0x80000001 and 0x8000000A (SVM features) */ if (nested) {
kvm_cpu_cap_set(X86_FEATURE_SVM);
kvm_cpu_cap_set(X86_FEATURE_VMCBCLEAN);
/* * KVM currently flushes TLBs on *every* nested SVM transition, * and so for all intents and purposes KVM supports flushing by * ASID, i.e. KVM is guaranteed to honor every L1 ASID flush.
*/
kvm_cpu_cap_set(X86_FEATURE_FLUSHBYASID);
if (nrips)
kvm_cpu_cap_set(X86_FEATURE_NRIPS);
if (npt_enabled)
kvm_cpu_cap_set(X86_FEATURE_NPT);
if (tsc_scaling)
kvm_cpu_cap_set(X86_FEATURE_TSCRATEMSR);
if (vls)
kvm_cpu_cap_set(X86_FEATURE_V_VMSAVE_VMLOAD); if (lbrv)
kvm_cpu_cap_set(X86_FEATURE_LBRV);
if (boot_cpu_has(X86_FEATURE_PAUSEFILTER))
kvm_cpu_cap_set(X86_FEATURE_PAUSEFILTER);
if (boot_cpu_has(X86_FEATURE_PFTHRESHOLD))
kvm_cpu_cap_set(X86_FEATURE_PFTHRESHOLD);
if (vgif)
kvm_cpu_cap_set(X86_FEATURE_VGIF);
if (vnmi)
kvm_cpu_cap_set(X86_FEATURE_VNMI);
/* Nested VM can receive #VMEXIT instead of triggering #GP */
kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
}
if (cpu_feature_enabled(X86_FEATURE_BUS_LOCK_THRESHOLD))
kvm_caps.has_bus_lock_exit = true;
/* CPUID 0x80000008 */ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
boot_cpu_has(X86_FEATURE_AMD_SSBD))
kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
if (enable_pmu) { /* * Enumerate support for PERFCTR_CORE if and only if KVM has * access to enough counters to virtualize "core" support, * otherwise limit vPMU support to the legacy number of counters.
*/ if (kvm_pmu_cap.num_counters_gp < AMD64_NUM_COUNTERS_CORE)
kvm_pmu_cap.num_counters_gp = min(AMD64_NUM_COUNTERS,
kvm_pmu_cap.num_counters_gp); else
kvm_cpu_cap_check_and_set(X86_FEATURE_PERFCTR_CORE);
if (kvm_pmu_cap.version != 2 ||
!kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE))
kvm_cpu_cap_clear(X86_FEATURE_PERFMON_V2);
}
/* Don't advertise Bus Lock Detect to guest if SVM support is absent */
kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
}
static __init int svm_hardware_setup(void)
{ void *iopm_va; int cpu, r;
/* * NX is required for shadow paging and for NPT if the NX huge pages * mitigation is enabled.
*/ if (!boot_cpu_has(X86_FEATURE_NX)) {
pr_err_ratelimited("NX (Execute Disable) not supported\n"); return -EOPNOTSUPP;
}
kvm_enable_efer_bits(EFER_NX);
if (boot_cpu_has(X86_FEATURE_AUTOIBRS))
kvm_enable_efer_bits(EFER_AUTOIBRS);
/* Check for pause filtering support */ if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
pause_filter_count = 0;
pause_filter_thresh = 0;
} elseif (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
pause_filter_thresh = 0;
}
if (nested) {
pr_info("Nested Virtualization enabled\n");
kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
r = nested_svm_init_msrpm_merge_offsets(); if (r) return r;
}
/* * KVM's MMU doesn't support using 2-level paging for itself, and thus * NPT isn't supported if the host is using 2-level paging since host * CR4 is unchanged on VMRUN.
*/ if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE))
npt_enabled = false;
if (!boot_cpu_has(X86_FEATURE_NPT))
npt_enabled = false;
/* Force VM NPT level equal to the host's paging level */
kvm_configure_mmu(npt_enabled, get_npt_level(),
get_npt_level(), PG_LEVEL_1G);
pr_info("Nested Paging %s\n", str_enabled_disabled(npt_enabled));
/* Setup shadow_me_value and shadow_me_mask */
kvm_mmu_set_me_spte_mask(sme_me_mask, sme_me_mask);
svm_adjust_mmio_mask();
nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS);
if (lbrv) { if (!boot_cpu_has(X86_FEATURE_LBRV))
lbrv = false; else
pr_info("LBR virtualization supported\n");
}
iopm_va = svm_alloc_permissions_map(IOPM_SIZE, GFP_KERNEL); if (!iopm_va) return -ENOMEM;
iopm_base = __sme_set(__pa(iopm_va));
/* * Note, SEV setup consumes npt_enabled and enable_mmio_caching (which * may be modified by svm_adjust_mmio_mask()), as well as nrips.
*/
sev_hardware_setup();
svm_hv_hardware_setup();
for_each_possible_cpu(cpu) {
r = svm_cpu_init(cpu); if (r) goto err;
}
if (!vnmi) {
svm_x86_ops.is_vnmi_pending = NULL;
svm_x86_ops.set_vnmi_pending = NULL;
}
if (!enable_pmu)
pr_info("PMU virtualization is disabled\n");
svm_set_cpu_caps();
/* * It seems that on AMD processors PTE's accessed bit is * being set by the CPU hardware before the NPF vmexit. * This is not expected behaviour and our tests fail because * of it. * A workaround here is to disable support for * GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled. * In this case userspace can know if there is support using * KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle * it * If future AMD CPU models change the behaviour described above, * this variable can be changed accordingly
*/
allow_smaller_maxphyaddr = !npt_enabled;
r = kvm_x86_vendor_init(&svm_init_ops); if (r) return r;
/* * Common KVM initialization _must_ come last, after this, /dev/kvm is * exposed to userspace!
*/
r = kvm_init(sizeof(struct vcpu_svm), __alignof__(struct vcpu_svm),
THIS_MODULE); if (r) goto err_kvm_init;
¤ Diese beiden folgenden Angebotsgruppen bietet das Unternehmen0.75Angebot
(Wie Sie bei der Firma Beratungs- und Dienstleistungen beauftragen können 2026-04-26)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.