/* * Set osvw_len to higher value when updated Revision Guides * are published and we know what the new status bits are
*/ static uint64_t osvw_len = 4, osvw_status;
static DEFINE_PER_CPU(u64, current_tsc_ratio);
/* * These 2 parameters are used to config the controls for Pause-Loop Exiting: * pause_filter_count: On processors that support Pause filtering(indicated * by CPUID Fn8000_000A_EDX), the VMCB provides a 16 bit pause filter * count value. On VMRUN this value is loaded into an internal counter. * Each time a pause instruction is executed, this counter is decremented * until it reaches zero at which time a #VMEXIT is generated if pause * intercept is enabled. Refer to AMD APM Vol 2 Section 15.14.4 Pause * Intercept Filtering for more details. * This also indicate if ple logic enabled. * * pause_filter_thresh: In addition, some processor families support advanced * pause filtering (indicated by CPUID Fn8000_000A_EDX) upper bound on * the amount of time a guest is allowed to execute in a pause loop. * In this mode, a 16-bit pause filter threshold field is added in the * VMCB. The threshold value is a cycle count that is used to reset the * pause counter. As with simple pause filtering, VMRUN loads the pause * count value from VMCB into an internal counter. Then, on each pause * instruction the hardware checks the elapsed number of cycles since * the most recent pause instruction against the pause filter threshold. * If the elapsed cycle count is greater than the pause filter threshold, * then the internal pause count is reloaded from the VMCB and execution * continues. If the elapsed cycle count is less than the pause filter * threshold, then the internal pause count is decremented. If the count * value is less than zero and PAUSE intercept is enabled, a #VMEXIT is * triggered. If advanced pause filtering is supported and pause filter * threshold field is set to zero, the filter will operate in the simpler, * count only mode.
*/
/* Default resets per-vcpu window every exit to pause_filter_count. */ staticunsignedshort pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
module_param(pause_filter_count_shrink, ushort, 0444);
/* Default is to compute the maximum so we can never overflow. */ staticunsignedshort pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
module_param(pause_filter_count_max, ushort, 0444);
/* * Use nested page tables by default. Note, NPT may get forced off by * svm_hardware_setup() if it's unsupported by hardware or the host kernel.
*/ bool npt_enabled = true;
module_param_named(npt, npt_enabled, bool, 0444);
/* * enable / disable AVIC. Because the defaults differ for APICv * support between VMX and SVM we cannot use module_param_named.
*/ staticbool avic;
module_param(avic, bool, 0444);
module_param(enable_ipiv, bool, 0444);
/* * Only MSR_TSC_AUX is switched via the user return hook. EFER is switched via * the VMCB, and the SYSCALL/SYSENTER MSRs are handled by VMLOAD/VMSAVE. * * RDTSCP and RDPID are not used in the kernel, specifically to allow KVM to * defer the restoration of TSC_AUX until the CPU returns to userspace.
*/ int tsc_aux_uret_slot __ro_after_init = -1;
if (!npt_enabled) { /* Shadow paging assumes NX to be available. */
efer |= EFER_NX;
if (!(efer & EFER_LMA))
efer &= ~EFER_LME;
}
if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) { if (!(efer & EFER_SVME)) {
svm_leave_nested(vcpu);
svm_set_gif(svm, true); /* #GP intercept is still needed for vmware backdoor */ if (!enable_vmware_backdoor)
clr_exception_intercept(svm, GP_VECTOR);
/* * Free the nested guest state, unless we are in SMM. * In this case we will return to the nested guest * as soon as we leave SMM.
*/ if (!is_smm(vcpu))
svm_free_nested(svm);
} else { int ret = svm_allocate_nested(svm);
if (ret) {
vcpu->arch.efer = old_efer; return ret;
}
/* * Never intercept #GP for SEV guests, KVM can't * decrypt guest memory to workaround the erratum.
*/ if (svm_gp_erratum_intercept && !sev_guest(vcpu->kvm))
set_exception_intercept(svm, GP_VECTOR);
}
}
/* * SEV-ES does not expose the next RIP. The RIP update is controlled by * the type of exit and the #VC handler in the guest.
*/ if (sev_es_guest(vcpu->kvm)) goto done;
/* * Due to architectural shortcomings, the CPU doesn't always provide * NextRIP, e.g. if KVM intercepted an exception that occurred while * the CPU was vectoring an INTO/INT3 in the guest. Temporarily skip * the instruction even if NextRIP is supported to acquire the next * RIP so that it can be shoved into the NextRIP field, otherwise * hardware will fail to advance guest RIP during event injection. * Drop the exception/interrupt if emulation fails and effectively * retry the instruction, it's the least awful option. If NRIPS is * in use, the skip must not commit any side effects such as clearing * the interrupt shadow or RFLAGS.RF.
*/ if (!__svm_skip_emulated_instruction(vcpu, !nrips)) return -EIO;
rip = kvm_rip_read(vcpu);
/* * Save the injection information, even when using next_rip, as the * VMCB's next_rip will be lost (cleared on VM-Exit) if the injection * doesn't complete due to a VM-Exit occurring while the CPU is * vectoring the event. Decoding the instruction isn't guaranteed to * work as there may be no backing instruction, e.g. if the event is * being injected by L1 for L2, or if the guest is patching INT3 into * a different instruction.
*/
svm->soft_int_injected = true;
svm->soft_int_csbase = svm->vmcb->save.cs.base;
svm->soft_int_old_rip = old_rip;
svm->soft_int_next_rip = rip;
if (nrips)
kvm_rip_write(vcpu, old_rip);
if (static_cpu_has(X86_FEATURE_NRIPS))
svm->vmcb->control.next_rip = rip;
if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH)) return;
/* Use _safe variants to not break nested virtualization */ if (native_read_msr_safe(MSR_AMD64_DC_CFG, &val)) return;
val |= (1ULL << 47);
native_write_msr_safe(MSR_AMD64_DC_CFG, val);
erratum_383_found = true;
}
staticvoid svm_init_osvw(struct kvm_vcpu *vcpu)
{ /* * Guests should see errata 400 and 415 as fixed (assuming that * HLT and IO instructions are intercepted).
*/
vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
vcpu->arch.osvw.status = osvw_status & ~(6ULL);
/* * By increasing VCPU's osvw.length to 3 we are telling the guest that * all osvw.status bits inside that length, including bit 0 (which is * reserved for erratum 298), are valid. However, if host processor's * osvw_len is 0 then osvw_status[0] carries no information. We need to * be conservative here and therefore we tell the guest that erratum 298 * is present (because we really don't know).
*/ if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
vcpu->arch.osvw.status |= 1;
}
staticbool __kvm_is_svm_supported(void)
{ int cpu = smp_processor_id(); struct cpuinfo_x86 *c = &cpu_data(cpu);
if (c->x86_vendor != X86_VENDOR_AMD &&
c->x86_vendor != X86_VENDOR_HYGON) {
pr_err("CPU %d isn't AMD or Hygon\n", cpu); returnfalse;
}
if (!cpu_has(c, X86_FEATURE_SVM)) {
pr_err("SVM not supported by CPU %d\n", cpu); returnfalse;
}
if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
pr_info("KVM is unsupported when running as an SEV guest\n"); returnfalse;
}
staticvoid svm_disable_virtualization_cpu(void)
{ /* Make sure we clean up behind us */ if (tsc_scaling)
__svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
kvm_cpu_svm_disable();
amd_pmu_disable_virt();
}
staticint svm_enable_virtualization_cpu(void)
{
struct svm_cpu_data *sd;
uint64_t efer; int me = raw_smp_processor_id();
rdmsrq(MSR_EFER, efer); if (efer & EFER_SVME) return -EBUSY;
if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { /* * Set the default value, even if we don't use TSC scaling * to avoid having stale value in the msr
*/
__svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
}
/* * Get OSVW bits. * * Note that it is possible to have a system with mixed processor * revisions and therefore different OSVW bits. If bits are not the same * on different processors then choose the worst case (i.e. if erratum * is present on one processor and not on another then assume that the * erratum is present everywhere).
*/ if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
u64 len, status = 0; int err;
err = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &len); if (!err)
err = native_read_msr_safe(MSR_AMD64_OSVW_STATUS, &status);
staticbool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
{ /* * For non-nested case: * If the L01 MSR bitmap does not intercept the MSR, then we need to * save it. * * For nested case: * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it.
*/ void *msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm :
to_svm(vcpu)->msrpm;
/* * Set all bits in the permissions map so that all MSR and I/O accesses * are intercepted by default.
*/
pm = page_address(pages);
memset(pm, 0xff, PAGE_SIZE * (1 << order));
/* * Note! Always intercept LVTT, as TSC-deadline timer mode * isn't virtualized by hardware, and the CPU will generate a * #GP instead of a #VMEXIT.
*/
X2APIC_MSR(APIC_LVTTHMR),
X2APIC_MSR(APIC_LVTPC),
X2APIC_MSR(APIC_LVT0),
X2APIC_MSR(APIC_LVT1),
X2APIC_MSR(APIC_LVTERR),
X2APIC_MSR(APIC_TMICT),
X2APIC_MSR(APIC_TMCCT),
X2APIC_MSR(APIC_TDCR),
}; int i;
if (intercept == svm->x2avic_msrs_intercepted) return;
if (!x2avic_enabled) return;
for (i = 0; i < ARRAY_SIZE(x2avic_passthrough_msrs); i++)
svm_set_intercept_for_msr(&svm->vcpu, x2avic_passthrough_msrs[i],
MSR_TYPE_RW, intercept);
if (cpu_feature_enabled(X86_FEATURE_IBPB))
svm_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W,
!guest_has_pred_cmd_msr(vcpu));
if (cpu_feature_enabled(X86_FEATURE_FLUSH_L1D))
svm_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W,
!guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D));
/* * Disable interception of SPEC_CTRL if KVM doesn't need to manually * context switch the MSR (SPEC_CTRL is virtualized by the CPU), or if * the guest has a non-zero SPEC_CTRL value, i.e. is likely actively * using SPEC_CTRL.
*/ if (cpu_feature_enabled(X86_FEATURE_V_SPEC_CTRL))
svm_set_intercept_for_msr(vcpu, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW,
!guest_has_spec_ctrl_msr(vcpu)); else
svm_set_intercept_for_msr(vcpu, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW,
!svm->spec_ctrl);
/* * Intercept SYSENTER_EIP and SYSENTER_ESP when emulating an Intel CPU, * as AMD hardware only store 32 bits, whereas Intel CPUs track 64 bits.
*/
svm_set_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW,
guest_cpuid_is_intel_compatible(vcpu));
svm_set_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW,
guest_cpuid_is_intel_compatible(vcpu));
if (kvm_aperfmperf_in_guest(vcpu->kvm)) {
svm_disable_intercept_for_msr(vcpu, MSR_IA32_APERF, MSR_TYPE_R);
svm_disable_intercept_for_msr(vcpu, MSR_IA32_MPERF, MSR_TYPE_R);
}
if (sev_es_guest(vcpu->kvm))
sev_es_recalc_msr_intercepts(vcpu);
/* * x2APIC intercepts are modified on-demand and cannot be filtered by * userspace.
*/
}
if (enable_lbrv && !current_enable_lbrv)
__svm_enable_lbrv(vcpu); elseif (!enable_lbrv && current_enable_lbrv)
__svm_disable_lbrv(vcpu);
/* * During nested transitions, it is possible that the current VMCB has * LBR_CTL set, but the previous LBR_CTL had it cleared (or vice versa). * In this case, even though LBR_CTL does not need an update, intercepts * do, so always recalculate the intercepts here.
*/
svm_recalc_lbr_msr_intercepts(vcpu);
}
if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) { /* Clear our flags if they were not set by the guest */ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
svm->vmcb->save.rflags &= ~X86_EFLAGS_TF; if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
}
}
staticvoid grow_ple_window(struct kvm_vcpu *vcpu)
{ struct vcpu_svm *svm = to_svm(vcpu); struct vmcb_control_area *control = &svm->vmcb->control; int old = control->pause_filter_count;
/* * Intercept INVPCID if shadow paging is enabled to sync/free shadow * roots, or if INVPCID is disabled in the guest to inject #UD.
*/ if (kvm_cpu_cap_has(X86_FEATURE_INVPCID)) { if (!npt_enabled ||
!guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_INVPCID))
svm_set_intercept(svm, INTERCEPT_INVPCID); else
svm_clr_intercept(svm, INTERCEPT_INVPCID);
}
if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) { if (guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP))
svm_clr_intercept(svm, INTERCEPT_RDTSCP); else
svm_set_intercept(svm, INTERCEPT_RDTSCP);
}
if (guest_cpuid_is_intel_compatible(vcpu)) {
svm_set_intercept(svm, INTERCEPT_VMLOAD);
svm_set_intercept(svm, INTERCEPT_VMSAVE);
svm->vmcb->control.virt_ext &= ~VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
} else { /* * If hardware supports Virtual VMLOAD VMSAVE then enable it * in VMCB and clear intercepts to avoid #VMEXIT.
*/ if (vls) {
svm_clr_intercept(svm, INTERCEPT_VMLOAD);
svm_clr_intercept(svm, INTERCEPT_VMSAVE);
svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
}
}
}
set_exception_intercept(svm, PF_VECTOR);
set_exception_intercept(svm, UD_VECTOR);
set_exception_intercept(svm, MC_VECTOR);
set_exception_intercept(svm, AC_VECTOR);
set_exception_intercept(svm, DB_VECTOR); /* * Guest access to VMware backdoor ports could legitimately * trigger #GP because of TSS I/O permission bitmap. * We intercept those #GP and allow access to them anyway * as VMware does.
*/ if (enable_vmware_backdoor)
set_exception_intercept(svm, GP_VECTOR);
err = -ENOMEM;
vmcb01_page = snp_safe_alloc_page(); if (!vmcb01_page) goto out;
if (sev_es_guest(vcpu->kvm)) { /* * SEV-ES guests require a separate VMSA page used to contain * the encrypted register state of the guest.
*/
vmsa_page = snp_safe_alloc_page(); if (!vmsa_page) goto error_free_vmcb_page;
}
err = avic_init_vcpu(svm); if (err) goto error_free_vmsa_page;
staticvoid svm_srso_vm_destroy(void)
{ if (!cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) return;
if (atomic_dec_return(&srso_nr_vms)) return;
guard(spinlock)(&srso_lock);
/* * Verify a new VM didn't come along, acquire the lock, and increment * the count before this task acquired the lock.
*/ if (atomic_read(&srso_nr_vms)) return;
staticvoid svm_srso_vm_init(void)
{ if (!cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) return;
/* * Acquire the lock on 0 => 1 transitions to ensure a potential 1 => 0 * transition, i.e. destroying the last VM, is fully complete, e.g. so * that a delayed IPI doesn't clear BP_SPEC_REDUCE after a vCPU runs.
*/ if (atomic_inc_not_zero(&srso_nr_vms)) return;
if (sev_es_guest(vcpu->kvm))
sev_es_unmap_ghcb(svm);
if (svm->guest_state_loaded) return;
/* * Save additional host state that will be restored on VMEXIT (sev-es) * or subsequent vmload of host save area.
*/
vmsave(sd->save_area_pa); if (sev_es_guest(vcpu->kvm))
sev_es_prepare_switch_to_guest(svm, sev_es_host_save_area(sd));
if (tsc_scaling)
__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
/* * TSC_AUX is always virtualized (context switched by hardware) for * SEV-ES guests when the feature is available. For non-SEV-ES guests, * context switch TSC_AUX via the user_return MSR infrastructure (not * all CPUs support TSC_AUX virtualization).
*/ if (likely(tsc_aux_uret_slot >= 0) &&
(!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm)))
kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
if (svm->nmi_singlestep) { /* Hide our flags if they were not set by the guest */ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
rflags &= ~X86_EFLAGS_TF; if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
rflags &= ~X86_EFLAGS_RF;
} return rflags;
}
/* * Any change of EFLAGS.VM is accompanied by a reload of SS * (caused by either a task switch or an inter-privilege IRET), * so we do not need to update the CPL here.
*/
to_svm(vcpu)->vmcb->save.rflags = rflags;
}
switch (reg) { case VCPU_EXREG_PDPTR: /* * When !npt_enabled, mmu->pdptrs[] is already available since * it is always updated per SDM when moving to CRs.
*/ if (npt_enabled)
load_pdptrs(vcpu, kvm_read_cr3(vcpu)); break; default:
KVM_BUG_ON(1, vcpu->kvm);
}
}
/* * The following fields are ignored when AVIC is enabled
*/
WARN_ON(kvm_vcpu_apicv_activated(&svm->vcpu));
svm_set_intercept(svm, INTERCEPT_VINTR);
/* * Recalculating intercepts may have cleared the VINTR intercept. If * V_INTR_MASKING is enabled in vmcb12, then the effective RFLAGS.IF * for L1 physical interrupts is L1's RFLAGS.IF at the time of VMRUN. * Requesting an interrupt window if save.RFLAGS.IF=0 is pointless as * interrupts will never be unblocked while L2 is running.
*/ if (!svm_is_intercept(svm, INTERCEPT_VINTR)) return;
/* * This is just a dummy VINTR to actually cause a vmexit to happen. * Actual injection of virtual interrupts happens through EVENTINJ.
*/
control = &svm->vmcb->control;
control->int_vector = 0x0;
control->int_ctl &= ~V_INTR_PRIO_MASK;
control->int_ctl |= V_IRQ_MASK |
((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
}
/* Drop int_ctl fields related to VINTR injection. */
svm->vmcb->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK; if (is_guest_mode(&svm->vcpu)) {
svm->vmcb01.ptr->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK;
switch (seg) { case VCPU_SREG_CS: return &save->cs; case VCPU_SREG_DS: return &save->ds; case VCPU_SREG_ES: return &save->es; case VCPU_SREG_FS: return &save01->fs; case VCPU_SREG_GS: return &save01->gs; case VCPU_SREG_SS: return &save->ss; case VCPU_SREG_TR: return &save01->tr; case VCPU_SREG_LDTR: return &save01->ldtr;
}
BUG(); return NULL;
}
/* * AMD CPUs circa 2014 track the G bit for all segments except CS. * However, the SVM spec states that the G bit is not observed by the * CPU, and some VMware virtual CPUs drop the G bit for all segments. * So let's synthesize a legal G bit for all segments, this helps * running KVM nested. It also helps cross-vendor migration, because * Intel's vmentry has a check on the 'G' bit.
*/
var->g = s->limit > 0xfffff;
/* * AMD's VMCB does not have an explicit unusable field, so emulate it * for cross vendor migration purposes by "not present"
*/
var->unusable = !var->present;
switch (seg) { case VCPU_SREG_TR: /* * Work around a bug where the busy flag in the tr selector * isn't exposed
*/
var->type |= 0x2; break; case VCPU_SREG_DS: case VCPU_SREG_ES: case VCPU_SREG_FS: case VCPU_SREG_GS: /* * The accessed bit must always be set in the segment * descriptor cache, although it can be cleared in the * descriptor, the cached bit always remains at 1. Since * Intel has a check on this, set it here to support * cross-vendor migration.
*/ if (!var->unusable)
var->type |= 0x1; break; case VCPU_SREG_SS: /* * On AMD CPUs sometimes the DB bit in the segment * descriptor is left as 1, although the whole segment has * been made unusable. Clear it here to pass an Intel VMX * entry check when cross vendor migrating.
*/ if (var->unusable)
var->db = 0; /* This is symmetric with svm_set_segment() */
var->dpl = to_svm(vcpu)->vmcb->save.cpl; break;
}
}
/* * For guests that don't set guest_state_protected, the cr3 update is * handled via kvm_mmu_load() while entering the guest. For guests * that do (SEV-ES/SEV-SNP), the cr3 update needs to be written to * VMCB save area now, since the save area will become the initial * contents of the VMSA, and future VMCB save area updates won't be * seen.
*/ if (sev_es_guest(vcpu->kvm)) {
svm->vmcb->save.cr3 = cr3;
vmcb_mark_dirty(svm->vmcb, VMCB_CR);
}
}
if (!npt_enabled) {
hcr0 |= X86_CR0_PG | X86_CR0_WP; if (old_paging != is_paging(vcpu))
svm_set_cr4(vcpu, kvm_read_cr4(vcpu));
}
/* * re-enable caching here because the QEMU bios * does not do it - this results in some delay at * reboot
*/ if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
hcr0 &= ~(X86_CR0_CD | X86_CR0_NW);
/* * This is always accurate, except if SYSRET returned to a segment * with SS.DPL != 3. Intel does not have this quirk, and always * forces SS.DPL to 3 on sysret, so we ignore that case; fixing it * would entail passing the CPL to userspace and back.
*/ if (seg == VCPU_SREG_SS) /* This is symmetric with svm_get_segment() */
svm->vmcb->save.cpl = (var->dpl & 3);
if (WARN_ON_ONCE(sev_es_guest(vcpu->kvm))) return;
get_debugreg(vcpu->arch.db[0], 0);
get_debugreg(vcpu->arch.db[1], 1);
get_debugreg(vcpu->arch.db[2], 2);
get_debugreg(vcpu->arch.db[3], 3); /* * We cannot reset svm->vmcb->save.dr6 to DR6_ACTIVE_LOW here, * because db_interception might need it. We can do it before vmentry.
*/
vcpu->arch.dr6 = svm->vmcb->save.dr6;
vcpu->arch.dr7 = svm->vmcb->save.dr7;
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
set_dr_intercepts(svm);
}
/* * WARN if hardware generates a fault with an error code that collides * with KVM-defined sythentic flags. Clear the flags and continue on, * i.e. don't terminate the VM, as KVM can't possibly be relying on a * flag that KVM doesn't know about.
*/ if (WARN_ON_ONCE(error_code & PFERR_SYNTHETIC_MASK))
error_code &= ~PFERR_SYNTHETIC_MASK;
if (sev_snp_guest(vcpu->kvm) && (error_code & PFERR_GUEST_ENC_MASK))
error_code |= PFERR_PRIVATE_ACCESS;
/* * VMCB is undefined after a SHUTDOWN intercept. INIT the vCPU to put * the VMCB in a known good state. Unfortuately, KVM doesn't have * KVM_MP_STATE_SHUTDOWN and can't add it without potentially breaking * userspace. At a platform view, INIT is acceptable behavior as * there exist bare metal platforms that automatically INIT the CPU * in response to shutdown. * * The VM save area for SEV-ES guests has already been encrypted so it * cannot be reinitialized, i.e. synthesizing INIT is futile.
*/ if (!sev_es_guest(vcpu->kvm)) {
clear_page(svm->vmcb); #ifdef CONFIG_KVM_SMM if (is_smm(vcpu))
kvm_smm_changed(vcpu, false); #endif
kvm_vcpu_reset(vcpu, true);
}
if (is_guest_mode(vcpu)) { /* Returns '1' or -errno on failure, '0' on success. */
ret = nested_svm_simple_vmexit(svm, guest_mode_exit_codes[opcode]); if (ret) return ret; return 1;
} return svm_instr_handlers[opcode](vcpu);
}
/* * #GP handling code. Note that #GP can be triggered under the following two * cases: * 1) SVM VM-related instructions (VMRUN/VMSAVE/VMLOAD) that trigger #GP on * some AMD CPUs when EAX of these instructions are in the reserved memory * regions (e.g. SMM memory on host). * 2) VMware backdoor
*/ staticint gp_interception(struct kvm_vcpu *vcpu)
{ struct vcpu_svm *svm = to_svm(vcpu);
u32 error_code = svm->vmcb->control.exit_info_1; int opcode;
/* Both #GP cases have zero error_code */ if (error_code) goto reinject;
/* Decode the instruction for usage later */ if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK) goto reinject;
opcode = svm_instr_opcode(vcpu);
if (opcode == NONE_SVM_INSTR) { if (!enable_vmware_backdoor) goto reinject;
/* * VMware backdoor emulation on #GP interception only handles * IN{S}, OUT{S}, and RDPMC.
*/ if (!is_guest_mode(vcpu)) return kvm_emulate_instruction(vcpu,
EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
} else { /* All SVM instructions expect page aligned RAX */ if (svm->vmcb->save.rax & ~PAGE_MASK) goto reinject;
void svm_set_gif(struct vcpu_svm *svm, bool value)
{ if (value) { /* * If VGIF is enabled, the STGI intercept is only added to * detect the opening of the SMI/NMI window; remove it now. * Likewise, clear the VINTR intercept, we will set it * again while processing KVM_REQ_EVENT if needed.
*/ if (vgif)
svm_clr_intercept(svm, INTERCEPT_STGI); if (svm_is_intercept(svm, INTERCEPT_VINTR))
svm_clear_vintr(svm);
/* * After a CLGI no interrupts should come. But if vGIF is * in use, we still rely on the VINTR intercept (rather than * STGI) to detect an open interrupt window.
*/ if (!vgif)
svm_clear_vintr(svm);
}
}
staticint stgi_interception(struct kvm_vcpu *vcpu)
{ int ret;
if (nested_svm_check_permissions(vcpu)) return 1;
ret = kvm_skip_emulated_instruction(vcpu);
svm_set_gif(to_svm(vcpu), true); return ret;
}
staticint clgi_interception(struct kvm_vcpu *vcpu)
{ int ret;
if (nested_svm_check_permissions(vcpu)) return 1;
ret = kvm_skip_emulated_instruction(vcpu);
svm_set_gif(to_svm(vcpu), false); return ret;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.