/* * Note, kvm_caps fields should *never* have default values, all fields must be * recomputed from scratch during vendor module load, e.g. to account for a * vendor module being reloaded with different module parameters.
*/ struct kvm_caps kvm_caps __read_mostly;
EXPORT_SYMBOL_GPL(kvm_caps);
/* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */ static u32 __read_mostly tsc_tolerance_ppm = 250;
module_param(tsc_tolerance_ppm, uint, 0644);
/* * Restoring the host value for MSRs that are only consumed when running in * usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU * returns to userspace, i.e. the kernel can run with the guest's value.
*/ #define KVM_MAX_NR_USER_RETURN_MSRS 16
/* * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features) track * the set of MSRs that KVM exposes to userspace through KVM_GET_MSRS, * KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. msrs_to_save holds MSRs that * require host support, i.e. should be probed via RDMSR. emulated_msrs holds * MSRs that KVM emulates without strictly requiring host support. * msr_based_features holds MSRs that enumerate features, i.e. are effectively * CPUID leafs. Note, msr_based_features isn't mutually exclusive with * msrs_to_save and emulated_msrs.
*/
/* This part of MSRs should match KVM_MAX_NR_AMD_GP_COUNTERS. */
MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
/* * KVM always supports the "true" VMX control MSRs, even if the host * does not. The VMX MSRs as a whole are considered "emulated" as KVM * doesn't strictly require them to exist in the host (ignoring that * KVM would refuse to load in the first place if the core set of MSRs * aren't supported).
*/
MSR_IA32_VMX_BASIC,
MSR_IA32_VMX_TRUE_PINBASED_CTLS,
MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
MSR_IA32_VMX_TRUE_EXIT_CTLS,
MSR_IA32_VMX_TRUE_ENTRY_CTLS,
MSR_IA32_VMX_MISC,
MSR_IA32_VMX_CR0_FIXED0,
MSR_IA32_VMX_CR4_FIXED0,
MSR_IA32_VMX_VMCS_ENUM,
MSR_IA32_VMX_PROCBASED_CTLS2,
MSR_IA32_VMX_EPT_VPID_CAP,
MSR_IA32_VMX_VMFUNC,
/* * List of MSRs that control the existence of MSR-based features, i.e. MSRs * that are effectively CPUID leafs. VMX MSRs are also included in the set of * feature MSRs, but are handled separately to allow expedited lookups.
*/ staticconst u32 msr_based_features_all_except_vmx[] = {
MSR_AMD64_DE_CFG,
MSR_IA32_UCODE_REV,
MSR_IA32_ARCH_CAPABILITIES,
MSR_IA32_PERF_CAPABILITIES,
MSR_PLATFORM_INFO,
};
/* * All feature MSRs except uCode revID, which tracks the currently loaded uCode * patch, are immutable once the vCPU model is defined.
*/ staticbool kvm_is_immutable_feature_msr(u32 msr)
{ int i;
if (msr >= KVM_FIRST_EMULATED_VMX_MSR && msr <= KVM_LAST_EMULATED_VMX_MSR) returntrue;
for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) { if (msr == msr_based_features_all_except_vmx[i]) return msr != MSR_IA32_UCODE_REV;
}
/* * Zero the data on read failures to avoid leaking stack data to the * guest and/or userspace, e.g. if the failure is ignored below.
*/
ret = msr_access_fn(vcpu, msr, data, host_initiated); if (ret && rw == MSR_TYPE_R)
*data = 0;
if (ret != KVM_MSR_RET_UNSUPPORTED) return ret;
/* * Userspace is allowed to read MSRs, and write '0' to MSRs, that KVM * advertises to userspace, even if an MSR isn't fully supported. * Simply check that @data is '0', which covers both the write '0' case * and all reads (in which case @data is zeroed on failure; see above).
*/ if (host_initiated && !*data && kvm_is_advertised_msr(msr)) return 0;
if (!ignore_msrs) {
kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n",
op, msr, *data); return ret;
}
if (report_ignored_msrs)
kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n", op, msr, *data);
/* * Disabling irqs at this point since the following code could be * interrupted and executed through kvm_arch_disable_virtualization_cpu()
*/
local_irq_save(flags); if (msrs->registered) {
msrs->registered = false;
user_return_notifier_unregister(urn);
}
local_irq_restore(flags); for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) {
values = &msrs->values[slot]; if (values->host != values->curr) {
wrmsrq(kvm_uret_msrs_list[slot], values->host);
values->curr = values->host;
}
}
}
staticint kvm_probe_user_return_msr(u32 msr)
{
u64 val; int ret;
preempt_disable();
ret = rdmsrq_safe(msr, &val); if (ret) goto out;
ret = wrmsrq_safe(msr, val);
out:
preempt_enable(); return ret;
}
int kvm_add_user_return_msr(u32 msr)
{
BUG_ON(kvm_nr_uret_msrs >= KVM_MAX_NR_USER_RETURN_MSRS);
if (msrs->registered)
kvm_on_user_return(&msrs->urn);
}
/* * Handle a fault on a hardware virtualization (VMX or SVM) instruction. * * Hardware virtualization extension instructions may fault if a reboot turns * off virtualization while processes are running. Usually after catching the * fault we just panic; during reboot instead the instruction is ignored.
*/
noinstr void kvm_spurious_fault(void)
{ /* Fault while not rebooting. We want the trace. */
BUG_ON(!kvm_rebooting);
}
EXPORT_SYMBOL_GPL(kvm_spurious_fault);
staticint exception_class(int vector)
{ switch (vector) { case PF_VECTOR: return EXCPT_PF; case DE_VECTOR: case TS_VECTOR: case NP_VECTOR: case SS_VECTOR: case GP_VECTOR: return EXCPT_CONTRIBUTORY; default: break;
} return EXCPT_BENIGN;
}
if (WARN_ON(vector > 31 || vector == NMI_VECTOR)) return EXCPT_INTERRUPT;
mask = 1 << vector;
/* * #DBs can be trap-like or fault-like, the caller must check other CPU * state, e.g. DR6, to determine whether a #DB is a trap or fault.
*/ if (mask & (1 << DB_VECTOR)) return EXCPT_DB;
switch (ex->vector) { case DB_VECTOR: /* * "Certain debug exceptions may clear bit 0-3. The * remaining contents of the DR6 register are never * cleared by the processor".
*/
vcpu->arch.dr6 &= ~DR_TRAP_BITS; /* * In order to reflect the #DB exception payload in guest * dr6, three components need to be considered: active low * bit, FIXED_1 bits and active high bits (e.g. DR6_BD, * DR6_BS and DR6_BT) * DR6_ACTIVE_LOW contains the FIXED_1 and active low bits. * In the target guest dr6: * FIXED_1 bits should always be set. * Active low bits should be cleared if 1-setting in payload. * Active high bits should be set if 1-setting in payload. * * Note, the payload is compatible with the pending debug * exceptions/exit qualification under VMX, that active_low bits * are active high in payload. * So they need to be flipped for DR6.
*/
vcpu->arch.dr6 |= DR6_ACTIVE_LOW;
vcpu->arch.dr6 |= ex->payload;
vcpu->arch.dr6 ^= ex->payload & DR6_ACTIVE_LOW;
/* * The #DB payload is defined as compatible with the 'pending * debug exceptions' field under VMX, not DR6. While bit 12 is * defined in the 'pending debug exceptions' field (enabled * breakpoint), it is reserved and must be zero in DR6.
*/
vcpu->arch.dr6 &= ~BIT(12); break; case PF_VECTOR:
vcpu->arch.cr2 = ex->payload; break;
}
/* * If the exception is destined for L2, morph it to a VM-Exit if L1 * wants to intercept the exception.
*/ if (is_guest_mode(vcpu) &&
kvm_x86_ops.nested_ops->is_exception_vmexit(vcpu, nr, error_code)) {
kvm_queue_exception_vmexit(vcpu, nr, has_error, error_code,
has_payload, payload); return;
}
/* to check exception */
prev_nr = vcpu->arch.exception.vector; if (prev_nr == DF_VECTOR) { /* triple fault -> shutdown */
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); return;
}
class1 = exception_class(prev_nr);
class2 = exception_class(nr); if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY) ||
(class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) { /* * Synthesize #DF. Clear the previously injected or pending * exception so as not to incorrectly trigger shutdown.
*/
vcpu->arch.exception.injected = false;
vcpu->arch.exception.pending = false;
kvm_queue_exception_e(vcpu, DF_VECTOR, 0);
} else { /* replace previous exception with a new one in a hope that instruction re-execution will regenerate lost
exception */ goto queue;
}
}
/* * On VM-Entry, an exception can be pending if and only if event * injection was blocked by nested_run_pending. In that case, however, * vcpu_enter_guest() requests an immediate exit, and the guest * shouldn't proceed far enough to need reinjection.
*/
WARN_ON_ONCE(kvm_is_exception_pending(vcpu));
/* * Do not check for interception when injecting an event for L2, as the * exception was checked for intercept when it was original queued, and * re-checking is incorrect if _L1_ injected the exception, in which * case it's exempt from interception.
*/
kvm_make_request(KVM_REQ_EVENT, vcpu);
/* * Async #PF in L2 is always forwarded to L1 as a VM-Exit regardless of * whether or not L1 wants to intercept "regular" #PF.
*/ if (is_guest_mode(vcpu) && fault->async_page_fault)
kvm_queue_exception_vmexit(vcpu, PF_VECTOR, true, fault->error_code, true, fault->address); else
kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
fault->address);
}
/* * Invalidate the TLB entry for the faulting address, if it exists, * else the access will fault indefinitely (and to emulate hardware).
*/ if ((fault->error_code & PFERR_PRESENT_MASK) &&
!(fault->error_code & PFERR_RSVD_MASK))
kvm_mmu_invalidate_addr(vcpu, fault_mmu, fault->address,
KVM_MMU_ROOT_CURRENT);
/* * Load the pae pdptrs. Return 1 if they are all valid, 0 otherwise.
*/ int load_pdptrs(struct kvm_vcpu *vcpu, unsignedlong cr3)
{ struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
gpa_t real_gpa; int i; int ret;
u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
/* * If the MMU is nested, CR3 holds an L2 GPA and needs to be translated * to an L1 GPA.
*/
real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(pdpt_gfn),
PFERR_USER_MASK | PFERR_WRITE_MASK, NULL); if (real_gpa == INVALID_GPA) return 0;
/* Note the offset, PDPTRs are 32 byte aligned when using PAE paging. */
ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(real_gpa), pdpte,
cr3 & GENMASK(11, 5), sizeof(pdpte)); if (ret < 0) return 0;
for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { if ((pdpte[i] & PT_PRESENT_MASK) &&
(pdpte[i] & pdptr_rsvd_bits(vcpu))) { return 0;
}
}
/* * Marking VCPU_EXREG_PDPTR dirty doesn't work for !tdp_enabled. * Shadow page roots need to be reconstructed instead.
*/ if (!tdp_enabled && memcmp(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs)))
kvm_mmu_free_roots(vcpu->kvm, mmu, KVM_MMU_ROOT_CURRENT);
if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) returnfalse;
if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) returnfalse;
return kvm_x86_call(is_valid_cr0)(vcpu, cr0);
}
void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsignedlong old_cr0, unsignedlong cr0)
{ /* * CR0.WP is incorporated into the MMU role, but only for non-nested, * indirect shadow MMUs. If paging is disabled, no updates are needed * as there are no permission bits to emulate. If TDP is enabled, the * MMU's metadata needs to be updated, e.g. so that emulating guest * translations does the right thing, but there's no need to unload the * root as CR0.WP doesn't affect SPTEs.
*/ if ((cr0 ^ old_cr0) == X86_CR0_WP) { if (!(cr0 & X86_CR0_PG)) return;
if (tdp_enabled) {
kvm_init_mmu(vcpu); return;
}
}
if ((cr0 ^ old_cr0) & X86_CR0_PG) {
kvm_clear_async_pf_completion_queue(vcpu);
kvm_async_pf_hash_reset(vcpu);
/* * Clearing CR0.PG is defined to flush the TLB from the guest's * perspective.
*/ if (!(cr0 & X86_CR0_PG))
kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
}
if ((cr0 ^ old_cr0) & KVM_MMU_CR0_ROLE_BITS)
kvm_mmu_reset_context(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_post_set_cr0);
/* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ if (index != XCR_XFEATURE_ENABLED_MASK) return 1; if (!(xcr0 & XFEATURE_MASK_FP)) return 1; if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE)) return 1;
/* * Do not allow the guest to set bits that we do not support * saving. However, xcr0 bit 0 is always set, even if the * emulated CPU does not support XSAVE (see kvm_vcpu_reset()).
*/
valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP; if (xcr0 & ~valid_bits) return 1;
if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) !=
(!(xcr0 & XFEATURE_MASK_BNDCSR))) return 1;
if (xcr0 & XFEATURE_MASK_AVX512) { if (!(xcr0 & XFEATURE_MASK_YMM)) return 1; if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512) return 1;
}
int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
{ /* Note, #UD due to CR4.OSXSAVE=0 has priority over the intercept. */ if (kvm_x86_call(get_cpl)(vcpu) != 0 ||
__kvm_set_xcr(vcpu, kvm_rcx_read(vcpu), kvm_read_edx_eax(vcpu))) {
kvm_inject_gp(vcpu, 0); return 1;
}
/* * If CR4.PCIDE is changed 0 -> 1, there is no need to flush the TLB * according to the SDM; however, stale prev_roots could be reused * incorrectly in the future after a MOV to CR3 with NOFLUSH=1, so we * free them all. This is *not* a superset of KVM_REQ_TLB_FLUSH_GUEST * or KVM_REQ_TLB_FLUSH_CURRENT, because the hardware TLB is not flushed, * so fall through.
*/ if (!tdp_enabled &&
(cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE))
kvm_mmu_unload(vcpu);
/* * The TLB has to be flushed for all PCIDs if any of the following * (architecturally required) changes happen: * - CR4.PCIDE is changed from 1 to 0 * - CR4.PGE is toggled * * This is a superset of KVM_REQ_TLB_FLUSH_CURRENT.
*/ if (((cr4 ^ old_cr4) & X86_CR4_PGE) ||
(!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
/* * The TLB has to be flushed for the current PCID if any of the * following (architecturally required) changes happen: * - CR4.SMEP is changed from 0 to 1 * - CR4.PAE is toggled
*/ elseif (((cr4 ^ old_cr4) & X86_CR4_PAE) ||
((cr4 & X86_CR4_SMEP) && !(old_cr4 & X86_CR4_SMEP)))
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) { /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */ if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu)) return 1;
}
/* * MOV CR3 and INVPCID are usually not intercepted when using TDP, but * this is reachable when running EPT=1 and unrestricted_guest=0, and * also via the emulator. KVM's TDP page tables are not in the scope of * the invalidation, but the guest's TLB entries need to be flushed as * the CPU may have cached entries in its TLB for the target PCID.
*/ if (unlikely(tdp_enabled)) {
kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu); return;
}
/* * If neither the current CR3 nor any of the prev_roots use the given * PCID, then nothing needs to be done here because a resync will * happen anyway before switching to any other CR3.
*/ if (kvm_get_active_pcid(vcpu) == pcid) {
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
}
/* * If PCID is disabled, there is no need to free prev_roots even if the * PCIDs for them are also 0, because MOV to CR3 always flushes the TLB * with PCIDE=0.
*/ if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE)) return;
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) if (kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd) == pcid)
roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
/* PDPTRs are always reloaded for PAE paging. */ if (cr3 == kvm_read_cr3(vcpu) && !is_pae_paging(vcpu)) goto handle_tlb_flush;
/* * Do not condition the GPA check on long mode, this helper is used to * stuff CR3, e.g. for RSM emulation, and there is no guarantee that * the current vCPU mode is accurate.
*/ if (!kvm_vcpu_is_legal_cr3(vcpu, cr3)) return 1;
if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, cr3)) return 1;
if (cr3 != kvm_read_cr3(vcpu))
kvm_mmu_new_pgd(vcpu, cr3);
vcpu->arch.cr3 = cr3;
kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3); /* Do not call post_set_cr3, we do not get here for confidential guests. */
handle_tlb_flush: /* * A load of CR3 that flushes the TLB flushes only the current PCID, * even if PCID is disabled, in which case PCID=0 is flushed. It's a * moot point in the end because _disabling_ PCID will flush all PCIDs, * and it's impossible to use a non-zero PCID when PCID is disabled, * i.e. only PCID=0 can be relevant.
*/ if (!skip_tlb_flush)
kvm_invalidate_pcid(vcpu, pcid);
return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_cr3);
int kvm_set_cr8(struct kvm_vcpu *vcpu, unsignedlong cr8)
{ if (cr8 & CR8_RESERVED_BITS) return 1; if (lapic_in_kernel(vcpu))
kvm_lapic_set_tpr(vcpu, cr8); else
vcpu->arch.cr8 = cr8; return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_cr8);
static u64 kvm_get_arch_capabilities(void)
{
u64 data = kvm_host.arch_capabilities & KVM_SUPPORTED_ARCH_CAP;
/* * If nx_huge_pages is enabled, KVM's shadow paging will ensure that * the nested hypervisor runs with NX huge pages. If it is not, * L1 is anyway vulnerable to ITLB_MULTIHIT exploits from other * L1 guests, so it need not worry about its own (L2) guests.
*/
data |= ARCH_CAP_PSCHANGE_MC_NO;
/* * If we're doing cache flushes (either "always" or "cond") * we will do one whenever the guest does a vmlaunch/vmresume. * If an outer hypervisor is doing the cache flush for us * (ARCH_CAP_SKIP_VMENTRY_L1DFLUSH), we can safely pass that * capability to the guest too, and if EPT is disabled we're not * vulnerable. Overall, only VMENTER_L1D_FLUSH_NEVER will * require a nested hypervisor to do a flush of its own.
*/ if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
data |= ARCH_CAP_RDCL_NO; if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
data |= ARCH_CAP_SSB_NO; if (!boot_cpu_has_bug(X86_BUG_MDS))
data |= ARCH_CAP_MDS_NO; if (!boot_cpu_has_bug(X86_BUG_RFDS))
data |= ARCH_CAP_RFDS_NO; if (!boot_cpu_has_bug(X86_BUG_ITS))
data |= ARCH_CAP_ITS_NO;
if (!boot_cpu_has(X86_FEATURE_RTM)) { /* * If RTM=0 because the kernel has disabled TSX, the host might * have TAA_NO or TSX_CTRL. Clear TAA_NO (the guest sees RTM=0 * and therefore knows that there cannot be TAA) but keep * TSX_CTRL: some buggy userspaces leave it set on tsx=on hosts, * and we want to allow migrating those guests to tsx=off hosts.
*/
data &= ~ARCH_CAP_TAA_NO;
} elseif (!boot_cpu_has_bug(X86_BUG_TAA)) {
data |= ARCH_CAP_TAA_NO;
} else { /* * Nothing to do here; we emulate TSX_CTRL if present on the * host so the guest can choose between disabling TSX or * using VERW to clear CPU buffers.
*/
}
if (!boot_cpu_has_bug(X86_BUG_GDS) || gds_ucode_mitigated())
data |= ARCH_CAP_GDS_NO;
/* * Write @data into the MSR specified by @index. Select MSR specific fault * checks are bypassed if @host_initiated is %true. * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called.
*/ staticint __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, bool host_initiated)
{ struct msr_data msr;
switch (index) { case MSR_FS_BASE: case MSR_GS_BASE: case MSR_KERNEL_GS_BASE: case MSR_CSTAR: case MSR_LSTAR: if (is_noncanonical_msr_address(data, vcpu)) return 1; break; case MSR_IA32_SYSENTER_EIP: case MSR_IA32_SYSENTER_ESP: /* * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if * non-canonical address is written on Intel but not on * AMD (which ignores the top 32-bits, because it does * not implement 64-bit SYSENTER). * * 64-bit code should hence be able to write a non-canonical * value on AMD. Making the address canonical ensures that * vmentry does not fail on Intel after writing a non-canonical * value, and that something deterministic happens if the guest * invokes 64-bit SYSENTER.
*/
data = __canonical_address(data, max_host_virt_addr_bits()); break; case MSR_TSC_AUX: if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX)) return 1;
if (!host_initiated &&
!guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) &&
!guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID)) return 1;
/* * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has * incomplete and conflicting architectural behavior. Current * AMD CPUs completely ignore bits 63:32, i.e. they aren't * reserved and always read as zeros. Enforce Intel's reserved * bits check if the guest CPU is Intel compatible, otherwise * clear the bits. This ensures cross-vendor migration will * provide consistent behavior for the guest.
*/ if (guest_cpuid_is_intel_compatible(vcpu) && (data >> 32) != 0) return 1;
/* * Read the MSR specified by @index into @data. Select MSR specific fault * checks are bypassed if @host_initiated is %true. * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called.
*/ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated)
{ struct msr_data msr; int ret;
switch (index) { case MSR_TSC_AUX: if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX)) return 1;
r = kvm_set_msr_with_filter(vcpu, msr, data); if (!r) {
trace_kvm_msr_write(msr, data);
} else { /* MSR write failed? See if we should ask user space */ if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_WRMSR, data,
complete_fast_msr_access, r)) return 0; /* Signal all other negative errors to userspace */ if (r < 0) return r;
trace_kvm_msr_write_ex(msr, data);
}
int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg)
{ return __kvm_emulate_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg));
}
EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr_imm);
int kvm_emulate_as_nop(struct kvm_vcpu *vcpu)
{ return kvm_skip_emulated_instruction(vcpu);
}
int kvm_emulate_invd(struct kvm_vcpu *vcpu)
{ /* Treat an INVD instruction as a NOP and just skip it. */ return kvm_emulate_as_nop(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_emulate_invd);
/* * The fast path for frequent and performance sensitive wrmsr emulation, * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces * the latency of virtual IPI by avoiding the expensive bits of transitioning * from guest to host, e.g. reacquiring KVM's SRCU lock. In contrast to the * other cases which must be called after interrupts are enabled on the host.
*/ staticint handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
{ if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic)) return 1;
switch (msr) { case APIC_BASE_MSR + (APIC_ICR >> 4):
data = kvm_read_edx_eax(vcpu);
handled = !handle_fastpath_set_x2apic_icr_irqoff(vcpu, data); break; case MSR_IA32_TSC_DEADLINE:
data = kvm_read_edx_eax(vcpu);
handled = !handle_fastpath_set_tscdeadline(vcpu, data); break; default:
handled = false; break;
}
if (handled) { if (!kvm_skip_emulated_instruction(vcpu))
ret = EXIT_FASTPATH_EXIT_USERSPACE; else
ret = EXIT_FASTPATH_REENTER_GUEST;
trace_kvm_msr_write(msr, data);
} else {
ret = EXIT_FASTPATH_NONE;
}
/* * Disallow writes to immutable feature MSRs after KVM_RUN. KVM does * not support modifying the guest vCPU model on the fly, e.g. changing * the nVMX capabilities while L2 is running is nonsensical. Allow * writes of the same value, e.g. to allow userspace to blindly stuff * all MSRs when emulating RESET.
*/ if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index) &&
(do_get_msr(vcpu, index, &val) || *data != val)) return -EINVAL;
static s64 get_kvmclock_base_ns(void)
{ /* Count up from boot time, but with the frequency of the raw clock. */ return ktime_to_ns(ktime_add(ktime_get_raw(), pvclock_gtod_data.offs_boot));
} #else static s64 get_kvmclock_base_ns(void)
{ /* Master clock not used, so we can just use CLOCK_BOOTTIME. */ return ktime_get_boottime_ns();
}
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Diese beiden folgenden Angebotsgruppen bietet das Unternehmen0.61Angebot
Wie Sie bei der Firma Beratungs- und Dienstleistungen beauftragen können
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.