Quelle nested.c Sprache: C

// SPDX-License-Identifier: GPL-2.0
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/objtool.h>
#include <linux/percpu.h>

#include <asm/debugreg.h>
#include <asm/mmu_context.h>
#include <asm/msr.h>

#include "x86.h"
#include "cpuid.h"
#include "hyperv.h"
#include "mmu.h"
#include "nested.h"
#include "pmu.h"
#include "posted_intr.h"
#include "sgx.h"
#include "trace.h"
#include "vmx.h"
#include "smm.h"

static bool __read_mostly enable_shadow_vmcs = 1;
module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);

static bool __read_mostly nested_early_check = 0;
module_param(nested_early_check, bool, S_IRUGO);

#define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK

/*
* Hyper-V requires all of these, so mark them as supported even though
* they are just treated the same as all-context.
*/
#define VMX_VPID_EXTENT_SUPPORTED_MASK  \
(VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \
VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \
VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \
VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)

#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5

enum {
VMX_VMREAD_BITMAP,
VMX_VMWRITE_BITMAP,
VMX_BITMAP_NR
};
static unsigned long *vmx_bitmap[VMX_BITMAP_NR];

#define vmx_vmread_bitmap                    (vmx_bitmap[VMX_VMREAD_BITMAP])
#define vmx_vmwrite_bitmap                   (vmx_bitmap[VMX_VMWRITE_BITMAP])

struct shadow_vmcs_field {
u16 encoding;
u16 offset;
};
static struct shadow_vmcs_field shadow_read_only_fields[] = {
#define SHADOW_FIELD_RO(x, y) { x, offsetof(struct vmcs12, y) },
#include "vmcs_shadow_fields.h"
};
static int max_shadow_read_only_fields =
ARRAY_SIZE(shadow_read_only_fields);

static struct shadow_vmcs_field shadow_read_write_fields[] = {
#define SHADOW_FIELD_RW(x, y) { x, offsetof(struct vmcs12, y) },
#include "vmcs_shadow_fields.h"
};
static int max_shadow_read_write_fields =
ARRAY_SIZE(shadow_read_write_fields);

static void init_vmcs_shadow_fields(void)
{
int i, j;

memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);

for (i = j = 0; i < max_shadow_read_only_fields; i++) {
  struct shadow_vmcs_field entry = shadow_read_only_fields[i];
  u16 field = entry.encoding;

  if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
      (i + 1 == max_shadow_read_only_fields ||
       shadow_read_only_fields[i + 1].encoding != field + 1))
   pr_err("Missing field from shadow_read_only_field %x\n",
          field + 1);

  clear_bit(field, vmx_vmread_bitmap);
  if (field & 1)
#ifdef CONFIG_X86_64
   continue;
#else
   entry.offset += sizeof(u32);
#endif
  shadow_read_only_fields[j++] = entry;
}
max_shadow_read_only_fields = j;

for (i = j = 0; i < max_shadow_read_write_fields; i++) {
  struct shadow_vmcs_field entry = shadow_read_write_fields[i];
  u16 field = entry.encoding;

  if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
      (i + 1 == max_shadow_read_write_fields ||
       shadow_read_write_fields[i + 1].encoding != field + 1))
   pr_err("Missing field from shadow_read_write_field %x\n",
          field + 1);

  WARN_ONCE(field >= GUEST_ES_AR_BYTES &&
     field <= GUEST_TR_AR_BYTES,
     "Update vmcs12_write_any() to drop reserved bits from AR_BYTES");

  /*
* PML and the preemption timer can be emulated, but the
* processor cannot vmwrite to fields that don't exist
* on bare metal.
*/
  switch (field) {
  case GUEST_PML_INDEX:
   if (!cpu_has_vmx_pml())
    continue;
   break;
  case VMX_PREEMPTION_TIMER_VALUE:
   if (!cpu_has_vmx_preemption_timer())
    continue;
   break;
  case GUEST_INTR_STATUS:
   if (!cpu_has_vmx_apicv())
    continue;
   break;
  default:
   break;
  }

  clear_bit(field, vmx_vmwrite_bitmap);
  clear_bit(field, vmx_vmread_bitmap);
  if (field & 1)
#ifdef CONFIG_X86_64
   continue;
#else
   entry.offset += sizeof(u32);
#endif
  shadow_read_write_fields[j++] = entry;
}
max_shadow_read_write_fields = j;
}

/*
* The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
* set the success or error code of an emulated VMX instruction (as specified
* by Vol 2B, VMX Instruction Reference, "Conventions"), and skip the emulated
* instruction.
*/
static int nested_vmx_succeed(struct kvm_vcpu *vcpu)
{
vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
   & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
       X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
return kvm_skip_emulated_instruction(vcpu);
}

static int nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
{
vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
   & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
       X86_EFLAGS_SF | X86_EFLAGS_OF))
   | X86_EFLAGS_CF);
return kvm_skip_emulated_instruction(vcpu);
}

static int nested_vmx_failValid(struct kvm_vcpu *vcpu,
    u32 vm_instruction_error)
{
vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
   & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
       X86_EFLAGS_SF | X86_EFLAGS_OF))
   | X86_EFLAGS_ZF);
get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
/*
* We don't need to force sync to shadow VMCS because
* VM_INSTRUCTION_ERROR is not shadowed. Enlightened VMCS 'shadows' all
* fields and thus must be synced.
*/
if (nested_vmx_is_evmptr12_set(to_vmx(vcpu)))
  to_vmx(vcpu)->nested.need_vmcs12_to_shadow_sync = true;

return kvm_skip_emulated_instruction(vcpu);
}

static int nested_vmx_fail(struct kvm_vcpu *vcpu, u32 vm_instruction_error)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);

/*
* failValid writes the error number to the current VMCS, which
* can't be done if there isn't a current VMCS.
*/
if (vmx->nested.current_vmptr == INVALID_GPA &&
     !nested_vmx_is_evmptr12_valid(vmx))
  return nested_vmx_failInvalid(vcpu);

return nested_vmx_failValid(vcpu, vm_instruction_error);
}

static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
{
/* TODO: not to reset guest simply here. */
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
pr_debug_ratelimited("nested vmx abort, indicator %d\n", indicator);
}

static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
{
return fixed_bits_valid(control, low, high);
}

static inline u64 vmx_control_msr(u32 low, u32 high)
{
return low | ((u64)high << 32);
}

static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
{
secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA);
vmx->nested.need_vmcs12_to_shadow_sync = false;
}

static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_KVM_HYPERV
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);

kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map);
vmx->nested.hv_evmcs = NULL;
vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID;

if (hv_vcpu) {
  hv_vcpu->nested.pa_page_gpa = INVALID_GPA;
  hv_vcpu->nested.vm_id = 0;
  hv_vcpu->nested.vp_id = 0;
}
#endif
}

static bool nested_evmcs_handle_vmclear(struct kvm_vcpu *vcpu, gpa_t vmptr)
{
#ifdef CONFIG_KVM_HYPERV
struct vcpu_vmx *vmx = to_vmx(vcpu);
/*
* When Enlightened VMEntry is enabled on the calling CPU we treat
* memory area pointer by vmptr as Enlightened VMCS (as there's no good
* way to distinguish it from VMCS12) and we must not corrupt it by
* writing to the non-existent 'launch_state' field. The area doesn't
* have to be the currently active EVMCS on the calling CPU and there's
* nothing KVM has to do to transition it from 'active' to 'non-active'
* state. It is possible that the area will stay mapped as
* vmx->nested.hv_evmcs but this shouldn't be a problem.
*/
if (!guest_cpu_cap_has_evmcs(vcpu) ||
     !evmptr_is_valid(nested_get_evmptr(vcpu)))
  return false;

if (nested_vmx_evmcs(vmx) && vmptr == vmx->nested.hv_evmcs_vmptr)
  nested_release_evmcs(vcpu);

return true;
#else
return false;
#endif
}

static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
         struct loaded_vmcs *prev)
{
struct vmcs_host_state *dest, *src;

if (unlikely(!vmx->vt.guest_state_loaded))
  return;

src = &prev->host_state;
dest = &vmx->loaded_vmcs->host_state;

vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
dest->ldt_sel = src->ldt_sel;
#ifdef CONFIG_X86_64
dest->ds_sel = src->ds_sel;
dest->es_sel = src->es_sel;
#endif
}

static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct loaded_vmcs *prev;
int cpu;

if (WARN_ON_ONCE(vmx->loaded_vmcs == vmcs))
  return;

cpu = get_cpu();
prev = vmx->loaded_vmcs;
vmx->loaded_vmcs = vmcs;
vmx_vcpu_load_vmcs(vcpu, cpu);
vmx_sync_vmcs_host_state(vmx, prev);
put_cpu();

vcpu->arch.regs_avail = ~VMX_REGS_LAZY_LOAD_SET;

/*
* All lazily updated registers will be reloaded from VMCS12 on both
* vmentry and vmexit.
*/
vcpu->arch.regs_dirty = 0;
}

static void nested_put_vmcs12_pages(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);

kvm_vcpu_unmap(vcpu, &vmx->nested.apic_access_page_map);
kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map);
kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map);
vmx->nested.pi_desc = NULL;
}

/*
* Free whatever needs to be freed from vmx->nested when L1 goes down, or
* just stops using VMX.
*/
static void free_nested(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);

if (WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01))
  vmx_switch_vmcs(vcpu, &vmx->vmcs01);

if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
  return;

kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);

vmx->nested.vmxon = false;
vmx->nested.smm.vmxon = false;
vmx->nested.vmxon_ptr = INVALID_GPA;
free_vpid(vmx->nested.vpid02);
vmx->nested.posted_intr_nv = -1;
vmx->nested.current_vmptr = INVALID_GPA;
if (enable_shadow_vmcs) {
  vmx_disable_shadow_vmcs(vmx);
  vmcs_clear(vmx->vmcs01.shadow_vmcs);
  free_vmcs(vmx->vmcs01.shadow_vmcs);
  vmx->vmcs01.shadow_vmcs = NULL;
}
kfree(vmx->nested.cached_vmcs12);
vmx->nested.cached_vmcs12 = NULL;
kfree(vmx->nested.cached_shadow_vmcs12);
vmx->nested.cached_shadow_vmcs12 = NULL;

nested_put_vmcs12_pages(vcpu);

kvm_mmu_free_roots(vcpu->kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);

nested_release_evmcs(vcpu);

free_loaded_vmcs(&vmx->nested.vmcs02);
}

/*
* Ensure that the current vmcs of the logical processor is the
* vmcs01 of the vcpu before calling free_nested().
*/
void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu)
{
vcpu_load(vcpu);
vmx_leave_nested(vcpu);
vcpu_put(vcpu);
}

#define EPTP_PA_MASK   GENMASK_ULL(51, 12)

static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
{
return VALID_PAGE(root_hpa) &&
        ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK));
}

static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp,
           gpa_t addr)
{
unsigned long roots = 0;
uint i;
struct kvm_mmu_root_info *cached_root;

WARN_ON_ONCE(!mmu_is_nested(vcpu));

for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
  cached_root = &vcpu->arch.mmu->prev_roots[i];

  if (nested_ept_root_matches(cached_root->hpa, cached_root->pgd,
         eptp))
   roots |= KVM_MMU_ROOT_PREVIOUS(i);
}
if (roots)
  kvm_mmu_invalidate_addr(vcpu, vcpu->arch.mmu, addr, roots);
}

static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
  struct x86_exception *fault)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long exit_qualification;
u32 vm_exit_reason;

if (vmx->nested.pml_full) {
  vm_exit_reason = EXIT_REASON_PML_FULL;
  vmx->nested.pml_full = false;

  /*
* It should be impossible to trigger a nested PML Full VM-Exit
* for anything other than an EPT Violation from L2.  KVM *can*
* trigger nEPT page fault injection in response to an EPT
* Misconfig, e.g. if the MMIO SPTE was stale and L1's EPT
* tables also changed, but KVM should not treat EPT Misconfig
* VM-Exits as writes.
*/
  WARN_ON_ONCE(vmx->vt.exit_reason.basic != EXIT_REASON_EPT_VIOLATION);

  /*
* PML Full and EPT Violation VM-Exits both use bit 12 to report
* "NMI unblocking due to IRET", i.e. the bit can be propagated
* as-is from the original EXIT_QUALIFICATION.
*/
  exit_qualification = vmx_get_exit_qual(vcpu) & INTR_INFO_UNBLOCK_NMI;
} else {
  if (fault->error_code & PFERR_RSVD_MASK) {
   vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
   exit_qualification = 0;
  } else {
   exit_qualification = fault->exit_qualification;
   exit_qualification |= vmx_get_exit_qual(vcpu) &
           (EPT_VIOLATION_GVA_IS_VALID |
            EPT_VIOLATION_GVA_TRANSLATED);
   vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
  }

  /*
* Although the caller (kvm_inject_emulated_page_fault) would
* have already synced the faulting address in the shadow EPT
* tables for the current EPTP12, we also need to sync it for
* any other cached EPTP02s based on the same EP4TA, since the
* TLB associates mappings to the EP4TA rather than the full EPTP.
*/
  nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer,
        fault->address);
}

nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification);
vmcs12->guest_physical_address = fault->address;
}

static void nested_ept_new_eptp(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
bool execonly = vmx->nested.msrs.ept_caps & VMX_EPT_EXECUTE_ONLY_BIT;
int ept_lpage_level = ept_caps_to_lpage_level(vmx->nested.msrs.ept_caps);

kvm_init_shadow_ept_mmu(vcpu, execonly, ept_lpage_level,
    nested_ept_ad_enabled(vcpu),
    nested_ept_get_eptp(vcpu));
}

static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
{
WARN_ON(mmu_is_nested(vcpu));

vcpu->arch.mmu = &vcpu->arch.guest_mmu;
nested_ept_new_eptp(vcpu);
vcpu->arch.mmu->get_guest_pgd     = nested_ept_get_eptp;
vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault;
vcpu->arch.mmu->get_pdptr         = kvm_pdptr_read;

vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
}

static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
{
vcpu->arch.mmu = &vcpu->arch.root_mmu;
vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
}

static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
         u16 error_code)
{
bool inequality, bit;

bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0;
inequality =
  (error_code & vmcs12->page_fault_error_code_mask) !=
   vmcs12->page_fault_error_code_match;
return inequality ^ bit;
}

static bool nested_vmx_is_exception_vmexit(struct kvm_vcpu *vcpu, u8 vector,
        u32 error_code)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);

/*
* Drop bits 31:16 of the error code when performing the #PF mask+match
* check.  All VMCS fields involved are 32 bits, but Intel CPUs never
* set bits 31:16 and VMX disallows setting bits 31:16 in the injected
* error code.  Including the to-be-dropped bits in the check might
* result in an "impossible" or missed exit from L1's perspective.
*/
if (vector == PF_VECTOR)
  return nested_vmx_is_page_fault_vmexit(vmcs12, (u16)error_code);

return (vmcs12->exception_bitmap & (1u << vector));
}

static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
            struct vmcs12 *vmcs12)
{
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
  return 0;

if (CC(!page_address_valid(vcpu, vmcs12->io_bitmap_a)) ||
     CC(!page_address_valid(vcpu, vmcs12->io_bitmap_b)))
  return -EINVAL;

return 0;
}

static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
      struct vmcs12 *vmcs12)
{
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
  return 0;

if (CC(!page_address_valid(vcpu, vmcs12->msr_bitmap)))
  return -EINVAL;

return 0;
}

static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
      struct vmcs12 *vmcs12)
{
if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
  return 0;

if (CC(!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr)))
  return -EINVAL;

return 0;
}

/*
* For x2APIC MSRs, ignore the vmcs01 bitmap.  L1 can enable x2APIC without L1
* itself utilizing x2APIC.  All MSRs were previously set to be intercepted,
* only the "disable intercept" case needs to be handled.
*/
static void nested_vmx_disable_intercept_for_x2apic_msr(unsigned long *msr_bitmap_l1,
       unsigned long *msr_bitmap_l0,
       u32 msr, int type)
{
if (type & MSR_TYPE_R && !vmx_test_msr_bitmap_read(msr_bitmap_l1, msr))
  vmx_clear_msr_bitmap_read(msr_bitmap_l0, msr);

if (type & MSR_TYPE_W && !vmx_test_msr_bitmap_write(msr_bitmap_l1, msr))
  vmx_clear_msr_bitmap_write(msr_bitmap_l0, msr);
}

static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
{
int msr;

for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
  unsigned word = msr / BITS_PER_LONG;

  msr_bitmap[word] = ~0;
  msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
}
}

#define BUILD_NVMX_MSR_INTERCEPT_HELPER(rw)     \
static inline         \
void nested_vmx_set_msr_##rw##_intercept(struct vcpu_vmx *vmx,   \
      unsigned long *msr_bitmap_l1,  \
      unsigned long *msr_bitmap_l0, u32 msr) \
{          \
if (vmx_test_msr_bitmap_##rw(vmx->vmcs01.msr_bitmap, msr) ||  \
     vmx_test_msr_bitmap_##rw(msr_bitmap_l1, msr))   \
  vmx_set_msr_bitmap_##rw(msr_bitmap_l0, msr);   \
else         \
  vmx_clear_msr_bitmap_##rw(msr_bitmap_l0, msr);   \
}
BUILD_NVMX_MSR_INTERCEPT_HELPER(read)
BUILD_NVMX_MSR_INTERCEPT_HELPER(write)

static inline void nested_vmx_set_intercept_for_msr(struct vcpu_vmx *vmx,
          unsigned long *msr_bitmap_l1,
          unsigned long *msr_bitmap_l0,
          u32 msr, int types)
{
if (types & MSR_TYPE_R)
  nested_vmx_set_msr_read_intercept(vmx, msr_bitmap_l1,
        msr_bitmap_l0, msr);
if (types & MSR_TYPE_W)
  nested_vmx_set_msr_write_intercept(vmx, msr_bitmap_l1,
         msr_bitmap_l0, msr);
}

/*
* Merge L0's and L1's MSR bitmap, return false to indicate that
* we do not use the hardware.
*/
static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
       struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int msr;
unsigned long *msr_bitmap_l1;
unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap;
struct kvm_host_map map;

/* Nothing to do if the MSR bitmap is not in use.  */
if (!cpu_has_vmx_msr_bitmap() ||
     !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
  return false;

/*
* MSR bitmap update can be skipped when:
* - MSR bitmap for L1 hasn't changed.
* - Nested hypervisor (L1) is attempting to launch the same L2 as
*   before.
* - Nested hypervisor (L1) has enabled 'Enlightened MSR Bitmap' feature
*   and tells KVM (L0) there were no changes in MSR bitmap for L2.
*/
if (!vmx->nested.force_msr_bitmap_recalc) {
  struct hv_enlightened_vmcs *evmcs = nested_vmx_evmcs(vmx);

  if (evmcs && evmcs->hv_enlightenments_control.msr_bitmap &&
      evmcs->hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP)
   return true;
}

if (kvm_vcpu_map_readonly(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), &map))
  return false;

msr_bitmap_l1 = (unsigned long *)map.hva;

/*
* To keep the control flow simple, pay eight 8-byte writes (sixteen
* 4-byte writes on 32-bit systems) up front to enable intercepts for
* the x2APIC MSR range and selectively toggle those relevant to L2.
*/
enable_x2apic_msr_intercepts(msr_bitmap_l0);

if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
  if (nested_cpu_has_apic_reg_virt(vmcs12)) {
   /*
* L0 need not intercept reads for MSRs between 0x800
* and 0x8ff, it just lets the processor take the value
* from the virtual-APIC page; take those 256 bits
* directly from the L1 bitmap.
*/
   for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
    unsigned word = msr / BITS_PER_LONG;

    msr_bitmap_l0[word] = msr_bitmap_l1[word];
   }
  }

  nested_vmx_disable_intercept_for_x2apic_msr(
   msr_bitmap_l1, msr_bitmap_l0,
   X2APIC_MSR(APIC_TASKPRI),
   MSR_TYPE_R | MSR_TYPE_W);

  if (nested_cpu_has_vid(vmcs12)) {
   nested_vmx_disable_intercept_for_x2apic_msr(
    msr_bitmap_l1, msr_bitmap_l0,
    X2APIC_MSR(APIC_EOI),
    MSR_TYPE_W);
   nested_vmx_disable_intercept_for_x2apic_msr(
    msr_bitmap_l1, msr_bitmap_l0,
    X2APIC_MSR(APIC_SELF_IPI),
    MSR_TYPE_W);
  }
}

/*
* Always check vmcs01's bitmap to honor userspace MSR filters and any
* other runtime changes to vmcs01's bitmap, e.g. dynamic pass-through.
*/
#ifdef CONFIG_X86_64
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
      MSR_FS_BASE, MSR_TYPE_RW);

nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
      MSR_GS_BASE, MSR_TYPE_RW);

nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
      MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
#endif
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
      MSR_IA32_SPEC_CTRL, MSR_TYPE_RW);

nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
      MSR_IA32_PRED_CMD, MSR_TYPE_W);

nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
      MSR_IA32_FLUSH_CMD, MSR_TYPE_W);

nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
      MSR_IA32_APERF, MSR_TYPE_R);

nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
      MSR_IA32_MPERF, MSR_TYPE_R);

kvm_vcpu_unmap(vcpu, &map);

vmx->nested.force_msr_bitmap_recalc = false;

return true;
}

static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
           struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;

if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
     vmcs12->vmcs_link_pointer == INVALID_GPA)
  return;

if (ghc->gpa != vmcs12->vmcs_link_pointer &&
     kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
          vmcs12->vmcs_link_pointer, VMCS12_SIZE))
  return;

kvm_read_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu),
         VMCS12_SIZE);
}

static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
           struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;

if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
     vmcs12->vmcs_link_pointer == INVALID_GPA)
  return;

if (ghc->gpa != vmcs12->vmcs_link_pointer &&
     kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
          vmcs12->vmcs_link_pointer, VMCS12_SIZE))
  return;

kvm_write_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu),
          VMCS12_SIZE);
}

/*
* In nested virtualization, check if L1 has set
* VM_EXIT_ACK_INTR_ON_EXIT
*/
static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
{
return get_vmcs12(vcpu)->vm_exit_controls &
  VM_EXIT_ACK_INTR_ON_EXIT;
}

static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
       struct vmcs12 *vmcs12)
{
if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
     CC(!page_address_valid(vcpu, vmcs12->apic_access_addr)))
  return -EINVAL;
else
  return 0;
}

static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
        struct vmcs12 *vmcs12)
{
if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
     !nested_cpu_has_apic_reg_virt(vmcs12) &&
     !nested_cpu_has_vid(vmcs12) &&
     !nested_cpu_has_posted_intr(vmcs12))
  return 0;

/*
* If virtualize x2apic mode is enabled,
* virtualize apic access must be disabled.
*/
if (CC(nested_cpu_has_virt_x2apic_mode(vmcs12) &&
        nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)))
  return -EINVAL;

/*
* If virtual interrupt delivery is enabled,
* we must exit on external interrupts.
*/
if (CC(nested_cpu_has_vid(vmcs12) && !nested_exit_on_intr(vcpu)))
  return -EINVAL;

/*
* bits 15:8 should be zero in posted_intr_nv,
* the descriptor address has been already checked
* in nested_get_vmcs12_pages.
*
* bits 5:0 of posted_intr_desc_addr should be zero.
*/
if (nested_cpu_has_posted_intr(vmcs12) &&
    (CC(!nested_cpu_has_vid(vmcs12)) ||
     CC(!nested_exit_intr_ack_set(vcpu)) ||
     CC((vmcs12->posted_intr_nv & 0xff00)) ||
     CC(!kvm_vcpu_is_legal_aligned_gpa(vcpu, vmcs12->posted_intr_desc_addr, 64))))
  return -EINVAL;

/* tpr shadow is needed by all apicv features. */
if (CC(!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)))
  return -EINVAL;

return 0;
}

static u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
           vmx->nested.msrs.misc_high);

return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER;
}

static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
           u32 count, u64 addr)
{
if (count == 0)
  return 0;

/*
* Exceeding the limit results in architecturally _undefined_ behavior,
* i.e. KVM is allowed to do literally anything in response to a bad
* limit.  Immediately generate a consistency check so that code that
* consumes the count doesn't need to worry about extreme edge cases.
*/
if (count > nested_vmx_max_atomic_switch_msrs(vcpu))
  return -EINVAL;

if (!kvm_vcpu_is_legal_aligned_gpa(vcpu, addr, 16) ||
     !kvm_vcpu_is_legal_gpa(vcpu, (addr + count * sizeof(struct vmx_msr_entry) - 1)))
  return -EINVAL;

return 0;
}

static int nested_vmx_check_exit_msr_switch_controls(struct kvm_vcpu *vcpu,
           struct vmcs12 *vmcs12)
{
if (CC(nested_vmx_check_msr_switch(vcpu,
        vmcs12->vm_exit_msr_load_count,
        vmcs12->vm_exit_msr_load_addr)) ||
     CC(nested_vmx_check_msr_switch(vcpu,
        vmcs12->vm_exit_msr_store_count,
        vmcs12->vm_exit_msr_store_addr)))
  return -EINVAL;

return 0;
}

static int nested_vmx_check_entry_msr_switch_controls(struct kvm_vcpu *vcpu,
                                                      struct vmcs12 *vmcs12)
{
if (CC(nested_vmx_check_msr_switch(vcpu,
        vmcs12->vm_entry_msr_load_count,
        vmcs12->vm_entry_msr_load_addr)))
                return -EINVAL;

return 0;
}

static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
      struct vmcs12 *vmcs12)
{
if (!nested_cpu_has_pml(vmcs12))
  return 0;

if (CC(!nested_cpu_has_ept(vmcs12)) ||
     CC(!page_address_valid(vcpu, vmcs12->pml_address)))
  return -EINVAL;

return 0;
}

static int nested_vmx_check_unrestricted_guest_controls(struct kvm_vcpu *vcpu,
       struct vmcs12 *vmcs12)
{
if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) &&
        !nested_cpu_has_ept(vmcs12)))
  return -EINVAL;
return 0;
}

static int nested_vmx_check_mode_based_ept_exec_controls(struct kvm_vcpu *vcpu,
        struct vmcs12 *vmcs12)
{
if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) &&
        !nested_cpu_has_ept(vmcs12)))
  return -EINVAL;
return 0;
}

static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu,
       struct vmcs12 *vmcs12)
{
if (!nested_cpu_has_shadow_vmcs(vmcs12))
  return 0;

if (CC(!page_address_valid(vcpu, vmcs12->vmread_bitmap)) ||
     CC(!page_address_valid(vcpu, vmcs12->vmwrite_bitmap)))
  return -EINVAL;

return 0;
}

static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
           struct vmx_msr_entry *e)
{
/* x2APIC MSR accesses are not allowed */
if (CC(vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8))
  return -EINVAL;
if (CC(e->index == MSR_IA32_UCODE_WRITE) || /* SDM Table 35-2 */
     CC(e->index == MSR_IA32_UCODE_REV))
  return -EINVAL;
if (CC(e->reserved != 0))
  return -EINVAL;
return 0;
}

static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
         struct vmx_msr_entry *e)
{
if (CC(e->index == MSR_FS_BASE) ||
     CC(e->index == MSR_GS_BASE) ||
     CC(e->index == MSR_IA32_SMM_MONITOR_CTL) || /* SMM is not supported */
     nested_vmx_msr_check_common(vcpu, e))
  return -EINVAL;
return 0;
}

static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
          struct vmx_msr_entry *e)
{
if (CC(e->index == MSR_IA32_SMBASE) || /* SMM is not supported */
     nested_vmx_msr_check_common(vcpu, e))
  return -EINVAL;
return 0;
}

/*
* Load guest's/host's msr at nested entry/exit.
* return 0 for success, entry index for failure.
*
* One of the failure modes for MSR load/store is when a list exceeds the
* virtual hardware's capacity. To maintain compatibility with hardware inasmuch
* as possible, process all valid entries before failing rather than precheck
* for a capacity violation.
*/
static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
{
u32 i;
struct vmx_msr_entry e;
u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);

for (i = 0; i < count; i++) {
  if (WARN_ON_ONCE(i >= max_msr_list_size))
   goto fail;

  if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
     &e, sizeof(e))) {
   pr_debug_ratelimited(
    "%s cannot read MSR entry (%u, 0x%08llx)\n",
    __func__, i, gpa + i * sizeof(e));
   goto fail;
  }
  if (nested_vmx_load_msr_check(vcpu, &e)) {
   pr_debug_ratelimited(
    "%s check failed (%u, 0x%x, 0x%x)\n",
    __func__, i, e.index, e.reserved);
   goto fail;
  }
  if (kvm_set_msr_with_filter(vcpu, e.index, e.value)) {
   pr_debug_ratelimited(
    "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
    __func__, i, e.index, e.value);
   goto fail;
  }
}
return 0;
fail:
/* Note, max_msr_list_size is at most 4096, i.e. this can't wrap. */
return i + 1;
}

static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu,
         u32 msr_index,
         u64 *data)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);

/*
* If the L0 hypervisor stored a more accurate value for the TSC that
* does not include the time taken for emulation of the L2->L1
* VM-exit in L0, use the more accurate value.
*/
if (msr_index == MSR_IA32_TSC) {
  int i = vmx_find_loadstore_msr_slot(&vmx->msr_autostore.guest,
          MSR_IA32_TSC);

  if (i >= 0) {
   u64 val = vmx->msr_autostore.guest.val[i].value;

   *data = kvm_read_l1_tsc(vcpu, val);
   return true;
  }
}

if (kvm_get_msr_with_filter(vcpu, msr_index, data)) {
  pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__,
   msr_index);
  return false;
}
return true;
}

static bool read_and_check_msr_entry(struct kvm_vcpu *vcpu, u64 gpa, int i,
         struct vmx_msr_entry *e)
{
if (kvm_vcpu_read_guest(vcpu,
    gpa + i * sizeof(*e),
    e, 2 * sizeof(u32))) {
  pr_debug_ratelimited(
   "%s cannot read MSR entry (%u, 0x%08llx)\n",
   __func__, i, gpa + i * sizeof(*e));
  return false;
}
if (nested_vmx_store_msr_check(vcpu, e)) {
  pr_debug_ratelimited(
   "%s check failed (%u, 0x%x, 0x%x)\n",
   __func__, i, e->index, e->reserved);
  return false;
}
return true;
}

static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
{
u64 data;
u32 i;
struct vmx_msr_entry e;
u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);

for (i = 0; i < count; i++) {
  if (WARN_ON_ONCE(i >= max_msr_list_size))
   return -EINVAL;

  if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
   return -EINVAL;

  if (!nested_vmx_get_vmexit_msr_value(vcpu, e.index, &data))
   return -EINVAL;

  if (kvm_vcpu_write_guest(vcpu,
      gpa + i * sizeof(e) +
          offsetof(struct vmx_msr_entry, value),
      &data, sizeof(data))) {
   pr_debug_ratelimited(
    "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
    __func__, i, e.index, data);
   return -EINVAL;
  }
}
return 0;
}

static bool nested_msr_store_list_has_msr(struct kvm_vcpu *vcpu, u32 msr_index)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
u32 count = vmcs12->vm_exit_msr_store_count;
u64 gpa = vmcs12->vm_exit_msr_store_addr;
struct vmx_msr_entry e;
u32 i;

for (i = 0; i < count; i++) {
  if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
   return false;

  if (e.index == msr_index)
   return true;
}
return false;
}

static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
        u32 msr_index)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmx_msrs *autostore = &vmx->msr_autostore.guest;
bool in_vmcs12_store_list;
int msr_autostore_slot;
bool in_autostore_list;
int last;

msr_autostore_slot = vmx_find_loadstore_msr_slot(autostore, msr_index);
in_autostore_list = msr_autostore_slot >= 0;
in_vmcs12_store_list = nested_msr_store_list_has_msr(vcpu, msr_index);

if (in_vmcs12_store_list && !in_autostore_list) {
  if (autostore->nr == MAX_NR_LOADSTORE_MSRS) {
   /*
* Emulated VMEntry does not fail here.  Instead a less
* accurate value will be returned by
* nested_vmx_get_vmexit_msr_value() by reading KVM's
* internal MSR state instead of reading the value from
* the vmcs02 VMExit MSR-store area.
*/
   pr_warn_ratelimited(
    "Not enough msr entries in msr_autostore. Can't add msr %x\n",
    msr_index);
   return;
  }
  last = autostore->nr++;
  autostore->val[last].index = msr_index;
} else if (!in_vmcs12_store_list && in_autostore_list) {
  last = --autostore->nr;
  autostore->val[msr_autostore_slot] = autostore->val[last];
}
}

/*
* Load guest's/host's cr3 at nested entry/exit.  @nested_ept is true if we are
* emulating VM-Entry into a guest with EPT enabled.  On failure, the expected
* Exit Qualification (for a VM-Entry consistency check VM-Exit) is assigned to
* @entry_failure_code.
*/
static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
          bool nested_ept, bool reload_pdptrs,
          enum vm_entry_failure_code *entry_failure_code)
{
if (CC(!kvm_vcpu_is_legal_cr3(vcpu, cr3))) {
  *entry_failure_code = ENTRY_FAIL_DEFAULT;
  return -EINVAL;
}

/*
* If PAE paging and EPT are both on, CR3 is not used by the CPU and
* must not be dereferenced.
*/
if (reload_pdptrs && !nested_ept && is_pae_paging(vcpu) &&
     CC(!load_pdptrs(vcpu, cr3))) {
  *entry_failure_code = ENTRY_FAIL_PDPTE;
  return -EINVAL;
}

vcpu->arch.cr3 = cr3;
kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);

/* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
kvm_init_mmu(vcpu);

if (!nested_ept)
  kvm_mmu_new_pgd(vcpu, cr3);

return 0;
}

/*
* Returns if KVM is able to config CPU to tag TLB entries
* populated by L2 differently than TLB entries populated
* by L1.
*
* If L0 uses EPT, L1 and L2 run with different EPTP because
* guest_mode is part of kvm_mmu_page_role. Thus, TLB entries
* are tagged with different EPTP.
*
* If L1 uses VPID and we allocated a vpid02, TLB entries are tagged
* with different VPID (L1 entries are tagged with vmx->vpid
* while L2 entries are tagged with vmx->nested.vpid02).
*/
static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);

return enable_ept ||
        (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
}

static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
         struct vmcs12 *vmcs12,
         bool is_vmenter)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);

/* Handle pending Hyper-V TLB flush requests */
kvm_hv_nested_transtion_tlb_flush(vcpu, enable_ept);

/*
* If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the
* same VPID as the host, and so architecturally, linear and combined
* mappings for VPID=0 must be flushed at VM-Enter and VM-Exit.  KVM
* emulates L2 sharing L1's VPID=0 by using vpid01 while running L2,
* and so KVM must also emulate TLB flush of VPID=0, i.e. vpid01.  This
* is required if VPID is disabled in KVM, as a TLB flush (there are no
* VPIDs) still occurs from L1's perspective, and KVM may need to
* synchronize the MMU in response to the guest TLB flush.
*
* Note, using TLB_FLUSH_GUEST is correct even if nested EPT is in use.
* EPT is a special snowflake, as guest-physical mappings aren't
* flushed on VPID invalidations, including VM-Enter or VM-Exit with
* VPID disabled.  As a result, KVM _never_ needs to sync nEPT
* entries on VM-Enter because L1 can't rely on VM-Enter to flush
* those mappings.
*/
if (!nested_cpu_has_vpid(vmcs12)) {
  kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
  return;
}

/* L2 should never have a VPID if VPID is disabled. */
WARN_ON(!enable_vpid);

/*
* VPID is enabled and in use by vmcs12.  If vpid12 is changing, then
* emulate a guest TLB flush as KVM does not track vpid12 history nor
* is the VPID incorporated into the MMU context.  I.e. KVM must assume
* that the new vpid12 has never been used and thus represents a new
* guest ASID that cannot have entries in the TLB.
*/
if (is_vmenter && vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
  vmx->nested.last_vpid = vmcs12->virtual_processor_id;
  kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
  return;
}

/*
* If VPID is enabled, used by vmc12, and vpid12 is not changing but
* does not have a unique TLB tag (ASID), i.e. EPT is disabled and
* KVM was unable to allocate a VPID for L2, flush the current context
* as the effective ASID is common to both L1 and L2.
*/
if (!nested_has_guest_tlb_tag(vcpu))
  kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
}

static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
{
superset &= mask;
subset &= mask;

return (superset | subset) == superset;
}

static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
{
const u64 feature_bits = VMX_BASIC_DUAL_MONITOR_TREATMENT |
     VMX_BASIC_INOUT |
     VMX_BASIC_TRUE_CTLS;

const u64 reserved_bits = GENMASK_ULL(63, 56) |
      GENMASK_ULL(47, 45) |
      BIT_ULL(31);

u64 vmx_basic = vmcs_config.nested.basic;

BUILD_BUG_ON(feature_bits & reserved_bits);

/*
* Except for 32BIT_PHYS_ADDR_ONLY, which is an anti-feature bit (has
* inverted polarity), the incoming value must not set feature bits or
* reserved bits that aren't allowed/supported by KVM.  Fields, i.e.
* multi-bit values, are explicitly checked below.
*/
if (!is_bitwise_subset(vmx_basic, data, feature_bits | reserved_bits))
  return -EINVAL;

/*
* KVM does not emulate a version of VMX that constrains physical
* addresses of VMX structures (e.g. VMCS) to 32-bits.
*/
if (data & VMX_BASIC_32BIT_PHYS_ADDR_ONLY)
  return -EINVAL;

if (vmx_basic_vmcs_revision_id(vmx_basic) !=
     vmx_basic_vmcs_revision_id(data))
  return -EINVAL;

if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
  return -EINVAL;

vmx->nested.msrs.basic = data;
return 0;
}

static void vmx_get_control_msr(struct nested_vmx_msrs *msrs, u32 msr_index,
    u32 **low, u32 **high)
{
switch (msr_index) {
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
  *low = &msrs->pinbased_ctls_low;
  *high = &msrs->pinbased_ctls_high;
  break;
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
  *low = &msrs->procbased_ctls_low;
  *high = &msrs->procbased_ctls_high;
  break;
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
  *low = &msrs->exit_ctls_low;
  *high = &msrs->exit_ctls_high;
  break;
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
  *low = &msrs->entry_ctls_low;
  *high = &msrs->entry_ctls_high;
  break;
case MSR_IA32_VMX_PROCBASED_CTLS2:
  *low = &msrs->secondary_ctls_low;
  *high = &msrs->secondary_ctls_high;
  break;
default:
  BUG();
}
}

static int
vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
{
u32 *lowp, *highp;
u64 supported;

vmx_get_control_msr(&vmcs_config.nested, msr_index, &lowp, &highp);

supported = vmx_control_msr(*lowp, *highp);

/* Check must-be-1 bits are still 1. */
if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0)))
  return -EINVAL;

/* Check must-be-0 bits are still 0. */
if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
  return -EINVAL;

vmx_get_control_msr(&vmx->nested.msrs, msr_index, &lowp, &highp);
*lowp = data;
*highp = data >> 32;
return 0;
}

static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
{
const u64 feature_bits = VMX_MISC_SAVE_EFER_LMA |
     VMX_MISC_ACTIVITY_HLT |
     VMX_MISC_ACTIVITY_SHUTDOWN |
     VMX_MISC_ACTIVITY_WAIT_SIPI |
     VMX_MISC_INTEL_PT |
     VMX_MISC_RDMSR_IN_SMM |
     VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
     VMX_MISC_VMXOFF_BLOCK_SMI |
     VMX_MISC_ZERO_LEN_INS;

const u64 reserved_bits = BIT_ULL(31) | GENMASK_ULL(13, 9);

u64 vmx_misc = vmx_control_msr(vmcs_config.nested.misc_low,
           vmcs_config.nested.misc_high);

BUILD_BUG_ON(feature_bits & reserved_bits);

/*
* The incoming value must not set feature bits or reserved bits that
* aren't allowed/supported by KVM.  Fields, i.e. multi-bit values, are
* explicitly checked below.
*/
if (!is_bitwise_subset(vmx_misc, data, feature_bits | reserved_bits))
  return -EINVAL;

if ((vmx->nested.msrs.pinbased_ctls_high &
      PIN_BASED_VMX_PREEMPTION_TIMER) &&
     vmx_misc_preemption_timer_rate(data) !=
     vmx_misc_preemption_timer_rate(vmx_misc))
  return -EINVAL;

if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc))
  return -EINVAL;

if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc))
  return -EINVAL;

if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
  return -EINVAL;

vmx->nested.msrs.misc_low = data;
vmx->nested.msrs.misc_high = data >> 32;

return 0;
}

static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
{
u64 vmx_ept_vpid_cap = vmx_control_msr(vmcs_config.nested.ept_caps,
            vmcs_config.nested.vpid_caps);

/* Every bit is either reserved or a feature bit. */
if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
  return -EINVAL;

vmx->nested.msrs.ept_caps = data;
vmx->nested.msrs.vpid_caps = data >> 32;
return 0;
}

static u64 *vmx_get_fixed0_msr(struct nested_vmx_msrs *msrs, u32 msr_index)
{
switch (msr_index) {
case MSR_IA32_VMX_CR0_FIXED0:
  return &msrs->cr0_fixed0;
case MSR_IA32_VMX_CR4_FIXED0:
  return &msrs->cr4_fixed0;
default:
  BUG();
}
}

static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
{
const u64 *msr = vmx_get_fixed0_msr(&vmcs_config.nested, msr_index);

/*
* 1 bits (which indicates bits which "must-be-1" during VMX operation)
* must be 1 in the restored value.
*/
if (!is_bitwise_subset(data, *msr, -1ULL))
  return -EINVAL;

*vmx_get_fixed0_msr(&vmx->nested.msrs, msr_index) = data;
return 0;
}

/*
* Called when userspace is restoring VMX MSRs.
*
* Returns 0 on success, non-0 otherwise.
*/
int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);

/*
* Don't allow changes to the VMX capability MSRs while the vCPU
* is in VMX operation.
*/
if (vmx->nested.vmxon)
  return -EBUSY;

switch (msr_index) {
case MSR_IA32_VMX_BASIC:
  return vmx_restore_vmx_basic(vmx, data);
case MSR_IA32_VMX_PINBASED_CTLS:
case MSR_IA32_VMX_PROCBASED_CTLS:
case MSR_IA32_VMX_EXIT_CTLS:
case MSR_IA32_VMX_ENTRY_CTLS:
  /*
* The "non-true" VMX capability MSRs are generated from the
* "true" MSRs, so we do not support restoring them directly.
*
* If userspace wants to emulate VMX_BASIC[55]=0, userspace
* should restore the "true" MSRs with the must-be-1 bits
* set according to the SDM Vol 3. A.2 "RESERVED CONTROLS AND
* DEFAULT SETTINGS".
*/
  return -EINVAL;
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
case MSR_IA32_VMX_PROCBASED_CTLS2:
  return vmx_restore_control_msr(vmx, msr_index, data);
case MSR_IA32_VMX_MISC:
  return vmx_restore_vmx_misc(vmx, data);
case MSR_IA32_VMX_CR0_FIXED0:
case MSR_IA32_VMX_CR4_FIXED0:
  return vmx_restore_fixed0_msr(vmx, msr_index, data);
case MSR_IA32_VMX_CR0_FIXED1:
case MSR_IA32_VMX_CR4_FIXED1:
  /*
* These MSRs are generated based on the vCPU's CPUID, so we
* do not support restoring them directly.
*/
  return -EINVAL;
case MSR_IA32_VMX_EPT_VPID_CAP:
  return vmx_restore_vmx_ept_vpid_cap(vmx, data);
case MSR_IA32_VMX_VMCS_ENUM:
  vmx->nested.msrs.vmcs_enum = data;
  return 0;
case MSR_IA32_VMX_VMFUNC:
  if (data & ~vmcs_config.nested.vmfunc_controls)
   return -EINVAL;
  vmx->nested.msrs.vmfunc_controls = data;
  return 0;
default:
  /*
* The rest of the VMX capability MSRs do not support restore.
*/
  return -EINVAL;
}
}

/* Returns 0 on success, non-0 otherwise. */
int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
{
switch (msr_index) {
case MSR_IA32_VMX_BASIC:
  *pdata = msrs->basic;
  break;
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
case MSR_IA32_VMX_PINBASED_CTLS:
  *pdata = vmx_control_msr(
   msrs->pinbased_ctls_low,
   msrs->pinbased_ctls_high);
  if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
   *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
  break;
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
case MSR_IA32_VMX_PROCBASED_CTLS:
  *pdata = vmx_control_msr(
   msrs->procbased_ctls_low,
   msrs->procbased_ctls_high);
  if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
   *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
  break;
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
case MSR_IA32_VMX_EXIT_CTLS:
  *pdata = vmx_control_msr(
   msrs->exit_ctls_low,
   msrs->exit_ctls_high);
  if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
   *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
  break;
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
case MSR_IA32_VMX_ENTRY_CTLS:
  *pdata = vmx_control_msr(
   msrs->entry_ctls_low,
   msrs->entry_ctls_high);
  if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
   *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
  break;
case MSR_IA32_VMX_MISC:
  *pdata = vmx_control_msr(
   msrs->misc_low,
   msrs->misc_high);
  break;
case MSR_IA32_VMX_CR0_FIXED0:
  *pdata = msrs->cr0_fixed0;
  break;
case MSR_IA32_VMX_CR0_FIXED1:
  *pdata = msrs->cr0_fixed1;
  break;
case MSR_IA32_VMX_CR4_FIXED0:
  *pdata = msrs->cr4_fixed0;
  break;
case MSR_IA32_VMX_CR4_FIXED1:
  *pdata = msrs->cr4_fixed1;
  break;
case MSR_IA32_VMX_VMCS_ENUM:
  *pdata = msrs->vmcs_enum;
  break;
case MSR_IA32_VMX_PROCBASED_CTLS2:
  *pdata = vmx_control_msr(
   msrs->secondary_ctls_low,
   msrs->secondary_ctls_high);
  break;
case MSR_IA32_VMX_EPT_VPID_CAP:
  *pdata = msrs->ept_caps |
   ((u64)msrs->vpid_caps << 32);
  break;
case MSR_IA32_VMX_VMFUNC:
  *pdata = msrs->vmfunc_controls;
  break;
default:
  return 1;
}

return 0;
}

/*
* Copy the writable VMCS shadow fields back to the VMCS12, in case they have
* been modified by the L1 guest.  Note, "writable" in this context means
* "writable by the guest", i.e. tagged SHADOW_FIELD_RW; the set of
* fields tagged SHADOW_FIELD_RO may or may not align with the "read-only"
* VM-exit information fields (which are actually writable if the vCPU is
* configured to support "VMWRITE to any supported field in the VMCS").
*/
static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
{
struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
struct shadow_vmcs_field field;
unsigned long val;
int i;

if (WARN_ON(!shadow_vmcs))
  return;

preempt_disable();

vmcs_load(shadow_vmcs);

for (i = 0; i < max_shadow_read_write_fields; i++) {
  field = shadow_read_write_fields[i];
  val = __vmcs_readl(field.encoding);
  vmcs12_write_any(vmcs12, field.encoding, field.offset, val);
}

vmcs_clear(shadow_vmcs);
vmcs_load(vmx->loaded_vmcs->vmcs);

preempt_enable();
}

static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
{
const struct shadow_vmcs_field *fields[] = {
  shadow_read_write_fields,
  shadow_read_only_fields
};
const int max_fields[] = {
  max_shadow_read_write_fields,
  max_shadow_read_only_fields
};
struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
struct shadow_vmcs_field field;
unsigned long val;
int i, q;

if (WARN_ON(!shadow_vmcs))
  return;

vmcs_load(shadow_vmcs);

for (q = 0; q < ARRAY_SIZE(fields); q++) {
  for (i = 0; i < max_fields[q]; i++) {
   field = fields[q][i];
   val = vmcs12_read_any(vmcs12, field.encoding,
           field.offset);
   __vmcs_writel(field.encoding, val);
  }
}

vmcs_clear(shadow_vmcs);
vmcs_load(vmx->loaded_vmcs->vmcs);
}

static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields)
{
#ifdef CONFIG_KVM_HYPERV
struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
struct hv_enlightened_vmcs *evmcs = nested_vmx_evmcs(vmx);
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(&vmx->vcpu);

/* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
vmcs12->tpr_threshold = evmcs->tpr_threshold;
vmcs12->guest_rip = evmcs->guest_rip;

if (unlikely(!(hv_clean_fields &
         HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL))) {
  hv_vcpu->nested.pa_page_gpa = evmcs->partition_assist_page;
  hv_vcpu->nested.vm_id = evmcs->hv_vm_id;
  hv_vcpu->nested.vp_id = evmcs->hv_vp_id;
}

if (unlikely(!(hv_clean_fields &
         HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
  vmcs12->guest_rsp = evmcs->guest_rsp;
  vmcs12->guest_rflags = evmcs->guest_rflags;
  vmcs12->guest_interruptibility_info =
   evmcs->guest_interruptibility_info;
  /*
* Not present in struct vmcs12:
* vmcs12->guest_ssp = evmcs->guest_ssp;
*/
}

if (unlikely(!(hv_clean_fields &
         HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
  vmcs12->cpu_based_vm_exec_control =
   evmcs->cpu_based_vm_exec_control;
}

if (unlikely(!(hv_clean_fields &
         HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN))) {
  vmcs12->exception_bitmap = evmcs->exception_bitmap;
}

if (unlikely(!(hv_clean_fields &
         HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
  vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
}

if (unlikely(!(hv_clean_fields &
         HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
  vmcs12->vm_entry_intr_info_field =
   evmcs->vm_entry_intr_info_field;
  vmcs12->vm_entry_exception_error_code =
   evmcs->vm_entry_exception_error_code;
  vmcs12->vm_entry_instruction_len =
   evmcs->vm_entry_instruction_len;
}

if (unlikely(!(hv_clean_fields &
         HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
  vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
  vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
  vmcs12->host_cr0 = evmcs->host_cr0;
  vmcs12->host_cr3 = evmcs->host_cr3;
  vmcs12->host_cr4 = evmcs->host_cr4;
  vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp;
  vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip;
  vmcs12->host_rip = evmcs->host_rip;
  vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs;
  vmcs12->host_es_selector = evmcs->host_es_selector;
  vmcs12->host_cs_selector = evmcs->host_cs_selector;
  vmcs12->host_ss_selector = evmcs->host_ss_selector;
  vmcs12->host_ds_selector = evmcs->host_ds_selector;
  vmcs12->host_fs_selector = evmcs->host_fs_selector;
  vmcs12->host_gs_selector = evmcs->host_gs_selector;
  vmcs12->host_tr_selector = evmcs->host_tr_selector;
  vmcs12->host_ia32_perf_global_ctrl = evmcs->host_ia32_perf_global_ctrl;
  /*
* Not present in struct vmcs12:
* vmcs12->host_ia32_s_cet = evmcs->host_ia32_s_cet;
* vmcs12->host_ssp = evmcs->host_ssp;
* vmcs12->host_ia32_int_ssp_table_addr = evmcs->host_ia32_int_ssp_table_addr;
*/
}

if (unlikely(!(hv_clean_fields &
         HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1))) {
  vmcs12->pin_based_vm_exec_control =
   evmcs->pin_based_vm_exec_control;
  vmcs12->vm_exit_controls = evmcs->vm_exit_controls;
  vmcs12->secondary_vm_exec_control =
   evmcs->secondary_vm_exec_control;
}

if (unlikely(!(hv_clean_fields &
         HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) {
  vmcs12->io_bitmap_a = evmcs->io_bitmap_a;
  vmcs12->io_bitmap_b = evmcs->io_bitmap_b;
}

if (unlikely(!(hv_clean_fields &
         HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) {
  vmcs12->msr_bitmap = evmcs->msr_bitmap;
}

if (unlikely(!(hv_clean_fields &
         HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) {
  vmcs12->guest_es_base = evmcs->guest_es_base;
  vmcs12->guest_cs_base = evmcs->guest_cs_base;
  vmcs12->guest_ss_base = evmcs->guest_ss_base;
  vmcs12->guest_ds_base = evmcs->guest_ds_base;
  vmcs12->guest_fs_base = evmcs->guest_fs_base;
  vmcs12->guest_gs_base = evmcs->guest_gs_base;
  vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base;
  vmcs12->guest_tr_base = evmcs->guest_tr_base;
  vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base;
  vmcs12->guest_idtr_base = evmcs->guest_idtr_base;
  vmcs12->guest_es_limit = evmcs->guest_es_limit;
  vmcs12->guest_cs_limit = evmcs->guest_cs_limit;
  vmcs12->guest_ss_limit = evmcs->guest_ss_limit;
  vmcs12->guest_ds_limit = evmcs->guest_ds_limit;
  vmcs12->guest_fs_limit = evmcs->guest_fs_limit;
  vmcs12->guest_gs_limit = evmcs->guest_gs_limit;
  vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit;
  vmcs12->guest_tr_limit = evmcs->guest_tr_limit;
  vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit;
  vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit;
  vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes;
  vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes;
  vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes;
  vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes;
  vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes;
  vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes;
  vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes;
  vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes;
  vmcs12->guest_es_selector = evmcs->guest_es_selector;
  vmcs12->guest_cs_selector = evmcs->guest_cs_selector;
  vmcs12->guest_ss_selector = evmcs->guest_ss_selector;
  vmcs12->guest_ds_selector = evmcs->guest_ds_selector;
  vmcs12->guest_fs_selector = evmcs->guest_fs_selector;
  vmcs12->guest_gs_selector = evmcs->guest_gs_selector;
  vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector;
  vmcs12->guest_tr_selector = evmcs->guest_tr_selector;
}

if (unlikely(!(hv_clean_fields &
         HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) {
  vmcs12->tsc_offset = evmcs->tsc_offset;
  vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
  vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
  vmcs12->encls_exiting_bitmap = evmcs->encls_exiting_bitmap;
  vmcs12->tsc_multiplier = evmcs->tsc_multiplier;
}

if (unlikely(!(hv_clean_fields &
         HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) {
  vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask;
  vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask;
  vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow;
  vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow;
  vmcs12->guest_cr0 = evmcs->guest_cr0;
  vmcs12->guest_cr3 = evmcs->guest_cr3;
  vmcs12->guest_cr4 = evmcs->guest_cr4;
  vmcs12->guest_dr7 = evmcs->guest_dr7;
}

if (unlikely(!(hv_clean_fields &
         HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) {
  vmcs12->host_fs_base = evmcs->host_fs_base;
  vmcs12->host_gs_base = evmcs->host_gs_base;
  vmcs12->host_tr_base = evmcs->host_tr_base;
  vmcs12->host_gdtr_base = evmcs->host_gdtr_base;
  vmcs12->host_idtr_base = evmcs->host_idtr_base;
  vmcs12->host_rsp = evmcs->host_rsp;
}

if (unlikely(!(hv_clean_fields &
         HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) {
  vmcs12->ept_pointer = evmcs->ept_pointer;
  vmcs12->virtual_processor_id = evmcs->virtual_processor_id;
}

if (unlikely(!(hv_clean_fields &
         HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) {
  vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer;
  vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl;
  vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat;
  vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer;
  vmcs12->guest_pdptr0 = evmcs->guest_pdptr0;
  vmcs12->guest_pdptr1 = evmcs->guest_pdptr1;
  vmcs12->guest_pdptr2 = evmcs->guest_pdptr2;
  vmcs12->guest_pdptr3 = evmcs->guest_pdptr3;
  vmcs12->guest_pending_dbg_exceptions =
   evmcs->guest_pending_dbg_exceptions;
  vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp;
  vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip;
  vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs;
  vmcs12->guest_activity_state = evmcs->guest_activity_state;
  vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs;
  vmcs12->guest_ia32_perf_global_ctrl = evmcs->guest_ia32_perf_global_ctrl;
  /*
* Not present in struct vmcs12:
* vmcs12->guest_ia32_s_cet = evmcs->guest_ia32_s_cet;
* vmcs12->guest_ia32_lbr_ctl = evmcs->guest_ia32_lbr_ctl;
* vmcs12->guest_ia32_int_ssp_table_addr = evmcs->guest_ia32_int_ssp_table_addr;
*/
}

/*
* Not used?
* vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr;
* vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr;
* vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr;
* vmcs12->page_fault_error_code_mask =
* evmcs->page_fault_error_code_mask;
* vmcs12->page_fault_error_code_match =
* evmcs->page_fault_error_code_match;
* vmcs12->cr3_target_count = evmcs->cr3_target_count;
* vmcs12->vm_exit_msr_store_count = evmcs->vm_exit_msr_store_count;
* vmcs12->vm_exit_msr_load_count = evmcs->vm_exit_msr_load_count;
* vmcs12->vm_entry_msr_load_count = evmcs->vm_entry_msr_load_count;
*/

/*
* Read only fields:
* vmcs12->guest_physical_address = evmcs->guest_physical_address;
* vmcs12->vm_instruction_error = evmcs->vm_instruction_error;
* vmcs12->vm_exit_reason = evmcs->vm_exit_reason;
* vmcs12->vm_exit_intr_info = evmcs->vm_exit_intr_info;
* vmcs12->vm_exit_intr_error_code = evmcs->vm_exit_intr_error_code;
* vmcs12->idt_vectoring_info_field = evmcs->idt_vectoring_info_field;
* vmcs12->idt_vectoring_error_code = evmcs->idt_vectoring_error_code;
* vmcs12->vm_exit_instruction_len = evmcs->vm_exit_instruction_len;
* vmcs12->vmx_instruction_info = evmcs->vmx_instruction_info;
* vmcs12->exit_qualification = evmcs->exit_qualification;
* vmcs12->guest_linear_address = evmcs->guest_linear_address;
*
* Not present in struct vmcs12:
* vmcs12->exit_io_instruction_ecx = evmcs->exit_io_instruction_ecx;
* vmcs12->exit_io_instruction_esi = evmcs->exit_io_instruction_esi;
* vmcs12->exit_io_instruction_edi = evmcs->exit_io_instruction_edi;
* vmcs12->exit_io_instruction_eip = evmcs->exit_io_instruction_eip;
*/

return;
#else /* CONFIG_KVM_HYPERV */
KVM_BUG_ON(1, vmx->vcpu.kvm);
#endif /* CONFIG_KVM_HYPERV */
}

static void copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
{
#ifdef CONFIG_KVM_HYPERV
struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
struct hv_enlightened_vmcs *evmcs = nested_vmx_evmcs(vmx);

/*
* Should not be changed by KVM:
*
* evmcs->host_es_selector = vmcs12->host_es_selector;
* evmcs->host_cs_selector = vmcs12->host_cs_selector;
* evmcs->host_ss_selector = vmcs12->host_ss_selector;
* evmcs->host_ds_selector = vmcs12->host_ds_selector;
* evmcs->host_fs_selector = vmcs12->host_fs_selector;
* evmcs->host_gs_selector = vmcs12->host_gs_selector;
* evmcs->host_tr_selector = vmcs12->host_tr_selector;
* evmcs->host_ia32_pat = vmcs12->host_ia32_pat;
* evmcs->host_ia32_efer = vmcs12->host_ia32_efer;
* evmcs->host_cr0 = vmcs12->host_cr0;
* evmcs->host_cr3 = vmcs12->host_cr3;
* evmcs->host_cr4 = vmcs12->host_cr4;
* evmcs->host_ia32_sysenter_esp = vmcs12->host_ia32_sysenter_esp;
* evmcs->host_ia32_sysenter_eip = vmcs12->host_ia32_sysenter_eip;
* evmcs->host_rip = vmcs12->host_rip;
* evmcs->host_ia32_sysenter_cs = vmcs12->host_ia32_sysenter_cs;
* evmcs->host_fs_base = vmcs12->host_fs_base;
* evmcs->host_gs_base = vmcs12->host_gs_base;
* evmcs->host_tr_base = vmcs12->host_tr_base;
* evmcs->host_gdtr_base = vmcs12->host_gdtr_base;
* evmcs->host_idtr_base = vmcs12->host_idtr_base;
* evmcs->host_rsp = vmcs12->host_rsp;
* sync_vmcs02_to_vmcs12() doesn't read these:
* evmcs->io_bitmap_a = vmcs12->io_bitmap_a;
* evmcs->io_bitmap_b = vmcs12->io_bitmap_b;
* evmcs->msr_bitmap = vmcs12->msr_bitmap;
* evmcs->ept_pointer = vmcs12->ept_pointer;
* evmcs->xss_exit_bitmap = vmcs12->xss_exit_bitmap;
* evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr;
* evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr;
* evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr;
* evmcs->tpr_threshold = vmcs12->tpr_threshold;
* evmcs->virtual_processor_id = vmcs12->virtual_processor_id;
* evmcs->exception_bitmap = vmcs12->exception_bitmap;
* evmcs->vmcs_link_pointer = vmcs12->vmcs_link_pointer;
* evmcs->pin_based_vm_exec_control = vmcs12->pin_based_vm_exec_control;
* evmcs->vm_exit_controls = vmcs12->vm_exit_controls;
* evmcs->secondary_vm_exec_control = vmcs12->secondary_vm_exec_control;
* evmcs->page_fault_error_code_mask =
* vmcs12->page_fault_error_code_mask;
* evmcs->page_fault_error_code_match =
* vmcs12->page_fault_error_code_match;
* evmcs->cr3_target_count = vmcs12->cr3_target_count;
* evmcs->virtual_apic_page_addr = vmcs12->virtual_apic_page_addr;
* evmcs->tsc_offset = vmcs12->tsc_offset;
* evmcs->guest_ia32_debugctl = vmcs12->guest_ia32_debugctl;
* evmcs->cr0_guest_host_mask = vmcs12->cr0_guest_host_mask;
* evmcs->cr4_guest_host_mask = vmcs12->cr4_guest_host_mask;
* evmcs->cr0_read_shadow = vmcs12->cr0_read_shadow;
* evmcs->cr4_read_shadow = vmcs12->cr4_read_shadow;
* evmcs->vm_exit_msr_store_count = vmcs12->vm_exit_msr_store_count;
* evmcs->vm_exit_msr_load_count = vmcs12->vm_exit_msr_load_count;
* evmcs->vm_entry_msr_load_count = vmcs12->vm_entry_msr_load_count;
* evmcs->guest_ia32_perf_global_ctrl = vmcs12->guest_ia32_perf_global_ctrl;
* evmcs->host_ia32_perf_global_ctrl = vmcs12->host_ia32_perf_global_ctrl;
* evmcs->encls_exiting_bitmap = vmcs12->encls_exiting_bitmap;
* evmcs->tsc_multiplier = vmcs12->tsc_multiplier;
*
* Not present in struct vmcs12:
* evmcs->exit_io_instruction_ecx = vmcs12->exit_io_instruction_ecx;
* evmcs->exit_io_instruction_esi = vmcs12->exit_io_instruction_esi;
* evmcs->exit_io_instruction_edi = vmcs12->exit_io_instruction_edi;
* evmcs->exit_io_instruction_eip = vmcs12->exit_io_instruction_eip;
* evmcs->host_ia32_s_cet = vmcs12->host_ia32_s_cet;
* evmcs->host_ssp = vmcs12->host_ssp;
* evmcs->host_ia32_int_ssp_table_addr = vmcs12->host_ia32_int_ssp_table_addr;
* evmcs->guest_ia32_s_cet = vmcs12->guest_ia32_s_cet;
* evmcs->guest_ia32_lbr_ctl = vmcs12->guest_ia32_lbr_ctl;
* evmcs->guest_ia32_int_ssp_table_addr = vmcs12->guest_ia32_int_ssp_table_addr;
* evmcs->guest_ssp = vmcs12->guest_ssp;
*/

evmcs->guest_es_selector = vmcs12->guest_es_selector;
evmcs->guest_cs_selector = vmcs12->guest_cs_selector;
evmcs->guest_ss_selector = vmcs12->guest_ss_selector;
evmcs->guest_ds_selector = vmcs12->guest_ds_selector;
evmcs->guest_fs_selector = vmcs12->guest_fs_selector;
evmcs->guest_gs_selector = vmcs12->guest_gs_selector;
evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector;
evmcs->guest_tr_selector = vmcs12->guest_tr_selector;

evmcs->guest_es_limit = vmcs12->guest_es_limit;
evmcs->guest_cs_limit = vmcs12->guest_cs_limit;
evmcs->guest_ss_limit = vmcs12->guest_ss_limit;
evmcs->guest_ds_limit = vmcs12->guest_ds_limit;
evmcs->guest_fs_limit = vmcs12->guest_fs_limit;
evmcs->guest_gs_limit = vmcs12->guest_gs_limit;
evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit;
evmcs->guest_tr_limit = vmcs12->guest_tr_limit;
evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit;
evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit;

evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes;
evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes;
evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes;
evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes;
evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes;
evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes;
evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes;
evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes;

evmcs->guest_es_base = vmcs12->guest_es_base;
evmcs->guest_cs_base = vmcs12->guest_cs_base;
evmcs->guest_ss_base = vmcs12->guest_ss_base;
evmcs->guest_ds_base = vmcs12->guest_ds_base;
evmcs->guest_fs_base = vmcs12->guest_fs_base;
evmcs->guest_gs_base = vmcs12->guest_gs_base;
evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base;
evmcs->guest_tr_base = vmcs12->guest_tr_base;
evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base;
evmcs->guest_idtr_base = vmcs12->guest_idtr_base;

evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat;
evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer;

evmcs->guest_pdptr0 = vmcs12->guest_pdptr0;
evmcs->guest_pdptr1 = vmcs12->guest_pdptr1;
evmcs->guest_pdptr2 = vmcs12->guest_pdptr2;
evmcs->guest_pdptr3 = vmcs12->guest_pdptr3;

evmcs->guest_pending_dbg_exceptions =
  vmcs12->guest_pending_dbg_exceptions;
evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp;
evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip;

evmcs->guest_activity_state = vmcs12->guest_activity_state;
evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs;

evmcs->guest_cr0 = vmcs12->guest_cr0;
evmcs->guest_cr3 = vmcs12->guest_cr3;
evmcs->guest_cr4 = vmcs12->guest_cr4;
evmcs->guest_dr7 = vmcs12->guest_dr7;

evmcs->guest_physical_address = vmcs12->guest_physical_address;

evmcs->vm_instruction_error = vmcs12->vm_instruction_error;
evmcs->vm_exit_reason = vmcs12->vm_exit_reason;
evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info;
evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code;
evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field;
evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code;
evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len;
evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info;

evmcs->exit_qualification = vmcs12->exit_qualification;

evmcs->guest_linear_address = vmcs12->guest_linear_address;
evmcs->guest_rsp = vmcs12->guest_rsp;
evmcs->guest_rflags = vmcs12->guest_rflags;

evmcs->guest_interruptibility_info =
--> --------------------

--> maximum size reached

--> --------------------

Messung V0.5

¤ Dauer der Verarbeitung: 0.8 Sekunden (vorverarbeitet) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.