/* * As per Hyper-V TLFS, extended hypercalls start from 0x8001 * (HvExtCallQueryCapabilities). Response of this hypercalls is a 64 bit value * where each bit tells which extended hypercall is available besides * HvExtCallQueryCapabilities. * * 0x8001 - First extended hypercall, HvExtCallQueryCapabilities, no bit * assigned. * * 0x8002 - Bit 0 * 0x8003 - Bit 1 * .. * 0x8041 - Bit 63 * * Therefore, HV_EXT_CALL_MAX = 0x8001 + 64
*/ #define HV_EXT_CALL_MAX (HV_EXT_CALL_QUERY_CAPABILITIES + 64)
if (auto_eoi_new)
hv->synic_auto_eoi_used++; else
hv->synic_auto_eoi_used--;
/* * Inhibit APICv if any vCPU is using SynIC's AutoEOI, which relies on * the hypervisor to manually inject IRQs.
*/
__kvm_set_or_clear_apicv_inhibit(vcpu->kvm,
APICV_INHIBIT_REASON_HYPERV,
!!hv->synic_auto_eoi_used);
up_write(&vcpu->kvm->arch.apicv_update_lock);
}
staticint synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
u64 data, bool host)
{ int vector, old_vector; bool masked;
vector = data & HV_SYNIC_SINT_VECTOR_MASK;
masked = data & HV_SYNIC_SINT_MASKED;
/* * Valid vectors are 16-255, however, nested Hyper-V attempts to write * default '0x10000' value on boot and this should not #GP. We need to * allow zero-initing the register from host as well.
*/ if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked) return 1; /* * Guest may configure multiple SINTs to use the same vector, so * we maintain a bitmap of vectors handled by synic, and a * bitmap of vectors with auto-eoi behavior. The bitmaps are * updated here, and atomically queried on fast paths.
*/
old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK;
ret = 0; switch (msr) { case HV_X64_MSR_SCONTROL:
synic->control = data; if (!host)
synic_exit(synic, msr); break; case HV_X64_MSR_SVERSION: if (!host) {
ret = 1; break;
}
synic->version = data; break; case HV_X64_MSR_SIEFP: if ((data & HV_SYNIC_SIEFP_ENABLE) && !host &&
!synic->dont_zero_synic_pages) if (kvm_clear_guest(vcpu->kvm,
data & PAGE_MASK, PAGE_SIZE)) {
ret = 1; break;
}
synic->evt_page = data; if (!host)
synic_exit(synic, msr); break; case HV_X64_MSR_SIMP: if ((data & HV_SYNIC_SIMP_ENABLE) && !host &&
!synic->dont_zero_synic_pages) if (kvm_clear_guest(vcpu->kvm,
data & PAGE_MASK, PAGE_SIZE)) {
ret = 1; break;
}
synic->msg_page = data; if (!host)
synic_exit(synic, msr); break; case HV_X64_MSR_EOM: { int i;
if (!synic->active) break;
for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
kvm_hv_notify_acked_sint(vcpu, i); break;
} case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host); break; default:
ret = 1; break;
} return ret;
}
/* * Fall back to get_kvmclock_ns() when TSC page hasn't been set up, * is broken, disabled or being updated.
*/ if (hv->hv_tsc_page_status != HV_TSC_PAGE_SET) return div_u64(get_kvmclock_ns(kvm), 100);
/* * stimer_start() assumptions: * a) stimer->count is not equal to 0 * b) stimer->config has HV_STIMER_ENABLE flag
*/ staticint stimer_start(struct kvm_vcpu_hv_stimer *stimer)
{
u64 time_now;
ktime_t ktime_now;
hrtimer_start(&stimer->timer,
ktime_add_ns(ktime_now,
100 * (stimer->exp_time - time_now)),
HRTIMER_MODE_ABS); return 0;
}
stimer->exp_time = stimer->count; if (time_now >= stimer->count) { /* * Expire timer according to Hypervisor Top-Level Functional * specification v4(15.3.1): * "If a one shot is enabled and the specified count is in * the past, it will expire immediately."
*/
stimer_mark_pending(stimer, false); return 0;
}
if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE)) return -ENOENT;
msg_page_gfn = synic->msg_page >> PAGE_SHIFT;
/* * Strictly following the spec-mandated ordering would assume setting * .msg_pending before checking .message_type. However, this function * is only called in vcpu context so the entire update is atomic from * guest POV and thus the exact order here doesn't matter.
*/
r = kvm_vcpu_read_guest_page(vcpu, msg_page_gfn, &hv_hdr.message_type,
msg_off + offsetof(struct hv_message,
header.message_type), sizeof(hv_hdr.message_type)); if (r < 0) return r;
if (hv_hdr.message_type != HVMSG_NONE) { if (no_retry) return 0;
/* * To avoid piling up periodic ticks, don't retry message * delivery for them (within "lazy" lost ticks policy).
*/ bool no_retry = stimer->config.periodic;
for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) {
stimer = &hv_vcpu->stimer[i]; if (stimer->config.enable) {
exp_time = stimer->exp_time;
if (exp_time) {
time_now =
get_time_ref_counter(vcpu->kvm); if (time_now >= exp_time)
stimer_expiration(stimer);
}
if ((stimer->config.enable) &&
stimer->count) { if (!stimer->msg_pending)
stimer_start(stimer);
} else
stimer_cleanup(stimer);
}
}
}
bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
stimer_init(&hv_vcpu->stimer[i], i);
hv_vcpu->vp_index = vcpu->vcpu_idx;
for (i = 0; i < HV_NR_TLB_FLUSH_FIFOS; i++) {
INIT_KFIFO(hv_vcpu->tlb_flush_fifo[i].entries);
spin_lock_init(&hv_vcpu->tlb_flush_fifo[i].write_lock);
}
return 0;
}
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
{ struct kvm_vcpu_hv_synic *synic; int r;
staticbool kvm_hv_msr_partition_wide(u32 msr)
{ bool r = false;
switch (msr) { case HV_X64_MSR_GUEST_OS_ID: case HV_X64_MSR_HYPERCALL: case HV_X64_MSR_REFERENCE_TSC: case HV_X64_MSR_TIME_REF_COUNT: case HV_X64_MSR_CRASH_CTL: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_RESET: case HV_X64_MSR_REENLIGHTENMENT_CONTROL: case HV_X64_MSR_TSC_EMULATION_CONTROL: case HV_X64_MSR_TSC_EMULATION_STATUS: case HV_X64_MSR_TSC_INVARIANT_CONTROL: case HV_X64_MSR_SYNDBG_OPTIONS: case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
r = true; break;
}
/* * The kvmclock and Hyper-V TSC page use similar formulas, and converting * between them is possible: * * kvmclock formula: * nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32) * + system_time * * Hyper-V formula: * nsec/100 = ticks * scale / 2^64 + offset * * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula. * By dividing the kvmclock formula by 100 and equating what's left we get: * ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 * scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100 * scale = tsc_to_system_mul * 2^(32+tsc_shift) / 100 * * Now expand the kvmclock formula and divide by 100: * nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32) * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) * + system_time * nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100 * + system_time / 100 * * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64: * nsec/100 = ticks * scale / 2^64 * - tsc_timestamp * scale / 2^64 * + system_time / 100 * * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out: * offset = system_time / 100 - tsc_timestamp * scale / 2^64 * * These two equivalencies are implemented in this function.
*/ staticbool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, struct ms_hyperv_tsc_page *tsc_ref)
{
u64 max_mul;
if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT)) returnfalse;
/* * check if scale would overflow, if so we use the time ref counter * tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64 * tsc_to_system_mul / 100 >= 2^(32-tsc_shift) * tsc_to_system_mul >= 100 * 2^(32-tsc_shift)
*/
max_mul = 100ull << (32 - hv_clock->tsc_shift); if (hv_clock->tsc_to_system_mul >= max_mul) returnfalse;
/* * Otherwise compute the scale and offset according to the formulas * derived above.
*/
tsc_ref->tsc_scale =
mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift),
hv_clock->tsc_to_system_mul,
100);
/* * Don't touch TSC page values if the guest has opted for TSC emulation after * migration. KVM doesn't fully support reenlightenment notifications and TSC * access emulation and Hyper-V is known to expect the values in TSC page to * stay constant before TSC access emulation is disabled from guest side * (HV_X64_MSR_TSC_EMULATION_STATUS). KVM userspace is expected to preserve TSC * frequency and guest visible TSC value across migration (and prevent it when * TSC scaling is unsupported).
*/ staticinlinebool tsc_page_update_unsafe(struct kvm_hv *hv)
{ return (hv->hv_tsc_page_status != HV_TSC_PAGE_GUEST_CHANGED) &&
hv->hv_tsc_emulation_control;
}
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) goto out_unlock;
gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; /* * Because the TSC parameters only vary when there is a * change in the master clock, do not bother with caching.
*/ if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
&tsc_seq, sizeof(tsc_seq)))) goto out_err;
if (tsc_seq && tsc_page_update_unsafe(hv)) { if (kvm_read_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) goto out_err;
/* * While we're computing and writing the parameters, force the * guest to use the time reference count MSR.
*/
hv->tsc_ref.tsc_sequence = 0; if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) goto out_err;
if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref)) goto out_err;
/* Ensure sequence is zero before writing the rest of the struct. */
smp_wmb(); if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) goto out_err;
/* * Now switch to the TSC page mechanism by writing the sequence.
*/
tsc_seq++; if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0)
tsc_seq = 1;
/* Write the struct entirely before the non-zero sequence. */
smp_wmb();
hv->tsc_ref.tsc_sequence = tsc_seq; if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) goto out_err;
if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET &&
!tsc_page_update_unsafe(hv))
hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED;
mutex_unlock(&hv->hv_lock);
}
staticbool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
{ if (!hv_vcpu->enforce_cpuid) returntrue;
switch (msr) { case HV_X64_MSR_GUEST_OS_ID: case HV_X64_MSR_HYPERCALL: return hv_vcpu->cpuid_cache.features_eax &
HV_MSR_HYPERCALL_AVAILABLE; case HV_X64_MSR_VP_RUNTIME: return hv_vcpu->cpuid_cache.features_eax &
HV_MSR_VP_RUNTIME_AVAILABLE; case HV_X64_MSR_TIME_REF_COUNT: return hv_vcpu->cpuid_cache.features_eax &
HV_MSR_TIME_REF_COUNT_AVAILABLE; case HV_X64_MSR_VP_INDEX: return hv_vcpu->cpuid_cache.features_eax &
HV_MSR_VP_INDEX_AVAILABLE; case HV_X64_MSR_RESET: return hv_vcpu->cpuid_cache.features_eax &
HV_MSR_RESET_AVAILABLE; case HV_X64_MSR_REFERENCE_TSC: return hv_vcpu->cpuid_cache.features_eax &
HV_MSR_REFERENCE_TSC_AVAILABLE; case HV_X64_MSR_SCONTROL: case HV_X64_MSR_SVERSION: case HV_X64_MSR_SIEFP: case HV_X64_MSR_SIMP: case HV_X64_MSR_EOM: case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: return hv_vcpu->cpuid_cache.features_eax &
HV_MSR_SYNIC_AVAILABLE; case HV_X64_MSR_STIMER0_CONFIG: case HV_X64_MSR_STIMER1_CONFIG: case HV_X64_MSR_STIMER2_CONFIG: case HV_X64_MSR_STIMER3_CONFIG: case HV_X64_MSR_STIMER0_COUNT: case HV_X64_MSR_STIMER1_COUNT: case HV_X64_MSR_STIMER2_COUNT: case HV_X64_MSR_STIMER3_COUNT: return hv_vcpu->cpuid_cache.features_eax &
HV_MSR_SYNTIMER_AVAILABLE; case HV_X64_MSR_EOI: case HV_X64_MSR_ICR: case HV_X64_MSR_TPR: case HV_X64_MSR_VP_ASSIST_PAGE: return hv_vcpu->cpuid_cache.features_eax &
HV_MSR_APIC_ACCESS_AVAILABLE; case HV_X64_MSR_TSC_FREQUENCY: case HV_X64_MSR_APIC_FREQUENCY: return hv_vcpu->cpuid_cache.features_eax &
HV_ACCESS_FREQUENCY_MSRS; case HV_X64_MSR_REENLIGHTENMENT_CONTROL: case HV_X64_MSR_TSC_EMULATION_CONTROL: case HV_X64_MSR_TSC_EMULATION_STATUS: return hv_vcpu->cpuid_cache.features_eax &
HV_ACCESS_REENLIGHTENMENT; case HV_X64_MSR_TSC_INVARIANT_CONTROL: return hv_vcpu->cpuid_cache.features_eax &
HV_ACCESS_TSC_INVARIANT; case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: return hv_vcpu->cpuid_cache.features_edx &
HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; case HV_X64_MSR_SYNDBG_OPTIONS: case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: return hv_vcpu->cpuid_cache.features_edx &
HV_FEATURE_DEBUG_MSRS_AVAILABLE; default: break;
}
returnfalse;
}
#define KVM_HV_WIN2016_GUEST_ID 0x1040a00003839 #define KVM_HV_WIN2016_GUEST_ID_MASK (~GENMASK_ULL(23, 16)) /* mask out the service version */
/* * Hyper-V enabled Windows Server 2016 SMP VMs fail to boot in !XSAVES && XSAVEC * configuration. * Such configuration can result from, for example, AMD Erratum 1386 workaround. * * Print a notice so users aren't left wondering what's suddenly gone wrong.
*/ staticvoid __kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu)
{ struct kvm *kvm = vcpu->kvm; struct kvm_hv *hv = to_kvm_hv(kvm);
/* Check again under the hv_lock. */ if (hv->xsaves_xsavec_checked) return;
if ((hv->hv_guest_os_id & KVM_HV_WIN2016_GUEST_ID_MASK) !=
KVM_HV_WIN2016_GUEST_ID) return;
hv->xsaves_xsavec_checked = true;
/* UP configurations aren't affected */ if (atomic_read(&kvm->online_vcpus) < 2) return;
if (guest_cpuid_has(vcpu, X86_FEATURE_XSAVES) ||
!guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVEC)) return;
pr_notice_ratelimited("Booting SMP Windows KVM VM with !XSAVES && XSAVEC. " "If it fails to boot try disabling XSAVEC in the VM config.\n");
}
if (unlikely(!host && !hv_check_msr_access(to_hv_vcpu(vcpu), msr))) return 1;
switch (msr) { case HV_X64_MSR_GUEST_OS_ID:
hv->hv_guest_os_id = data; /* setting guest os id to zero disables hypercall page */ if (!hv->hv_guest_os_id)
hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; break; case HV_X64_MSR_HYPERCALL: {
u8 instructions[9]; int i = 0;
u64 addr;
/* if guest os id is not set hypercall should remain disabled */ if (!hv->hv_guest_os_id) break; if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
hv->hv_hypercall = data; break;
}
/* * If Xen and Hyper-V hypercalls are both enabled, disambiguate * the same way Xen itself does, by setting the bit 31 of EAX * which is RsvdZ in the 32-bit Hyper-V hypercall ABI and just * going to be clobbered on 64-bit.
*/ if (kvm_xen_hypercall_enabled(kvm)) { /* orl $0x80000000, %eax */
instructions[i++] = 0x0d;
instructions[i++] = 0x00;
instructions[i++] = 0x00;
instructions[i++] = 0x00;
instructions[i++] = 0x80;
}
/* vmcall/vmmcall */
kvm_x86_call(patch_hypercall)(vcpu, instructions + i);
i += 3;
/* ret */
((unsignedchar *)instructions)[i++] = 0xc3;
addr = data & HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK; if (kvm_vcpu_write_guest(vcpu, addr, instructions, i)) return 1;
hv->hv_hypercall = data; break;
} case HV_X64_MSR_REFERENCE_TSC:
hv->hv_tsc_page = data; if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) { if (!host)
hv->hv_tsc_page_status = HV_TSC_PAGE_GUEST_CHANGED; else
hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED;
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
} else {
hv->hv_tsc_page_status = HV_TSC_PAGE_UNSET;
} break; case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: return kvm_hv_msr_set_crash_data(kvm,
msr - HV_X64_MSR_CRASH_P0,
data); case HV_X64_MSR_CRASH_CTL: if (host) return kvm_hv_msr_set_crash_ctl(kvm, data);
/* Send notification about crash to user space */
kvm_make_request(KVM_REQ_HV_CRASH, vcpu);
} break; case HV_X64_MSR_RESET: if (data == 1) {
vcpu_debug(vcpu, "hyper-v reset requested\n");
kvm_make_request(KVM_REQ_HV_RESET, vcpu);
} break; case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
hv->hv_reenlightenment_control = data; break; case HV_X64_MSR_TSC_EMULATION_CONTROL:
hv->hv_tsc_emulation_control = data; break; case HV_X64_MSR_TSC_EMULATION_STATUS: if (data && !host) return 1;
hv->hv_tsc_emulation_status = data; break; case HV_X64_MSR_TIME_REF_COUNT: /* read-only, but still ignore it if host-initiated */ if (!host) return 1; break; case HV_X64_MSR_TSC_INVARIANT_CONTROL: /* Only bit 0 is supported */ if (data & ~HV_EXPOSE_INVARIANT_TSC) return 1;
/* The feature can't be disabled from the guest */ if (!host && hv->hv_invtsc_control && !data) return 1;
if (!host || new_vp_index >= KVM_MAX_VCPUS) return 1;
if (new_vp_index == hv_vcpu->vp_index) return 0;
/* * The VP index is initialized to vcpu_index by * kvm_hv_vcpu_postcreate so they initially match. Now the * VP index is changing, adjust num_mismatched_vp_indexes if * it now matches or no longer matches vcpu_idx.
*/ if (hv_vcpu->vp_index == vcpu->vcpu_idx)
atomic_inc(&hv->num_mismatched_vp_indexes); elseif (new_vp_index == vcpu->vcpu_idx)
atomic_dec(&hv->num_mismatched_vp_indexes);
if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
hv_vcpu->hv_vapic = data; if (kvm_lapic_set_pv_eoi(vcpu, 0, 0)) return 1; break;
}
gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT;
addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); if (kvm_is_error_hva(addr)) return 1;
/* * Clear apic_assist portion of struct hv_vp_assist_page * only, there can be valuable data in the rest which needs * to be preserved e.g. on migration.
*/ if (__put_user(0, (u32 __user *)addr)) return 1;
hv_vcpu->hv_vapic = data;
kvm_vcpu_mark_page_dirty(vcpu, gfn); if (kvm_lapic_set_pv_eoi(vcpu,
gfn_to_gpa(gfn) | KVM_MSR_ENABLED, sizeof(struct hv_vp_assist_page))) return 1; break;
} case HV_X64_MSR_EOI: return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); case HV_X64_MSR_ICR: return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); case HV_X64_MSR_TPR: return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); case HV_X64_MSR_VP_RUNTIME: if (!host) return 1;
hv_vcpu->runtime_offset = data - current_task_runtime_100ns(); break; case HV_X64_MSR_SCONTROL: case HV_X64_MSR_SVERSION: case HV_X64_MSR_SIEFP: case HV_X64_MSR_SIMP: case HV_X64_MSR_EOM: case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: return synic_set_msr(to_hv_synic(vcpu), msr, data, host); case HV_X64_MSR_STIMER0_CONFIG: case HV_X64_MSR_STIMER1_CONFIG: case HV_X64_MSR_STIMER2_CONFIG: case HV_X64_MSR_STIMER3_CONFIG: { int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
return stimer_set_config(to_hv_stimer(vcpu, timer_index),
data, host);
} case HV_X64_MSR_STIMER0_COUNT: case HV_X64_MSR_STIMER1_COUNT: case HV_X64_MSR_STIMER2_COUNT: case HV_X64_MSR_STIMER3_COUNT: { int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
return stimer_set_count(to_hv_stimer(vcpu, timer_index),
data, host);
} case HV_X64_MSR_TSC_FREQUENCY: case HV_X64_MSR_APIC_FREQUENCY: /* read-only, but still ignore it if host-initiated */ if (!host) return 1; break; default:
kvm_pr_unimpl_wrmsr(vcpu, msr, data); return 1;
}
if (unlikely(!host && !hv_check_msr_access(to_hv_vcpu(vcpu), msr))) return 1;
switch (msr) { case HV_X64_MSR_GUEST_OS_ID:
data = hv->hv_guest_os_id; break; case HV_X64_MSR_HYPERCALL:
data = hv->hv_hypercall; break; case HV_X64_MSR_TIME_REF_COUNT:
data = get_time_ref_counter(kvm); break; case HV_X64_MSR_REFERENCE_TSC:
data = hv->hv_tsc_page; break; case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: return kvm_hv_msr_get_crash_data(kvm,
msr - HV_X64_MSR_CRASH_P0,
pdata); case HV_X64_MSR_CRASH_CTL: return kvm_hv_msr_get_crash_ctl(kvm, pdata); case HV_X64_MSR_RESET:
data = 0; break; case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
data = hv->hv_reenlightenment_control; break; case HV_X64_MSR_TSC_EMULATION_CONTROL:
data = hv->hv_tsc_emulation_control; break; case HV_X64_MSR_TSC_EMULATION_STATUS:
data = hv->hv_tsc_emulation_status; break; case HV_X64_MSR_TSC_INVARIANT_CONTROL:
data = hv->hv_invtsc_control; break; case HV_X64_MSR_SYNDBG_OPTIONS: case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: return syndbg_get_msr(vcpu, msr, pdata, host); default:
kvm_pr_unimpl_rdmsr(vcpu, msr); return 1;
}
if (unlikely(!host && !hv_check_msr_access(hv_vcpu, msr))) return 1;
switch (msr) { case HV_X64_MSR_VP_INDEX:
data = hv_vcpu->vp_index; break; case HV_X64_MSR_EOI: return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); case HV_X64_MSR_ICR: return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); case HV_X64_MSR_TPR: return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); case HV_X64_MSR_VP_ASSIST_PAGE:
data = hv_vcpu->hv_vapic; break; case HV_X64_MSR_VP_RUNTIME:
data = current_task_runtime_100ns() + hv_vcpu->runtime_offset; break; case HV_X64_MSR_SCONTROL: case HV_X64_MSR_SVERSION: case HV_X64_MSR_SIEFP: case HV_X64_MSR_SIMP: case HV_X64_MSR_EOM: case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: return synic_get_msr(to_hv_synic(vcpu), msr, pdata, host); case HV_X64_MSR_STIMER0_CONFIG: case HV_X64_MSR_STIMER1_CONFIG: case HV_X64_MSR_STIMER2_CONFIG: case HV_X64_MSR_STIMER3_CONFIG: { int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
return stimer_get_config(to_hv_stimer(vcpu, timer_index),
pdata);
} case HV_X64_MSR_STIMER0_COUNT: case HV_X64_MSR_STIMER1_COUNT: case HV_X64_MSR_STIMER2_COUNT: case HV_X64_MSR_STIMER3_COUNT: { int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
return stimer_get_count(to_hv_stimer(vcpu, timer_index),
pdata);
} case HV_X64_MSR_TSC_FREQUENCY:
data = (u64)vcpu->arch.virtual_tsc_khz * 1000; break; case HV_X64_MSR_APIC_FREQUENCY:
data = div64_u64(1000000000ULL,
vcpu->kvm->arch.apic_bus_cycle_ns); break; default:
kvm_pr_unimpl_rdmsr(vcpu, msr); return 1;
}
*pdata = data; return 0;
}
/* * If vp_index == vcpu_idx for all vCPUs, fill vcpu_mask directly, else * fill a temporary buffer and manually test each vCPU's VP index.
*/ if (likely(!has_mismatch))
bitmap = (u64 *)vcpu_mask; else
bitmap = vp_bitmap;
/* * Each set of 64 VPs is packed into sparse_banks, with valid_bank_mask * having a '1' for each bank that exists in sparse_banks. Sets must * be in ascending order, i.e. bank0..bankN.
*/
memset(bitmap, 0, sizeof(vp_bitmap));
for_each_set_bit(bank, (unsignedlong *)&valid_bank_mask,
KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
bitmap[bank] = sparse_banks[sbank++];
if (!test_bit(valid_bit_nr, (unsignedlong *)&valid_bank_mask)) returnfalse;
/* * The index into the sparse bank is the number of preceding bits in * the valid mask. Optimize for VMs with <64 vCPUs by skipping the * fancy math if there can't possibly be preceding bits.
*/ if (valid_bit_nr)
sbank = hweight64(valid_bank_mask & GENMASK_ULL(valid_bit_nr - 1, 0)); else
sbank = 0;
/* * Current read offset when KVM reads hypercall input data gradually, * either offset in bytes from 'ingpa' for regular hypercalls or the * number of already consumed 'XMM halves' for 'fast' hypercalls.
*/ union {
gpa_t data_offset; int consumed_xmm_halves;
};
};
staticint kvm_hv_get_hc_data(struct kvm *kvm, struct kvm_hv_hcall *hc,
u16 orig_cnt, u16 cnt_cap, u64 *data)
{ /* * Preserve the original count when ignoring entries via a "cap", KVM * still needs to validate the guest input (though the non-XMM path * punts on the checks).
*/
u16 cnt = min(orig_cnt, cnt_cap); int i, j;
if (hc->fast) { /* * Each XMM holds two sparse banks, but do not count halves that * have already been consumed for hypercall parameters.
*/ if (orig_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - hc->consumed_xmm_halves) return HV_STATUS_INVALID_HYPERCALL_INPUT;
for (i = 0; i < cnt; i++) {
j = i + hc->consumed_xmm_halves; if (j % 2)
data[i] = sse128_hi(hc->xmm[j / 2]); else
data[i] = sse128_lo(hc->xmm[j / 2]);
} return 0;
}
/* * All entries should fit on the fifo leaving one free for 'flush all' * entry in case another request comes in. In case there's not enough * space, just put 'flush all' entry there.
*/ if (count && entries && count < kfifo_avail(&tlb_flush_fifo->entries)) {
WARN_ON(kfifo_in(&tlb_flush_fifo->entries, entries, count) != count); goto out_unlock;
}
/* * Note: full fifo always contains 'flush all' entry, no need to check the * return value.
*/
kfifo_in(&tlb_flush_fifo->entries, &flush_all_entry, 1);
static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
{ struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); unsignedlong *vcpu_mask = hv_vcpu->vcpu_mask;
u64 *sparse_banks = hv_vcpu->sparse_banks; struct kvm *kvm = vcpu->kvm; struct hv_tlb_flush_ex flush_ex; struct hv_tlb_flush flush; struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; /* * Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE' * entries on the TLB flush fifo. The last entry, however, needs to be * always left free for 'flush all' entry which gets placed when * there is not enough space to put all the requested entries.
*/
u64 __tlb_flush_entries[KVM_HV_TLB_FLUSH_FIFO_SIZE - 1];
u64 *tlb_flush_entries;
u64 valid_bank_mask; struct kvm_vcpu *v; unsignedlong i; bool all_cpus;
/* * The Hyper-V TLFS doesn't allow more than HV_MAX_SPARSE_VCPU_BANKS * sparse banks. Fail the build if KVM's max allowed number of * vCPUs (>4096) exceeds this limit.
*/
BUILD_BUG_ON(KVM_HV_MAX_SPARSE_VCPU_SET_BITS > HV_MAX_SPARSE_VCPU_BANKS);
/* * 'Slow' hypercall's first parameter is the address in guest's memory * where hypercall parameters are placed. This is either a GPA or a * nested GPA when KVM is handling the call from L2 ('direct' TLB * flush). Translate the address here so the memory can be uniformly * read with kvm_read_guest().
*/ if (!hc->fast && is_guest_mode(vcpu)) {
hc->ingpa = translate_nested_gpa(vcpu, hc->ingpa, 0, NULL); if (unlikely(hc->ingpa == INVALID_GPA)) return HV_STATUS_INVALID_HYPERCALL_INPUT;
}
/* * Work around possible WS2012 bug: it sends hypercalls * with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear, * while also expecting us to flush something and crashing if * we don't. Let's treat processor_mask == 0 same as * HV_FLUSH_ALL_PROCESSORS.
*/
all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
flush.processor_mask == 0;
} else { if (hc->fast) {
flush_ex.address_space = hc->ingpa;
flush_ex.flags = hc->outgpa;
memcpy(&flush_ex.hv_vp_set,
&hc->xmm[0], sizeof(hc->xmm[0]));
hc->consumed_xmm_halves = 2;
} else { if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex, sizeof(flush_ex)))) return HV_STATUS_INVALID_HYPERCALL_INPUT;
hc->data_offset = sizeof(flush_ex);
}
/* * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't * analyze it here, flush TLB regardless of the specified address space.
*/ if (all_cpus && !is_guest_mode(vcpu)) {
kvm_for_each_vcpu(i, v, kvm) {
tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(v, false);
hv_tlb_flush_enqueue(v, tlb_flush_fifo,
tlb_flush_entries, hc->rep_cnt);
}
/* * The following check races with nested vCPUs entering/exiting * and/or migrating between L1's vCPUs, however the only case when * KVM *must* flush the TLB is when the target L2 vCPU keeps * running on the same L1 vCPU from the moment of the request until * kvm_hv_flush_tlb() returns. TLB is fully flushed in all other * cases, e.g. when the target L2 vCPU migrates to a different L1 * vCPU or when the corresponding L1 vCPU temporary switches to a * different L2 vCPU while the request is being processed.
*/ if (!hv_v || hv_v->nested.vm_id != hv_vcpu->nested.vm_id) continue;
if (!all_cpus &&
!hv_is_vp_in_sparse_set(hv_v->nested.vp_id, valid_bank_mask,
sparse_banks)) continue;
if (!hv_vcpu) { /* * KVM should have already allocated kvm_vcpu_hv if Hyper-V is * enabled in CPUID.
*/
WARN_ON_ONCE(vcpu->arch.hyperv_enabled); return;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.