/* 14 is the version for Xeon and Pentium 8.4.8*/ #define APIC_VERSION 0x14UL #define LAPIC_MMIO_LENGTH (1 << 12)
/* * Enable local APIC timer advancement (tscdeadline mode only) with adaptive * tuning. When enabled, KVM programs the host timer event to fire early, i.e. * before the deadline expires, to account for the delay between taking the * VM-Exit (to inject the guest event) and the subsequent VM-Enter to resume * the guest, i.e. so that the interrupt arrives in the guest with minimal * latency relative to the deadline programmed by the guest.
*/ staticbool lapic_timer_advance __read_mostly = true;
module_param(lapic_timer_advance, bool, 0444);
/* * For simplicity, KVM always allocates enough space for all possible * xAPIC IDs. Yell, but don't kill the VM, as KVM can continue on * without the optimized map.
*/ if (WARN_ON_ONCE(xapic_id > new->max_apic_id)) return -EINVAL;
/* * Bail if a vCPU was added and/or enabled its APIC between allocating * the map and doing the actual calculations for the map. Note, KVM * hardcodes the x2APIC ID to vcpu_id, i.e. there's no TOCTOU bug if * the compiler decides to reload x2apic_id after this check.
*/ if (x2apic_id > new->max_apic_id) return -E2BIG;
/* * Deliberately truncate the vCPU ID when detecting a mismatched APIC * ID to avoid false positives if the vCPU ID, i.e. x2APIC ID, is a * 32-bit value. Any unwanted aliasing due to truncation results will * be detected below.
*/ if (!apic_x2apic_mode(apic) && xapic_id != (u8)vcpu->vcpu_id)
*xapic_id_mismatch = true;
/* * Apply KVM's hotplug hack if userspace has enable 32-bit APIC IDs. * Allow sending events to vCPUs by their x2APIC ID even if the target * vCPU is in legacy xAPIC mode, and silently ignore aliased xAPIC IDs * (the x2APIC ID is truncated to 8 bits, causing IDs > 0xff to wrap * and collide). * * Honor the architectural (and KVM's non-optimized) behavior if * userspace has not enabled 32-bit x2APIC IDs. Each APIC is supposed * to process messages independently. If multiple vCPUs have the same * effective APIC ID, e.g. due to the x2APIC wrap or because the guest * manually modified its xAPIC IDs, events targeting that ID are * supposed to be recognized by all vCPUs with said ID.
*/ if (vcpu->kvm->arch.x2apic_format) { /* See also kvm_apic_match_physical_addr(). */ if (apic_x2apic_mode(apic) || x2apic_id > 0xff)
new->phys_map[x2apic_id] = apic;
if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
new->phys_map[xapic_id] = apic;
} else { /* * Disable the optimized map if the physical APIC ID is already * mapped, i.e. is aliased to multiple vCPUs. The optimized * map requires a strict 1:1 mapping between IDs and vCPUs.
*/ if (apic_x2apic_mode(apic))
physical_id = x2apic_id; else
physical_id = xapic_id;
/* * To optimize logical mode delivery, all software-enabled APICs must * be configured for the same mode.
*/ if (new->logical_mode == KVM_APIC_MODE_SW_DISABLED) {
new->logical_mode = logical_mode;
} elseif (new->logical_mode != logical_mode) {
new->logical_mode = KVM_APIC_MODE_MAP_DISABLED; return;
}
/* * In x2APIC mode, the LDR is read-only and derived directly from the * x2APIC ID, thus is guaranteed to be addressable. KVM reuses * kvm_apic_map.phys_map to optimize logical mode x2APIC interrupts by * reversing the LDR calculation to get cluster of APICs, i.e. no * additional work is required.
*/ if (apic_x2apic_mode(apic)) return;
/* * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock. * * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with * apic_map_lock_held.
*/ enum {
CLEAN,
UPDATE_IN_PROGRESS,
DIRTY
};
staticvoid kvm_recalculate_apic_map(struct kvm *kvm)
{ struct kvm_apic_map *new, *old = NULL; struct kvm_vcpu *vcpu; unsignedlong i;
u32 max_id = 255; /* enough space for any xAPIC ID */ bool xapic_id_mismatch; int r;
/* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */ if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN) return;
WARN_ONCE(!irqchip_in_kernel(kvm), "Dirty APIC map without an in-kernel local APIC");
mutex_lock(&kvm->arch.apic_map_lock);
retry: /* * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map (if clean) * or the APIC registers (if dirty). Note, on retry the map may have * not yet been marked dirty by whatever task changed a vCPU's x2APIC * ID, i.e. the map may still show up as in-progress. In that case * this task still needs to retry and complete its calculation.
*/ if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
DIRTY, UPDATE_IN_PROGRESS) == CLEAN) { /* Someone else has updated the map. */
mutex_unlock(&kvm->arch.apic_map_lock); return;
}
/* * Reset the mismatch flag between attempts so that KVM does the right * thing if a vCPU changes its xAPIC ID, but do NOT reset max_id, i.e. * keep max_id strictly increasing. Disallowing max_id from shrinking * ensures KVM won't get stuck in an infinite loop, e.g. if the vCPU * with the highest x2APIC ID is toggling its APIC on and off.
*/
xapic_id_mismatch = false;
kvm_for_each_vcpu(i, vcpu, kvm) if (kvm_apic_present(vcpu))
max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
kvm_for_each_vcpu(i, vcpu, kvm) { if (!kvm_apic_present(vcpu)) continue;
r = kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch); if (r) {
kvfree(new); new = NULL; if (r == -E2BIG) {
cond_resched(); goto retry;
}
goto out;
}
kvm_recalculate_logical_map(new, vcpu);
}
out: /* * The optimized map is effectively KVM's internal version of APICv, * and all unwanted aliasing that results in disabling the optimized * map also applies to APICv.
*/ if (!new)
kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED); else
kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED);
if (xapic_id_mismatch)
kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED); else
kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
old = rcu_dereference_protected(kvm->arch.apic_map,
lockdep_is_held(&kvm->arch.apic_map_lock));
rcu_assign_pointer(kvm->arch.apic_map, new); /* * Write kvm->arch.apic_map before clearing apic->apic_map_dirty. * If another update has come in, leave it DIRTY.
*/
atomic_cmpxchg_release(&kvm->arch.apic_map_dirty,
UPDATE_IN_PROGRESS, CLEAN);
mutex_unlock(&kvm->arch.apic_map_lock);
/* Check if there are APF page ready requests pending */ if (enabled) {
kvm_make_request(KVM_REQ_APF_READY, apic->vcpu);
kvm_xen_sw_enable_lapic(apic->vcpu);
}
}
v = APIC_VERSION | ((apic->nr_lvt_entries - 1) << 16);
/* * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation) * which doesn't have EOI register; Some buggy OSes (e.g. Windows with * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC * version first and level-triggered interrupts never get EOIed in * IOAPIC.
*/ if (guest_cpu_cap_has(vcpu, X86_FEATURE_X2APIC) &&
!ioapic_in_kernel(vcpu->kvm))
v |= APIC_LVR_DIRECTED_EOI;
kvm_lapic_set_reg(apic, APIC_LVR, v);
}
void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu)
{ int nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu); struct kvm_lapic *apic = vcpu->arch.apic; int i;
if (!lapic_in_kernel(vcpu) || nr_lvt_entries == apic->nr_lvt_entries) return;
/* Initialize/mask any "new" LVT entries. */ for (i = apic->nr_lvt_entries; i < nr_lvt_entries; i++)
kvm_lapic_set_reg(apic, APIC_LVTx(i), APIC_LVT_MASKED);
apic->nr_lvt_entries = nr_lvt_entries;
/* The number of LVT entries is reflected in the version register. */
kvm_apic_set_version(vcpu);
}
/* * With APIC virtualization enabled, all caching is disabled * because the processor can modify ISR under the hood. Instead * just set SVI.
*/ if (unlikely(apic->apicv_active))
kvm_x86_call(hwapic_isr_update)(apic->vcpu, vec); else {
++apic->isr_count;
BUG_ON(apic->isr_count > MAX_APIC_VECTOR); /* * ISR (in service register) bit is set when injecting an interrupt. * The highest vector is injected. Thus the latest bit set matches * the highest bit in ISR.
*/
apic->highest_isr_cache = vec;
}
}
staticinlineint apic_find_highest_isr(struct kvm_lapic *apic)
{ int result;
/* * Note that isr_count is always 1, and highest_isr_cache * is always -1, with APIC virtualization enabled.
*/ if (!apic->isr_count) return -1; if (likely(apic->highest_isr_cache != -1)) return apic->highest_isr_cache;
result = apic_find_highest_vector(apic->regs + APIC_ISR);
ASSERT(result == -1 || result >= 16);
/* * We do get here for APIC virtualization enabled if the guest * uses the Hyper-V APIC enlightenment. In this case we may need * to trigger a new interrupt delivery by writing the SVI field; * on the other hand isr_count and highest_isr_cache are unused * and must be left alone.
*/ if (unlikely(apic->apicv_active))
kvm_x86_call(hwapic_isr_update)(apic->vcpu, apic_find_highest_isr(apic)); else {
--apic->isr_count;
BUG_ON(apic->isr_count < 0);
apic->highest_isr_cache = -1;
}
}
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
{ /* This may race with setting of irr in __apic_accept_irq() and * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq * will cause vmexit immediately and the value will be recalculated * on the next vmentry.
*/ return apic_find_highest_irr(vcpu->arch.apic);
}
EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
staticint __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, int vector, int level, int trig_mode, struct dest_map *dest_map);
if (val && pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) returnfalse;
/* * Clear pending bit in any case: it will be set again on vmentry. * While this might not be ideal from performance point of view, * this makes sure pv eoi is only enabled when we know it's safe.
*/
__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
/* * Hotplug hack: Accept interrupts for vCPUs in xAPIC mode as if they * were in x2APIC mode if the target APIC ID can't be encoded as an * xAPIC ID. This allows unique addressing of hotplugged vCPUs (which * start in xAPIC mode) with an APIC ID that is unaddressable in xAPIC * mode. Match the x2APIC ID if and only if the target APIC ID can't * be encoded in xAPIC to avoid spurious matches against a vCPU that * changed its (addressable) xAPIC ID (which is writable).
*/ if (apic_x2apic_mode(apic) || mda > 0xff) return mda == kvm_x2apic_id(apic);
/* The KVM local APIC implementation has two quirks: * * - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs * in xAPIC mode if the "destination & 0xff" matches its xAPIC ID. * KVM doesn't do that aliasing. * * - in-kernel IOAPIC messages have to be delivered directly to * x2APIC, because the kernel does not support interrupt remapping. * In order to support broadcast without interrupt remapping, x2APIC * rewrites the destination of non-IPI messages from APIC_BROADCAST * to X2APIC_BROADCAST. * * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API. This is * important when userspace wants to use x2APIC-format MSIs, because * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
*/ static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsignedint dest_id, struct kvm_lapic *source, struct kvm_lapic *target)
{ bool ipi = source != NULL;
/* Return true if the interrupt can be handled by using *bitmap as index mask * for valid destinations in *dst array. * Return false if kvm_apic_map_get_dest_lapic did nothing useful. * Note: we may have zero kvm_lapic destinations when we return true, which * means that the interrupt should be dropped. In this case, *bitmap would be * zero and *dst undefined.
*/ staticinlinebool kvm_apic_map_get_dest_lapic(struct kvm *kvm, struct kvm_lapic **src, struct kvm_lapic_irq *irq, struct kvm_apic_map *map, struct kvm_lapic ***dst, unsignedlong *bitmap)
{ int i, lowest;
ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap); if (ret) {
*r = 0;
for_each_set_bit(i, &bitmap, 16) { if (!dst[i]) continue;
*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
}
}
rcu_read_unlock(); return ret;
}
/* * This routine tries to handle interrupts in posted mode, here is how * it deals with different cases: * - For single-destination interrupts, handle it in posted mode * - Else if vector hashing is enabled and it is a lowest-priority * interrupt, handle it in posted mode and use the following mechanism * to find the destination vCPU. * 1. For lowest-priority interrupts, store all the possible * destination vCPUs in an array. * 2. Use "guest vector % max number of destination vCPUs" to find * the right destination vCPU in the array for the lowest-priority * interrupt. * - Otherwise, use remapped mode to inject the interrupt.
*/ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu)
{ struct kvm_apic_map *map; unsignedlong bitmap; struct kvm_lapic **dst = NULL; bool ret = false;
if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
hweight16(bitmap) == 1) { unsignedlong i = find_first_bit(&bitmap, 16);
if (dst[i]) {
*dest_vcpu = dst[i]->vcpu;
ret = true;
}
}
rcu_read_unlock(); return ret;
}
/* * Add a pending IRQ into lapic. * Return 1 if successfully added and 0 if discarded.
*/ staticint __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, int vector, int level, int trig_mode, struct dest_map *dest_map)
{ int result = 0; struct kvm_vcpu *vcpu = apic->vcpu;
trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
trig_mode, vector); switch (delivery_mode) { case APIC_DM_LOWEST:
vcpu->arch.apic_arb_prio++;
fallthrough; case APIC_DM_FIXED: if (unlikely(trig_mode && !level)) break;
/* FIXME add logic for vcpu on reset */ if (unlikely(!apic_enabled(apic))) break;
result = 1;
if (dest_map) {
__set_bit(vcpu->vcpu_id, dest_map->map);
dest_map->vectors[vcpu->vcpu_id] = vector;
}
if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { if (trig_mode)
apic_set_vector(vector, apic->regs + APIC_TMR); else
apic_clear_vector(vector, apic->regs + APIC_TMR);
}
case APIC_DM_REMRD:
result = 1;
vcpu->arch.pv.pv_unhalted = 1;
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu); break;
case APIC_DM_SMI: if (!kvm_inject_smi(vcpu)) {
kvm_vcpu_kick(vcpu);
result = 1;
} break;
case APIC_DM_NMI:
result = 1;
kvm_inject_nmi(vcpu);
kvm_vcpu_kick(vcpu); break;
case APIC_DM_INIT: if (!trig_mode || level) {
result = 1; /* assumes that there are only KVM_APIC_INIT/SIPI */
apic->pending_events = (1UL << KVM_APIC_INIT);
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
} break;
case APIC_DM_STARTUP:
result = 1;
apic->sipi_vector = vector; /* make sure sipi_vector is visible for the receiver */
smp_wmb();
set_bit(KVM_APIC_SIPI, &apic->pending_events);
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu); break;
case APIC_DM_EXTINT: /* * Should only be called by kvm_apic_local_deliver() with LVT0, * before NMI watchdog was enabled. Already handled by * kvm_apic_accept_pic_intr().
*/ break;
/* * This routine identifies the destination vcpus mask meant to receive the * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find * out the destination vcpus array and set the bitmap or it traverses to * each available vcpu to identify the same.
*/ void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, unsignedlong *vcpu_bitmap)
{ struct kvm_lapic **dest_vcpu = NULL; struct kvm_lapic *src = NULL; struct kvm_apic_map *map; struct kvm_vcpu *vcpu; unsignedlong bitmap, i; int vcpu_idx; bool ret;
staticvoid kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
{ int __maybe_unused trigger_mode;
/* Eoi the ioapic only if the ioapic doesn't own the vector. */ if (!kvm_ioapic_handles_vector(apic, vector)) return;
/* * If the intercepted EOI is for an IRQ that was pending from previous * routing, then re-scan the I/O APIC routes as EOIs for the IRQ likely * no longer need to be intercepted.
*/ if (apic->vcpu->arch.highest_stale_pending_ioapic_eoi == vector)
kvm_make_request(KVM_REQ_SCAN_IOAPIC, apic->vcpu);
/* Request a KVM exit to inform the userspace IOAPIC. */ if (irqchip_split(apic->vcpu->kvm)) {
apic->vcpu->arch.pending_ioapic_eoi = vector;
kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu); return;
}
/* * this interface assumes a trap-like exit, which has already finished * desired side effect including vISR and vPPR update.
*/ void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
{ struct kvm_lapic *apic = vcpu->arch.apic;
case APIC_TMCCT: /* Timer CCR */ if (apic_lvtt_tscdeadline(apic)) return 0;
val = apic_get_tmcct(apic); break; case APIC_PROCPRI:
apic_update_ppr(apic);
val = kvm_lapic_get_reg(apic, offset); break; case APIC_TASKPRI:
report_tpr_access(apic, false);
fallthrough; default:
val = kvm_lapic_get_reg(apic, offset); break;
}
if (kvm_lapic_lvt_supported(apic, LVT_CMCI))
valid_reg_mask |= APIC_REG_MASK(APIC_LVTCMCI);
/* ARBPRI, DFR, and ICR2 are not valid in x2APIC mode. */ if (!apic_x2apic_mode(apic))
valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI) |
APIC_REG_MASK(APIC_DFR) |
APIC_REG_MASK(APIC_ICR2);
/* * WARN if KVM reads ICR in x2APIC mode, as it's an 8-byte register in * x2APIC and needs to be manually handled by the caller.
*/
WARN_ON_ONCE(apic_x2apic_mode(apic) && offset == APIC_ICR);
if (alignment + len > 4) return 1;
if (offset > 0x3f0 ||
!(kvm_lapic_readable_reg_mask(apic) & APIC_REG_MASK(offset))) return 1;
result = __apic_read(apic, offset & ~0xf);
trace_kvm_apic_read(offset, result);
switch (len) { case 1: case 2: case 4:
memcpy(data, (char *)&result + alignment, len); break; default:
printk(KERN_ERR "Local APIC read with len = %x, " "should be 1,2, or 4 instead\n", len); break;
} return 0;
}
staticvoid limit_periodic_timer_frequency(struct kvm_lapic *apic)
{ /* * Do not allow the guest to program periodic timers with small * interval, since the hrtimers are not throttled by the host * scheduler.
*/ if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
s64 min_period = min_timer_period_us * 1000LL;
if (apic->lapic_timer.period < min_period) {
pr_info_once( "vcpu %i: requested %lld ns " "lapic timer period limited to %lld ns\n",
apic->vcpu->vcpu_id,
apic->lapic_timer.period, min_period);
apic->lapic_timer.period = min_period;
}
}
}
/* * Assume a timer IRQ was "injected" if the APIC is protected. KVM's * copy of the vIRR is bogus, it's the responsibility of the caller to * precisely check whether or not a timer IRQ is pending.
*/ if (apic->guest_apic_protected) returntrue;
reg = kvm_lapic_get_reg(apic, APIC_LVTT); if (kvm_apic_hw_enabled(apic)) { int vec = reg & APIC_VECTOR_MASK; void *bitmap = apic->regs + APIC_ISR;
if (apic->apicv_active)
bitmap = apic->regs + APIC_IRR;
if (apic_test_vector(vec, bitmap)) returntrue;
} returnfalse;
}
/* * If the guest TSC is running at a different ratio than the host, then * convert the delay to nanoseconds to achieve an accurate delay. Note * that __delay() uses delay_tsc whenever the hardware has TSC, thus * always for VMX enabled hardware.
*/ if (vcpu->arch.tsc_scaling_ratio == kvm_caps.default_tsc_scaling_ratio) {
__delay(min(guest_cycles,
nsec_to_cycles(vcpu, timer_advance_ns)));
} else {
u64 delay_ns = guest_cycles * 1000000ULL;
do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
ndelay(min_t(u32, delay_ns, timer_advance_ns));
}
}
/* Do not adjust for tiny fluctuations or large random spikes. */ if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN) return;
/* too early */ if (advance_expire_delta < 0) {
ns = -advance_expire_delta * 1000000ULL;
do_div(ns, vcpu->arch.virtual_tsc_khz);
timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
} else { /* too late */
ns = advance_expire_delta * 1000000ULL;
do_div(ns, vcpu->arch.virtual_tsc_khz);
timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
}
/* * If the timer fired early, reread the TSC to account for the overhead * of the above adjustment to avoid waiting longer than is necessary.
*/ if (guest_tsc < tsc_deadline)
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
if (guest_tsc < tsc_deadline)
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
}
if (kvm_use_posted_timer_interrupt(apic->vcpu)) { /* * Ensure the guest's timer has truly expired before posting an * interrupt. Open code the relevant checks to avoid querying * lapic_timer_int_injected(), which will be false since the * interrupt isn't yet injected. Waiting until after injecting * is not an option since that won't help a posted interrupt.
*/ if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
vcpu->arch.apic->lapic_timer.timer_advance_ns)
__kvm_wait_lapic_expire(vcpu);
kvm_apic_inject_pending_timer_irqs(apic); return;
}
atomic_inc(&apic->lapic_timer.pending);
kvm_make_request(KVM_REQ_UNBLOCK, vcpu); if (from_timer_fn)
kvm_vcpu_kick(vcpu);
}
/* * Synchronize both deadlines to the same time source or * differences in the periods (caused by differences in the * underlying clocks or numerical approximation errors) will * cause the two to drift apart over time as the errors * accumulate.
*/
apic->lapic_timer.target_expiration =
ktime_add_ns(apic->lapic_timer.target_expiration,
apic->lapic_timer.period);
delta = ktime_sub(apic->lapic_timer.target_expiration, now);
apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
nsec_to_cycles(apic->vcpu, delta);
}
staticvoid start_sw_period(struct kvm_lapic *apic)
{ if (!apic->lapic_timer.period) return;
if (ktime_after(ktime_get(),
apic->lapic_timer.target_expiration)) {
apic_timer_expired(apic, false);
/* * To simplify handling the periodic timer, leave the hv timer running * even if the deadline timer has expired, i.e. rely on the resulting * VM-Exit to recompute the periodic timer's target expiration.
*/ if (!apic_lvtt_period(apic)) { /* * Cancel the hv timer if the sw timer fired while the hv timer * was being programmed, or if the hv timer itself expired.
*/ if (atomic_read(&ktimer->pending)) {
cancel_hv_timer(apic);
} elseif (expired) {
apic_timer_expired(apic, false);
cancel_hv_timer(apic);
}
}
WARN_ON(preemptible()); if (apic->lapic_timer.hv_timer_in_use)
cancel_hv_timer(apic); if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) return;
preempt_disable(); /* If the preempt notifier has already run, it also called apic_timer_expired */ if (!apic->lapic_timer.hv_timer_in_use) goto out;
WARN_ON(kvm_vcpu_is_blocking(vcpu));
apic_timer_expired(apic, false);
cancel_hv_timer(apic);
preempt_disable(); /* Possibly the TSC deadline timer is not enabled yet */ if (apic->lapic_timer.hv_timer_in_use)
start_sw_timer(apic);
preempt_enable();
}
staticint kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
{ int ret = 0;
trace_kvm_apic_write(reg, val);
switch (reg) { case APIC_ID: /* Local APIC ID */ if (!apic_x2apic_mode(apic)) {
kvm_apic_set_xapic_id(apic, val >> 24);
} else {
ret = 1;
} break;
case APIC_TASKPRI:
report_tpr_access(apic, true);
apic_set_tpr(apic, val & 0xff); break;
case APIC_EOI:
apic_set_eoi(apic); break;
case APIC_LDR: if (!apic_x2apic_mode(apic))
kvm_apic_set_ldr(apic, val & APIC_LDR_MASK); else
ret = 1; break;
case APIC_DFR: if (!apic_x2apic_mode(apic))
kvm_apic_set_dfr(apic, val | 0x0FFFFFFF); else
ret = 1; break;
case APIC_SPIV: {
u32 mask = 0x3ff; if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
mask |= APIC_SPIV_DIRECTED_EOI;
apic_set_spiv(apic, val & mask); if (!(val & APIC_SPIV_APIC_ENABLED)) { int i;
for (i = 0; i < apic->nr_lvt_entries; i++) {
kvm_lapic_set_reg(apic, APIC_LVTx(i),
kvm_lapic_get_reg(apic, APIC_LVTx(i)) | APIC_LVT_MASKED);
}
apic_update_lvtt(apic);
atomic_set(&apic->lapic_timer.pending, 0);
} break;
} case APIC_ICR:
WARN_ON_ONCE(apic_x2apic_mode(apic));
/* No delay here, so we always clear the pending bit */
val &= ~APIC_ICR_BUSY;
kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
kvm_lapic_set_reg(apic, APIC_ICR, val); break; case APIC_ICR2: if (apic_x2apic_mode(apic))
ret = 1; else
kvm_lapic_set_reg(apic, APIC_ICR2, val & 0xff000000); break;
case APIC_LVT0:
apic_manage_nmi_watchdog(apic, val);
fallthrough; case APIC_LVTTHMR: case APIC_LVTPC: case APIC_LVT1: case APIC_LVTERR: case APIC_LVTCMCI: {
u32 index = get_lvt_index(reg); if (!kvm_lapic_lvt_supported(apic, index)) {
ret = 1; break;
} if (!kvm_apic_sw_enabled(apic))
val |= APIC_LVT_MASKED;
val &= apic_lvt_mask[index];
kvm_lapic_set_reg(apic, reg, val); break;
}
case APIC_LVTT: if (!kvm_apic_sw_enabled(apic))
val |= APIC_LVT_MASKED;
val &= (apic_lvt_mask[LVT_TIMER] | apic->lapic_timer.timer_mode_mask);
kvm_lapic_set_reg(apic, APIC_LVTT, val);
apic_update_lvtt(apic); break;
case APIC_TMICT: if (apic_lvtt_tscdeadline(apic)) break;
case APIC_TDCR: {
uint32_t old_divisor = apic->divide_count;
kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb);
update_divide_count(apic); if (apic->divide_count != old_divisor &&
apic->lapic_timer.period) {
hrtimer_cancel(&apic->lapic_timer.timer);
update_target_expiration(apic, old_divisor);
restart_apic_timer(apic);
} break;
} case APIC_ESR: if (apic_x2apic_mode(apic) && val != 0)
ret = 1; break;
case APIC_SELF_IPI: /* * Self-IPI exists only when x2APIC is enabled. Bits 7:0 hold * the vector, everything else is reserved.
*/ if (!apic_x2apic_mode(apic) || (val & ~APIC_VECTOR_MASK))
ret = 1; else
kvm_apic_send_ipi(apic, APIC_DEST_SELF | val, 0); break; default:
ret = 1; break;
}
/* * Recalculate APIC maps if necessary, e.g. if the software enable bit * was toggled, the APIC ID changed, etc... The maps are marked dirty * on relevant changes, i.e. this is a nop for most writes.
*/
kvm_recalculate_apic_map(apic->vcpu->kvm);
int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
{ if (data & X2APIC_ICR_RESERVED_BITS) return 1;
/* * The BUSY bit is reserved on both Intel and AMD in x2APIC mode, but * only AMD requires it to be zero, Intel essentially just ignores the * bit. And if IPI virtualization (Intel) or x2AVIC (AMD) is enabled, * the CPU performs the reserved bits checks, i.e. the underlying CPU * behavior will "win". Arbitrarily clear the BUSY bit, as there is no * sane way to provide consistent behavior with respect to hardware.
*/
data &= ~APIC_ICR_BUSY;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.