if (vcpu->arch.hfscr & HFSCR_EBB) { if (current->thread.ebbhr != vcpu->arch.ebbhr)
mtspr(SPRN_EBBHR, vcpu->arch.ebbhr); if (current->thread.ebbrr != vcpu->arch.ebbrr)
mtspr(SPRN_EBBRR, vcpu->arch.ebbrr); if (current->thread.bescr != vcpu->arch.bescr)
mtspr(SPRN_BESCR, vcpu->arch.bescr);
}
if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
current->thread.tidr != vcpu->arch.tid)
mtspr(SPRN_TIDR, vcpu->arch.tid); if (host_os_sprs->iamr != vcpu->arch.iamr)
mtspr(SPRN_IAMR, vcpu->arch.iamr); if (host_os_sprs->amr != vcpu->arch.amr)
mtspr(SPRN_AMR, vcpu->arch.amr); if (vcpu->arch.uamor != 0)
mtspr(SPRN_UAMOR, vcpu->arch.uamor); if (current->thread.fscr != vcpu->arch.fscr)
mtspr(SPRN_FSCR, vcpu->arch.fscr); if (current->thread.dscr != vcpu->arch.dscr)
mtspr(SPRN_DSCR, vcpu->arch.dscr); if (vcpu->arch.pspb != 0)
mtspr(SPRN_PSPB, vcpu->arch.pspb);
/* * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI] * clear (or hstate set appropriately to catch those registers * being clobbered if we take a MCE or SRESET), so those are done * later.
*/
if (!(vcpu->arch.ctrl & 1))
mtspr(SPRN_CTRLT, 0);
}
/* Returns true if current MSR and/or guest MSR may have changed */ bool load_vcpu_state(struct kvm_vcpu *vcpu, struct p9_host_os_sprs *host_os_sprs)
{ bool ret = false;
/* vcpu guest regs must already be saved */ void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, struct p9_host_os_sprs *host_os_sprs)
{ /* * current->thread.xxx registers must all be restored to host * values before a potential context switch, otherwise the context * switch itself will overwrite current->thread.xxx with the values * from the guest SPRs.
*/
if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
current->thread.tidr != vcpu->arch.tid)
mtspr(SPRN_TIDR, current->thread.tidr); if (host_os_sprs->iamr != vcpu->arch.iamr)
mtspr(SPRN_IAMR, host_os_sprs->iamr); if (vcpu->arch.uamor != 0)
mtspr(SPRN_UAMOR, 0); if (host_os_sprs->amr != vcpu->arch.amr)
mtspr(SPRN_AMR, host_os_sprs->amr); if (current->thread.fscr != vcpu->arch.fscr)
mtspr(SPRN_FSCR, current->thread.fscr); if (current->thread.dscr != vcpu->arch.dscr)
mtspr(SPRN_DSCR, current->thread.dscr); if (vcpu->arch.pspb != 0)
mtspr(SPRN_PSPB, 0);
/* Save guest CTRL register, set runlatch to 1 */ if (!(vcpu->arch.ctrl & 1))
mtspr(SPRN_CTRLT, 1);
#ifdef CONFIG_ALTIVEC if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
vcpu->arch.vrsave != current->thread.vrsave)
mtspr(SPRN_VRSAVE, current->thread.vrsave); #endif if (vcpu->arch.hfscr & HFSCR_EBB) { if (vcpu->arch.bescr != current->thread.bescr)
mtspr(SPRN_BESCR, current->thread.bescr); if (vcpu->arch.ebbhr != current->thread.ebbhr)
mtspr(SPRN_EBBHR, current->thread.ebbhr); if (vcpu->arch.ebbrr != current->thread.ebbrr)
mtspr(SPRN_EBBRR, current->thread.ebbrr);
if (!vcpu->arch.nested) { /* * This is like load_fp in context switching, turn off * the facility after it wraps the u8 to try avoiding * saving and restoring the registers each partition * switch.
*/
vcpu->arch.load_ebb++; if (!vcpu->arch.load_ebb)
vcpu->arch.hfscr &= ~HFSCR_EBB;
}
}
if (vcpu->arch.tar != current->thread.tar)
mtspr(SPRN_TAR, current->thread.tar);
}
EXPORT_SYMBOL_GPL(restore_p9_host_os_sprs);
/* * Malicious or buggy radix guests may have inserted SLB entries * (only 0..3 because radix always runs with UPRT=1), so these must * be cleared here to avoid side-channels. slbmte is used rather * than slbia, as it won't clear cached translations.
*/ staticvoid radix_clear_slb(void)
{ int i;
/* * Prior memory accesses to host PID Q3 must be completed before we * start switching, and stores must be drained to avoid not-my-LPAR * logic (see switch_mmu_to_host).
*/ asmvolatile("hwsync" ::: "memory");
isync();
mtspr(SPRN_LPID, lpid);
mtspr(SPRN_LPCR, lpcr);
mtspr(SPRN_PID, pid); /* * isync not required here because we are HRFID'ing to guest before * any guest context access, which is context synchronising.
*/
}
/* * See switch_mmu_to_guest_radix. ptesync should not be required here * even if the host is in HPT mode because speculative accesses would * not cause RC updates (we are in real mode).
*/ asmvolatile("hwsync" ::: "memory");
isync();
mtspr(SPRN_LPID, lpid);
mtspr(SPRN_LPCR, lpcr);
mtspr(SPRN_PID, pid);
for (i = 0; i < vcpu->arch.slb_max; i++)
mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv); /* * isync not required here, see switch_mmu_to_guest_radix.
*/
}
/* * The guest has exited, so guest MMU context is no longer being * non-speculatively accessed, but a hwsync is needed before the * mtLPIDR / mtPIDR switch, in order to ensure all stores are drained, * so the not-my-LPAR tlbie logic does not overlook them.
*/ asmvolatile("hwsync" ::: "memory");
isync();
mtspr(SPRN_PID, pid);
mtspr(SPRN_LPID, lpid);
mtspr(SPRN_LPCR, lpcr); /* * isync is not required after the switch, because mtmsrd with L=0 * is performed after this switch, which is context synchronising.
*/
if (!radix_enabled())
slb_restore_bolted_realmode();
}
staticvoid save_clear_host_mmu(struct kvm *kvm)
{ if (!radix_enabled()) { /* * Hash host could save and restore host SLB entries to * reduce SLB fault overheads of VM exits, but for now the * existing code clears all entries and restores just the * bolted ones when switching back to host.
*/
slb_clear_invalidate_partition();
}
}
staticvoid save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
{ if (kvm_is_radix(kvm)) {
radix_clear_slb();
} else { int i; int nr = 0;
/* * This must run before switching to host (radix host can't * access all SLBs).
*/ for (i = 0; i < vcpu->arch.slb_nr; i++) {
u64 slbee, slbev;
staticvoid check_need_tlb_flush(struct kvm *kvm, int pcpu, struct kvm_nested_guest *nested)
{
cpumask_t *need_tlb_flush; bool all_set = true; int i;
if (nested)
need_tlb_flush = &nested->need_tlb_flush; else
need_tlb_flush = &kvm->arch.need_tlb_flush;
if (likely(!cpumask_test_cpu(pcpu, need_tlb_flush))) return;
/* * Individual threads can come in here, but the TLB is shared between * the 4 threads in a core, hence invalidating on one thread * invalidates for all, so only invalidate the first time (if all bits * were set. The others must still execute a ptesync. * * If a race occurs and two threads do the TLB flush, that is not a * problem, just sub-optimal.
*/ for (i = cpu_first_tlb_thread_sibling(pcpu);
i <= cpu_last_tlb_thread_sibling(pcpu);
i += cpu_tlb_thread_sibling_step()) { if (!cpumask_test_cpu(i, need_tlb_flush)) {
all_set = false; break;
}
} if (all_set)
flush_guest_tlb(kvm); else asmvolatile("ptesync" ::: "memory");
/* Clear the bit after the TLB flush */
cpumask_clear_cpu(pcpu, need_tlb_flush);
}
/* MSR bits may have been cleared by context switch so must recheck */ if (IS_ENABLED(CONFIG_PPC_FPU))
msr_needed |= MSR_FP; if (cpu_has_feature(CPU_FTR_ALTIVEC))
msr_needed |= MSR_VEC; if (cpu_has_feature(CPU_FTR_VSX))
msr_needed |= MSR_VSX; if ((cpu_has_feature(CPU_FTR_TM) ||
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
(vcpu->arch.hfscr & HFSCR_TM))
msr_needed |= MSR_TM;
/* * This could be combined with MSR[RI] clearing, but that expands * the unrecoverable window. It would be better to cover unrecoverable * with KVM bad interrupt handling rather than use MSR[RI] at all. * * Much more difficult and less worthwhile to combine with IR/DR * disable.
*/ if ((msr & msr_needed) != msr_needed) {
msr |= msr_needed;
__mtmsrd(msr, 0);
} else {
__hard_irq_disable();
}
local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
/* * On POWER9 DD2.1 and below, sometimes on a Hypervisor Data Storage * Interrupt (HDSI) the HDSISR is not be updated at all. * * To work around this we put a canary value into the HDSISR before * returning to a guest and then check for this canary when we take a * HDSI. If we find the canary on a HDSI, we know the hardware didn't * update the HDSISR. In this case we return to the guest to retake the * HDSI which should correctly update the HDSISR the second time HDSI * entry. * * The "radix prefetch bug" test can be used to test for this bug, as * it also exists fo DD2.1 and below.
*/ if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
mtspr(SPRN_HDSISR, HDSISR_CANARY);
/* * It might be preferable to load_vcpu_state here, in order to get the * GPR/FP register loads executing in parallel with the previous mtSPR * instructions, but for now that can't be done because the TM handling * in load_vcpu_state can change some SPRs and vcpu state (nip, msr). * But TM could be split out if this would be a significant benefit.
*/
/* * MSR[RI] does not need to be cleared (and is not, for radix guests * with no prefetch bug), because in_guest is set. If we take a SRESET * or MCE with in_guest set but still in HV mode, then * kvmppc_p9_bad_interrupt handles the interrupt, which effectively * clears MSR[RI] and doesn't return.
*/
WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_HV_P9);
barrier(); /* Open in_guest critical section */
/* * Hash host, hash guest, or radix guest with prefetch bug, all have * to disable the MMU before switching to guest MMU state.
*/ if (!radix_enabled() || !kvm_is_radix(kvm) ||
cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
__mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
save_clear_host_mmu(kvm);
if (kvm_is_radix(kvm))
switch_mmu_to_guest_radix(kvm, vcpu, lpcr); else
switch_mmu_to_guest_hpt(kvm, vcpu, lpcr);
/* TLBIEL uses LPID=LPIDR, so run this after setting guest LPID */
check_need_tlb_flush(kvm, vc->pcpu, nested);
/* * P9 suppresses the HDEC exception when LPCR[HDICE] = 0, * so set guest LPCR (with HDICE) before writing HDEC.
*/
mtspr(SPRN_HDEC, hdec);
/* XXX: Could get these from r11/12 and paca exsave instead */
vcpu->arch.shregs.srr0 = mfspr(SPRN_SRR0);
vcpu->arch.shregs.srr1 = mfspr(SPRN_SRR1);
vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
/* 0x2 bit for HSRR is only used by PR and P7/8 HV paths, clear it */
trap = local_paca->kvm_hstate.scratch0 & ~0x2;
/* * After reading machine check regs (DAR, DSISR, SRR0/1) and hstate * scratch (which we need to move into exsave to make re-entrant vs * SRESET/MCE), register state is protected from reentrancy. However * timebase, MMU, among other state is still set to guest, so don't * enable MSR[RI] here. It gets enabled at the end, after in_guest * is cleared. * * It is possible an NMI could come in here, which is why it is * important to save the above state early so it can be debugged.
*/
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * Softpatch interrupt for transactional memory emulation cases * on POWER9 DD2.2. This is early in the guest exit path - we * haven't saved registers or done a treclaim yet.
*/
} elseif (trap == BOOK3S_INTERRUPT_HV_SOFTPATCH) {
vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
/* * The cases we want to handle here are those where the guest * is in real suspend mode and is trying to transition to * transactional mode.
*/ if (!local_paca->kvm_hstate.fake_suspend &&
(vcpu->arch.shregs.msr & MSR_TS_S)) { if (kvmhv_p9_tm_emulation_early(vcpu)) { /* * Go straight back into the guest with the * new NIP/MSR as set by TM emulation.
*/
mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr); goto tm_return_to_guest;
}
} #endif
}
/* Advance host PURR/SPURR by the amount used by guest */
purr = mfspr(SPRN_PURR);
spurr = mfspr(SPRN_SPURR);
local_paca->kvm_hstate.host_purr += purr - vcpu->arch.purr;
local_paca->kvm_hstate.host_spurr += spurr - vcpu->arch.spurr;
vcpu->arch.purr = purr;
vcpu->arch.spurr = spurr;
dpdes = mfspr(SPRN_DPDES); if (dpdes)
vcpu->arch.doorbell_request = 1;
vc->vtb = mfspr(SPRN_VTB);
dec = mfspr(SPRN_DEC); if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
dec = (s32) dec;
*tb = mftb();
vcpu->arch.dec_expires = dec + *tb;
/* * Enable MSR here in order to have facilities enabled to save * guest registers. This enables MMU (if we were in realmode), so * only switch MMU on after the MMU is switched to host, to avoid * the P9_RADIX_PREFETCH_BUG or hash guest context.
*/ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
vcpu->arch.shregs.msr & MSR_TS_MASK)
msr |= MSR_TS_S;
__mtmsrd(msr, 0);
if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) { /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
mtspr(SPRN_PSSCR, host_hpsscr |
(local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
}
mtspr(SPRN_HFSCR, host_hfscr); if (vcpu->arch.ciabr != host_ciabr)
mtspr(SPRN_CIABR, host_ciabr);
if (dawr_enabled()) { if (vcpu->arch.dawr0 != host_dawr0)
mtspr(SPRN_DAWR0, host_dawr0); if (vcpu->arch.dawrx0 != host_dawrx0)
mtspr(SPRN_DAWRX0, host_dawrx0); if (cpu_has_feature(CPU_FTR_DAWR1)) { if (vcpu->arch.dawr1 != host_dawr1)
mtspr(SPRN_DAWR1, host_dawr1); if (vcpu->arch.dawrx1 != host_dawrx1)
mtspr(SPRN_DAWRX1, host_dawrx1);
}
}
if (dpdes)
mtspr(SPRN_DPDES, 0); if (vc->pcr)
mtspr(SPRN_PCR, PCR_MASK);
/* HDEC must be at least as large as DEC, so decrementer_max fits */
mtspr(SPRN_HDEC, decrementer_max);
timer_rearm_host_dec(*tb);
restore_p9_host_os_sprs(vcpu, &host_os_sprs);
barrier(); /* Close in_guest critical section */
WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_NONE); /* Interrupts are recoverable at this point */
/* * cp_abort is required if the processor supports local copy-paste * to clear the copy buffer that was under control of the guest.
*/ if (cpu_has_feature(CPU_FTR_ARCH_31)) asmvolatile(PPC_CP_ABORT);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.