/* * Perf's "BASE" is wildly misleading, architectural PMUs use bits 31:16 of ECX * to encode the "type" of counter to read, i.e. this is not a "base". And to * further confuse things, non-architectural PMUs use bit 31 as a flag for * "fast" reads, whereas the "type" is an explicit value.
*/ #define INTEL_RDPMC_GP 0 #define INTEL_RDPMC_FIXED INTEL_PMC_FIXED_RDPMC_BASE
/* * The encoding of ECX for RDPMC is different for architectural versus * non-architecturals PMUs (PMUs with version '0'). For architectural * PMUs, bits 31:16 specify the PMC type and bits 15:0 specify the PMC * index. For non-architectural PMUs, bit 31 is a "fast" flag, and * bits 30:0 specify the PMC index. * * Yell and reject attempts to read PMCs for a non-architectural PMU, * as KVM doesn't support such PMUs.
*/ if (WARN_ON_ONCE(!pmu->version)) return NULL;
/* * General Purpose (GP) PMCs are supported on all PMUs, and fixed PMCs * are supported on all architectural PMUs, i.e. on all virtual PMUs * supported by KVM. Note, KVM only emulates fixed PMCs for PMU v2+, * but the type itself is still valid, i.e. let RDPMC fail due to * accessing a non-existent counter. Reject attempts to read all other * types, which are unknown/unsupported.
*/ switch (type) { case INTEL_RDPMC_FIXED:
counters = pmu->fixed_counters;
num_counters = pmu->nr_arch_fixed_counters;
bitmask = pmu->counter_bitmask[KVM_PMC_FIXED]; break; case INTEL_RDPMC_GP:
counters = pmu->gp_counters;
num_counters = pmu->nr_arch_gp_counters;
bitmask = pmu->counter_bitmask[KVM_PMC_GP]; break; default: return NULL;
}
idx &= INTEL_RDPMC_INDEX_MASK; if (idx >= num_counters) return NULL;
/* * The perf_event_attr is constructed in the minimum efficient way: * - set 'pinned = true' to make it task pinned so that if another * cpu pinned event reclaims LBR, the event->oncpu will be set to -1; * - set '.exclude_host = true' to record guest branches behavior; * * - set '.config = INTEL_FIXED_VLBR_EVENT' to indicates host perf * schedule the event without a real HW counter but a fake one; * check is_guest_lbr_event() and __intel_get_event_constraints(); * * - set 'sample_type = PERF_SAMPLE_BRANCH_STACK' and * 'branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK | * PERF_SAMPLE_BRANCH_USER' to configure it as a LBR callstack * event, which helps KVM to save/restore guest LBR records * during host context switches and reduces quite a lot overhead, * check branch_user_callstack() and intel_pmu_lbr_sched_task();
*/ struct perf_event_attr attr = {
.type = PERF_TYPE_RAW,
.size = sizeof(attr),
.config = INTEL_FIXED_VLBR_EVENT,
.sample_type = PERF_SAMPLE_BRANCH_STACK,
.pinned = true,
.exclude_host = true,
.branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK |
PERF_SAMPLE_BRANCH_USER,
};
if (WARN_ON_ONCE(!lbr_desc)) return 0;
if (unlikely(lbr_desc->event)) {
__set_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use); return 0;
}
/* * It's safe to access LBR msrs from guest when they have not * been passthrough since the host would help restore or reset * the LBR msrs records when the guest LBR event is scheduled in.
*/ staticbool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu, struct msr_data *msr_info, bool read)
{ struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
u32 index = msr_info->index;
if (!intel_pmu_is_valid_lbr_msr(vcpu, index)) returnfalse;
if (!lbr_desc->event && intel_pmu_create_guest_lbr_event(vcpu) < 0) goto dummy;
/* * Disable irq to ensure the LBR feature doesn't get reclaimed by the * host at the time the value is read from the msr, and this avoids the * host LBR value to be leaked to the guest. If LBR has been reclaimed, * return 0 on guest reads.
*/
local_irq_disable(); if (lbr_desc->event->state == PERF_EVENT_STATE_ACTIVE) { if (read)
rdmsrq(index, msr_info->data); else
wrmsrq(index, msr_info->data);
__set_bit(INTEL_PMC_IDX_FIXED_VLBR, vcpu_to_pmu(vcpu)->pmc_in_use);
local_irq_enable(); returntrue;
}
clear_bit(INTEL_PMC_IDX_FIXED_VLBR, vcpu_to_pmu(vcpu)->pmc_in_use);
local_irq_enable();
dummy: if (read)
msr_info->data = 0; returntrue;
}
if (data != pmc->eventsel) {
pmc->eventsel = data;
kvm_pmu_request_counter_reprogram(pmc);
} break;
} elseif (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) { break;
} /* Not a known PMU MSR. */ return 1;
}
return 0;
}
/* * Map fixed counter events to architectural general purpose event encodings. * Perf doesn't provide APIs to allow KVM to directly program a fixed counter, * and so KVM instead programs the architectural event to effectively request * the fixed counter. Perf isn't guaranteed to use a fixed counter and may * instead program the encoding into a general purpose counter, e.g. if a * different perf_event is already utilizing the requested counter, but the end * result is the same (ignoring the fact that using a general purpose counter * will likely exacerbate counter contention). * * Forcibly inlined to allow asserting on @index at build time, and there should * never be more than one user.
*/ static __always_inline u64 intel_get_fixed_pmc_eventsel(unsignedint index)
{ constenum perf_hw_id fixed_pmc_perf_ids[] = {
[0] = PERF_COUNT_HW_INSTRUCTIONS,
[1] = PERF_COUNT_HW_CPU_CYCLES,
[2] = PERF_COUNT_HW_REF_CPU_CYCLES,
};
u64 eventsel;
/* * Yell if perf reports support for a fixed counter but perf doesn't * have a known encoding for the associated general purpose event.
*/
eventsel = perf_get_hw_event_config(fixed_pmc_perf_ids[index]);
WARN_ON_ONCE(!eventsel && index < kvm_pmu_cap.num_counters_fixed); return eventsel;
}
staticvoid intel_pmu_enable_fixed_counter_bits(struct kvm_pmu *pmu, u64 bits)
{ int i;
for (i = 0; i < pmu->nr_arch_fixed_counters; i++)
pmu->fixed_ctr_ctrl_rsvd &= ~intel_fixed_bits_by_idx(i, bits);
}
/* * Setting passthrough of LBR MSRs is done only in the VM-Entry loop, * and PMU refresh is disallowed after the vCPU has run, i.e. this code * should never be reached while KVM is passing through MSRs.
*/ if (KVM_BUG_ON(lbr_desc->msr_passthrough, vcpu->kvm)) return;
entry = kvm_find_cpuid_entry(vcpu, 0xa); if (!entry) return;
eax.full = entry->eax;
edx.full = entry->edx;
pmu->version = eax.split.version_id; if (!pmu->version) return;
/* * GLOBAL_STATUS and GLOBAL_OVF_CONTROL (a.k.a. GLOBAL_STATUS_RESET) * share reserved bit definitions. The kernel just happens to use * OVF_CTRL for the names.
*/
pmu->global_status_rsvd = pmu->global_ctrl_rsvd
& ~(MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF |
MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD); if (vmx_pt_mode_is_host_guest())
pmu->global_status_rsvd &=
~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI;
/* * Emulate LBR_On_PMI behavior for 1 < pmu.version < 4. * * If Freeze_LBR_On_PMI = 1, the LBR is frozen on PMI and * the KVM emulates to clear the LBR bit (bit 0) in IA32_DEBUGCTL. * * Guest needs to re-enable LBR to resume branches recording.
*/ staticvoid intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
{
u64 data = vmx_guest_debugctl_read();
if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
data &= ~DEBUGCTLMSR_LBR;
vmx_guest_debugctl_write(vcpu, data);
}
}
staticvoid intel_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
{
u8 version = vcpu_to_pmu(vcpu)->version;
if (!intel_pmu_lbr_is_enabled(vcpu)) return;
if (version > 1 && version < 4)
intel_pmu_legacy_freezing_lbrs_on_pmi(vcpu);
}
for (i = 0; i < lbr->nr; i++) {
vmx_set_intercept_for_msr(vcpu, lbr->from + i, MSR_TYPE_RW, set);
vmx_set_intercept_for_msr(vcpu, lbr->to + i, MSR_TYPE_RW, set); if (lbr->info)
vmx_set_intercept_for_msr(vcpu, lbr->info + i, MSR_TYPE_RW, set);
}
/* * Higher priority host perf events (e.g. cpu pinned) could reclaim the * pmu resources (e.g. LBR) that were assigned to the guest. This is * usually done via ipi calls (more details in perf_install_in_context). * * Before entering the non-root mode (with irq disabled here), double * confirm that the pmu features enabled to the guest are not reclaimed * by higher priority host events. Otherwise, disallow vcpu's access to * the reclaimed features.
*/ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu)
{ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
if (WARN_ON_ONCE(!lbr_desc)) return;
if (!lbr_desc->event) {
vmx_disable_lbr_msrs_passthrough(vcpu); if (vmx_guest_debugctl_read() & DEBUGCTLMSR_LBR) goto warn; if (test_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use)) goto warn; return;
}
/* * A negative index indicates the event isn't mapped to a * physical counter in the host, e.g. due to contention.
*/
hw_idx = pmc->perf_event->hw.idx; if (hw_idx != pmc->idx && hw_idx > -1)
pmu->host_cross_mapped_mask |= BIT_ULL(hw_idx);
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.