// SPDX-License-Identifier: GPL-2.0 /* * AMD Encrypted Register State Support * * Author: Joerg Roedel <jroedel@suse.de> * * This file is not compiled stand-alone. It contains code shared * between the pre-decompression boot code and the running Linux kernel * and is included directly into both code-bases.
*/
/* * SVSM related information: * During boot, the page tables are set up as identity mapped and later * changed to use kernel virtual addresses. Maintain separate virtual and * physical addresses for the CAA to allow SVSM functions to be used during * early boot, both with identity mapped virtual addresses and proper kernel * virtual addresses.
*/ struct svsm_ca *boot_svsm_caa __ro_after_init;
u64 boot_svsm_caa_pa __ro_after_init;
/* * Since feature negotiation related variables are set early in the boot * process they must reside in the .data section so as not to be zeroed * out when the .bss section is later cleared. * * GHCB protocol version negotiated with the hypervisor.
*/ static u16 ghcb_version __ro_after_init;
/* Copy of the SNP firmware's CPUID page. */ staticstruct snp_cpuid_table cpuid_table_copy __ro_after_init;
/* * These will be initialized based on CPUID table so that non-present * all-zero leaves (for sparse tables) can be differentiated from * invalid/out-of-range leaves. This is needed since all-zero leaves * still need to be post-processed.
*/ static u32 cpuid_std_range_max __ro_after_init; static u32 cpuid_hyp_range_max __ro_after_init; static u32 cpuid_ext_range_max __ro_after_init;
bool __init sev_es_check_cpu_features(void)
{ if (!has_cpuflag(X86_FEATURE_RDRAND)) {
error("RDRAND instruction not supported - no trusted source of randomness available\n"); returnfalse;
}
/* If the response GPA is not ours then abort the guest */ if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) ||
(GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER);
}
bool sev_es_negotiate_protocol(void)
{
u64 val;
/* Do the GHCB protocol version negotiation */
sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ);
VMGEXIT();
val = sev_es_rd_ghcb_msr();
if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) returnfalse;
if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN ||
GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX) returnfalse;
ret = ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0); if (!ret) return ES_OK;
if (ret == 1) {
u64 info = ghcb->save.sw_exit_info_2; unsignedlong v = info & SVM_EVTINJ_VEC_MASK;
/* Check if exception information from hypervisor is sane. */ if ((info & SVM_EVTINJ_VALID) &&
((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) &&
((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) {
ctxt->fi.vector = v;
if (info & SVM_EVTINJ_VALID_ERR)
ctxt->fi.error_code = info >> 32;
return ES_EXCEPTION;
}
}
return ES_VMM_ERROR;
}
staticinlineint svsm_process_result_codes(struct svsm_call *call)
{ switch (call->rax_out) { case SVSM_SUCCESS: return 0; case SVSM_ERR_INCOMPLETE: case SVSM_ERR_BUSY: return -EAGAIN; default: return -EINVAL;
}
}
/* * Issue a VMGEXIT to call the SVSM: * - Load the SVSM register state (RAX, RCX, RDX, R8 and R9) * - Set the CA call pending field to 1 * - Issue VMGEXIT * - Save the SVSM return register state (RAX, RCX, RDX, R8 and R9) * - Perform atomic exchange of the CA call pending field * * - See the "Secure VM Service Module for SEV-SNP Guests" specification for * details on the calling convention. * - The calling convention loosely follows the Microsoft X64 calling * convention by putting arguments in RCX, RDX, R8 and R9. * - RAX specifies the SVSM protocol/callid as input and the return code * as output.
*/ static __always_inline void svsm_issue_call(struct svsm_call *call, u8 *pending)
{ registerunsignedlong rax asm("rax") = call->rax; registerunsignedlong rcx asm("rcx") = call->rcx; registerunsignedlong rdx asm("rdx") = call->rdx; registerunsignedlong r8 asm("r8") = call->r8; registerunsignedlong r9 asm("r9") = call->r9;
/* * Fill in protocol and format specifiers. This can be called very early * in the boot, so use rip-relative references as needed.
*/
ghcb->protocol_version = ghcb_version;
ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
staticint __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg)
{
u64 val;
sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, reg_idx));
VMGEXIT();
val = sev_es_rd_ghcb_msr(); if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) return -EIO;
*reg = (val >> 32);
return 0;
}
staticint __sev_cpuid_hv_msr(struct cpuid_leaf *leaf)
{ int ret;
/* * MSR protocol does not support fetching non-zero subfunctions, but is * sufficient to handle current early-boot cases. Should that change, * make sure to report an error rather than ignoring the index and * grabbing random values. If this issue arises in the future, handling * can be added here to use GHCB-page protocol for cases that occur late * enough in boot that GHCB page is available.
*/ if (cpuid_function_is_indexed(leaf->fn) && leaf->subfn) return -EINVAL;
ret = __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EAX, &leaf->eax);
ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EBX, &leaf->ebx);
ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_ECX, &leaf->ecx);
ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EDX, &leaf->edx);
if (cr4 & X86_CR4_OSXSAVE) /* Safe to read xcr0 */
ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK)); else /* xgetbv will cause #UD - use reset value for xcr0 */
ghcb_set_xcr0(ghcb, 1);
ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0); if (ret != ES_OK) return ret;
if (!(ghcb_rax_is_valid(ghcb) &&
ghcb_rbx_is_valid(ghcb) &&
ghcb_rcx_is_valid(ghcb) &&
ghcb_rdx_is_valid(ghcb))) return ES_VMM_ERROR;
/* * This may be called early while still running on the initial identity * mapping. Use RIP-relative addressing to obtain the correct address * while running with the initial identity mapping as well as the * switch-over to kernel virtual addresses later.
*/ conststruct snp_cpuid_table *snp_cpuid_get_table(void)
{ return rip_rel_ptr(&cpuid_table_copy);
}
/* * The SNP Firmware ABI, Revision 0.9, Section 7.1, details the use of * XCR0_IN and XSS_IN to encode multiple versions of 0xD subfunctions 0 * and 1 based on the corresponding features enabled by a particular * combination of XCR0 and XSS registers so that a guest can look up the * version corresponding to the features currently enabled in its XCR0/XSS * registers. The only values that differ between these versions/table * entries is the enabled XSAVE area size advertised via EBX. * * While hypervisors may choose to make use of this support, it is more * robust/secure for a guest to simply find the entry corresponding to the * base/legacy XSAVE area size (XCR0=1 or XCR0=3), and then calculate the * XSAVE area size using subfunctions 2 through 64, as documented in APM * Volume 3, Rev 3.31, Appendix E.3.8, which is what is done here. * * Since base/legacy XSAVE area size is documented as 0x240, use that value * directly rather than relying on the base size in the CPUID table. * * Return: XSAVE area size on success, 0 otherwise.
*/ static u32 __head snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
{ conststruct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
u64 xfeatures_found = 0;
u32 xsave_size = 0x240; int i;
for (i = 0; i < cpuid_table->count; i++) { conststruct snp_cpuid_fn *e = &cpuid_table->fn[i];
if (!(e->eax_in == 0xD && e->ecx_in > 1 && e->ecx_in < 64)) continue; if (!(xfeatures_en & (BIT_ULL(e->ecx_in)))) continue; if (xfeatures_found & (BIT_ULL(e->ecx_in))) continue;
/* * Either the guest set unsupported XCR0/XSS bits, or the corresponding * entries in the CPUID table were not present. This is not a valid * state to be in.
*/ if (xfeatures_found != (xfeatures_en & GENMASK_ULL(63, 2))) return 0;
for (i = 0; i < cpuid_table->count; i++) { conststruct snp_cpuid_fn *e = &cpuid_table->fn[i];
if (e->eax_in != leaf->fn) continue;
if (cpuid_function_is_indexed(leaf->fn) && e->ecx_in != leaf->subfn) continue;
/* * For 0xD subfunctions 0 and 1, only use the entry corresponding * to the base/legacy XSAVE area size (XCR0=1 or XCR0=3, XSS=0). * See the comments above snp_cpuid_calc_xsave_size() for more * details.
*/ if (e->eax_in == 0xD && (e->ecx_in == 0 || e->ecx_in == 1)) if (!(e->xcr0_in == 1 || e->xcr0_in == 3) || e->xss_in) continue;
if (leaf->subfn != 0 && leaf->subfn != 1) return 0;
if (native_read_cr4() & X86_CR4_OSXSAVE)
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); if (leaf->subfn == 1) { /* Get XSS value if XSAVES is enabled. */ if (leaf->eax & BIT(3)) { unsignedlong lo, hi;
/* * The PPR and APM aren't clear on what size should be * encoded in 0xD:0x1:EBX when compaction is not enabled * by either XSAVEC (feature bit 1) or XSAVES (feature * bit 3) since SNP-capable hardware has these feature * bits fixed as 1. KVM sets it to 0 in this case, but * to avoid this becoming an issue it's safer to simply * treat this as unsupported for SNP guests.
*/ if (!(leaf->eax & (BIT(1) | BIT(3)))) return -EINVAL;
compacted = true;
}
xsave_size = snp_cpuid_calc_xsave_size(xcr0 | xss, compacted); if (!xsave_size) return -EINVAL;
leaf->ebx = xsave_size;
} break; case 0x8000001E:
snp_cpuid_hv(ghcb, ctxt, &leaf_hv);
/* extended APIC ID */
leaf->eax = leaf_hv.eax; /* compute ID */
leaf->ebx = (leaf->ebx & GENMASK(31, 8)) | (leaf_hv.ebx & GENMASK(7, 0)); /* node ID */
leaf->ecx = (leaf->ecx & GENMASK(31, 8)) | (leaf_hv.ecx & GENMASK(7, 0)); break; default: /* No fix-ups needed, use values as-is. */ break;
}
return 0;
}
/* * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value * should be treated as fatal by caller.
*/ int __head
snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
{ conststruct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
if (!cpuid_table->count) return -EOPNOTSUPP;
if (!snp_cpuid_get_validated_func(leaf)) { /* * Some hypervisors will avoid keeping track of CPUID entries * where all values are zero, since they can be handled the * same as out-of-range values (all-zero). This is useful here * as well as it allows virtually all guest configurations to * work using a single SNP CPUID table. * * To allow for this, there is a need to distinguish between * out-of-range entries and in-range zero entries, since the * CPUID table entries are only a template that may need to be * augmented with additional values for things like * CPU-specific information during post-processing. So if it's * not in the table, set the values to zero. Then, if they are * within a valid CPUID range, proceed with post-processing * using zeros as the initial values. Otherwise, skip * post-processing and just return zeros immediately.
*/
leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0;
/* * Boot VC Handler - This is the first VC handler during boot, there is no GHCB * page yet, so it only supports the MSR based communication with the * hypervisor and only the CPUID exit-code.
*/ void __head do_vc_no_ghcb(struct pt_regs *regs, unsignedlong exit_code)
{ unsignedint subfn = lower_bits(regs->cx, 32); unsignedint fn = lower_bits(regs->ax, 32);
u16 opcode = *(unsignedshort *)regs->ip; struct cpuid_leaf leaf; int ret;
/* Only CPUID is supported via MSR protocol */ if (exit_code != SVM_EXIT_CPUID) goto fail;
/* Is it really a CPUID insn? */ if (opcode != 0xa20f) goto fail;
leaf.fn = fn;
leaf.subfn = subfn;
ret = snp_cpuid(NULL, NULL, &leaf); if (!ret) goto cpuid_done;
/* * This is a VC handler and the #VC is only raised when SEV-ES is * active, which means SEV must be active too. Do sanity checks on the * CPUID results to make sure the hypervisor does not trick the kernel * into the no-sev path. This could map sensitive data unencrypted and * make it accessible to the hypervisor. * * In particular, check for: * - Availability of CPUID leaf 0x8000001f * - SEV CPUID bit. * * The hypervisor might still report the wrong C-bit position, but this * can't be checked here.
*/
if (fn == 0x80000000 && (regs->ax < 0x8000001f)) /* SEV leaf check */ goto fail; elseif ((fn == 0x8000001f && !(regs->ax & BIT(1)))) /* SEV bit */ goto fail;
/* Skip over the CPUID two-byte opcode */
regs->ip += 2;
return;
fail: /* Terminate the guest */
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
}
/* * Search for a Confidential Computing blob passed in as a setup_data entry * via the Linux Boot Protocol.
*/ static __head struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
{ struct cc_setup_data *sd = NULL; struct setup_data *hdr;
/* * Initialize the kernel's copy of the SNP CPUID table, and set up the * pointer that will be used to access it. * * Maintaining a direct mapping of the SNP CPUID table used by firmware would * be possible as an alternative, but the approach is brittle since the * mapping needs to be updated in sync with all the changes to virtual memory * layout and related mapping facilities throughout the boot process.
*/ staticvoid __head setup_cpuid_table(conststruct cc_blob_sev_info *cc_info)
{ conststruct snp_cpuid_table *cpuid_table_fw, *cpuid_table; int i;
if (!cc_info || !cc_info->cpuid_phys || cc_info->cpuid_len < PAGE_SIZE)
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID);
if (snp_vmpl) {
svsm_pval_4k_page(paddr, validate);
} else {
ret = pvalidate(vaddr, RMP_PG_SIZE_4K, validate); if (ret)
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
}
/* * If validating memory (making it private) and affected by the * cache-coherency vulnerability, perform the cache eviction mitigation.
*/ if (validate && !has_cpuflag(X86_FEATURE_COHERENCY_SFW_NO))
sev_evict_cache((void *)vaddr, 1);
}
/* * Maintain the GPA of the SVSM Calling Area (CA) in order to utilize the SVSM * services needed when not running in VMPL0.
*/ staticbool __head svsm_setup_ca(conststruct cc_blob_sev_info *cc_info)
{ struct snp_secrets_page *secrets_page; struct snp_cpuid_table *cpuid_table; unsignedint i;
u64 caa;
BUILD_BUG_ON(sizeof(*secrets_page) != PAGE_SIZE);
/* * Check if running at VMPL0. * * Use RMPADJUST (see the rmpadjust() function for a description of what * the instruction does) to update the VMPL1 permissions of a page. If * the guest is running at VMPL0, this will succeed and implies there is * no SVSM. If the guest is running at any other VMPL, this will fail. * Linux SNP guests only ever run at a single VMPL level so permission mask * changes of a lesser-privileged VMPL are a don't-care. * * Use a rip-relative reference to obtain the proper address, since this * routine is running identity mapped when called, both by the decompressor * code and the early kernel code.
*/ if (!rmpadjust((unsignedlong)rip_rel_ptr(&boot_ghcb_page), RMP_PG_SIZE_4K, 1)) returnfalse;
/* * Not running at VMPL0, ensure everything has been properly supplied * for running under an SVSM.
*/ if (!cc_info || !cc_info->secrets_phys || cc_info->secrets_len != PAGE_SIZE)
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECRETS_PAGE);
secrets_page = (struct snp_secrets_page *)cc_info->secrets_phys; if (!secrets_page->svsm_size)
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NO_SVSM);
if (!secrets_page->svsm_guest_vmpl)
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_VMPL0);
snp_vmpl = secrets_page->svsm_guest_vmpl;
caa = secrets_page->svsm_caa;
/* * An open-coded PAGE_ALIGNED() in order to avoid including * kernel-proper headers into the decompressor.
*/ if (caa & (PAGE_SIZE - 1))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CAA);
/* * The CA is identity mapped when this routine is called, both by the * decompressor code and the early kernel code.
*/
boot_svsm_caa = (struct svsm_ca *)caa;
boot_svsm_caa_pa = caa;
/* Advertise the SVSM presence via CPUID. */
cpuid_table = (struct snp_cpuid_table *)snp_cpuid_get_table(); for (i = 0; i < cpuid_table->count; i++) { struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
if (fn->eax_in == 0x8000001f)
fn->eax |= BIT(28);
}
returntrue;
}
Messung V0.5
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.3Bemerkung:
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.