/* * For Secure TSC guests, the BSP fetches TSC_INFO using SNP guest messaging and * initializes snp_tsc_scale and snp_tsc_offset. These values are replicated * across the APs VMSA fields (TSC_SCALE and TSC_OFFSET).
*/ static u64 snp_tsc_scale __ro_after_init; static u64 snp_tsc_offset __ro_after_init; staticunsignedlong snp_tsc_freq_khz __ro_after_init;
/* * SVSM related information: * When running under an SVSM, the VMPL that Linux is executing at must be * non-zero. The VMPL is therefore used to indicate the presence of an SVSM.
*/
u8 snp_vmpl __ro_after_init;
EXPORT_SYMBOL_GPL(snp_vmpl);
mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE); if (!mem) {
pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n"); return 0;
}
/* * This can be called very early in the boot, use native functions in * order to avoid paravirt issues.
*/
flags = native_local_irq_save();
/* * The SVSM calling area (CA) can support processing 510 entries at a * time. Loop through the Page State Change descriptor until the CA is * full or the last entry in the descriptor is reached, at which time * the SVSM is invoked. This repeats until all entries in the descriptor * are processed.
*/
call.caa = svsm_get_caa();
for (i = 0; i <= desc->hdr.end_entry;) {
i = svsm_build_ca_from_psc_desc(desc, i, pc);
do {
ret = svsm_perform_call_protocol(&call); if (!ret) continue;
/* * Check if the entry failed because of an RMP mismatch (a * PVALIDATE at 2M was requested, but the page is mapped in * the RMP as 4K).
*/
if (call.rax_out == SVSM_PVALIDATE_FAIL_SIZEMISMATCH &&
pc->entry[pc->cur_index].page_size == RMP_PG_SIZE_2M) { /* Save this entry for post-processing at 4K */
pv_4k[pv_4k_count++] = pc->entry[pc->cur_index];
/* Skip to the next one unless at the end of the list */
pc->cur_index++; if (pc->cur_index < pc->num_entries)
ret = -EAGAIN; else
ret = 0;
}
} while (ret == -EAGAIN);
if (ret)
svsm_pval_terminate(pc, ret, call.rax_out);
}
/* Process any entries that failed to be validated at 2M and validate them at 4K */ for (i = 0; i < pv_4k_count; i++) {
u64 pfn, pfn_end;
if (snp_vmpl)
svsm_pval_pages(desc); else
pval_pages(desc);
/* * If not affected by the cache-coherency vulnerability there is no need * to perform the cache eviction mitigation.
*/ if (cpu_feature_enabled(X86_FEATURE_COHERENCY_SFW_NO)) return;
for (i = 0; i <= desc->hdr.end_entry; i++) {
e = &desc->entries[i];
/* * If validating memory (making it private) perform the cache * eviction mitigation.
*/ if (e->operation == SNP_PAGE_STATE_PRIVATE)
sev_evict_cache(pfn_to_kaddr(e->gfn), e->pagesize ? 512 : 1);
}
}
staticint vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
{ int cur_entry, end_entry, ret = 0; struct snp_psc_desc *data; struct es_em_ctxt ctxt;
vc_ghcb_invalidate(ghcb);
/* Copy the input desc into GHCB shared buffer */
data = (struct snp_psc_desc *)ghcb->shared_buffer;
memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
/* * As per the GHCB specification, the hypervisor can resume the guest * before processing all the entries. Check whether all the entries * are processed. If not, then keep retrying. Note, the hypervisor * will update the data memory directly to indicate the status, so * reference the data->hdr everywhere. * * The strategy here is to wait for the hypervisor to change the page * state in the RMP table before guest accesses the memory pages. If the * page state change was not successful, then later memory access will * result in a crash.
*/
cur_entry = data->hdr.cur_entry;
end_entry = data->hdr.end_entry;
while (data->hdr.cur_entry <= data->hdr.end_entry) {
ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
/* This will advance the shared buffer data points to. */
ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
/* * Page State Change VMGEXIT can pass error code through * exit_info_2.
*/ if (WARN(ret || ghcb->save.sw_exit_info_2, "SNP: PSC failed ret=%d exit_info_2=%llx\n",
ret, ghcb->save.sw_exit_info_2)) {
ret = 1; goto out;
}
/* Verify that reserved bit is not set */ if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
ret = 1; goto out;
}
/* * Sanity check that entry processing is not going backwards. * This will happen only if hypervisor is tricking us.
*/ if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry, "SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
ret = 1; goto out;
}
}
/* Page validation must be rescinded before changing to shared */ if (op == SNP_PAGE_STATE_SHARED)
pvalidate_pages(data);
local_irq_save(flags);
if (sev_cfg.ghcbs_initialized)
ghcb = __sev_get_ghcb(&state); else
ghcb = boot_ghcb;
/* Invoke the hypervisor to perform the page state changes */ if (!ghcb || vmgexit_psc(ghcb, data))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
if (sev_cfg.ghcbs_initialized)
__sev_put_ghcb(&state);
local_irq_restore(flags);
/* Page validation must be performed after changing to private */ if (op == SNP_PAGE_STATE_PRIVATE)
pvalidate_pages(data);
local_irq_restore(flags);
} else { /* * If the kernel runs at VMPL0, it can change the VMSA * bit for a page using the RMPADJUST instruction. * However, for the instruction to succeed it must * target the permissions of a lesser privileged (higher * numbered) VMPL level, so use VMPL1.
*/
u64 attrs = 1;
if (make_vmsa)
attrs |= RMPADJUST_VMSA_PAGE_BIT;
ret = rmpadjust((unsignedlong)va, RMP_PG_SIZE_4K, attrs);
}
return ret;
}
staticvoid snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
{ int err;
/* * Ensure that all the per-CPU GHCBs are made private at the * end of the unsharing loop so that the switch to the slower * MSR protocol happens last.
*/
for_each_possible_cpu(cpu) {
data = per_cpu(runtime_data, cpu);
ghcb = (unsignedlong)&data->ghcb_page;
/* Handle the case of a huge page containing the GHCB page */ if (addr <= ghcb && ghcb < addr + size) {
skipped_addr = true; break;
}
}
/* Stop new private<->shared conversions */ void snp_kexec_begin(void)
{ if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return;
if (!IS_ENABLED(CONFIG_KEXEC_CORE)) return;
/* * Crash kernel ends up here with interrupts disabled: can't wait for * conversions to finish. * * If race happened, just report and proceed.
*/ if (!set_memory_enc_stop_conversion())
pr_warn("Failed to stop shared<->private conversions\n");
}
/* * Shutdown all APs except the one handling kexec/kdump and clearing * the VMSA tag on AP's VMSA pages as they are not being used as * VMSA page anymore.
*/ staticvoid shutdown_all_aps(void)
{ struct sev_es_save_area *vmsa; int apic_id, this_cpu, cpu;
this_cpu = get_cpu();
/* * APs are already in HLT loop when enc_kexec_finish() callback * is invoked.
*/
for_each_present_cpu(cpu) {
vmsa = per_cpu(sev_vmsa, cpu);
/* * The BSP or offlined APs do not have guest allocated VMSA * and there is no need to clear the VMSA tag for this page.
*/ if (!vmsa) continue;
/* * Cannot clear the VMSA tag for the currently running vCPU.
*/ if (this_cpu == cpu) { unsignedlong pa; struct page *p;
pa = __pa(vmsa); /* * Mark the VMSA page of the running vCPU as offline * so that is excluded and not touched by makedumpfile * while generating vmcore during kdump.
*/
p = pfn_to_online_page(pa >> PAGE_SHIFT); if (p)
__SetPageOffline(p); continue;
}
apic_id = cpuid_to_apicid[cpu];
/* * Issue AP destroy to ensure AP gets kicked out of guest mode * to allow using RMPADJUST to remove the VMSA tag on it's * VMSA page.
*/
vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id);
snp_cleanup_vmsa(vmsa, apic_id);
}
if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return;
if (!IS_ENABLED(CONFIG_KEXEC_CORE)) return;
shutdown_all_aps();
unshare_all_memory();
/* * Switch to using the MSR protocol to change per-CPU GHCBs to * private. All the per-CPU GHCBs have been switched back to private, * so can't do any more GHCB calls to the hypervisor beyond this point * until the kexec'ed kernel starts running.
*/
boot_ghcb = NULL;
sev_cfg.ghcbs_initialized = false;
for_each_possible_cpu(cpu) {
data = per_cpu(runtime_data, cpu);
ghcb = &data->ghcb_page;
pte = lookup_address((unsignedlong)ghcb, &level);
size = page_level_size(level); /* Handle the case of a huge page containing the GHCB page */
addr = (unsignedlong)ghcb & page_level_mask(level);
set_pte_enc(pte, level, (void *)addr);
snp_set_memory_private(addr, (size / PAGE_SIZE));
}
}
/* * Allocate VMSA page to work around the SNP erratum where the CPU will * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB) * collides with the RMP entry of VMSA page. The recommended workaround * is to not use a large page. * * Allocate an 8k page which is also 8k-aligned.
*/
p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1); if (!p) return NULL;
split_page(p, 1);
/* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */
__free_page(p);
/* * The hypervisor SNP feature support check has happened earlier, just check * the AP_CREATION one here.
*/ if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION)) return -EOPNOTSUPP;
/* * Verify the desired start IP against the known trampoline start IP * to catch any future new trampolines that may be introduced that * would require a new protected guest entry point.
*/ if (WARN_ONCE(start_ip != real_mode_header->trampoline_start, "Unsupported SNP start_ip: %lx\n", start_ip)) return -EINVAL;
/* Override start_ip with known protected guest start IP */
start_ip = real_mode_header->sev_es_trampoline_start;
cur_vmsa = per_cpu(sev_vmsa, cpu);
/* * A new VMSA is created each time because there is no guarantee that * the current VMSA is the kernels or that the vCPU is not running. If * an attempt was done to use the current VMSA with a running vCPU, a * #VMEXIT of that vCPU would wipe out all of the settings being done * here.
*/
vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(cpu); if (!vmsa) return -ENOMEM;
/* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */
caa = per_cpu(svsm_caa, cpu);
/* CR4 should maintain the MCE value */
cr4 = native_read_cr4() & X86_CR4_MCE;
/* Set the CS value based on the start_ip converted to a SIPI vector */
sipi_vector = (start_ip >> 12);
vmsa->cs.base = sipi_vector << 12;
vmsa->cs.limit = AP_INIT_CS_LIMIT;
vmsa->cs.attrib = INIT_CS_ATTRIBS;
vmsa->cs.selector = sipi_vector << 8;
/* Set the RIP value based on start_ip */
vmsa->rip = start_ip & 0xfff;
/* Set AP INIT defaults as documented in the APM */
vmsa->ds.limit = AP_INIT_DS_LIMIT;
vmsa->ds.attrib = INIT_DS_ATTRIBS;
vmsa->es = vmsa->ds;
vmsa->fs = vmsa->ds;
vmsa->gs = vmsa->ds;
vmsa->ss = vmsa->ds;
/* * Set the SNP-specific fields for this VMSA: * VMPL level * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits)
*/
vmsa->vmpl = snp_vmpl;
vmsa->sev_features = sev_status >> 2;
/* Populate AP's TSC scale/offset to get accurate TSC values. */ if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) {
vmsa->tsc_scale = snp_tsc_scale;
vmsa->tsc_offset = snp_tsc_offset;
}
/* Switch the page over to a VMSA page now that it is initialized */
ret = snp_set_vmsa(vmsa, caa, apic_id, true); if (ret) {
pr_err("set VMSA page failed (%u)\n", ret);
free_page((unsignedlong)vmsa);
return -EINVAL;
}
/* Issue VMGEXIT AP Creation NAE event */
ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id); if (ret) {
snp_cleanup_vmsa(vmsa, apic_id);
vmsa = NULL;
}
/* Free up any previous VMSA page */ if (cur_vmsa)
snp_cleanup_vmsa(cur_vmsa, apic_id);
/* Record the current VMSA page */
per_cpu(sev_vmsa, cpu) = vmsa;
return ret;
}
void __init snp_set_wakeup_secondary_cpu(void)
{ if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return;
/* * Always set this override if SNP is enabled. This makes it the * required method to start APs under SNP. If the hypervisor does * not support AP creation, then no APs will be started.
*/
apic_update_callback(wakeup_secondary_cpu, wakeup_cpu_via_vmgexit);
}
/* * This is needed by the OVMF UEFI firmware which will use whatever it finds in * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu * runtime GHCBs used by the kernel are also mapped in the EFI page-table. * * When running under SVSM the CA page is needed too, so map it as well.
*/ int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd)
{ unsignedlong address, pflags, pflags_enc; struct sev_es_runtime_data *data; int cpu;
u64 pfn;
if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) return 0;
data = this_cpu_read(runtime_data);
ghcb = &data->ghcb_page;
snp_register_ghcb_early(__pa(ghcb));
}
void setup_ghcb(void)
{ if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) return;
/* * Check whether the runtime #VC exception handler is active. It uses * the per-CPU GHCB page which is set up by sev_es_init_vc_handling(). * * If SNP is active, register the per-CPU GHCB page so that the runtime * exception handler can use it.
*/ if (initial_vc_handler == (unsignedlong)kernel_exc_vmm_communication) { if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
snp_register_per_cpu_ghcb();
sev_cfg.ghcbs_initialized = true;
return;
}
/* * Make sure the hypervisor talks a supported protocol. * This gets called only in the BSP boot phase.
*/ if (!sev_es_negotiate_protocol())
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
/* * Clear the boot_ghcb. The first exception comes in before the bss * section is cleared.
*/
memset(&boot_ghcb_page, 0, PAGE_SIZE);
/* Alright - Make the boot-ghcb public */
boot_ghcb = &boot_ghcb_page;
/* SNP guest requires that GHCB GPA must be registered. */ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
snp_register_ghcb_early(__pa(&boot_ghcb_page));
}
while (true) {
vc_ghcb_invalidate(ghcb);
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP);
ghcb_set_sw_exit_info_1(ghcb, 0);
ghcb_set_sw_exit_info_2(ghcb, 0);
sev_es_wr_ghcb_msr(__pa(ghcb));
VMGEXIT();
/* Wakeup signal? */ if (ghcb_sw_exit_info_2_is_valid(ghcb) &&
ghcb->save.sw_exit_info_2) break;
}
__sev_put_ghcb(&state);
}
/* * Play_dead handler when running under SEV-ES. This is needed because * the hypervisor can't deliver an SIPI request to restart the AP. * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the * hypervisor wakes it up again.
*/ staticvoid sev_es_play_dead(void)
{
play_dead_common();
/* IRQs now disabled */
sev_es_ap_hlt_loop();
/* * If we get here, the VCPU was woken up again. Jump to CPU * startup code to get it back online.
*/
soft_restart_cpu();
} #else/* CONFIG_HOTPLUG_CPU */ #define sev_es_play_dead native_play_dead #endif/* CONFIG_HOTPLUG_CPU */
if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) return;
if (!sev_es_check_cpu_features())
panic("SEV-ES CPU Features missing");
/* * SNP is supported in v2 of the GHCB spec which mandates support for HV * features.
*/ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
sev_hv_features = get_hv_features();
if (!(sev_hv_features & GHCB_HV_FT_SNP))
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
}
/* If running under an SVSM, switch to the per-cpu CA */ if (snp_vmpl) { struct svsm_call call = {}; unsignedlong flags; int ret;
local_irq_save(flags);
/* * SVSM_CORE_REMAP_CA call: * RAX = 0 (Protocol=0, CallID=0) * RCX = New CA GPA
*/
call.caa = svsm_get_caa();
call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
call.rcx = this_cpu_read(svsm_caa_pa);
ret = svsm_perform_call_protocol(&call); if (ret)
panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n",
ret, call.rax_out);
sev_cfg.use_cas = true;
local_irq_restore(flags);
}
sev_es_setup_play_dead();
/* Secondary CPUs use the runtime #VC handler */
initial_vc_handler = (unsignedlong)kernel_exc_vmm_communication;
}
/* * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are * enabled, as the alternative (fallback) logic for DMI probing in the legacy * ROM region can cause a crash since this region is not pre-validated.
*/ void __init snp_dmi_setup(void)
{ if (efi_enabled(EFI_CONFIG_TABLES))
dmi_setup();
}
staticvoid dump_cpuid_table(void)
{ conststruct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); int i = 0;
/* * It is useful from an auditing/testing perspective to provide an easy way * for the guest owner to know that the CPUID table has been initialized as * expected, but that initialization happens too early in boot to print any * sort of indicator, and there's not really any other good place to do it, * so do it here. * * If running as an SNP guest, report the current VM privilege level (VMPL).
*/ staticint __init report_snp_info(void)
{ conststruct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
if (cpuid_table->count) {
pr_info("Using SNP CPUID table, %d entries present.\n",
cpuid_table->count);
if (sev_cfg.debug)
dump_cpuid_table();
}
if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
pr_info("SNP running at VMPL%u.\n", snp_vmpl);
return 0;
}
arch_initcall(report_snp_info);
staticvoid update_attest_input(struct svsm_call *call, struct svsm_attest_call *input)
{ /* If (new) lengths have been returned, propagate them up */ if (call->rcx_out != call->rcx)
input->manifest_buf.len = call->rcx_out;
if (call->rdx_out != call->rdx)
input->certificates_buf.len = call->rdx_out;
if (call->r8_out != call->r8)
input->report_buf.len = call->r8_out;
}
int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input)
{ struct svsm_attest_call *ac; unsignedlong flags;
u64 attest_call_pa; int ret;
/* * Set input registers for the request and set RDX and R8 to known * values in order to detect length values being returned in them.
*/
call->rax = call_id;
call->rcx = attest_call_pa;
call->rdx = -1;
call->r8 = -1;
ret = svsm_perform_call_protocol(call);
update_attest_input(call, input);
/* * __sev_get_ghcb() needs to run with IRQs disabled because it is using * a per-CPU GHCB.
*/
local_irq_save(flags);
ghcb = __sev_get_ghcb(&state); if (!ghcb) {
ret = -EIO; goto e_restore_irq;
}
vc_ghcb_invalidate(ghcb);
if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
ghcb_set_rax(ghcb, input->data_gpa);
ghcb_set_rbx(ghcb, input->data_npages);
}
ret = sev_es_ghcb_hv_call(ghcb, &ctxt, req->exit_code, input->req_gpa, input->resp_gpa); if (ret) goto e_put;
req->exitinfo2 = ghcb->save.sw_exit_info_2; switch (req->exitinfo2) { case 0: break;
case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY):
ret = -EAGAIN; break;
case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN): /* Number of expected pages are returned in RBX */ if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
input->data_npages = ghcb_get_rbx(ghcb);
ret = -ENOSPC; break;
}
fallthrough; default:
ret = -EIO; break;
}
/** * snp_svsm_vtpm_probe() - Probe if SVSM provides a vTPM device * * Check that there is SVSM and that it supports at least TPM_SEND_COMMAND * which is the only request used so far. * * Return: true if the platform provides a vTPM SVSM device, false otherwise.
*/ staticbool snp_svsm_vtpm_probe(void)
{ struct svsm_call call = {};
/* The vTPM device is available only if a SVSM is present */ if (!snp_vmpl) returnfalse;
/** * snp_svsm_vtpm_send_command() - Execute a vTPM operation on SVSM * @buffer: A buffer used to both send the command and receive the response. * * Execute a SVSM_VTPM_CMD call as defined by * "Secure VM Service Module for SEV-SNP Guests" Publication # 58019 Revision: 1.00 * * All command request/response buffers have a common structure as specified by * the following table: * Byte Size In/Out Description * Offset (Bytes) * 0x000 4 In Platform command * Out Platform command response size * * Each command can build upon this common request/response structure to create * a structure specific to the command. See include/linux/tpm_svsm.h for more * details. * * Return: 0 on success, -errno on failure
*/ int snp_svsm_vtpm_send_command(u8 *buffer)
{ struct svsm_call call = {};
page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz)); if (!page) return NULL;
ret = set_memory_decrypted((unsignedlong)page_address(page), npages); if (ret) {
pr_err("failed to mark page shared, ret=%d\n", ret);
__free_pages(page, get_order(sz)); return NULL;
}
int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id)
{ /* Adjust the default VMPCK key based on the executing VMPL level */ if (vmpck_id == -1)
vmpck_id = snp_vmpl;
mdesc->vmpck = get_vmpck(vmpck_id, mdesc->secrets, &mdesc->os_area_msg_seqno); if (!mdesc->vmpck) {
pr_err("Invalid VMPCK%d communication key\n", vmpck_id); return -EINVAL;
}
/* Verify that VMPCK is not zero. */ if (!memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
pr_err("Empty VMPCK%d communication key\n", vmpck_id); return -EINVAL;
}
mdesc->vmpck_id = vmpck_id;
mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN); if (!mdesc->ctx) return -ENOMEM;
/* Allocate the shared page used for the request and response message. */
mdesc->request = alloc_shared_pages(sizeof(struct snp_guest_msg)); if (!mdesc->request) goto e_unmap;
mdesc->response = alloc_shared_pages(sizeof(struct snp_guest_msg)); if (!mdesc->response) goto e_free_request;
/* Mutex to serialize the shared buffer access and command handling. */ static DEFINE_MUTEX(snp_cmd_mutex);
/* * If an error is received from the host or AMD Secure Processor (ASP) there * are two options. Either retry the exact same encrypted request or discontinue * using the VMPCK. * * This is because in the current encryption scheme GHCB v2 uses AES-GCM to * encrypt the requests. The IV for this scheme is the sequence number. GCM * cannot tolerate IV reuse. * * The ASP FW v1.51 only increments the sequence numbers on a successful * guest<->ASP back and forth and only accepts messages at its exact sequence * number. * * So if the sequence number were to be reused the encryption scheme is * vulnerable. If the sequence number were incremented for a fresh IV the ASP * will reject the request.
*/ staticvoid snp_disable_vmpck(struct snp_msg_desc *mdesc)
{
pr_alert("Disabling VMPCK%d communication key to prevent IV reuse.\n",
mdesc->vmpck_id);
memzero_explicit(mdesc->vmpck, VMPCK_KEY_LEN);
mdesc->vmpck = NULL;
}
/* Read the current message sequence counter from secrets pages */
count = *mdesc->os_area_msg_seqno;
return count + 1;
}
/* Return a non-zero on success */ static u64 snp_get_msg_seqno(struct snp_msg_desc *mdesc)
{
u64 count = __snp_get_msg_seqno(mdesc);
/* * The message sequence counter for the SNP guest request is a 64-bit * value but the version 2 of GHCB specification defines a 32-bit storage * for it. If the counter exceeds the 32-bit value then return zero. * The caller should check the return value, but if the caller happens to * not check the value and use it, then the firmware treats zero as an * invalid number and will fail the message request.
*/ if (count >= UINT_MAX) {
pr_err("request message sequence counter overflow\n"); return 0;
}
return count;
}
staticvoid snp_inc_msg_seqno(struct snp_msg_desc *mdesc)
{ /* * The counter is also incremented by the PSP, so increment it by 2 * and save in secrets page.
*/
*mdesc->os_area_msg_seqno += 2;
}
pr_debug("response [seqno %lld type %d version %d sz %d]\n",
resp_msg_hdr->msg_seqno, resp_msg_hdr->msg_type, resp_msg_hdr->msg_version,
resp_msg_hdr->msg_sz);
/* Copy response from shared memory to encrypted memory. */
memcpy(resp_msg, mdesc->response, sizeof(*resp_msg));
/* Verify that the sequence counter is incremented by 1 */ if (unlikely(resp_msg_hdr->msg_seqno != (req_msg_hdr->msg_seqno + 1))) return -EBADMSG;
/* Verify response message type and version number. */ if (resp_msg_hdr->msg_type != (req_msg_hdr->msg_type + 1) ||
resp_msg_hdr->msg_version != req_msg_hdr->msg_version) return -EBADMSG;
/* * If the message size is greater than our buffer length then return * an error.
*/ if (unlikely((resp_msg_hdr->msg_sz + ctx->authsize) > req->resp_sz)) return -EBADMSG;
/* Decrypt the payload */
memcpy(iv, &resp_msg_hdr->msg_seqno, min(sizeof(iv), sizeof(resp_msg_hdr->msg_seqno))); if (!aesgcm_decrypt(ctx, req->resp_buf, resp_msg->payload, resp_msg_hdr->msg_sz,
&resp_msg_hdr->algo, AAD_LEN, iv, resp_msg_hdr->authtag)) return -EBADMSG;
retry_request: /* * Call firmware to process the request. In this function the encrypted * message enters shared memory with the host. So after this call the * sequence number must be incremented or the VMPCK must be deleted to * prevent reuse of the IV.
*/
rc = snp_issue_guest_request(req); switch (rc) { case -ENOSPC: /* * If the extended guest request fails due to having too * small of a certificate data buffer, retry the same * guest request without the extended data request in * order to increment the sequence number and thus avoid * IV reuse.
*/
override_npages = req->input.data_npages;
req->exit_code = SVM_VMGEXIT_GUEST_REQUEST;
/* * Override the error to inform callers the given extended * request buffer size was too small and give the caller the * required buffer size.
*/
override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN);
/* * If this call to the firmware succeeds, the sequence number can * be incremented allowing for continued use of the VMPCK. If * there is an error reflected in the return value, this value * is checked further down and the result will be the deletion * of the VMPCK and the error code being propagated back to the * user as an ioctl() return code.
*/ goto retry_request;
/* * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been * throttled. Retry in the driver to avoid returning and reusing the * message sequence number on a different message.
*/ case -EAGAIN: if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) {
rc = -ETIMEDOUT; break;
}
schedule_timeout_killable(SNP_REQ_RETRY_DELAY); goto retry_request;
}
/* * Increment the message sequence number. There is no harm in doing * this now because decryption uses the value stored in the response * structure and any failure will wipe the VMPCK, preventing further * use anyway.
*/
snp_inc_msg_seqno(mdesc);
if (override_err) {
req->exitinfo2 = override_err;
/* * If an extended guest request was issued and the supplied certificate * buffer was not large enough, a standard guest request was issued to * prevent IV reuse. If the standard request was successful, return -EIO * back to the caller as would have originally been returned.
*/ if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN))
rc = -EIO;
}
if (override_npages)
req->input.data_npages = override_npages;
return rc;
}
int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req)
{
u64 seqno; int rc;
/* * enc_payload() calls aesgcm_encrypt(), which can potentially offload to HW. * The offload's DMA SG list of data to encrypt has to be in linear mapping.
*/ if (!virt_addr_valid(req->req_buf) || !virt_addr_valid(req->resp_buf)) {
pr_warn("AES-GSM buffers must be in linear mapping"); return -EINVAL;
}
guard(mutex)(&snp_cmd_mutex);
/* Check if the VMPCK is not empty */ if (!mdesc->vmpck || !memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) {
pr_err_ratelimited("VMPCK is disabled\n"); return -ENOTTY;
}
/* Get message sequence and verify that its a non-zero */
seqno = snp_get_msg_seqno(mdesc); if (!seqno) return -EIO;
/* Clear shared memory's response for the host to populate. */
memset(mdesc->response, 0, sizeof(struct snp_guest_msg));
/* Encrypt the userspace provided payload in mdesc->secret_request. */
rc = enc_payload(mdesc, seqno, req); if (rc) return rc;
/* * Write the fully encrypted request to the shared unencrypted * request page.
*/
memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request));
tsc_req = kzalloc(sizeof(*tsc_req), GFP_KERNEL); if (!tsc_req) return rc;
/* * The intermediate response buffer is used while decrypting the * response payload. Make sure that it has enough space to cover * the authtag.
*/
tsc_resp = kzalloc(sizeof(*tsc_resp) + AUTHTAG_LEN, GFP_KERNEL); if (!tsc_resp) goto e_free_tsc_req;
mdesc = snp_msg_alloc(); if (IS_ERR_OR_NULL(mdesc)) goto e_free_tsc_resp;
rc = snp_msg_init(mdesc, snp_vmpl); if (rc) goto e_free_mdesc;
if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) return;
mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE); if (!mem) {
pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n");
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.