/* * Architecture neutral utility routines for interacting with * Hyper-V. This file is specifically for code that must be * built-in to the kernel image when CONFIG_HYPERV is set * (vs. being in a module) because it is called from architecture * specific code under arch/. * * Copyright (C) 2021, Microsoft, Inc. * * Author : Michael Kelley <mikelley@microsoft.com>
*/
/* * ms_hyperv and hv_nested are defined here with other * Hyper-V specific globals so they are shared across all architectures and are * built only when CONFIG_HYPERV is defined. But on x86, * ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not * defined, and it uses these three variables. So mark them as __weak * here, allowing for an overriding definition in the module containing * ms_hyperv_init_platform().
*/ bool __weak hv_nested;
EXPORT_SYMBOL_GPL(hv_nested);
/* * Per-cpu array holding the tail pointer for the SynIC event ring buffer * for each SINT. * * We cannot maintain this in mshv driver because the tail pointer should * persist even if the mshv driver is unloaded.
*/
u8 * __percpu *hv_synic_eventring_tail;
EXPORT_SYMBOL_GPL(hv_synic_eventring_tail);
/* * Hyper-V specific initialization and shutdown code that is * common across all architectures. Called from architecture * specific initialization functions.
*/
/* * Boolean to control whether to report panic messages over Hyper-V. * * It can be set via /proc/sys/kernel/hyperv_record_panic_msg
*/ staticint sysctl_record_panic_msg = 1;
/* * sysctl option to allow the user to control whether kmsg data should be * reported to Hyper-V on panic.
*/ staticconststruct ctl_table hv_ctl_table[] = {
{
.procname = "hyperv_record_panic_msg",
.data = &sysctl_record_panic_msg,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE
},
};
/* * The following callback works both as die and panic notifier; its * goal is to provide panic information to the hypervisor unless the * kmsg dumper is used [see hv_kmsg_dump()], which provides more * information but isn't always available. * * Notice that both the panic/die report notifiers are registered only * if we have the capability HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE set.
*/ staticint hv_die_panic_notify_crash(struct notifier_block *self, unsignedlong val, void *args)
{ struct pt_regs *regs; bool is_die;
/* Don't notify Hyper-V unless we have a die oops event or panic. */ if (self == &hyperv_panic_report_block) {
is_die = false;
regs = current_pt_regs();
} else { /* die event */ if (val != DIE_OOPS) return NOTIFY_DONE;
/* * Hyper-V should be notified only once about a panic/die. If we will * be calling hv_kmsg_dump() later with kmsg data, don't do the * notification here.
*/ if (!sysctl_record_panic_msg || !hv_panic_page)
hyperv_report_panic(regs, val, is_die);
return NOTIFY_DONE;
}
/* * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg * buffer and call into Hyper-V to transfer the data.
*/ staticvoid hv_kmsg_dump(struct kmsg_dumper *dumper, struct kmsg_dump_detail *detail)
{ struct kmsg_dump_iter iter;
size_t bytes_written;
/* We are only interested in panics. */ if (detail->reason != KMSG_DUMP_PANIC || !sysctl_record_panic_msg) return;
/* * Write dump contents to the page. No need to synchronize; panic should * be single-threaded.
*/
kmsg_dump_rewind(&iter);
kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
&bytes_written); if (!bytes_written) return; /* * P3 to contain the physical address of the panic page & P4 to * contain the size of the panic data in that page. Rest of the * registers are no-op when the NOTIFY_MSG flag is set.
*/
hv_set_msr(HV_MSR_CRASH_P0, 0);
hv_set_msr(HV_MSR_CRASH_P1, 0);
hv_set_msr(HV_MSR_CRASH_P2, 0);
hv_set_msr(HV_MSR_CRASH_P3, virt_to_phys(hv_panic_page));
hv_set_msr(HV_MSR_CRASH_P4, bytes_written);
/* * Let Hyper-V know there is crash data available along with * the panic message.
*/
hv_set_msr(HV_MSR_CRASH_CTL,
(HV_CRASH_CTL_CRASH_NOTIFY |
HV_CRASH_CTL_CRASH_NOTIFY_MSG));
}
ret = hv_do_hypercall(control, input, output); if (hv_result_success(ret)) {
ret = output->values[0].reg8 & HV_VTL_MASK;
} else {
pr_err("Failed to get VTL(error: %lld) exiting...\n", ret);
BUG();
}
local_irq_restore(flags); return ret;
} #endif
int __init hv_common_init(void)
{ int i; union hv_hypervisor_version_info version;
/* Get information about the Hyper-V host version */ if (!hv_get_hypervisor_version(&version))
pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n",
version.major_version, version.minor_version,
version.build_number, version.service_number,
version.service_pack, version.service_branch);
if (hv_is_isolation_supported())
sysctl_record_panic_msg = 0;
/* * Hyper-V expects to get crash register data or kmsg when * crash enlightment is available and system crashes. Set * crash_kexec_post_notifiers to be true to make sure that * calling crash enlightment interface before running kdump * kernel.
*/ if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
u64 hyperv_crash_ctl;
/* * Panic message recording (sysctl_record_panic_msg) * is enabled by default in non-isolated guests and * disabled by default in isolated guests; the panic * message recording won't be available in isolated * guests should the following registration fail.
*/
hv_ctl_table_hdr = register_sysctl("kernel", hv_ctl_table); if (!hv_ctl_table_hdr)
pr_err("Hyper-V: sysctl table register error");
/* * Register for panic kmsg callback only if the right * capability is supported by the hypervisor.
*/
hyperv_crash_ctl = hv_get_msr(HV_MSR_CRASH_CTL); if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG)
hv_kmsg_dump_register();
/* * Allocate the per-CPU state for the hypercall input arg. * If this allocation fails, we will not be able to setup * (per-CPU) hypercall input page and thus this failure is * fatal on Hyper-V.
*/
hyperv_pcpu_input_arg = alloc_percpu(void *);
BUG_ON(!hyperv_pcpu_input_arg);
/* Allocate the per-CPU state for output arg for root */ if (hv_output_page_exists()) {
hyperv_pcpu_output_arg = alloc_percpu(void *);
BUG_ON(!hyperv_pcpu_output_arg);
}
if (hv_root_partition()) {
hv_synic_eventring_tail = alloc_percpu(u8 *);
BUG_ON(!hv_synic_eventring_tail);
}
/* * Seed the Linux random number generator with entropy provided by * the Hyper-V host in ACPI table OEM0.
*/ if (!IS_ENABLED(CONFIG_ACPI)) return;
status = acpi_get_table("OEM0", 0, &header); if (ACPI_FAILURE(status) || !header) return;
/* * Since the "OEM0" table name is for OEM specific usage, verify * that what we're seeing purports to be from Microsoft.
*/ if (strncmp(header->oem_table_id, "MICROSFT", 8)) goto error;
/* * Ensure the length is reasonable. Requiring at least 8 bytes and * no more than 4K bytes is somewhat arbitrary and just protects * against a malformed table. Hyper-V currently provides 64 bytes, * but allow for a change in a later version.
*/ if (header->length < sizeof(*header) + 8 ||
header->length > sizeof(*header) + SZ_4K) goto error;
pr_debug("Hyper-V: Seeding rng with %d random bytes from ACPI table OEM0\n",
length);
add_bootloader_randomness(randomdata, length);
/* * To prevent the seed data from being visible in /sys/firmware/acpi, * zero out the random data in the ACPI table and fixup the checksum. * The zero'ing is done out of an abundance of caution in avoiding * potential security risks to the rng. Similarly, reset the table * length to just the header size so that a subsequent kexec doesn't * try to use the zero'ed out random data.
*/ for (i = 0; i < length; i++) {
header->checksum += randomdata[i];
randomdata[i] = 0;
}
for (i = 0; i < sizeof(header->length); i++)
header->checksum += ((u8 *)&header->length)[i];
header->length = sizeof(*header); for (i = 0; i < sizeof(header->length); i++)
header->checksum -= ((u8 *)&header->length)[i];
error:
acpi_put_table(header);
}
/* * Hyper-V specific initialization and die code for * individual CPUs that is common across all architectures. * Called by the CPU hotplug mechanism.
*/
int hv_common_cpu_init(unsignedint cpu)
{ void **inputarg, **outputarg;
u8 **synic_eventring_tail;
u64 msr_vp_index;
gfp_t flags; constint pgcount = hv_output_page_exists() ? 2 : 1; void *mem; int ret = 0;
/* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
/* * The per-cpu memory is already allocated if this CPU was previously * online and then taken offline
*/ if (!*inputarg) {
mem = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags); if (!mem) return -ENOMEM;
if (!ms_hyperv.paravisor_present &&
(hv_isolation_type_snp() || hv_isolation_type_tdx())) {
ret = set_memory_decrypted((unsignedlong)mem, pgcount); if (ret) { /* It may be unsafe to free 'mem' */ return ret;
}
memset(mem, 0x00, pgcount * HV_HYP_PAGE_SIZE);
}
/* * In a fully enlightened TDX/SNP VM with more than 64 VPs, if * hyperv_pcpu_input_arg is not NULL, set_memory_decrypted() -> * ... -> cpa_flush()-> ... -> __send_ipi_mask_ex() tries to * use hyperv_pcpu_input_arg as the hypercall input page, which * must be a decrypted page in such a VM, but the page is still * encrypted before set_memory_decrypted() returns. Fix this by * setting *inputarg after the above set_memory_decrypted(): if * hyperv_pcpu_input_arg is NULL, __send_ipi_mask_ex() returns * HV_STATUS_INVALID_PARAMETER immediately, and the function * hv_send_ipi_mask() falls back to orig_apic.send_IPI_mask(), * which may be slightly slower than the hypercall, but still * works correctly in such a VM.
*/
*inputarg = mem;
}
msr_vp_index = hv_get_msr(HV_MSR_VP_INDEX);
hv_vp_index[cpu] = msr_vp_index;
if (msr_vp_index > hv_max_vp_index)
hv_max_vp_index = msr_vp_index;
if (hv_root_partition()) {
synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
*synic_eventring_tail = kcalloc(HV_SYNIC_SINT_COUNT, sizeof(u8), flags); /* No need to unwind any of the above on failure here */ if (unlikely(!*synic_eventring_tail))
ret = -ENOMEM;
}
return ret;
}
int hv_common_cpu_die(unsignedint cpu)
{
u8 **synic_eventring_tail; /* * The hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory * is not freed when the CPU goes offline as the hyperv_pcpu_input_arg * may be used by the Hyper-V vPCI driver in reassigning interrupts * as part of the offlining process. The interrupt reassignment * happens *after* the CPUHP_AP_HYPERV_ONLINE state has run and * called this function. * * If a previously offlined CPU is brought back online again, the * originally allocated memory is reused in hv_common_cpu_init().
*/
/* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */ bool hv_query_ext_cap(u64 cap_query)
{ /* * The address of the 'hv_extended_cap' variable will be used as an * output parameter to the hypercall below and so it should be * compatible with 'virt_to_phys'. Which means, it's address should be * directly mapped. Use 'static' to keep it compatible; stack variables * can be virtually mapped, making them incompatible with * 'virt_to_phys'. * Hypercall input/output addresses should also be 8-byte aligned.
*/ static u64 hv_extended_cap __aligned(8); staticbool hv_extended_cap_queried;
u64 status;
/* * Querying extended capabilities is an extended hypercall. Check if the * partition supports extended hypercall, first.
*/ if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS)) returnfalse;
/* Extended capabilities do not change at runtime. */ if (hv_extended_cap_queried) return hv_extended_cap & cap_query;
status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL,
&hv_extended_cap);
/* * The query extended capabilities hypercall should not fail under * any normal circumstances. Avoid repeatedly making the hypercall, on * error.
*/
hv_extended_cap_queried = true; if (!hv_result_success(status)) {
pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n",
status); returnfalse;
}
/* * Default function to read the Hyper-V reference counter, independent * of whether Hyper-V enlightened clocks/timers are being used. But on * architectures where it is used, Hyper-V enlightenment code in * hyperv_timer.c may override this function. */ static u64 __hv_read_ref_counter(void) { return hv_get_msr(HV_MSR_TIME_REF_COUNT); }
/* These __weak functions provide default "no-op" behavior and * may be overridden by architecture specific versions. Architectures * for which the default "no-op" behavior is sufficient can leave * them unimplemented and not be cluttered with a bunch of stub * functions in arch-specific code. */
void hv_identify_partition_type(void) { /* Assume guest role */ hv_curr_partition_type = HV_PARTITION_TYPE_GUEST; /* * Check partition creation and cpu management privileges * * Hyper-V should never specify running as root and as a Confidential * VM. But to protect against a compromised/malicious Hyper-V trying * to exploit root behavior to expose Confidential VM memory, ignore * the root partition setting if also a Confidential VM. */ if ((ms_hyperv.priv_high & HV_CREATE_PARTITIONS) && (ms_hyperv.priv_high & HV_CPU_MANAGEMENT) && !(ms_hyperv.priv_high & HV_ISOLATION)) { pr_info("Hyper-V: running as root partition\n"); if (IS_ENABLED(CONFIG_MSHV_ROOT)) hv_curr_partition_type = HV_PARTITION_TYPE_ROOT; else pr_crit("Hyper-V: CONFIG_MSHV_ROOT not enabled!\n"); } }
struct hv_status_info { char *string; int errno; u16 code; };
/* * Note on the errno mappings: * A failed hypercall is usually only recoverable (or loggable) near * the call site where the HV_STATUS_* code is known. So the errno * it gets converted to is not too useful further up the stack. * Provide a few mappings that could be useful, and revert to -EIO * as a fallback. */ static const struct hv_status_info hv_status_infos[] = { #define _STATUS_INFO(status, errno) { #status, (errno), (status) } _STATUS_INFO(HV_STATUS_SUCCESS, 0), _STATUS_INFO(HV_STATUS_INVALID_HYPERCALL_CODE, -EINVAL), _STATUS_INFO(HV_STATUS_INVALID_HYPERCALL_INPUT, -EINVAL), _STATUS_INFO(HV_STATUS_INVALID_ALIGNMENT, -EIO), _STATUS_INFO(HV_STATUS_INVALID_PARAMETER, -EINVAL), _STATUS_INFO(HV_STATUS_ACCESS_DENIED, -EIO), _STATUS_INFO(HV_STATUS_INVALID_PARTITION_STATE, -EIO), _STATUS_INFO(HV_STATUS_OPERATION_DENIED, -EIO), _STATUS_INFO(HV_STATUS_UNKNOWN_PROPERTY, -EIO), _STATUS_INFO(HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE, -EIO), _STATUS_INFO(HV_STATUS_INSUFFICIENT_MEMORY, -ENOMEM), _STATUS_INFO(HV_STATUS_INVALID_PARTITION_ID, -EINVAL), _STATUS_INFO(HV_STATUS_INVALID_VP_INDEX, -EINVAL), _STATUS_INFO(HV_STATUS_NOT_FOUND, -EIO), _STATUS_INFO(HV_STATUS_INVALID_PORT_ID, -EINVAL), _STATUS_INFO(HV_STATUS_INVALID_CONNECTION_ID, -EINVAL), _STATUS_INFO(HV_STATUS_INSUFFICIENT_BUFFERS, -EIO), _STATUS_INFO(HV_STATUS_NOT_ACKNOWLEDGED, -EIO), _STATUS_INFO(HV_STATUS_INVALID_VP_STATE, -EIO), _STATUS_INFO(HV_STATUS_NO_RESOURCES, -EIO), _STATUS_INFO(HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED, -EIO), _STATUS_INFO(HV_STATUS_INVALID_LP_INDEX, -EINVAL), _STATUS_INFO(HV_STATUS_INVALID_REGISTER_VALUE, -EINVAL), _STATUS_INFO(HV_STATUS_INVALID_LP_INDEX, -EIO), _STATUS_INFO(HV_STATUS_INVALID_REGISTER_VALUE, -EIO), _STATUS_INFO(HV_STATUS_OPERATION_FAILED, -EIO), _STATUS_INFO(HV_STATUS_TIME_OUT, -EIO), _STATUS_INFO(HV_STATUS_CALL_PENDING, -EIO), _STATUS_INFO(HV_STATUS_VTL_ALREADY_ENABLED, -EIO), #undef _STATUS_INFO };
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.5Bemerkung:
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.