// SPDX-License-Identifier: GPL-2.0 /* * This code is used on x86_64 to create page table identity mappings on * demand by building up a new set of page tables (or appending to the * existing ones), and then switching over to them when ready. * * Copyright (C) 2015-2016 Yinghai Lu * Copyright (C) 2016 Kees Cook
*/
/* No MITIGATION_PAGE_TABLE_ISOLATION support needed either: */ #undef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
#include"error.h" #include"misc.h"
/* These actually do the work of building the kernel identity maps. */ #include <linux/pgtable.h> #include <asm/cmpxchg.h> #include <asm/trap_pf.h> #include <asm/trapnr.h> #include <asm/init.h> /* Use the static base for this part of the boot process */ #undef __PAGE_OFFSET #define __PAGE_OFFSET __PAGE_OFFSET_BASE #include"../../mm/ident_map.c"
#define _SETUP #include <asm/setup.h> /* For COMMAND_LINE_SIZE */ #undef _SETUP
externunsignedlong get_cmd_line_ptr(void);
/* Used by PAGE_KERN* macros: */
pteval_t __default_kernel_pte_mask __read_mostly = ~0;
/* Used to track our page table allocation area. */ struct alloc_pgt_data { unsignedchar *pgt_buf; unsignedlong pgt_buf_size; unsignedlong pgt_buf_offset;
};
/* * Allocates space for a page table entry, using struct alloc_pgt_data * above. Besides the local callers, this is used as the allocation * callback in mapping_info below.
*/ staticvoid *alloc_pgt_page(void *context)
{ struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context; unsignedchar *entry;
/* Validate there is space available for a new page. */ if (pages->pgt_buf_offset >= pages->pgt_buf_size) {
debug_putstr("out of pgt_buf in " __FILE__ "!?\n");
debug_putaddr(pages->pgt_buf_offset);
debug_putaddr(pages->pgt_buf_size); return NULL;
}
/* Consumed more tables than expected? */ if (pages->pgt_buf_offset == BOOT_PGT_SIZE_WARN) {
debug_putstr("pgt_buf running low in " __FILE__ "\n");
debug_putstr("Need to raise BOOT_PGT_SIZE?\n");
debug_putaddr(pages->pgt_buf_offset);
debug_putaddr(pages->pgt_buf_size);
}
/* * Mapping information structure passed to kernel_ident_mapping_init(). * Due to relocation, pointers must be assigned at run time not build time.
*/ staticstruct x86_mapping_info mapping_info;
/* * Adds the specified range to the identity mappings.
*/ void kernel_add_identity_map(unsignedlong start, unsignedlong end)
{ int ret;
/* Align boundary to 2M. */
start = round_down(start, PMD_SIZE);
end = round_up(end, PMD_SIZE); if (start >= end) return;
/* Build the mapping. */
ret = kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt, start, end); if (ret)
error("Error: kernel_ident_mapping_init() failed\n");
}
/* Locates and clears a region for a new top level page table. */ void initialize_identity_maps(void *rmode)
{ unsignedlong cmdline; struct setup_data *sd;
/* Exclude the encryption mask from __PHYSICAL_MASK */
physical_mask &= ~sme_me_mask;
/* * It should be impossible for this not to already be true, * but since calling this a second time would rewind the other * counters, let's just make sure this is reset too.
*/
pgt_data.pgt_buf_offset = 0;
/* * If we came here via startup_32(), cr3 will be _pgtable already * and we must append to the existing area instead of entirely * overwriting it. * * With 5-level paging, we use '_pgtable' to allocate the p4d page table, * the top-level page table is allocated separately. * * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level * cases. On 4-level paging it's equal to 'top_level_pgt'.
*/
top_level_pgt = read_cr3_pa(); if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) {
pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
} else {
pgt_data.pgt_buf = _pgtable;
pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
top_level_pgt = (unsignedlong)alloc_pgt_page(&pgt_data);
}
/* * New page-table is set up - map the kernel image, boot_params and the * command line. The uncompressed kernel requires boot_params and the * command line to be mapped in the identity mapping. Map them * explicitly here in case the compressed kernel does not touch them, * or does not touch all the pages covering them.
*/
kernel_add_identity_map((unsignedlong)_head, (unsignedlong)_end);
boot_params_ptr = rmode;
kernel_add_identity_map((unsignedlong)boot_params_ptr,
(unsignedlong)(boot_params_ptr + 1));
cmdline = get_cmd_line_ptr();
kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);
/* * Also map the setup_data entries passed via boot_params in case they * need to be accessed by uncompressed kernel via the identity mapping.
*/
sd = (struct setup_data *)boot_params_ptr->hdr.setup_data; while (sd) { unsignedlong sd_addr = (unsignedlong)sd;
/* Load the new page-table. */
write_cr3(top_level_pgt);
/* * Now that the required page table mappings are established and a * GHCB can be used, check for SNP guest/HV feature compatibility.
*/
snp_check_features();
}
pte = (pte_t *)info->alloc_pgt_page(info->context); if (!pte) return NULL;
address = __address & PMD_MASK; /* No large page - clear PSE flag */
page_flags = info->page_flag & ~_PAGE_PSE;
/* Populate the PTEs */ for (i = 0; i < PTRS_PER_PMD; i++) {
set_pte(&pte[i], __pte(address | page_flags));
address += PAGE_SIZE;
}
/* * Ideally we need to clear the large PMD first and do a TLB * flush before we write the new PMD. But the 2M range of the * PMD might contain the code we execute and/or the stack * we are on, so we can't do that. But that should be safe here * because we are going from large to small mappings and we are * also the only user of the page-table, so there is no chance * of a TLB multihit.
*/
pmd = __pmd((unsignedlong)pte | info->kernpg_flag);
set_pmd(pmdp, pmd); /* Flush TLB to establish the new PMD */
write_cr3(top_level_pgt);
/* * Hardcode cl-size to 64 - CPUID can't be used here because that might * cause another #VC exception and the GHCB is not ready to use yet.
*/
flush_size = 64;
start = (char *)(address & PAGE_MASK);
end = start + PAGE_SIZE;
/* * First make sure there are no pending writes on the cache-lines to * flush.
*/ asmvolatile("mfence" : : : "memory");
/* * First make sure there is a PMD mapping for 'address'. * It should already exist, but keep things generic. * * To map the page just read from it and fault it in if there is no * mapping yet. kernel_add_identity_map() can't be called here because * that would unconditionally map the address on PMD level, destroying * any PTE-level mappings that might already exist. Use assembly here * so the access won't be optimized away.
*/ asmvolatile("mov %[address], %%r9"
:: [address] "g" (*(unsignedlong *)address)
: "r9", "memory");
/* * The page is mapped at least with PMD size - so skip checks and walk * directly to the PMD.
*/
p4dp = p4d_offset(pgdp, address);
pudp = pud_offset(p4dp, address);
pmdp = pmd_offset(pudp, address);
/* * Changing encryption attributes of a page requires to flush it from * the caches.
*/ if ((set | clr) & _PAGE_ENC) {
clflush_page(address);
/* * If the encryption attribute is being cleared, change the page state * to shared in the RMP table.
*/ if (clr)
snp_set_page_shared(__pa(address & PAGE_MASK));
}
/* * If the encryption attribute is being set, then change the page state to * private in the RMP entry. The page state change must be done after the PTE * is updated.
*/ if (set & _PAGE_ENC)
snp_set_page_private(__pa(address & PAGE_MASK));
/* Flush TLB after changing encryption attribute */
write_cr3(top_level_pgt);
return 0;
}
int set_page_decrypted(unsignedlong address)
{ return set_clr_page_flags(&mapping_info, address, 0, _PAGE_ENC);
}
int set_page_encrypted(unsignedlong address)
{ return set_clr_page_flags(&mapping_info, address, _PAGE_ENC, 0);
}
int set_page_non_present(unsignedlong address)
{ return set_clr_page_flags(&mapping_info, address, 0, _PAGE_PRESENT);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.