/* * The booting CPU updates the failed status @__early_cpu_boot_status, * with MMU turned off.
*/ long __section(".mmuoff.data.write") __early_cpu_boot_status;
/* * Empty_zero_page is a special page that is used for zero-initialized data * and COW.
*/ unsignedlong empty_zero_page[PAGE_SIZE / sizeof(unsignedlong)] __page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
/* * Don't bother with the fixmap if swapper_pg_dir is still mapped * writable in the kernel mapping.
*/ if (rodata_is_rw) {
WRITE_ONCE(*pgdp, pgd);
dsb(ishst);
isb(); return;
}
spin_lock(&swapper_pgdir_lock);
fixmap_pgdp = pgd_set_fixmap(__pa_symbol(pgdp));
WRITE_ONCE(*fixmap_pgdp, pgd); /* * We need dsb(ishst) here to ensure the page-table-walker sees * our new entry before set_p?d() returns. The fixmap's * flush_tlb_kernel_range() via clear_fixmap() does this for us.
*/
pgd_clear_fixmap();
spin_unlock(&swapper_pgdir_lock);
}
phys = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0,
MEMBLOCK_ALLOC_NOLEAKTRACE); if (!phys)
panic("Failed to allocate page table page\n");
return phys;
}
bool pgattr_change_is_safe(pteval_t old, pteval_t new)
{ /* * The following mapping attributes may be updated in live * kernel mappings without the need for break-before-make.
*/
pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG |
PTE_SWBITS_MASK;
/* creating or taking down mappings is always safe */ if (!pte_valid(__pte(old)) || !pte_valid(__pte(new))) returntrue;
/* A live entry's pfn should not change */ if (pte_pfn(__pte(old)) != pte_pfn(__pte(new))) returnfalse;
/* live contiguous mappings may not be manipulated at all */ if ((old | new) & PTE_CONT) returnfalse;
/* Transitioning from Non-Global to Global is unsafe */ if (old & ~new & PTE_NG) returnfalse;
/* * Changing the memory type between Normal and Normal-Tagged is safe * since Tagged is considered a permission attribute from the * mismatched attribute aliases perspective.
*/ if (((old & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL) ||
(old & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_TAGGED)) &&
((new & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL) ||
(new & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_TAGGED)))
mask |= PTE_ATTRINDX_MASK;
/* * Required barriers to make this visible to the table walker * are deferred to the end of alloc_init_cont_pte().
*/
__set_pte_nosync(ptep, pfn_pte(__phys_to_pfn(phys), prot));
/* * After the PTE entry has been populated once, we * only allow updates to the permission attributes.
*/
BUG_ON(!pgattr_change_is_safe(pte_val(old_pte),
pte_val(__ptep_get(ptep))));
/* use a contiguous mapping if the range is suitably aligned */ if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) &&
(flags & NO_CONT_MAPPINGS) == 0)
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
init_pte(ptep, addr, next, phys, __prot);
ptep += pte_index(next) - pte_index(addr);
phys += next - addr;
} while (addr = next, addr != end);
/* * Note: barriers and maintenance necessary to clear the fixmap slot * ensure that all previous pgtable writes are visible to the table * walker.
*/
pte_clear_fixmap();
}
/* try section mapping first */ if (((addr | next | phys) & ~PMD_MASK) == 0 &&
(flags & NO_BLOCK_MAPPINGS) == 0) {
pmd_set_huge(pmdp, phys, prot);
/* * After the PMD entry has been populated once, we * only allow updates to the permission attributes.
*/
BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
READ_ONCE(pmd_val(*pmdp))));
} else {
alloc_init_cont_pte(pmdp, addr, next, phys, prot,
pgtable_alloc, flags);
/* use a contiguous mapping if the range is suitably aligned */ if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) &&
(flags & NO_CONT_MAPPINGS) == 0)
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
/* * For 4K granule only, attempt to put down a 1GB block
*/ if (pud_sect_supported() &&
((addr | next | phys) & ~PUD_MASK) == 0 &&
(flags & NO_BLOCK_MAPPINGS) == 0) {
pud_set_huge(pudp, phys, prot);
/* * After the PUD entry has been populated once, we * only allow updates to the permission attributes.
*/
BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
READ_ONCE(pud_val(*pudp))));
} else {
alloc_init_cont_pmd(pudp, addr, next, phys, prot,
pgtable_alloc, flags);
/* * If the virtual and physical address don't have the same offset * within a page, we cannot map the region as the caller expects.
*/ if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) return;
/* * This function can only be used to modify existing table entries, * without allocating new levels of table. Note that this permits the * creation of new section or page entries.
*/ void __init create_mapping_noalloc(phys_addr_t phys, unsignedlong virt,
phys_addr_t size, pgprot_t prot)
{ if (virt < PAGE_OFFSET) {
pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
&phys, virt); return;
}
__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
NO_CONT_MAPPINGS);
}
void __init mark_linear_text_alias_ro(void)
{ /* * Remove the write permissions from the linear alias of .text/.rodata
*/
update_mapping_prot(__pa_symbol(_text), (unsignedlong)lm_alias(_text),
(unsignedlong)__init_begin - (unsignedlong)_text,
PAGE_KERNEL_RO);
}
/* * Setting hierarchical PXNTable attributes on table entries covering * the linear region is only possible if it is guaranteed that no table * entries at any level are being shared between the linear region and * the vmalloc region. Check whether this is true for the PGD level, in * which case it is guaranteed to be true for all other levels as well. * (Unless we are running with support for LPA2, in which case the * entire reduced VA space is covered by a single pgd_t which will have * been populated without the PXNTable attribute by the time we get here.)
*/
BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end) &&
pgd_index(_PAGE_OFFSET(VA_BITS_MIN)) != PTRS_PER_PGD - 1);
early_kfence_pool = arm64_kfence_alloc_pool();
if (can_set_direct_map())
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
/* * Take care not to create a writable alias for the * read-only text and rodata sections of the kernel image. * So temporarily mark them as NOMAP to skip mappings in * the following for-loop
*/
memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
/* map all the memory banks */
for_each_mem_range(i, &start, &end) { if (start >= end) break; /* * The linear map must allow allocation tags reading/writing * if MTE is present. Otherwise, it has the same attributes as * PAGE_KERNEL.
*/
__map_memblock(pgdp, start, end, pgprot_tagged(PAGE_KERNEL),
flags);
}
/* * Map the linear alias of the [_text, __init_begin) interval * as non-executable now, and remove the write permission in * mark_linear_text_alias_ro() below (which will be called after * alternative patching has completed). This makes the contents * of the region accessible to subsystems such as hibernate, * but protects it from inadvertent modification or execution. * Note that contiguous mappings cannot be remapped in this way, * so we should avoid them here.
*/
__map_memblock(pgdp, kernel_start, kernel_end,
PAGE_KERNEL, NO_CONT_MAPPINGS);
memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
arm64_kfence_map_pool(early_kfence_pool, pgdp);
}
/* * mark .rodata as read only. Use __init_begin rather than __end_rodata * to cover NOTES and EXCEPTION_TABLE.
*/
section_size = (unsignedlong)__init_begin - (unsignedlong)__start_rodata;
WRITE_ONCE(rodata_is_rw, false);
update_mapping_prot(__pa_symbol(__start_rodata), (unsignedlong)__start_rodata,
section_size, PAGE_KERNEL_RO); /* mark the range between _text and _stext as read only. */
update_mapping_prot(__pa_symbol(_text), (unsignedlong)_text,
(unsignedlong)_stext - (unsignedlong)_text,
PAGE_KERNEL_RO);
}
/* The trampoline is always mapped and can therefore be global */
pgprot_val(prot) &= ~PTE_NG;
/* Map only the text into the trampoline page table */
memset(tramp_pg_dir, 0, PGD_SIZE);
__create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS,
entry_tramp_text_size(), prot,
pgd_pgtable_alloc_init_mm, NO_BLOCK_MAPPINGS);
/* Map both the text and data into the kernel page table */ for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++)
__set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i,
pa_start + i * PAGE_SIZE, prot);
if (IS_ENABLED(CONFIG_RELOCATABLE))
__set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i,
pa_start + i * PAGE_SIZE, PAGE_KERNEL_RO);
if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && !arm64_use_ng_mappings) { extern u32 __idmap_kpti_flag;
u64 pa = __pa_symbol(&__idmap_kpti_flag);
/* * The KPTI G-to-nG conversion code needs a read-write mapping * of its synchronization flag in the ID map.
*/
ptep = __pa_symbol(kpti_ptes);
__pi_map_range(&ptep, pa, pa + sizeof(u32), pa, PAGE_KERNEL,
IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false,
__phys_to_virt(ptep) - ptep);
}
}
do {
next = pmd_addr_end(addr, end);
pmdp = pmd_offset(pudp, addr);
pmd = READ_ONCE(*pmdp); if (pmd_none(pmd)) continue;
WARN_ON(!pmd_present(pmd)); if (pmd_sect(pmd)) {
pmd_clear(pmdp);
/* * One TLBI should be sufficient here as the PMD_SIZE * range is mapped with a single block entry.
*/
flush_tlb_kernel_range(addr, addr + PAGE_SIZE); if (free_mapped)
free_hotplug_page_range(pmd_page(pmd),
PMD_SIZE, altmap); continue;
}
WARN_ON(!pmd_table(pmd));
unmap_hotplug_pte_range(pmdp, addr, next, free_mapped, altmap);
} while (addr = next, addr < end);
}
do {
next = pud_addr_end(addr, end);
pudp = pud_offset(p4dp, addr);
pud = READ_ONCE(*pudp); if (pud_none(pud)) continue;
WARN_ON(!pud_present(pud)); if (pud_sect(pud)) {
pud_clear(pudp);
/* * One TLBI should be sufficient here as the PUD_SIZE * range is mapped with a single block entry.
*/
flush_tlb_kernel_range(addr, addr + PAGE_SIZE); if (free_mapped)
free_hotplug_page_range(pud_page(pud),
PUD_SIZE, altmap); continue;
}
WARN_ON(!pud_table(pud));
unmap_hotplug_pmd_range(pudp, addr, next, free_mapped, altmap);
} while (addr = next, addr < end);
}
/* * altmap can only be used as vmemmap mapping backing memory. * In case the backing memory itself is not being freed, then * altmap is irrelevant. Warn about this inconsistency when * encountered.
*/
WARN_ON(!free_mapped && altmap);
do {
next = pgd_addr_end(addr, end);
pgdp = pgd_offset_k(addr);
pgd = READ_ONCE(*pgdp); if (pgd_none(pgd)) continue;
do {
ptep = pte_offset_kernel(pmdp, addr);
pte = __ptep_get(ptep);
/* * This is just a sanity check here which verifies that * pte clearing has been done by earlier unmap loops.
*/
WARN_ON(!pte_none(pte));
} while (addr += PAGE_SIZE, addr < end);
if (!pgtable_range_aligned(start, end, floor, ceiling, PMD_MASK)) return;
/* * Check whether we can free the pte page if the rest of the * entries are empty. Overlap with other regions have been * handled by the floor/ceiling check.
*/
ptep = pte_offset_kernel(pmdp, 0UL); for (i = 0; i < PTRS_PER_PTE; i++) { if (!pte_none(__ptep_get(&ptep[i]))) return;
}
if (!pgtable_range_aligned(start, end, floor, ceiling, PUD_MASK)) return;
/* * Check whether we can free the pmd page if the rest of the * entries are empty. Overlap with other regions have been * handled by the floor/ceiling check.
*/
pmdp = pmd_offset(pudp, 0UL); for (i = 0; i < PTRS_PER_PMD; i++) { if (!pmd_none(READ_ONCE(pmdp[i]))) return;
}
if (!pgtable_range_aligned(start, end, floor, ceiling, P4D_MASK)) return;
/* * Check whether we can free the pud page if the rest of the * entries are empty. Overlap with other regions have been * handled by the floor/ceiling check.
*/
pudp = pud_offset(p4dp, 0UL); for (i = 0; i < PTRS_PER_PUD; i++) { if (!pud_none(READ_ONCE(pudp[i]))) return;
}
if (!pgtable_range_aligned(start, end, floor, ceiling, PGDIR_MASK)) return;
/* * Check whether we can free the p4d page if the rest of the * entries are empty. Overlap with other regions have been * handled by the floor/ceiling check.
*/
p4dp = p4d_offset(pgdp, 0UL); for (i = 0; i < PTRS_PER_P4D; i++) { if (!p4d_none(READ_ONCE(p4dp[i]))) return;
}
table = pmd_offset(pudp, addr);
pmdp = table;
next = addr;
end = addr + PUD_SIZE; do { if (pmd_present(pmdp_get(pmdp)))
pmd_free_pte_page(pmdp, next);
} while (pmdp++, next += PMD_SIZE, next != end);
struct range arch_get_mappable_range(void)
{ struct range mhp_range;
u64 start_linear_pa = __pa(_PAGE_OFFSET(vabits_actual));
u64 end_linear_pa = __pa(PAGE_END - 1);
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { /* * Check for a wrap, it is possible because of randomized linear * mapping the start physical address is actually bigger than * the end physical address. In this case set start to zero * because [0, end_linear_pa] range must still be able to cover * all addressable physical addresses.
*/ if (start_linear_pa > end_linear_pa)
start_linear_pa = 0;
}
WARN_ON(start_linear_pa > end_linear_pa);
/* * Linear mapping region is the range [PAGE_OFFSET..(PAGE_END - 1)] * accommodating both its ends but excluding PAGE_END. Max physical * range which can be mapped inside this linear mapping range, must * also be derived from its end points.
*/
mhp_range.start = start_linear_pa;
mhp_range.end = end_linear_pa;
return mhp_range;
}
int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params)
{ int ret, flags = NO_EXEC_MAPPINGS;
VM_BUG_ON(!mhp_range_allowed(start, size, true));
if (can_set_direct_map())
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
/* * This memory hotplug notifier helps prevent boot memory from being * inadvertently removed as it blocks pfn range offlining process in * __offline_pages(). Hence this prevents both offlining as well as * removal process for boot memory which is initially always online. * In future if and when boot memory could be removed, this notifier * should be dropped and free_hotplug_page_range() should handle any * reserved pages allocated during boot.
*/ staticint prevent_bootmem_remove_notifier(struct notifier_block *nb, unsignedlong action, void *data)
{ struct mem_section *ms; struct memory_notify *arg = data; unsignedlong end_pfn = arg->start_pfn + arg->nr_pages; unsignedlong pfn = arg->start_pfn;
if ((action != MEM_GOING_OFFLINE) && (action != MEM_OFFLINE)) return NOTIFY_OK;
ms = __pfn_to_section(pfn); if (!early_section(ms)) continue;
if (action == MEM_GOING_OFFLINE) { /* * Boot memory removal is not supported. Prevent * it via blocking any attempted offline request * for the boot memory and just report it.
*/
pr_warn("Boot memory [%lx %lx] offlining attempted\n", start, end); return NOTIFY_BAD;
} elseif (action == MEM_OFFLINE) { /* * This should have never happened. Boot memory * offlining should have been prevented by this * very notifier. Probably some memory removal * procedure might have changed which would then * require further debug.
*/
pr_err("Boot memory [%lx %lx] offlined\n", start, end);
/* * Core memory hotplug does not process a return * code from the notifier for MEM_OFFLINE events. * The error condition has been reported. Return * from here as if ignored.
*/ return NOTIFY_DONE;
}
} return NOTIFY_OK;
}
/* * This ensures that boot memory sections on the platform are online * from early boot. Memory sections could not be prevented from being * offlined, unless for some reason they are not online to begin with. * This helps validate the basic assumption on which the above memory * event notifier works to prevent boot memory section offlining and * its possible removal.
*/ staticvoid validate_bootmem_online(void)
{
phys_addr_t start, end, addr; struct mem_section *ms;
u64 i;
/* * Scanning across all memblock might be expensive * on some big memory systems. Hence enable this * validation only with DEBUG_VM.
*/ if (!IS_ENABLED(CONFIG_DEBUG_VM)) return;
for_each_mem_range(i, &start, &end) { for (addr = start; addr < end; addr += (1UL << PA_SECTION_SHIFT)) {
ms = __pfn_to_section(PHYS_PFN(addr));
/* * All memory ranges in the system at this point * should have been marked as early sections.
*/
WARN_ON(!early_section(ms));
/* * Memory notifier mechanism here to prevent boot * memory offlining depends on the fact that each * early section memory on the system is initially * online. Otherwise a given memory section which * is already offline will be overlooked and can * be removed completely. Call out such sections.
*/ if (!online_section(ms))
pr_err("Boot memory [%llx %llx] is offline, can be removed\n",
addr, addr + (1UL << PA_SECTION_SHIFT));
}
}
}
staticint __init prevent_bootmem_remove_init(void)
{ int ret = 0;
if (!IS_ENABLED(CONFIG_MEMORY_HOTREMOVE)) return ret;
validate_bootmem_online();
ret = register_memory_notifier(&prevent_bootmem_remove_nb); if (ret)
pr_err("%s: Notifier registration failed %d\n", __func__, ret);
if (alternative_has_cap_unlikely(ARM64_WORKAROUND_2645198)) { /* * Break-before-make (BBM) is required for all user space mappings * when the permission changes from executable to non-executable * in cases where cpu is affected with errata #2645198.
*/ if (pte_accessible(vma->vm_mm, pte) && pte_user_exec(pte))
__flush_tlb_range(vma, addr, nr * PAGE_SIZE,
PAGE_SIZE, true, 3);
}
/* * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, * avoiding the possibility of conflicting TLB entries being allocated.
*/ void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp)
{ typedefvoid (ttbr_replace_func)(phys_addr_t); extern ttbr_replace_func idmap_cpu_replace_ttbr1;
ttbr_replace_func *replace_phys; unsignedlong daif;
/* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */
phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp));
/* * We really don't want to take *any* exceptions while TTBR1 is * in the process of being replaced so mask everything.
*/
daif = local_daif_save();
replace_phys(ttbr1);
local_daif_restore(daif);
cpu_uninstall_idmap();
}
#ifdef CONFIG_ARCH_HAS_PKEYS int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsignedlong init_val)
{
u64 new_por;
u64 old_por;
if (!system_supports_poe()) return -ENOSPC;
/* * This code should only be called with valid 'pkey' * values originating from in-kernel users. Complain * if a bad value is observed.
*/ if (WARN_ON_ONCE(pkey >= arch_max_pkey())) return -EINVAL;
/* Set the bits we need in POR: */
new_por = POE_RWX; if (init_val & PKEY_DISABLE_WRITE)
new_por &= ~POE_W; if (init_val & PKEY_DISABLE_ACCESS)
new_por &= ~POE_RW; if (init_val & PKEY_DISABLE_READ)
new_por &= ~POE_R; if (init_val & PKEY_DISABLE_EXECUTE)
new_por &= ~POE_X;
/* Shift the bits in to the correct place in POR for pkey: */
new_por = POR_ELx_PERM_PREP(pkey, new_por);
/* Get old POR and mask off any old bits in place: */
old_por = read_sysreg_s(SYS_POR_EL0);
old_por &= ~(POE_MASK << POR_ELx_PERM_SHIFT(pkey));
/* Write old part along with new part: */
write_sysreg_s(old_por | new_por, SYS_POR_EL0);
return 0;
} #endif
Messung V0.5
¤ Dauer der Verarbeitung: 0.42 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.