/* * empty_zero_page is a special page that is used for * zero-initialized data and COW.
*/ struct page *empty_zero_page;
EXPORT_SYMBOL(empty_zero_page);
/* * The pmd table for the upper-most set of pages.
*/
pmd_t *top_pmd;
/* * Initialise the cache_policy variable with the initial state specified * via the "pmd" value. This is used to ensure that on ARMv6 and later, * the C code sets the page tables up with the same policy as the head * assembly code, which avoids an illegal state where the TLBs can get * confused. See comments in early_cachepolicy() for more information.
*/ void __init init_default_cache_policy(unsignedlong pmd)
{ int i;
initial_pmd_value = pmd;
pmd &= PMD_SECT_CACHE_MASK;
for (i = 0; i < ARRAY_SIZE(cache_policies); i++) if (cache_policies[i].pmd == pmd) {
cachepolicy = i; break;
}
if (i == ARRAY_SIZE(cache_policies))
pr_err("ERROR: could not find cache policy\n");
}
/* * These are useful for identifying cache coherency problems by allowing * the cache or the cache and writebuffer to be turned off. (Note: the * write buffer should not be on and the cache off).
*/ staticint __init early_cachepolicy(char *p)
{ int i, selected = -1;
for (i = 0; i < ARRAY_SIZE(cache_policies); i++) { int len = strlen(cache_policies[i].policy);
if (selected == -1)
pr_err("ERROR: unknown or unsupported cache policy\n");
/* * This restriction is partly to do with the way we boot; it is * unpredictable to have memory mapped using two different sets of * memory attributes (shared, type, and cache attribs). We can not * change these attributes once the initial assembly has setup the * page tables.
*/ if (cpu_architecture() >= CPU_ARCH_ARMv6 && selected != cachepolicy) {
pr_warn("Only cachepolicy=%s supported on ARMv6 and later\n",
cache_policies[cachepolicy].policy); return 0;
}
/* * The early fixmap range spans multiple pmds, for which * we are not prepared:
*/
BUILD_BUG_ON((__fix_to_virt(__end_of_early_ioremap_region) >> PMD_SHIFT)
!= FIXADDR_TOP >> PMD_SHIFT);
/* * To avoid TLB flush broadcasts, this uses local_flush_tlb_kernel_range(). * As a result, this can only be called with preemption disabled, as under * stop_machine().
*/ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
{ unsignedlong vaddr = __fix_to_virt(idx);
pte_t *pte = pte_offset_fixmap(pmd_off_k(vaddr), vaddr);
/* Make sure fixmap region does not exceed available allocation. */
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) < FIXADDR_START);
BUG_ON(idx >= __end_of_fixed_addresses);
/* We support only device mappings before pgprot_kernel is set. */ if (WARN_ON(pgprot_val(prot) != pgprot_val(FIXMAP_PAGE_IO) &&
pgprot_val(prot) && pgprot_val(pgprot_kernel) == 0)) return;
/* * Adjust the PMD section entries according to the CPU in use.
*/ staticvoid __init build_mem_type_table(void)
{ struct cachepolicy *cp; unsignedint cr = get_cr();
pteval_t user_pgprot, kern_pgprot, vecs_pgprot; int cpu_arch = cpu_architecture(); int i;
if (cpu_arch < CPU_ARCH_ARMv6) { #ifdefined(CONFIG_CPU_DCACHE_DISABLE) if (cachepolicy > CPOLICY_BUFFERED)
cachepolicy = CPOLICY_BUFFERED; #elifdefined(CONFIG_CPU_DCACHE_WRITETHROUGH) if (cachepolicy > CPOLICY_WRITETHROUGH)
cachepolicy = CPOLICY_WRITETHROUGH; #endif
} if (cpu_arch < CPU_ARCH_ARMv5) { if (cachepolicy >= CPOLICY_WRITEALLOC)
cachepolicy = CPOLICY_WRITEBACK;
ecc_mask = 0;
}
if (is_smp()) { if (cachepolicy != CPOLICY_WRITEALLOC) {
pr_warn("Forcing write-allocate cache policy for SMP\n");
cachepolicy = CPOLICY_WRITEALLOC;
} if (!(initial_pmd_value & PMD_SECT_S)) {
pr_warn("Forcing shared mappings for SMP\n");
initial_pmd_value |= PMD_SECT_S;
}
}
/* * Strip out features not present on earlier architectures. * Pre-ARMv5 CPUs don't have TEX bits. Pre-ARMv6 CPUs or those * without extended page tables don't have the 'Shared' bit.
*/ if (cpu_arch < CPU_ARCH_ARMv5) for (i = 0; i < ARRAY_SIZE(mem_types); i++)
mem_types[i].prot_sect &= ~PMD_SECT_TEX(7); if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3()) for (i = 0; i < ARRAY_SIZE(mem_types); i++)
mem_types[i].prot_sect &= ~PMD_SECT_S;
/* * ARMv5 and lower, bit 4 must be set for page tables (was: cache * "update-able on write" bit on ARM610). However, Xscale and * Xscale3 require this bit to be cleared.
*/ if (cpu_is_xscale_family()) { for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
mem_types[i].prot_sect &= ~PMD_BIT4;
mem_types[i].prot_l1 &= ~PMD_BIT4;
}
} elseif (cpu_arch < CPU_ARCH_ARMv6) { for (i = 0; i < ARRAY_SIZE(mem_types); i++) { if (mem_types[i].prot_l1)
mem_types[i].prot_l1 |= PMD_BIT4; if (mem_types[i].prot_sect)
mem_types[i].prot_sect |= PMD_BIT4;
}
}
/* * Mark the device areas according to the CPU/architecture.
*/ if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) { if (!cpu_is_xsc3()) { /* * Mark device regions on ARMv6+ as execute-never * to prevent speculative instruction fetches.
*/
mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN;
mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN;
mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN;
mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN;
/* Also setup NX memory mapping */
mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN;
mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_XN;
} if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { /* * For ARMv7 with TEX remapping, * - shared device is SXCB=1100 * - nonshared device is SXCB=0100 * - write combine device mem is SXCB=0001 * (Uncached Normal memory)
*/
mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1);
mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1);
mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
} elseif (cpu_is_xsc3()) { /* * For Xscale3, * - shared device is TEXCB=00101 * - nonshared device is TEXCB=01000 * - write combine device mem is TEXCB=00100 * (Inner/Outer Uncacheable in xsc3 parlance)
*/
mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED;
mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
} else { /* * For ARMv6 and ARMv7 without TEX remapping, * - shared device is TEXCB=00001 * - nonshared device is TEXCB=01000 * - write combine device mem is TEXCB=00100 * (Uncached Normal in ARMv6 parlance).
*/
mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
}
} else { /* * On others, write combining is "Uncached/Buffered"
*/
mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
}
/* * Now deal with the memory-type mappings
*/
cp = &cache_policies[cachepolicy];
vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
#ifndef CONFIG_ARM_LPAE /* * We don't use domains on ARMv6 (since this causes problems with * v6/v7 kernels), so we must use a separate memory type for user * r/o, kernel r/w to map the vectors page.
*/ if (cpu_arch == CPU_ARCH_ARMv6)
vecs_pgprot |= L_PTE_MT_VECTORS;
/* * Check is it with support for the PXN bit * in the Short-descriptor translation table format descriptors.
*/ if (cpu_arch == CPU_ARCH_ARMv7 &&
(read_cpuid_ext(CPUID_EXT_MMFR0) & 0xF) >= 4) {
user_pmd_table |= PMD_PXNTABLE;
} #endif
/* * ARMv6 and above have extended page tables.
*/ if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) { #ifndef CONFIG_ARM_LPAE /* * Mark cache clean areas and XIP ROM read only * from SVC mode and no access from userspace.
*/
mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; #endif
/* * If the initial page tables were created with the S bit * set, then we need to do the same here for the same * reasons given in early_cachepolicy().
*/ if (initial_pmd_value & PMD_SECT_S) {
user_pgprot |= L_PTE_SHARED;
kern_pgprot |= L_PTE_SHARED;
vecs_pgprot |= L_PTE_SHARED;
mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S;
mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED;
mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S;
mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
mem_types[MT_MEMORY_RWX].prot_sect |= PMD_SECT_S;
mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED;
mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S;
mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED;
mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_S;
mem_types[MT_MEMORY_RO].prot_pte |= L_PTE_SHARED;
mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED;
mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S;
mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED;
}
}
/* * Non-cacheable Normal - intended for memory areas that must * not cause dirty cache line writebacks when used
*/ if (cpu_arch >= CPU_ARCH_ARMv6) { if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { /* Non-cacheable Normal is XCB = 001 */
mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |=
PMD_SECT_BUFFERED;
} else { /* For both ARMv6 and non-TEX-remapping ARMv7 */
mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |=
PMD_SECT_TEX(1);
}
} else {
mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
}
#ifdef CONFIG_ARM_LPAE /* * Do not generate access flag faults for the kernel mappings.
*/ for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
mem_types[i].prot_pte |= PTE_EXT_AF; if (mem_types[i].prot_sect)
mem_types[i].prot_sect |= PMD_SECT_AF;
}
kern_pgprot |= PTE_EXT_AF;
vecs_pgprot |= PTE_EXT_AF;
/* * Set PXN for user mappings
*/
user_pgprot |= PTE_EXT_PXN; #endif
for (i = 0; i < 16; i++) {
pteval_t v = pgprot_val(protection_map[i]);
protection_map[i] = __pgprot(v | user_pgprot);
}
#ifndef CONFIG_ARM_LPAE /* * In classic MMU format, puds and pmds are folded in to * the pgds. pmd_offset gives the PGD entry. PGDs refer to a * group of L1 entries making up one logical pointer to * an L2 table (2MB), where as PMDs refer to the individual * L1 entries (1MB). Hence increment to get the correct * offset for odd 1MB sections. * (See arch/arm/include/asm/pgtable-2level.h)
*/ if (addr & SECTION_SIZE)
pmd++; #endif do {
*pmd = __pmd(phys | type->prot_sect | (ng ? PMD_SECT_nG : 0));
phys += SECTION_SIZE;
} while (pmd++, addr += SECTION_SIZE, addr != end);
if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) {
pr_err("MM: CPU does not support supersection mapping for 0x%08llx at 0x%08lx\n",
(longlong)__pfn_to_phys((u64)md->pfn), addr); return;
}
/* N.B. ARMv6 supersections are only defined to work with domain 0. * Since domain assignments can in fact be arbitrary, the * 'domain == 0' check below is required to insure that ARMv6 * supersections are only allocated for domain 0 regardless * of the actual domain assignments in use.
*/ if (type->domain) {
pr_err("MM: invalid domain in supersection mapping for 0x%08llx at 0x%08lx\n",
(longlong)__pfn_to_phys((u64)md->pfn), addr); return;
}
if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) {
pr_err("MM: cannot create mapping for 0x%08llx at 0x%08lx invalid alignment\n",
(longlong)__pfn_to_phys((u64)md->pfn), addr); return;
}
/* * Shift bits [35:32] of address into bits [23:20] of PMD * (See ARMv6 spec).
*/
phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20);
pgd = pgd_offset(mm, addr);
end = addr + length; do {
p4d_t *p4d = p4d_offset(pgd, addr);
pud_t *pud = pud_offset(p4d, addr);
pmd_t *pmd = pmd_offset(pud, addr); int i;
for (i = 0; i < 16; i++)
*pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER |
(ng ? PMD_SECT_nG : 0));
if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
pr_warn("BUG: map for 0x%08llx at 0x%08lx can not be mapped using pages, ignoring.\n",
(longlong)__pfn_to_phys(md->pfn), addr); return;
}
pgd = pgd_offset(mm, addr);
end = addr + length; do { unsignedlong next = pgd_addr_end(addr, end);
phys += next - addr;
addr = next;
} while (pgd++, addr != end);
}
/* * Create the page directory entries and any necessary * page tables for the mapping specified by `md'. We * are able to cope here with varying sizes and address * offsets, and we take full advantage of sections and * supersections.
*/ staticvoid __init create_mapping(struct map_desc *md)
{ if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
pr_warn("BUG: not creating mapping for 0x%08llx at 0x%08lx in user region\n",
(longlong)__pfn_to_phys((u64)md->pfn), md->virtual); return;
}
if (md->type == MT_DEVICE &&
md->virtual >= PAGE_OFFSET && md->virtual < FIXADDR_START &&
(md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) {
pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n",
(longlong)__pfn_to_phys((u64)md->pfn), md->virtual);
}
/* * The Linux PMD is made of two consecutive section entries covering 2MB * (see definition in include/asm/pgtable-2level.h). However a call to * create_mapping() may optimize static mappings by using individual * 1MB section mappings. This leaves the actual PMD potentially half * initialized if the top or bottom section entry isn't used, leaving it * open to problems if a subsequent ioremap() or vmalloc() tries to use * the virtual space left free by that unused section entry. * * Let's avoid the issue by inserting dummy vm entries covering the unused * PMD halves once the static mappings are in place.
*/
list_for_each_entry(svm, &static_vmlist, list) {
vm = &svm->vm;
addr = (unsignedlong)vm->addr; if (addr < next) continue;
/* * Check if this vm starts on an odd section boundary. * If so and the first section entry for this PMD is free * then we block the corresponding virtual address.
*/ if ((addr & ~PMD_MASK) == SECTION_SIZE) {
pmd = pmd_off_k(addr); if (pmd_none(*pmd))
pmd_empty_section_gap(addr & PMD_MASK);
}
/* * Then check if this vm ends on an odd section boundary. * If so and the second section entry for this PMD is empty * then we block the corresponding virtual address.
*/
addr += vm->size; if ((addr & ~PMD_MASK) == SECTION_SIZE) {
pmd = pmd_off_k(addr) + 1; if (pmd_none(*pmd))
pmd_empty_section_gap(addr);
}
/* no need to look at any vm entry until we hit the next PMD */
next = (addr + PMD_SIZE - 1) & PMD_MASK;
}
}
#else #define fill_pmd_gaps() do { } while (0) #endif
/* * vmalloc=size forces the vmalloc area to be exactly 'size' * bytes. This can be used to increase (or decrease) the vmalloc * area - the default is 240MiB.
*/ staticint __init early_vmalloc(char *arg)
{ unsignedlong vmalloc_reserve = memparse(arg, NULL); unsignedlong vmalloc_max;
if (vmalloc_reserve < SZ_16M) {
vmalloc_reserve = SZ_16M;
pr_warn("vmalloc area is too small, limiting to %luMiB\n",
vmalloc_reserve >> 20);
}
vmalloc_max = VMALLOC_END - (PAGE_OFFSET + SZ_32M + VMALLOC_OFFSET); if (vmalloc_reserve > vmalloc_max) {
vmalloc_reserve = vmalloc_max;
pr_warn("vmalloc area is too big, limiting to %luMiB\n",
vmalloc_reserve >> 20);
}
/* * Let's use our own (unoptimized) equivalent of __pa() that is * not affected by wrap-arounds when sizeof(phys_addr_t) == 4. * The result is used as the upper bound on physical memory address * and may itself be outside the valid range for which phys_addr_t * and therefore __pa() is defined.
*/
vmalloc_limit = (u64)VMALLOC_END - vmalloc_size - VMALLOC_OFFSET -
PAGE_OFFSET + PHYS_OFFSET;
/* * The first usable region must be PMD aligned. Mark its start * as MEMBLOCK_NOMAP if it isn't
*/
for_each_mem_range(i, &block_start, &block_end) { if (!IS_ALIGNED(block_start, PMD_SIZE)) {
phys_addr_t len;
for_each_mem_range(i, &block_start, &block_end) { if (block_start < vmalloc_limit) { if (block_end > lowmem_limit) /* * Compare as u64 to ensure vmalloc_limit does * not get truncated. block_end should always * fit in phys_addr_t so there should be no * issue with assignment.
*/
lowmem_limit = min_t(u64,
vmalloc_limit,
block_end);
/* * Find the first non-pmd-aligned page, and point * memblock_limit at it. This relies on rounding the * limit down to be pmd-aligned, which happens at the * end of this function. * * With this algorithm, the start or end of almost any * bank can be non-pmd-aligned. The only exception is * that the start of the bank 0 must be section- * aligned, since otherwise memory would need to be * allocated when mapping the start of bank 0, which * occurs before any free memory is mapped.
*/ if (!memblock_limit) { if (!IS_ALIGNED(block_start, PMD_SIZE))
memblock_limit = block_start; elseif (!IS_ALIGNED(block_end, PMD_SIZE))
memblock_limit = lowmem_limit;
}
}
}
arm_lowmem_limit = lowmem_limit;
high_memory = __va(arm_lowmem_limit - 1) + 1;
if (!memblock_limit)
memblock_limit = arm_lowmem_limit;
/* * Round the memblock limit down to a pmd size. This * helps to ensure that we will allocate memory from the * last full pmd, which should be mapped.
*/
memblock_limit = round_down(memblock_limit, PMD_SIZE);
if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) { if (memblock_end_of_DRAM() > arm_lowmem_limit) {
phys_addr_t end = memblock_end_of_DRAM();
pr_notice("Ignoring RAM at %pa-%pa\n",
&memblock_limit, &end);
pr_notice("Consider using a HIGHMEM enabled kernel.\n");
memblock_remove(memblock_limit, end - memblock_limit);
}
}
/* * Clear out all the mappings below the kernel image.
*/ #ifdef CONFIG_KASAN /* * KASan's shadow memory inserts itself between the TASK_SIZE * and MODULES_VADDR. Do not clear the KASan shadow memory mappings.
*/ for (addr = 0; addr < KASAN_SHADOW_START; addr += PMD_SIZE)
pmd_clear(pmd_off_k(addr)); /* * Skip over the KASan shadow area. KASAN_SHADOW_END is sometimes * equal to MODULES_VADDR and then we exit the pmd clearing. If we * are using a thumb-compiled kernel, there there will be 8MB more * to clear as KASan always offset to 16 MB below MODULES_VADDR.
*/ for (addr = KASAN_SHADOW_END; addr < MODULES_VADDR; addr += PMD_SIZE)
pmd_clear(pmd_off_k(addr)); #else for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE)
pmd_clear(pmd_off_k(addr)); #endif
#ifdef CONFIG_XIP_KERNEL /* The XIP kernel is mapped in the module area -- skip over it */
addr = ((unsignedlong)_exiprom + PMD_SIZE - 1) & PMD_MASK; #endif for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE)
pmd_clear(pmd_off_k(addr));
/* * Find the end of the first block of lowmem.
*/
end = memblock.memory.regions[0].base + memblock.memory.regions[0].size; if (end >= arm_lowmem_limit)
end = arm_lowmem_limit;
/* * Clear out all the kernel space mappings, except for the first * memory bank, up to the vmalloc region.
*/ for (addr = __phys_to_virt(end);
addr < VMALLOC_START; addr += PMD_SIZE)
pmd_clear(pmd_off_k(addr));
}
#ifdef CONFIG_ARM_LPAE /* the first page is reserved for pgd */ #define SWAPPER_PG_DIR_SIZE (PAGE_SIZE + \
PTRS_PER_PGD * PTRS_PER_PMD * sizeof(pmd_t)) #else #define SWAPPER_PG_DIR_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) #endif
/* * Reserve the special regions of memory
*/ void __init arm_mm_memblock_reserve(void)
{ /* * Reserve the page tables. These are already in use, * and can only be in node 0.
*/
memblock_reserve(__pa(swapper_pg_dir), SWAPPER_PG_DIR_SIZE);
#ifdef CONFIG_SA1111 /* * Because of the SA1111 DMA bug, we want to preserve our * precious DMA-able memory...
*/
memblock_reserve(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET); #endif
}
/* * Set up the device mappings. Since we clear out the page tables for all * mappings above VMALLOC_START, except early fixmap, we might remove debug * device mappings. This means earlycon can be used to debug this function * Any other function or debugging method which may touch any device _will_ * crash the kernel.
*/ staticvoid __init devicemaps_init(conststruct machine_desc *mdesc)
{ struct map_desc map; unsignedlong addr; void *vectors;
/* * Create a mapping for the machine vectors at the high-vectors * location (0xffff0000). If we aren't using high-vectors, also * create a mapping at the low-vectors virtual address.
*/
map.pfn = __phys_to_pfn(virt_to_phys(vectors));
map.virtual = 0xffff0000;
map.length = PAGE_SIZE; #ifdef CONFIG_KUSER_HELPERS
map.type = MT_HIGH_VECTORS; #else
map.type = MT_LOW_VECTORS; #endif
create_mapping(&map);
/* * Ask the machine support to map in the statically mapped devices.
*/ if (mdesc->map_io)
mdesc->map_io(); else
debug_ll_io_init();
fill_pmd_gaps();
/* Reserve fixed i/o space in VMALLOC region */
pci_reserve_io();
/* * Finally flush the caches and tlb to ensure that we're in a * consistent state wrt the writebuffer. This also ensures that * any write-allocated cache lines in the vector page are written * back. After this point, we can start to touch devices again.
*/
local_flush_tlb_all();
flush_cache_all();
/* Map all the lowmem memory banks. */
for_each_mem_range(i, &start, &end) { struct map_desc map;
pr_debug("map lowmem start: 0x%08llx, end: 0x%08llx\n",
(longlong)start, (longlong)end); if (end > arm_lowmem_limit)
end = arm_lowmem_limit; if (start >= end) break;
/* * If our kernel image is in the VMALLOC area we need to remove * the kernel physical memory from lowmem since the kernel will * be mapped separately. * * The kernel will typically be at the very start of lowmem, * but any placement relative to memory ranges is possible. * * If the memblock contains the kernel, we have to chisel out * the kernel memory from it and map each part separately. We * get 6 different theoretical cases: * * +--------+ +--------+ * +-- start --+ +--------+ | Kernel | | Kernel | * | | | Kernel | | case 2 | | case 5 | * | | | case 1 | +--------+ | | +--------+ * | Memory | +--------+ | | | Kernel | * | range | +--------+ | | | case 6 | * | | | Kernel | +--------+ | | +--------+ * | | | case 3 | | Kernel | | | * +-- end ----+ +--------+ | case 4 | | | * +--------+ +--------+
*/
/* Case 5: kernel covers range, don't map anything, should be rare */ if ((start > kernel_sec_start) && (end < kernel_sec_end)) break;
/* Cases where the kernel is starting inside the range */ if ((kernel_sec_start >= start) && (kernel_sec_start <= end)) { /* Case 6: kernel is embedded in the range, we need two mappings */ if ((start < kernel_sec_start) && (end > kernel_sec_end)) { /* Map memory below the kernel */
map.pfn = __phys_to_pfn(start);
map.virtual = __phys_to_virt(start);
map.length = kernel_sec_start - start;
map.type = MT_MEMORY_RW;
create_mapping(&map); /* Map memory above the kernel */
map.pfn = __phys_to_pfn(kernel_sec_end);
map.virtual = __phys_to_virt(kernel_sec_end);
map.length = end - kernel_sec_end;
map.type = MT_MEMORY_RW;
create_mapping(&map); break;
} /* Case 1: kernel and range start at the same address, should be common */ if (kernel_sec_start == start)
start = kernel_sec_end; /* Case 3: kernel and range end at the same address, should be rare */ if (kernel_sec_end == end)
end = kernel_sec_start;
} elseif ((kernel_sec_start < start) && (kernel_sec_end > start) && (kernel_sec_end < end)) { /* Case 2: kernel ends inside range, starts below it */
start = kernel_sec_end;
} elseif ((kernel_sec_start > start) && (kernel_sec_start < end) && (kernel_sec_end > end)) { /* Case 4: kernel starts inside range, ends above it */
end = kernel_sec_start;
}
map.pfn = __phys_to_pfn(start);
map.virtual = __phys_to_virt(start);
map.length = end - start;
map.type = MT_MEMORY_RW;
create_mapping(&map);
}
}
staticvoid __init map_kernel(void)
{ /* * We use the well known kernel section start and end and split the area in the * middle like this: * . . * | RW memory | * +----------------+ kernel_x_start * | Executable | * | kernel memory | * +----------------+ kernel_x_end / kernel_nx_start * | Non-executable | * | kernel memory | * +----------------+ kernel_nx_end * | RW memory | * . . * * Notice that we are dealing with section sized mappings here so all of this * will be bumped to the closest section boundary. This means that some of the * non-executable part of the kernel memory is actually mapped as executable. * This will only persist until we turn on proper memory management later on * and we remap the whole kernel with page granularity.
*/ #ifdef CONFIG_XIP_KERNEL
phys_addr_t kernel_nx_start = kernel_sec_start; #else
phys_addr_t kernel_x_start = kernel_sec_start;
phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
phys_addr_t kernel_nx_start = kernel_x_end; #endif
phys_addr_t kernel_nx_end = kernel_sec_end; struct map_desc map;
/* * Map the kernel if it is XIP. * It is always first in the modulearea.
*/ #ifdef CONFIG_XIP_KERNEL
map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
map.virtual = MODULES_VADDR;
map.length = ((unsignedlong)_exiprom - map.virtual + ~SECTION_MASK) & SECTION_MASK;
map.type = MT_ROM;
create_mapping(&map); #else
map.pfn = __phys_to_pfn(kernel_x_start);
map.virtual = __phys_to_virt(kernel_x_start);
map.length = kernel_x_end - kernel_x_start;
map.type = MT_MEMORY_RWX;
create_mapping(&map);
/* If the nx part is small it may end up covered by the tail of the RWX section */ if (kernel_x_end == kernel_nx_end) return; #endif
map.pfn = __phys_to_pfn(kernel_nx_start);
map.virtual = __phys_to_virt(kernel_nx_start);
map.length = kernel_nx_end - kernel_nx_start;
map.type = MT_MEMORY_RW;
create_mapping(&map);
}
/* * early_paging_init() recreates boot time page table setup, allowing machines * to switch over to a high (>4G) address space on LPAE systems
*/ staticvoid __init early_paging_init(conststruct machine_desc *mdesc)
{
pgtables_remap *lpae_pgtables_remap; unsignedlong pa_pgd;
u32 cr, ttbcr, tmp; longlong offset;
if (!mdesc->pv_fixup) return;
offset = mdesc->pv_fixup(); if (offset == 0) return;
/* * Offset the kernel section physical offsets so that the kernel * mapping will work out later on.
*/
kernel_sec_start += offset;
kernel_sec_end += offset;
/* * Get the address of the remap function in the 1:1 identity * mapping setup by the early page table assembly code. We * must get this prior to the pv update. The following barrier * ensures that this is complete before we fixup any P:V offsets.
*/
lpae_pgtables_remap = (pgtables_remap *)(unsignedlong)__pa(lpae_pgtables_remap_asm);
pa_pgd = __pa(swapper_pg_dir);
barrier();
pr_info("Switching physical address space to 0x%08llx\n",
(u64)PHYS_OFFSET + offset);
/* Re-set the phys pfn offset, and the pv offset */
__pv_offset += offset;
__pv_phys_pfn_offset += PFN_DOWN(offset);
/* Run the patch stub to update the constants */
fixup_pv_table(&__pv_table_begin,
(&__pv_table_end - &__pv_table_begin) << 2);
/* * We changing not only the virtual to physical mapping, but also * the physical addresses used to access memory. We need to flush * all levels of cache in the system with caching disabled to * ensure that all data is written back, and nothing is prefetched * into the caches. We also need to prevent the TLB walkers * allocating into the caches too. Note that this is ARMv7 LPAE * specific.
*/
cr = get_cr();
set_cr(cr & ~(CR_I | CR_C));
ttbcr = cpu_get_ttbcr(); /* Disable all kind of caching of the translation table */
tmp = ttbcr & ~(TTBCR_ORGN0_MASK | TTBCR_IRGN0_MASK);
cpu_set_ttbcr(tmp);
flush_cache_all();
/* * Fixup the page tables - this must be in the idmap region as * we need to disable the MMU to do this safely, and hence it * needs to be assembly. It's fairly simple, as we're using the * temporary tables setup by the initial assembly code.
*/
lpae_pgtables_remap(offset, pa_pgd);
/* Re-enable the caches and cacheable TLB walks */
cpu_set_ttbcr(ttbcr);
set_cr(cr);
}
offset = mdesc->pv_fixup(); if (offset == 0) return;
pr_crit("Physical address space modification is only to support Keystone2.\n");
pr_crit("Please enable ARM_LPAE and ARM_PATCH_PHYS_VIRT support to use this\n");
pr_crit("feature. Your kernel may crash now, have a good day.\n");
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
}
#endif
staticvoid __init early_fixmap_shutdown(void)
{ int i; unsignedlong va = fix_to_virt(__end_of_permanent_fixed_addresses - 1);
/* * paging_init() sets up the page tables, initialises the zone memory * maps, and sets up the zero page, bad page and bad page tables.
*/ void __init paging_init(conststruct machine_desc *mdesc)
{ void *zero_page;
#ifdef CONFIG_XIP_KERNEL /* Store the kernel RW RAM region start/end in these variables */
kernel_sec_start = CONFIG_PHYS_OFFSET & SECTION_MASK;
kernel_sec_end = round_up(__pa(_end), SECTION_SIZE); #endif
pr_debug("physical kernel sections: 0x%08llx-0x%08llx\n",
kernel_sec_start, kernel_sec_end);
prepare_page_table();
map_lowmem();
memblock_set_current_limit(arm_lowmem_limit);
pr_debug("lowmem limit is %08llx\n", (longlong)arm_lowmem_limit); /* * After this point early_alloc(), i.e. the memblock allocator, can * be used
*/
map_kernel();
dma_contiguous_remap();
early_fixmap_shutdown();
devicemaps_init(mdesc);
kmap_init();
tcm_init();
top_pmd = pmd_off_k(0xffff0000);
/* allocate the zero page. */
zero_page = early_alloc(PAGE_SIZE);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.