#if CONFIG_PGTABLE_LEVELS > 2 void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
{
paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); /* * NOTE! For PAE, any changes to the top page-directory-pointer-table * entries need a full cr3 reload to flush.
*/ #ifdef CONFIG_X86_PAE
tlb->need_flush_all = 1; #endif
tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd));
}
/* * List of all pgd's needed for non-PAE so it can invalidate entries * in both cached and uncached pgd's; not needed for PAE since the * kernel pmd is shared. If PAE were not to share the pmd a similar * tactic would be needed. This is essentially codepath-based locking * against pageattr.c; it is the unique case in which a valid change * of kernel pagetables can't be lazily synchronized by vmalloc faults. * vmalloc faults work because attached pagetables are never freed. * -- nyc
*/
#ifdef CONFIG_X86_PAE /* * In PAE mode, we need to do a cr3 reload (=tlb flush) when * updating the top-level pagetable entries to guarantee the * processor notices the update. Since this is expensive, and * all 4 top-level entries are used almost immediately in a * new process's life, we just pre-populate them here.
*/ #define PREALLOCATED_PMDS PTRS_PER_PGD
/* * "USER_PMDS" are the PMDs for the user copy of the page tables when * PTI is enabled. They do not exist when PTI is disabled. Note that * this is distinct from the user _portion_ of the kernel page tables * which always exists. * * We allocate separate PMDs for the kernel part of the user page-table * when PTI is enabled. We need them to map the per-process LDT into the * user-space page-table.
*/ #define PREALLOCATED_USER_PMDS (boot_cpu_has(X86_FEATURE_PTI) ? \
KERNEL_PGD_PTRS : 0) #define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS
/* Note: almost everything apart from _PAGE_PRESENT is
reserved at the pmd (PDPT) level. */
set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
/* * According to Intel App note "TLBs, Paging-Structure Caches, * and Their Invalidation", April 2007, document 317080-001, * section 8.1: in PAE mode we explicitly have to flush the * TLB via cr3 if the top-level pgd is changed...
*/
flush_tlb_mm(mm);
} #else/* !CONFIG_X86_PAE */
/* No need to prepopulate any pagetable entries in non-PAE modes. */ #define PREALLOCATED_PMDS 0 #define PREALLOCATED_USER_PMDS 0 #define MAX_PREALLOCATED_USER_PMDS 0 #endif/* CONFIG_X86_PAE */
staticvoid free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
{ int i; struct ptdesc *ptdesc;
for (i = 0; i < count; i++) if (pmds[i]) {
ptdesc = virt_to_ptdesc(pmds[i]);
staticint preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
{ int i; bool failed = false;
gfp_t gfp = GFP_PGTABLE_USER;
if (mm == &init_mm)
gfp &= ~__GFP_ACCOUNT;
gfp &= ~__GFP_HIGHMEM;
for (i = 0; i < count; i++) {
pmd_t *pmd = NULL; struct ptdesc *ptdesc = pagetable_alloc(gfp, 0);
if (!ptdesc)
failed = true; if (ptdesc && !pagetable_pmd_ctor(mm, ptdesc)) {
pagetable_free(ptdesc);
ptdesc = NULL;
failed = true;
} if (ptdesc) {
mm_inc_nr_pmds(mm);
pmd = ptdesc_address(ptdesc);
}
pmds[i] = pmd;
}
if (failed) {
free_pmds(mm, pmds, count); return -ENOMEM;
}
return 0;
}
/* * Mop up any pmd pages which may still be attached to the pgd. * Normally they will be freed by munmap/exit_mmap, but any pmd we * preallocate which never got a corresponding vma will need to be * freed manually.
*/ staticvoid mop_up_one_pmd(struct mm_struct *mm, pgd_t *pgdp)
{
pgd_t pgd = *pgdp;
if (pgd_val(pgd) != 0) {
pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
staticinline pgd_t *_pgd_alloc(struct mm_struct *mm)
{ /* * PTI and Xen need a whole page for the PAE PGD * even though the hardware only needs 32 bytes. * * For simplicity, allocate a page for all users.
*/ return __pgd_alloc(mm, pgd_allocation_order());
}
if (paravirt_pgd_alloc(mm) != 0) goto out_free_user_pmds;
/* * Make sure that pre-populating the pmds is atomic with * respect to anything walking the pgd_list, so that they * never see a partially populated pgd.
*/
spin_lock(&pgd_lock);
pgd_ctor(mm, pgd); if (sizeof(pmds) != 0)
pgd_prepopulate_pmd(mm, pgd, pmds);
if (sizeof(u_pmds) != 0)
pgd_prepopulate_user_pmd(mm, pgd, u_pmds);
/* * Used to set accessed or dirty bits in the page table entries * on other architectures. On x86, the accessed and dirty bits * are tracked by hardware. However, do_wp_page calls this function * to also make the pte writeable at the same time the dirty bit is * set. In that case we do actually need to write the PTE.
*/ int ptep_set_access_flags(struct vm_area_struct *vma, unsignedlong address, pte_t *ptep,
pte_t entry, int dirty)
{ int changed = !pte_same(*ptep, entry);
if (changed && dirty)
set_pte(ptep, entry);
return changed;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE int pmdp_set_access_flags(struct vm_area_struct *vma, unsignedlong address, pmd_t *pmdp,
pmd_t entry, int dirty)
{ int changed = !pmd_same(*pmdp, entry);
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
if (changed && dirty) {
set_pmd(pmdp, entry); /* * We had a write-protection fault here and changed the pmd * to to more permissive. No need to flush the TLB for that, * #PF is architecturally guaranteed to do that and in the * worst-case we'll generate a spurious fault.
*/
}
return changed;
}
int pudp_set_access_flags(struct vm_area_struct *vma, unsignedlong address,
pud_t *pudp, pud_t entry, int dirty)
{ int changed = !pud_same(*pudp, entry);
VM_BUG_ON(address & ~HPAGE_PUD_MASK);
if (changed && dirty) {
set_pud(pudp, entry); /* * We had a write-protection fault here and changed the pud * to to more permissive. No need to flush the TLB for that, * #PF is architecturally guaranteed to do that and in the * worst-case we'll generate a spurious fault.
*/
}
return changed;
} #endif
int ptep_test_and_clear_young(struct vm_area_struct *vma, unsignedlong addr, pte_t *ptep)
{ int ret = 0;
if (pte_young(*ptep))
ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
(unsignedlong *) &ptep->pte);
return ret;
}
#ifdefined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsignedlong addr, pmd_t *pmdp)
{ int ret = 0;
if (pmd_young(*pmdp))
ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
(unsignedlong *)pmdp);
return ret;
} #endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE int pudp_test_and_clear_young(struct vm_area_struct *vma, unsignedlong addr, pud_t *pudp)
{ int ret = 0;
if (pud_young(*pudp))
ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
(unsignedlong *)pudp);
return ret;
} #endif
int ptep_clear_flush_young(struct vm_area_struct *vma, unsignedlong address, pte_t *ptep)
{ /* * On x86 CPUs, clearing the accessed bit without a TLB flush * doesn't cause data corruption. [ It could cause incorrect * page aging and the (mistaken) reclaim of hot pages, but the * chance of that should be relatively low. ] * * So as a performance optimization don't flush the TLB when * clearing the accessed bit, it will eventually be flushed by * a context switch or a VM operation anyway. [ In the rare * event of it not getting flushed for a long time the delay * shouldn't really matter because there's no real memory * pressure for swapout to react to. ]
*/ return ptep_test_and_clear_young(vma, address, ptep);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE int pmdp_clear_flush_young(struct vm_area_struct *vma, unsignedlong address, pmd_t *pmdp)
{ int young;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
young = pmdp_test_and_clear_young(vma, address, pmdp); if (young)
flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
/* * No flush is necessary. Once an invalid PTE is established, the PTE's * access and dirty bits cannot be updated.
*/ return pmdp_establish(vma, address, pmdp, pmd_mkinvalid(*pmdp));
} #endif
/** * reserve_top_address - Reserve a hole in the top of the kernel address space * @reserve: Size of hole to reserve * * Can be used to relocate the fixmap area and poke a hole in the top * of the kernel address space to make room for a hypervisor.
*/ void __init reserve_top_address(unsignedlong reserve)
{ #ifdef CONFIG_X86_32
BUG_ON(fixmaps_set > 0);
__FIXADDR_TOP = round_down(-reserve, 1 << PMD_SHIFT) - PAGE_SIZE;
printk(KERN_INFO "Reserving virtual address space above 0x%08lx (rounded to 0x%08lx)\n",
-reserve, __FIXADDR_TOP + PAGE_SIZE); #endif
}
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP #if CONFIG_PGTABLE_LEVELS > 4 /** * p4d_set_huge - Set up kernel P4D mapping * @p4d: Pointer to the P4D entry * @addr: Virtual address associated with the P4D entry * @prot: Protection bits to use * * No 512GB pages yet -- always return 0
*/ int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
{ return 0;
}
/** * p4d_clear_huge - Clear kernel P4D mapping when it is set * @p4d: Pointer to the P4D entry to clear * * No 512GB pages yet -- do nothing
*/ void p4d_clear_huge(p4d_t *p4d)
{
} #endif
/** * pud_set_huge - Set up kernel PUD mapping * @pud: Pointer to the PUD entry * @addr: Virtual address associated with the PUD entry * @prot: Protection bits to use * * MTRRs can override PAT memory types with 4KiB granularity. Therefore, this * function sets up a huge page only if the complete range has the same MTRR * caching mode. * * Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger * page mapping attempt fails. * * Returns 1 on success and 0 on failure.
*/ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
{
u8 uniform;
mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform); if (!uniform) return 0;
/* Bail out if we are we on a populated non-leaf entry: */ if (pud_present(*pud) && !pud_leaf(*pud)) return 0;
/** * pmd_set_huge - Set up kernel PMD mapping * @pmd: Pointer to the PMD entry * @addr: Virtual address associated with the PMD entry * @prot: Protection bits to use * * See text over pud_set_huge() above. * * Returns 1 on success and 0 on failure.
*/ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
{
u8 uniform;
mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform); if (!uniform) {
pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n",
__func__, addr, addr + PMD_SIZE); return 0;
}
/* Bail out if we are we on a populated non-leaf entry: */ if (pmd_present(*pmd) && !pmd_leaf(*pmd)) return 0;
/** * pud_clear_huge - Clear kernel PUD mapping when it is set * @pud: Pointer to the PUD entry to clear. * * Returns 1 on success and 0 on failure (no PUD map is found).
*/ int pud_clear_huge(pud_t *pud)
{ if (pud_leaf(*pud)) {
pud_clear(pud); return 1;
}
return 0;
}
/** * pmd_clear_huge - Clear kernel PMD mapping when it is set * @pmd: Pointer to the PMD entry to clear. * * Returns 1 on success and 0 on failure (no PMD map is found).
*/ int pmd_clear_huge(pmd_t *pmd)
{ if (pmd_leaf(*pmd)) {
pmd_clear(pmd); return 1;
}
return 0;
}
#ifdef CONFIG_X86_64 /** * pud_free_pmd_page - Clear PUD entry and free PMD page * @pud: Pointer to a PUD * @addr: Virtual address associated with PUD * * Context: The PUD range has been unmapped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise. * * NOTE: Callers must allow a single page allocation.
*/ int pud_free_pmd_page(pud_t *pud, unsignedlong addr)
{
pmd_t *pmd, *pmd_sv;
pte_t *pte; int i;
for (i = 0; i < PTRS_PER_PMD; i++) {
pmd_sv[i] = pmd[i]; if (!pmd_none(pmd[i]))
pmd_clear(&pmd[i]);
}
pud_clear(pud);
/* INVLPG to clear all paging-structure caches */
flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
for (i = 0; i < PTRS_PER_PMD; i++) { if (!pmd_none(pmd_sv[i])) {
pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]);
pte_free_kernel(&init_mm, pte);
}
}
free_page((unsignedlong)pmd_sv);
pmd_free(&init_mm, pmd);
return 1;
}
/** * pmd_free_pte_page - Clear PMD entry and free PTE page. * @pmd: Pointer to the PMD * @addr: Virtual address associated with PMD * * Context: The PMD range has been unmapped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise.
*/ int pmd_free_pte_page(pmd_t *pmd, unsignedlong addr)
{
pte_t *pte;
/* INVLPG to clear all paging-structure caches */
flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
pte_free_kernel(&init_mm, pte);
return 1;
}
#else/* !CONFIG_X86_64 */
/* * Disable free page handling on x86-PAE. This assures that ioremap() * does not update sync'd PMD entries. See vmalloc_sync_one().
*/ int pmd_free_pte_page(pmd_t *pmd, unsignedlong addr)
{ return pmd_none(*pmd);
}
void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte)
{ /* * Hardware before shadow stack can (rarely) set Dirty=1 * on a Write=0 PTE. So the below condition * only indicates a software bug when shadow stack is * supported by the HW. This checking is covered in * pte_shstk().
*/
VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) &&
pte_shstk(pte));
}
void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd)
{ /* See note in arch_check_zapped_pte() */
VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) &&
pmd_shstk(pmd));
}
void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud)
{ /* See note in arch_check_zapped_pte() */
VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) && pud_shstk(pud));
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.