#define _PAGE_PTE 0x4000000000000000UL /* distinguishes PTEs from pointers */ #define _PAGE_PRESENT 0x8000000000000000UL /* pte contains a translation */ /* * We need to mark a pmd pte invalid while splitting. We can do that by clearing * the _PAGE_PRESENT bit. But then that will be taken as a swap pte. In order to * differentiate between two use a SW field when invalidating. * * We do that temporary invalidate for regular pte entry in ptep_set_access_flags * * This is used only when _PAGE_PRESENT is cleared.
*/ #define _PAGE_INVALID _RPAGE_SW0
/* * Top and bottom bits of RPN which can be used by hash * translation mode, because we expect them to be zero * otherwise.
*/ #define _RPAGE_RPN0 0x01000 #define _RPAGE_RPN1 0x02000 #define _RPAGE_RPN43 0x0080000000000000UL #define _RPAGE_RPN42 0x0040000000000000UL #define _RPAGE_RPN41 0x0020000000000000UL
/* Max physical address bit as per radix table */ #define _RPAGE_PA_MAX 56
/* * Max physical address bit we will use for now. * * This is mostly a hardware limitation and for now Power9 has * a 51 bit limit. * * This is different from the number of physical bit required to address * the last byte of memory. That is defined by MAX_PHYSMEM_BITS. * MAX_PHYSMEM_BITS is a linux limitation imposed by the maximum * number of sections we can support (SECTIONS_SHIFT). * * This is different from Radix page table limitation above and * should always be less than that. The limit is done such that * we can overload the bits between _RPAGE_PA_MAX and _PAGE_PA_MAX * for hash linux page table specific bits. * * In order to be compatible with future hardware generations we keep * some offsets and limit this for now to 53
*/ #define _PAGE_PA_MAX 53
/* * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE * Instead of fixing all of them, add an alternate define which * maps CI pte mapping.
*/ #define _PAGE_NO_CACHE _PAGE_TOLERANT /* * We support _RPAGE_PA_MAX bit real address in pte. On the linux side * we are limited by _PAGE_PA_MAX. Clear everything above _PAGE_PA_MAX * and every thing below PAGE_SHIFT;
*/ #define PTE_RPN_MASK (((1UL << _PAGE_PA_MAX) - 1) & (PAGE_MASK)) #define PTE_RPN_SHIFT PAGE_SHIFT /* * set of bits not changed in pmd_modify. Even though we have hash specific bits * in here, on radix we expect them to be zero.
*/ #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
_PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \
_PAGE_SOFT_DIRTY) /* * user access blocked by key
*/ #define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY) #define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_READ) #define _PAGE_KERNEL_ROX (_PAGE_PRIVILEGED | _PAGE_READ | _PAGE_EXEC) #define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC) /* * _PAGE_CHG_MASK masks of bits that are to be preserved across * pgprot changes
*/ #define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
_PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \
_PAGE_SOFT_DIRTY)
/* * We define 2 sets of base prot bits, one for basic pages (ie, * cacheable kernel and user pages) and one for non cacheable * pages. We always set _PAGE_COHERENT when SMP is enabled or * the processor might need it for DMA coherency.
*/ #define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED) #define _PAGE_BASE (_PAGE_BASE_NC)
/* PMD_SHIFT determines what a second-level page table entry can map */ #define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1))
/* PUD_SHIFT determines what a third-level page table entry can map */ #define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) #define PUD_SIZE (1UL << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1))
/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ #define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1))
/* Bits to mask out from a PMD to get to the PTE page */ #define PMD_MASKED_BITS 0xc0000000000000ffUL /* Bits to mask out from a PUD to get to the PMD page */ #define PUD_MASKED_BITS 0xc0000000000000ffUL /* Bits to mask out from a PGD to get to the PUD page */ #define P4D_MASKED_BITS 0xc0000000000000ffUL
/* * Used as an indicator for rcu callback functions
*/ enum pgtable_index {
PTE_INDEX = 0,
PMD_INDEX,
PUD_INDEX,
PGD_INDEX, /* * Below are used with 4k page size and hugetlb
*/
HTLB_16M_INDEX,
HTLB_16G_INDEX,
};
/* hash 4k can't share hugetlb and also doesn't support THP */ #ifdef CONFIG_PPC_64K_PAGES #include <asm/book3s/64/pgtable-64k.h> #endif
#include <asm/barrier.h> /* * IO space itself carved into the PIO region (ISA and PHB IO space) and * the ioremap space * * ISA_IO_BASE = KERN_IO_START, 64K reserved area * PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE
*/ #define FULL_IO_SIZE 0x80000000ul #define ISA_IO_BASE (KERN_IO_START) #define ISA_IO_END (KERN_IO_START + 0x10000ul) #define PHB_IO_BASE (ISA_IO_END) #define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE) #define IOREMAP_BASE (PHB_IO_END) #define IOREMAP_START (ioremap_bot) #define IOREMAP_END (KERN_IO_END - FIXADDR_SIZE) #define FIXADDR_SIZE SZ_32M #define FIXADDR_TOP (IOREMAP_END + FIXADDR_SIZE)
#ifndef __ASSEMBLY__
staticinlineunsignedlong pte_update(struct mm_struct *mm, unsignedlong addr,
pte_t *ptep, unsignedlong clr, unsignedlong set, int huge)
{ if (radix_enabled()) return radix__pte_update(mm, addr, ptep, clr, set, huge); return hash__pte_update(mm, addr, ptep, clr, set, huge);
} /* * For hash even if we have _PAGE_ACCESSED = 0, we do a pte_update. * We currently remove entries from the hashtable regardless of whether * the entry was young or dirty. * * We should be more intelligent about this but for the moment we override * these functions and force a tlb flush unconditionally * For radix: H_PAGE_HASHPTE should be zero. Hence we can use the same * function for both hash and radix.
*/ staticinlineint __ptep_test_and_clear_young(struct mm_struct *mm, unsignedlong addr, pte_t *ptep)
{ unsignedlong old;
/* * On Book3S CPUs, clearing the accessed bit without a TLB flush * doesn't cause data corruption. [ It could cause incorrect * page aging and the (mistaken) reclaim of hot pages, but the * chance of that should be relatively low. ] * * So as a performance optimization don't flush the TLB when * clearing the accessed bit, it will eventually be flushed by * a context switch or a VM operation anyway. [ In the rare * event of it not getting flushed for a long time the delay * shouldn't really matter because there's no real memory * pressure for swapout to react to. ] * * Note: this optimisation also exists in pte_needs_flush() and * huge_pmd_needs_flush().
*/ #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH #define ptep_clear_flush_young ptep_test_and_clear_young
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL staticinline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsignedlong addr,
pte_t *ptep, int full)
{ if (full && radix_enabled()) { /* * We know that this is a full mm pte clear and * hence can be sure there is no parallel set_pte.
*/ return radix__ptep_get_and_clear_full(mm, addr, ptep, full);
} return ptep_get_and_clear(mm, addr, ptep);
}
staticinlineint pte_present(pte_t pte)
{ /* * A pte is considerent present if _PAGE_PRESENT is set. * We also need to consider the pte present which is marked * invalid during ptep_set_access_flags. Hence we look for _PAGE_INVALID * if we find _PAGE_PRESENT cleared.
*/
#define pte_access_permitted pte_access_permitted staticinlinebool pte_access_permitted(pte_t pte, bool write)
{ /* * _PAGE_READ is needed for any access and will be cleared for * PROT_NONE. Execute-only mapping via PROT_EXEC also returns false.
*/ if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte)) returnfalse;
/* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. * * Even if PTEs can be unsigned long long, a PFN is always an unsigned * long for now.
*/ staticinline pte_t pfn_pte(unsignedlong pfn, pgprot_t pgprot)
{
VM_BUG_ON(pfn >> (64 - PAGE_SHIFT));
VM_BUG_ON((pfn << PAGE_SHIFT) & ~PTE_RPN_MASK);
staticinline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{ /* FIXME!! check whether this need to be a conditional */ return __pte_raw((pte_raw(pte) & cpu_to_be64(_PAGE_CHG_MASK)) |
cpu_to_be64(pgprot_val(newprot)));
}
/* Encode and de-code a swap entry */ #define MAX_SWAPFILES_CHECK() do { \
BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \ /* \ * Don't have overlapping bits with _PAGE_HPTEFLAGS \ * We filter HPTEFLAGS on set_pte. \
*/
BUILD_BUG_ON(_PAGE_HPTEFLAGS & SWP_TYPE_MASK); \
BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY); \
BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_EXCLUSIVE); \
} while (0)
#define SWP_TYPE_BITS 5 #define SWP_TYPE_MASK ((1UL << SWP_TYPE_BITS) - 1) #define __swp_type(x) ((x).val & SWP_TYPE_MASK) #define __swp_offset(x) (((x).val & PTE_RPN_MASK) >> PAGE_SHIFT) #define __swp_entry(type, offset) ((swp_entry_t) { \
(type) | (((offset) << PAGE_SHIFT) & PTE_RPN_MASK)}) /* * swp_entry_t must be independent of pte bits. We build a swp_entry_t from * swap type and offset we get from swap and convert that to pte to find a * matching pte in linux page table. * Clear bits not found in swap entries here.
*/ #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE }) #define __swp_entry_to_pte(x) __pte((x).val | _PAGE_PTE) #define __pmd_to_swp_entry(pmd) (__pte_to_swp_entry(pmd_pte(pmd))) #define __swp_entry_to_pmd(x) (pte_pmd(__swp_entry_to_pte(x)))
staticinlinebool check_pte_access(unsignedlong access, unsignedlong ptev)
{ /* * This check for _PAGE_RWX and _PAGE_PRESENT bits
*/ if (access & ~ptev) returnfalse; /* * This check for access to privilege space
*/ if ((access & _PAGE_PRIVILEGED) != (ptev & _PAGE_PRIVILEGED)) returnfalse;
returntrue;
} /* * Generic functions with hash/radix callbacks
*/
VM_WARN_ON(!(pte_raw(pte) & cpu_to_be64(_PAGE_PTE))); /* * Keep the _PAGE_PTE added till we are sure we handle _PAGE_PTE * in all the callers.
*/
pte = __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PTE));
staticinlineint pmd_present(pmd_t pmd)
{ /* * A pmd is considerent present if _PAGE_PRESENT is set. * We also need to consider the pmd present which is marked * invalid during a split. Hence we look for _PAGE_INVALID * if we find _PAGE_PRESENT cleared.
*/ if (pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)) returntrue;
returnfalse;
}
staticinlineint pmd_is_serializing(pmd_t pmd)
{ /* * If the pmd is undergoing a split, the _PAGE_PRESENT bit is clear * and _PAGE_INVALID is set (see pmd_present, pmdp_invalidate). * * This condition may also occur when flushing a pmd while flushing * it (see ptep_modify_prot_start), so callers must ensure this * case is fine as well.
*/ if ((pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)) ==
cpu_to_be64(_PAGE_INVALID)) returntrue;
#define pmd_access_permitted pmd_access_permitted staticinlinebool pmd_access_permitted(pmd_t pmd, bool write)
{ /* * pmdp_invalidate sets this combination (which is not caught by * !pte_present() check in pte_access_permitted), to prevent * lock-free lookups, as part of the serialize_against_pte_lookup() * synchronisation. * * This also catches the case where the PTE's hardware PRESENT bit is * cleared while TLB is flushed, which is suboptimal but should not * be frequent.
*/ if (pmd_is_serializing(pmd)) returnfalse;
/* * For radix we should always find H_PAGE_HASHPTE zero. Hence * the below will work for radix too
*/ staticinlineint __pmdp_test_and_clear_young(struct mm_struct *mm, unsignedlong addr, pmd_t *pmdp)
{ unsignedlong old;
/* * Only returns true for a THP. False for pmd migration entry. * We also need to return true when we come across a pte that * in between a thp split. While splitting THP, we mark the pmd * invalid (pmdp_invalidate()) before we set it with pte page * address. A pmd_trans_huge() check against a pmd entry during that time * should return true. * We should not call this on a hugetlb entry. We should check for HugeTLB * entry using vma->vm_flags * The page table walk rule is explained in Documentation/mm/transhuge.rst
*/ staticinlineint pmd_trans_huge(pmd_t pmd)
{ if (!pmd_present(pmd)) returnfalse;
if (radix_enabled()) return radix__pmd_trans_huge(pmd); return hash__pmd_trans_huge(pmd);
}
staticinlineint pud_trans_huge(pud_t pud)
{ if (!pud_present(pud)) returnfalse;
if (radix_enabled()) return radix__pud_trans_huge(pud); return 0;
}
/* * Returns true for a R -> RW upgrade of pte
*/ staticinlinebool is_pte_rw_upgrade(unsignedlong old_val, unsignedlong new_val)
{ if (!(old_val & _PAGE_READ)) returnfalse;
if ((!(old_val & _PAGE_WRITE)) && (new_val & _PAGE_WRITE)) returntrue;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.