/* * Xen leaves the responsibility for maintaining p2m mappings to the * guests themselves, but it must also access and update the p2m array * during suspend/resume when all the pages are reallocated. * * The logical flat p2m table is mapped to a linear kernel memory area. * For accesses by Xen a three-level tree linked via mfns only is set up to * allow the address space to be sparse. * * Xen * | * p2m_top_mfn * / \ * p2m_mid_mfn p2m_mid_mfn * / / * p2m p2m p2m ... * * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. * * The p2m_top_mfn level is limited to 1 page, so the maximum representable * pseudo-physical address space is: * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages * * P2M_PER_PAGE depends on the architecture, as a mfn is always * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to * 512 and 1024 entries respectively. * * In short, these structures contain the Machine Frame Number (MFN) of the PFN. * * However not all entries are filled with MFNs. Specifically for all other * leaf entries, or for the top root, or middle one, for which there is a void * entry, we assume it is "missing". So (for example) * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. * We have a dedicated page p2m_missing with all entries being * INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m * list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns. * * We also have the possibility of setting 1-1 mappings on certain regions, so * that: * pfn_to_mfn(0xc0000)=0xc0000 * * The benefit of this is, that we can assume for non-RAM regions (think * PCI BARs, or ACPI spaces), we can create mappings easily because we * get the PFN value to match the MFN. * * For this to work efficiently we have one new page p2m_identity. All entries * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only * recognizes that and MFNs, no other fancy value). * * On lookup we spot that the entry points to p2m_identity and return the * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. * If the entry points to an allocated page, we just proceed as before and * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in * appropriate functions (pfn_to_mfn). * * The reason for having the IDENTITY_FRAME_BIT instead of just returning the * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a * non-identity pfn. To protect ourselves against we elect to set (and get) the * IDENTITY_FRAME_BIT on all identity mapped PFNs.
*/
/* * Hint at last populated PFN. * * Used to set HYPERVISOR_shared_info->arch.max_pfn so the toolstack * can avoid scanning the whole P2M (which may be sized to account for * hotplugged memory).
*/ staticunsignedlong xen_p2m_last_pfn;
/* * Build the parallel p2m_top_mfn and p2m_mid_mfn structures * * This is called both at boot time, and after resuming from suspend: * - At boot time we're called rather early, and must use alloc_bootmem*() * to allocate memory. * * - After resume we're called from within stop_machine, but the mfn * tree should already be completely allocated.
*/ void __ref xen_build_mfn_list_list(void)
{ unsignedlong pfn, mfn;
pte_t *ptep; unsignedint level, topidx, mididx; unsignedlong *mid_mfn_p;
if (xen_start_info->flags & SIF_VIRT_P2M_4TOOLS) return;
/* Pre-initialize p2m_top_mfn to be completely missing */ if (p2m_top_mfn == NULL) {
p2m_mid_missing_mfn = alloc_p2m_page();
p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
/* Don't bother allocating any mfn mid levels if * they're just missing, just update the stored mfn, * since all could have changed over a migrate.
*/ if (ptep == p2m_missing_pte || ptep == p2m_identity_pte) {
BUG_ON(mididx);
BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn);
pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; continue;
}
if (mid_mfn_p == p2m_mid_missing_mfn) {
mid_mfn_p = alloc_p2m_page();
p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
p2m_missing_pte = alloc_p2m_page();
paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT);
p2m_identity_pte = alloc_p2m_page();
paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT); for (i = 0; i < PTRS_PER_PTE; i++) {
set_pte(p2m_missing_pte + i,
pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL_RO));
set_pte(p2m_identity_pte + i,
pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL_RO));
}
for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) { /* * Try to map missing/identity PMDs or p2m-pages if possible. * We have to respect the structure of the mfn_list_list * which will be built just afterwards. * Chunk size to test is one p2m page if we are in the middle * of a mfn_list_list mid page and the complete mid page area * if we are at index 0 of the mid page. Please note that a * mid page might cover more than one PMD, e.g. on 32 bit PAE * kernels.
*/
chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ?
P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE;
type = xen_p2m_elem_type(pfn);
i = 0; if (type != P2M_TYPE_PFN) for (i = 1; i < chunk; i++) if (xen_p2m_elem_type(pfn + i) != type) break; if (i < chunk) /* Reset to minimal chunk size. */
chunk = P2M_PER_PAGE;
/* * The INVALID_P2M_ENTRY is filled in both p2m_*identity * and in p2m_*missing, so returning the INVALID_P2M_ENTRY * would be wrong.
*/ if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) return IDENTITY_FRAME(pfn);
/* * Allocate new pmd(s). It is checked whether the old pmd is still in place. * If not, nothing is changed. This is okay as the only reason for allocating * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual * pmd.
*/ static pte_t *alloc_p2m_pmd(unsignedlong addr, pte_t *pte_pg)
{
pte_t *ptechk;
pte_t *pte_newpg[PMDS_PER_MID_PAGE];
pmd_t *pmdp; unsignedint level; unsignedlong flags; unsignedlong vaddr; int i;
/* Do all allocations first to bail out in error case. */ for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
pte_newpg[i] = alloc_p2m_page(); if (!pte_newpg[i]) { for (i--; i >= 0; i--)
free_p2m_page(pte_newpg[i]);
for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
copy_page(pte_newpg[i], pte_pg);
paravirt_alloc_pte(&init_mm, __pa(pte_newpg[i]) >> PAGE_SHIFT);
pmdp = lookup_pmd_address(vaddr);
BUG_ON(!pmdp);
spin_lock_irqsave(&p2m_update_lock, flags);
ptechk = lookup_address(vaddr, &level); if (ptechk == pte_pg) {
HYPERVISOR_shared_info->arch.p2m_generation++;
wmb(); /* Tools are synchronizing via p2m_generation. */
set_pmd(pmdp,
__pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE));
wmb(); /* Tools are synchronizing via p2m_generation. */
HYPERVISOR_shared_info->arch.p2m_generation++;
pte_newpg[i] = NULL;
}
spin_unlock_irqrestore(&p2m_update_lock, flags);
if (pte_newpg[i]) {
paravirt_release_pte(__pa(pte_newpg[i]) >> PAGE_SHIFT);
free_p2m_page(pte_newpg[i]);
}
vaddr += PMD_SIZE;
}
return lookup_address(addr, &level);
}
/* * Fully allocate the p2m structure for a given pfn. We need to check * that both the top and mid levels are allocated, and make sure the * parallel mfn tree is kept in sync. We may race with other cpus, so * the new pages are installed with cmpxchg; if we lose the race then * simply free the page we allocated and use the one that's there.
*/ int xen_alloc_p2m_entry(unsignedlong pfn)
{ unsigned topidx; unsignedlong *top_mfn_p, *mid_mfn;
pte_t *ptep, *pte_pg; unsignedint level; unsignedlong flags; unsignedlong addr = (unsignedlong)(xen_p2m_addr + pfn); unsignedlong p2m_pfn;
if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) { /* PMD level is missing, allocate a new one */
ptep = alloc_p2m_pmd(addr, pte_pg); if (!ptep) return -ENOMEM;
}
/* Only invalid entries allowed above the highest p2m covered frame. */ if (unlikely(pfn >= xen_p2m_size)) return mfn == INVALID_P2M_ENTRY;
/* * The interface requires atomic updates on p2m elements. * xen_safe_write_ulong() is using an atomic store via asm().
*/ if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn))) returntrue;
if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_missing))) return mfn == INVALID_P2M_ENTRY;
if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) return mfn == IDENTITY_FRAME(pfn);
returnfalse;
}
bool set_phys_to_machine(unsignedlong pfn, unsignedlong mfn)
{ if (unlikely(!__set_phys_to_machine(pfn, mfn))) { int ret;
ret = xen_alloc_p2m_entry(pfn); if (ret < 0) returnfalse;
return __set_phys_to_machine(pfn, mfn);
}
returntrue;
}
int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsignedint count)
{ int i, ret = 0;
pte_t *pte;
if (xen_feature(XENFEAT_auto_translated_physmap)) return 0;
if (kmap_ops) {
ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
kmap_ops, count); if (ret) goto out;
}
for (i = 0; i < count; i++) { unsignedlong mfn, pfn; struct gnttab_unmap_grant_ref unmap[2]; int rc;
/* Do not add to override if the map failed. */ if (map_ops[i].status != GNTST_okay ||
(kmap_ops && kmap_ops[i].status != GNTST_okay)) continue;
int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsignedint count)
{ int i, ret = 0;
if (xen_feature(XENFEAT_auto_translated_physmap)) return 0;
for (i = 0; i < count; i++) { unsignedlong mfn = __pfn_to_mfn(page_to_pfn(pages[i])); unsignedlong pfn = page_to_pfn(pages[i]);
if (mfn != INVALID_P2M_ENTRY && (mfn & FOREIGN_FRAME_BIT))
set_phys_to_machine(pfn, INVALID_P2M_ENTRY); else
ret = -EINVAL;
} if (kunmap_ops)
ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
kunmap_ops, count) ?: ret;
/* * Do the real remapping of non-RAM regions as specified in the * xen_nonram_remap[] array. * In case of an error just crash the system.
*/ void __init xen_do_remap_nonram(void)
{ unsignedint i; unsignedint remapped = 0; conststruct nonram_remap *remap = xen_nonram_remap; unsignedlong pfn, mfn, end_pfn;
for (i = 0; i < nr_nonram_remap; i++) {
end_pfn = PFN_UP(remap->paddr + remap->size);
pfn = PFN_DOWN(remap->paddr);
mfn = PFN_DOWN(remap->maddr); while (pfn < end_pfn) { if (!set_phys_to_machine(pfn, mfn))
panic("Failed to set p2m mapping for pfn=%lx mfn=%lx\n",
pfn, mfn);
#ifdef CONFIG_ACPI /* * Xen variant of acpi_os_ioremap() taking potentially remapped non-RAM * regions into account. * Any attempt to map an area crossing a remap boundary will produce a * WARN() splat. * phys is related to remap->maddr on input and will be rebased to remap->paddr.
*/ staticvoid __iomem *xen_acpi_os_ioremap(acpi_physical_address phys,
acpi_size size)
{ unsignedint i; conststruct nonram_remap *remap = xen_nonram_remap;
/* * Add a new non-RAM remap entry. * In case of no free entry found, just crash the system.
*/ void __init xen_add_remap_nonram(phys_addr_t maddr, phys_addr_t paddr, unsignedlong size)
{
BUG_ON((maddr & ~PAGE_MASK) != (paddr & ~PAGE_MASK));
if (nr_nonram_remap == NR_NONRAM_REMAP) {
xen_raw_console_write("Number of required E820 entry remapping actions exceed maximum value\n");
BUG();
}
#ifdef CONFIG_ACPI /* Switch to the Xen acpi_os_ioremap() variant. */ if (nr_nonram_remap == 0)
acpi_os_ioremap = xen_acpi_os_ioremap; #endif
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.