#ifdef NODE_NOT_IN_PAGE_FLAGS /* * If we did not store the node number in the page then we have to * do a lookup in the section_to_node_table in order to find which * node the page belongs to.
*/ #if MAX_NUMNODES <= 256 static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; #else static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; #endif
int page_to_nid(conststruct page *page)
{ return section_to_node_table[page_to_section(page)];
}
EXPORT_SYMBOL(page_to_nid);
/* * An existing section is possible in the sub-section hotplug * case. First hot-add instantiates, follow-on hot-add reuses * the existing section. * * The mem_hotplug_lock resolves the apparent race below.
*/ if (mem_section[root]) return 0;
section = sparse_index_alloc(nid); if (!section) return -ENOMEM;
/* * During early boot, before section_mem_map is used for an actual * mem_map, we use section_mem_map to store the section's NUMA * node. This keeps us from having to use another data structure. The * node information is cleared just before we store the real mem_map.
*/ staticinlineunsignedlong sparse_encode_early_nid(int nid)
{ return ((unsignedlong)nid << SECTION_NID_SHIFT);
}
/* Validate the physical addressing limitations of the model */ staticvoid __meminit mminit_validate_memmodel_limits(unsignedlong *start_pfn, unsignedlong *end_pfn)
{ unsignedlong max_sparsemem_pfn = (DIRECT_MAP_PHYSMEM_END + 1) >> PAGE_SHIFT;
/* * Sanity checks - do not allow an architecture to pass * in larger pfns than the maximum scope of sparsemem:
*/ if (*start_pfn > max_sparsemem_pfn) {
mminit_dprintk(MMINIT_WARNING, "pfnvalidation", "Start of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
*start_pfn, *end_pfn, max_sparsemem_pfn);
WARN_ON_ONCE(1);
*start_pfn = max_sparsemem_pfn;
*end_pfn = max_sparsemem_pfn;
} elseif (*end_pfn > max_sparsemem_pfn) {
mminit_dprintk(MMINIT_WARNING, "pfnvalidation", "End of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
*start_pfn, *end_pfn, max_sparsemem_pfn);
WARN_ON_ONCE(1);
*end_pfn = max_sparsemem_pfn;
}
}
/* * There are a number of times that we loop over NR_MEM_SECTIONS, * looking for section_present() on each. But, when we have very * large physical address spaces, NR_MEM_SECTIONS can also be * very large which makes the loops quite long. * * Keeping track of this gives us an easy way to break out of * those loops early.
*/ unsignedlong __highest_present_section_nr; staticvoid __section_mark_present(struct mem_section *ms, unsignedlong section_nr)
{ if (section_nr > __highest_present_section_nr)
__highest_present_section_nr = section_nr;
ms = __nr_to_section(section_nr); if (!ms->section_mem_map) {
ms->section_mem_map = sparse_encode_early_nid(nid) |
SECTION_IS_ONLINE;
__section_mark_present(ms, section_nr);
}
}
}
/* * Mark all memblocks as present using memory_present(). * This is a convenience function that is useful to mark all of the systems * memory as present during initialization.
*/ staticvoid __init memblocks_present(void)
{ unsignedlong start, end; int i, nid;
#ifdef CONFIG_SPARSEMEM_EXTREME if (unlikely(!mem_section)) { unsignedlong size, align;
/* * Subtle, we encode the real pfn into the mem_map such that * the identity pfn - section_mem_map will return the actual * physical page frame number.
*/ staticunsignedlong sparse_encode_mem_map(struct page *mem_map, unsignedlong pnum)
{ unsignedlong coded_mem_map =
(unsignedlong)(mem_map - (section_nr_to_pfn(pnum)));
BUILD_BUG_ON(SECTION_MAP_LAST_BIT > PFN_SECTION_SHIFT);
BUG_ON(coded_mem_map & ~SECTION_MAP_MASK); return coded_mem_map;
}
#ifdef CONFIG_MEMORY_HOTPLUG /* * Decode mem_map from the coded memmap
*/ struct page *sparse_decode_mem_map(unsignedlong coded_mem_map, unsignedlong pnum)
{ /* mask off the extra low bits of information */
coded_mem_map &= SECTION_MAP_MASK; return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
} #endif/* CONFIG_MEMORY_HOTPLUG */
staticstruct mem_section_usage * __init
sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, unsignedlong size)
{ struct mem_section_usage *usage; unsignedlong goal, limit; int nid; /* * A page may contain usemaps for other sections preventing the * page being freed and making a section unremovable while * other sections referencing the usemap remain active. Similarly, * a pgdat can prevent a section being removed. If section A * contains a pgdat and section B contains the usemap, both * sections become inter-dependent. This allocates usemaps * from the same section as the pgdat where possible to avoid * this problem.
*/
goal = pgdat_to_phys(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
limit = goal + (1UL << PA_SECTION_SHIFT);
nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
again:
usage = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid); if (!usage && limit) {
limit = MEMBLOCK_ALLOC_ACCESSIBLE; goto again;
} return usage;
}
usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr)); if (usemap_nid != nid) {
pr_info("node %d must be removed before remove section %ld\n",
nid, usemap_snr); return;
} /* * There is a circular dependency. * Some platforms allow un-removable section because they will just * gather other removable sections for dynamic partitioning. * Just notify un-removable section's number here.
*/
pr_info("Section %ld and %ld (node %d) have a circular dependency on usemap and pgdat allocations\n",
usemap_snr, pgdat_snr, nid);
} #else staticstruct mem_section_usage * __init
sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, unsignedlong size)
{ return memblock_alloc_node(size, SMP_CACHE_BYTES, pgdat->node_id);
}
staticvoid __init sparse_buffer_init(unsignedlong size, int nid)
{
phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
WARN_ON(sparsemap_buf); /* forgot to call sparse_buffer_fini()? */ /* * Pre-allocated buffer is mainly used by __populate_section_memmap * and we want it to be properly aligned to the section size - this is * especially the case for VMEMMAP which maps memmap to PMDs
*/
sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true);
sparsemap_buf_end = sparsemap_buf + size;
}
/* * Helper function that is used for generic section initialization, and * can also be used by any hooks added above.
*/ void __init sparse_init_early_section(int nid, struct page *map, unsignedlong pnum, unsignedlong flags)
{
BUG_ON(!sparse_usagebuf || sparse_usagebuf >= sparse_usagebuf_end);
check_usemap_section_nr(nid, sparse_usagebuf);
sparse_init_one_section(__nr_to_section(pnum), pnum, map,
sparse_usagebuf, SECTION_IS_EARLY | flags);
sparse_usagebuf = (void *)sparse_usagebuf + mem_section_usage_size();
}
/* * Initialize sparse on a specific node. The node spans [pnum_begin, pnum_end) * And number of present sections in this node is map_count.
*/ staticvoid __init sparse_init_nid(int nid, unsignedlong pnum_begin, unsignedlong pnum_end, unsignedlong map_count)
{ unsignedlong pnum; struct page *map; struct mem_section *ms;
ms = __nr_to_section(pnum); if (!preinited_vmemmap_section(ms)) {
map = __populate_section_memmap(pfn, PAGES_PER_SECTION,
nid, NULL, NULL); if (!map) {
pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.",
__func__, nid);
pnum_begin = pnum;
sparse_usage_fini();
sparse_buffer_fini(); goto failed;
}
memmap_boot_pages_add(DIV_ROUND_UP(PAGES_PER_SECTION * sizeof(struct page),
PAGE_SIZE));
sparse_init_early_section(nid, map, pnum, 0);
}
}
sparse_usage_fini();
sparse_buffer_fini(); return;
failed: /* * We failed to allocate, mark all the following pnums as not present, * except the ones already initialized earlier.
*/
for_each_present_section_nr(pnum_begin, pnum) { if (pnum >= pnum_end) break;
ms = __nr_to_section(pnum); if (!preinited_vmemmap_section(ms))
ms->section_mem_map = 0;
ms->section_mem_map = 0;
}
}
/* * Allocate the accumulated non-linear sections, allocate a mem_map * for each and record the physical to section mapping.
*/ void __init sparse_init(void)
{ unsignedlong pnum_end, pnum_begin, map_count = 1; int nid_begin;
/* see include/linux/mmzone.h 'struct mem_section' definition */
BUILD_BUG_ON(!is_power_of_2(sizeof(struct mem_section)));
memblocks_present();
/* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */
set_pageblock_order();
for_each_present_section_nr(pnum_begin + 1, pnum_end) { int nid = sparse_early_nid(__nr_to_section(pnum_end));
if (nid == nid_begin) {
map_count++; continue;
} /* Init node with sections in range [pnum_begin, pnum_end) */
sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
nid_begin = nid;
pnum_begin = pnum_end;
map_count = 1;
} /* cover the last node */
sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
vmemmap_populate_print_last();
}
#ifdef CONFIG_MEMORY_HOTPLUG
/* Mark all memory sections within the pfn range as online */ void online_mem_sections(unsignedlong start_pfn, unsignedlong end_pfn)
{ unsignedlong pfn;
/* onlining code should never touch invalid ranges */ if (WARN_ON(!valid_section_nr(section_nr))) continue;
ms = __nr_to_section(section_nr);
ms->section_mem_map |= SECTION_IS_ONLINE;
}
}
/* Mark all memory sections within the pfn range as offline */ void offline_mem_sections(unsignedlong start_pfn, unsignedlong end_pfn)
{ unsignedlong pfn;
/* * TODO this needs some double checking. Offlining code makes * sure to check pfn_valid but those checks might be just bogus
*/ if (WARN_ON(!valid_section_nr(section_nr))) continue;
ms = __nr_to_section(section_nr);
ms->section_mem_map &= ~SECTION_IS_ONLINE;
}
}
/* * When this function is called, the removing section is * logical offlined state. This means all pages are isolated * from page allocator. If removing section's memmap is placed * on the same section, it must not be freed. * If it is freed, page allocator may allocate it which will * be removed physically soon.
*/ if (maps_section_nr != removing_section_nr)
put_page_bootmem(page);
}
}
/* * To deactivate a memory region, there are 3 cases to handle across * two configurations (SPARSEMEM_VMEMMAP={y,n}): * * 1. deactivation of a partial hot-added section (only possible in * the SPARSEMEM_VMEMMAP=y case). * a) section was present at memory init. * b) section was hot-added post memory init. * 2. deactivation of a complete hot-added section. * 3. deactivation of a complete section from memory init. * * For 1, when subsection_map does not empty we will not be freeing the * usage map, but still need to free the vmemmap range. * * For 2 and 3, the SPARSEMEM_VMEMMAP={y,n} cases are unified
*/ staticvoid section_deactivate(unsignedlong pfn, unsignedlong nr_pages, struct vmem_altmap *altmap)
{ struct mem_section *ms = __pfn_to_section(pfn); bool section_is_early = early_section(ms); struct page *memmap = NULL; bool empty;
if (clear_subsection_map(pfn, nr_pages)) return;
empty = is_subsection_map_empty(ms); if (empty) { unsignedlong section_nr = pfn_to_section_nr(pfn);
/* * Mark the section invalid so that valid_section() * return false. This prevents code from dereferencing * ms->usage array.
*/
ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
/* * When removing an early section, the usage map is kept (as the * usage maps of other sections fall into the same page). It * will be re-used when re-adding the section - which is then no * longer an early section. If the usage map is PageReserved, it * was allocated during boot.
*/ if (!PageReserved(virt_to_page(ms->usage))) {
kfree_rcu(ms->usage, rcu);
WRITE_ONCE(ms->usage, NULL);
}
memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
}
/* * The memmap of early sections is always fully populated. See * section_activate() and pfn_valid() .
*/ if (!section_is_early) {
memmap_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE)));
depopulate_section_memmap(pfn, nr_pages, altmap);
} elseif (memmap) {
memmap_boot_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page),
PAGE_SIZE)));
free_map_bootmem(memmap);
}
if (empty)
ms->section_mem_map = (unsignedlong)NULL;
}
if (!ms->usage) {
usage = kzalloc(mem_section_usage_size(), GFP_KERNEL); if (!usage) return ERR_PTR(-ENOMEM);
ms->usage = usage;
}
rc = fill_subsection_map(pfn, nr_pages); if (rc) { if (usage)
ms->usage = NULL;
kfree(usage); return ERR_PTR(rc);
}
/* * The early init code does not consider partially populated * initial sections, it simply assumes that memory will never be * referenced. If we hot-add memory into such a section then we * do not need to populate the memmap and can simply reuse what * is already there.
*/ if (nr_pages < PAGES_PER_SECTION && early_section(ms)) return pfn_to_page(pfn);
/** * sparse_add_section - add a memory section, or populate an existing one * @nid: The node to add section on * @start_pfn: start pfn of the memory range * @nr_pages: number of pfns to add in the section * @altmap: alternate pfns to allocate the memmap backing store * @pgmap: alternate compound page geometry for devmap mappings * * This is only intended for hotplug. * * Note that only VMEMMAP supports sub-section aligned hotplug, * the proper alignment and size are gated by check_pfn_span(). * * * Return: * * 0 - On success. * * -EEXIST - Section has been present. * * -ENOMEM - Out of memory.
*/ int __meminit sparse_add_section(int nid, unsignedlong start_pfn, unsignedlong nr_pages, struct vmem_altmap *altmap, struct dev_pagemap *pgmap)
{ unsignedlong section_nr = pfn_to_section_nr(start_pfn); struct mem_section *ms; struct page *memmap; int ret;
ret = sparse_index_init(section_nr, nid); if (ret < 0) return ret;
/* * Poison uninitialized struct pages in order to catch invalid flags * combinations.
*/ if (!altmap || !altmap->inaccessible)
page_init_poison(memmap, sizeof(struct page) * nr_pages);
ms = __nr_to_section(section_nr);
set_section_nid(section_nr, nid);
__section_mark_present(ms, section_nr);
/* Align memmap to section boundary in the subsection case */ if (section_nr_to_pfn(section_nr) != start_pfn)
memmap = pfn_to_page(section_nr_to_pfn(section_nr));
sparse_init_one_section(ms, section_nr, memmap, ms->usage, 0);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.