/* * Keep track of memory that is to be preserved across KHO. * * The serializing side uses two levels of xarrays to manage chunks of per-order * 512 byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order of a * 1TB system would fit inside a single 512 byte bitmap. For order 0 allocations * each bitmap will cover 16M of address space. Thus, for 16G of memory at most * 512K of bitmap memory will be needed for order 0. * * This approach is fully incremental, as the serialization progresses folios * can continue be aggregated to the tracker. The final step, immediately prior * to kexec would serialize the xarray information into a linked list for the * successor kernel to parse.
*/
/* almost as free_reserved_page(), just don't free the page */ staticvoid kho_restore_page(struct page *page, unsignedint order)
{ unsignedint nr_pages = (1 << order);
/* Head page gets refcount of 1. */
set_page_count(page, 1);
/* For higher order folios, tail pages get a page count of zero. */ for (unsignedint i = 1; i < nr_pages; i++)
set_page_count(page + i, 0);
if (order > 0)
prep_compound_page(page, order);
adjust_managed_page_count(page, nr_pages);
}
/** * kho_restore_folio - recreates the folio from the preserved memory. * @phys: physical address of the folio. * * Return: pointer to the struct folio on success, NULL on failure.
*/ struct folio *kho_restore_folio(phys_addr_t phys)
{ struct page *page = pfn_to_online_page(PHYS_PFN(phys)); unsignedlong order;
if (!page) return NULL;
order = page->private; if (order > MAX_PAGE_ORDER) return NULL;
/* Serialize and deserialize struct kho_mem_phys across kexec * * Record all the bitmaps in a linked list of pages for the next kernel to * process. Each chunk holds bitmaps of the same order and each block of bitmaps * starts at a given physical address. This allows the bitmaps to be sparse. The * xarray is used to store them in a tree while building up the data structure, * but the KHO successor kernel only needs to process them once in order. * * All of this memory is normal kmalloc() memory and is not marked for * preservation. The successor kernel will remain isolated to the scratch space * until it completes processing this list. Once processed all the memory * storing these ranges will be marked as free.
*/
for (i = 0; i != chunk->hdr.num_elms; i++)
deserialize_bitmap(chunk->hdr.order,
&chunk->bitmaps[i]);
chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
}
}
/* * With KHO enabled, memory can become fragmented because KHO regions may * be anywhere in physical address space. The scratch regions give us a * safe zones that we will never see KHO allocations from. This is where we * can later safely load our new kexec images into and then use the scratch * area for early allocations that happen before page allocator is * initialized.
*/ staticstruct kho_scratch *kho_scratch; staticunsignedint kho_scratch_cnt;
/* * The scratch areas are scaled by default as percent of memory allocated from * memblock. A user can override the scale with command line parameter: * * kho_scratch=N% * * It is also possible to explicitly define size for a lowmem, a global and * per-node scratch areas: * * kho_scratch=l[KMG],n[KMG],m[KMG] * * The explicit size definition takes precedence over scale definition.
*/ staticunsignedint scratch_scale __initdata = 200; static phys_addr_t scratch_size_global __initdata; static phys_addr_t scratch_size_pernode __initdata; static phys_addr_t scratch_size_lowmem __initdata;
/* parse nn% */ if (p[len - 1] == '%') { /* unsigned int max is 4,294,967,295, 10 chars */ char s_scale[11] = {}; int ret = 0;
if (len > ARRAY_SIZE(s_scale)) return -EINVAL;
memcpy(s_scale, p, len - 1);
ret = kstrtouint(s_scale, 10, &scratch_scale); if (!ret)
pr_notice("scratch scale is %d%%\n", scratch_scale); return ret;
}
/* parse ll[KMG],mm[KMG],nn[KMG] */ for (i = 0; i < ARRAY_SIZE(sizes); i++) { char *endp = p;
if (i > 0) { if (*p != ',') return -EINVAL;
p += 1;
}
sizes[i] = memparse(p, &endp); if (!sizes[i] || endp == p) return -EINVAL;
p = endp;
}
/** * kho_reserve_scratch - Reserve a contiguous chunk of memory for kexec * * With KHO we can preserve arbitrary pages in the system. To ensure we still * have a large contiguous region of memory when we search the physical address * space for target memory, let's make sure we always have a large CMA region * active. This CMA region will only be used for movable pages which are not a * problem for us during KHO because we can just move them somewhere else.
*/ staticvoid __init kho_reserve_scratch(void)
{
phys_addr_t addr, size; int nid, i = 0;
/* * reserve scratch area in low memory for lowmem allocations in the * next kernel
*/
size = scratch_size_lowmem;
addr = memblock_phys_alloc_range(size, CMA_MIN_ALIGNMENT_BYTES, 0,
ARCH_LOW_ADDRESS_LIMIT); if (!addr) goto err_free_scratch_desc;
/* reserve large contiguous area for allocations without nid */
size = scratch_size_global;
addr = memblock_phys_alloc(size, CMA_MIN_ALIGNMENT_BYTES); if (!addr) goto err_free_scratch_areas;
/** * kho_add_subtree - record the physical address of a sub FDT in KHO root tree. * @ser: serialization control object passed by KHO notifiers. * @name: name of the sub tree. * @fdt: the sub tree blob. * * Creates a new child node named @name in KHO root FDT and records * the physical address of @fdt. The pages of @fdt must also be preserved * by KHO for the new kernel to retrieve it after kexec. * * A debugfs blob entry is also created at * ``/sys/kernel/debug/kho/out/sub_fdts/@name``. * * Return: 0 on success, error code on failure
*/ int kho_add_subtree(struct kho_serialization *ser, constchar *name, void *fdt)
{ int err = 0;
u64 phys = (u64)virt_to_phys(fdt); void *root = page_to_virt(ser->fdt);
int register_kho_notifier(struct notifier_block *nb)
{ return blocking_notifier_chain_register(&kho_out.chain_head, nb);
}
EXPORT_SYMBOL_GPL(register_kho_notifier);
int unregister_kho_notifier(struct notifier_block *nb)
{ return blocking_notifier_chain_unregister(&kho_out.chain_head, nb);
}
EXPORT_SYMBOL_GPL(unregister_kho_notifier);
/** * kho_preserve_folio - preserve a folio across kexec. * @folio: folio to preserve. * * Instructs KHO to preserve the whole folio across kexec. The order * will be preserved as well. * * Return: 0 on success, error code on failure
*/ int kho_preserve_folio(struct folio *folio)
{ constunsignedlong pfn = folio_pfn(folio); constunsignedint order = folio_order(folio); struct kho_mem_track *track = &kho_out.ser.track;
/** * kho_retrieve_subtree - retrieve a preserved sub FDT by its name. * @name: the name of the sub FDT passed to kho_add_subtree(). * @phys: if found, the physical address of the sub FDT is stored in @phys. * * Retrieve a preserved sub FDT named @name and store its physical * address in @phys. * * Return: 0 on success, error code on failure
*/ int kho_retrieve_subtree(constchar *name, phys_addr_t *phys)
{ constvoid *fdt = kho_get_fdt(); const u64 *val; int offset, len;
err = kho_out_debugfs_init(); if (err) goto err_free_fdt;
if (fdt) {
err = kho_in_debugfs_init(fdt); /* * Failure to create /sys/kernel/debug/kho/in does not prevent * reviving state from KHO and setting up KHO for the next * kexec.
*/ if (err)
pr_err("failed exposing handover FDT in debugfs: %d\n",
err);
return 0;
}
for (int i = 0; i < kho_scratch_cnt; i++) { unsignedlong base_pfn = PHYS_PFN(kho_scratch[i].addr); unsignedlong count = kho_scratch[i].size >> PAGE_SHIFT; unsignedlong pfn;
/* * Mark scratch mem as CMA before we return it. That way we * ensure that no kernel allocations happen on it. That means * we can reuse it as scratch memory again later.
*/
__for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,
MEMBLOCK_KHO_SCRATCH, &start, &end, NULL) {
ulong start_pfn = pageblock_start_pfn(PFN_DOWN(start));
ulong end_pfn = pageblock_align(PFN_UP(end));
ulong pfn;
/* * We pass a safe contiguous blocks of memory to use for early boot * purporses from the previous kernel so that we can resize the * memblock array as needed.
*/ for (int i = 0; i < scratch_cnt; i++) { struct kho_scratch *area = &scratch[i];
u64 size = area->size;
memblock_add(area->addr, size);
err = memblock_mark_kho_scratch(area->addr, size); if (WARN_ON(err)) {
pr_warn("failed to mark the scratch region 0x%pa+0x%pa: %d",
&area->addr, &size, err); goto out;
}
pr_debug("Marked 0x%pa+0x%pa as scratch", &area->addr, &size);
}
memblock_reserve(scratch_phys, scratch_len);
/* * Now that we have a viable region of scratch memory, let's tell * the memblocks allocator to only use that for any allocations. * That way we ensure that nothing scribbles over in use data while * we initialize the page tables which we will need to ingest all * memory reservations from the previous kernel.
*/
memblock_set_kho_scratch_only();
kho_in.fdt_phys = fdt_phys;
kho_in.scratch_phys = scratch_phys;
kho_scratch_cnt = scratch_cnt;
pr_info("found kexec handover data. Will skip init for some devices\n");
out: if (fdt)
early_memunmap(fdt, fdt_len); if (scratch)
early_memunmap(scratch, scratch_len); if (err)
pr_warn("disabling KHO revival: %d\n", err);
}
/* Helper functions for kexec_file_load */
int kho_fill_kimage(struct kimage *image)
{
ssize_t scratch_size; int err = 0; struct kexec_buf scratch;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.