/* Flag to indicate we are going to kexec a new kernel */ bool kexec_in_progress = false;
bool kexec_file_dbg_print;
/* * When kexec transitions to the new kernel there is a one-to-one * mapping between physical and virtual addresses. On processors * where you can disable the MMU this is trivial, and easy. For * others it is still a simple predictable page table to setup. * * In that environment kexec copies the new kernel to its final * resting place. This means I can only support memory whose * physical address can fit in an unsigned long. In particular * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. * If the assembly stub has more restrictive requirements * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be * defined more restrictively in <asm/kexec.h>. * * The code for the transition from the current kernel to the * new kernel is placed in the control_code_buffer, whose size * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single * page of memory is necessary, but some architectures require more. * Because this memory must be identity mapped in the transition from * virtual to physical addresses it must live in the range * 0 - TASK_SIZE, as only the user space mappings are arbitrarily * modifiable. * * The assembly stub in the control code buffer is passed a linked list * of descriptor pages detailing the source pages of the new kernel, * and the destination addresses of those source pages. As this data * structure is not used in the context of the current OS, it must * be self-contained. * * The code has been made to work with highmem pages and will use a * destination page in its final resting place (if it happens * to allocate it). The end product of this is that most of the * physical address space, and most of RAM can be used. * * Future directions include: * - allocating a page table with the control code buffer identity * mapped, to simplify machine_kexec and make kexec_on_panic more * reliable.
*/
/* * KIMAGE_NO_DEST is an impossible destination address..., for * allocating pages whose destination address we do not care about.
*/ #define KIMAGE_NO_DEST (-1UL) #define PAGE_COUNT(x) (((x) + PAGE_SIZE - 1) >> PAGE_SHIFT)
int sanity_check_segment_list(struct kimage *image)
{ int i; unsignedlong nr_segments = image->nr_segments; unsignedlong total_pages = 0; unsignedlong nr_pages = totalram_pages();
/* * Verify we have good destination addresses. The caller is * responsible for making certain we don't attempt to load * the new image into invalid or reserved areas of RAM. This * just verifies it is an address we can use. * * Since the kernel does everything in page size chunks ensure * the destination addresses are page aligned. Too many * special cases crop of when we don't do this. The most * insidious is getting overlapping destination addresses * simply because addresses are changed to page size * granularity.
*/ for (i = 0; i < nr_segments; i++) { unsignedlong mstart, mend;
/* Verify our destination addresses do not overlap. * If we alloed overlapping destination addresses * through very weird things can happen with no * easy explanation as one segment stops on another.
*/ for (i = 0; i < nr_segments; i++) { unsignedlong mstart, mend; unsignedlong j;
pstart = image->segment[j].mem;
pend = pstart + image->segment[j].memsz; /* Do the segments overlap ? */ if ((mend > pstart) && (mstart < pend)) return -EINVAL;
}
}
/* Ensure our buffer sizes are strictly less than * our memory sizes. This should always be the case, * and it is easier to check up front than to be surprised * later on.
*/ for (i = 0; i < nr_segments; i++) { if (image->segment[i].bufsz > image->segment[i].memsz) return -EINVAL;
}
/* * Verify that no more than half of memory will be consumed. If the * request from userspace is too large, a large amount of time will be * wasted allocating pages, which can cause a soft lockup.
*/ for (i = 0; i < nr_segments; i++) { if (PAGE_COUNT(image->segment[i].memsz) > nr_pages / 2) return -EINVAL;
#ifdef CONFIG_CRASH_DUMP /* * Verify we have good destination addresses. Normally * the caller is responsible for making certain we don't * attempt to load the new image into invalid or reserved * areas of RAM. But crash kernels are preloaded into a * reserved area of ram. We must ensure the addresses * are in the reserved area otherwise preloading the * kernel could corrupt things.
*/
if (image->type == KEXEC_TYPE_CRASH) { for (i = 0; i < nr_segments; i++) { unsignedlong mstart, mend;
mstart = image->segment[i].mem;
mend = mstart + image->segment[i].memsz - 1; /* Ensure we are within the crash kernel limits */ if ((mstart < phys_to_boot_phys(crashk_res.start)) ||
(mend > phys_to_boot_phys(crashk_res.end))) return -EADDRNOTAVAIL;
}
} #endif
/* * The destination addresses are searched from system RAM rather than * being allocated from the buddy allocator, so they are not guaranteed * to be accepted by the current kernel. Accept the destination * addresses before kexec swaps their content with the segments' source * pages to avoid accessing memory before it is accepted.
*/ for (i = 0; i < nr_segments; i++)
accept_memory(image->segment[i].mem, image->segment[i].memsz);
staticstruct page *kimage_alloc_normal_control_pages(struct kimage *image, unsignedint order)
{ /* Control pages are special, they are the intermediaries * that are needed while we copy the rest of the pages * to their final resting place. As such they must * not conflict with either the destination addresses * or memory the kernel is already using. * * The only case where we really need more than one of * these are for architectures where we cannot disable * the MMU and must instead generate an identity mapped * page table for all of the memory. * * At worst this runs in O(N) of the image size.
*/ struct list_head extra_pages; struct page *pages; unsignedint count;
count = 1 << order;
INIT_LIST_HEAD(&extra_pages);
/* Loop while I can allocate a page and the page allocated * is a destination page.
*/ do { unsignedlong pfn, epfn, addr, eaddr;
if (pages) { /* Remember the allocated page... */
list_add(&pages->lru, &image->control_pages);
/* Because the page is already in it's destination * location we will never allocate another page at * that address. Therefore kimage_alloc_pages * will not return it (again) and we don't need * to give it an entry in image->segment[].
*/
} /* Deal with the destination pages I have inadvertently allocated. * * Ideally I would convert multi-page allocations into single * page allocations, and add everything to image->dest_pages. * * For now it is simpler to just free the pages.
*/
kimage_free_page_list(&extra_pages);
return pages;
}
#ifdef CONFIG_CRASH_DUMP staticstruct page *kimage_alloc_crash_control_pages(struct kimage *image, unsignedint order)
{ /* Control pages are special, they are the intermediaries * that are needed while we copy the rest of the pages * to their final resting place. As such they must * not conflict with either the destination addresses * or memory the kernel is already using. * * Control pages are also the only pags we must allocate * when loading a crash kernel. All of the other pages * are specified by the segments and we just memcpy * into them directly. * * The only case where we really need more than one of * these are for architectures where we cannot disable * the MMU and must instead generate an identity mapped * page table for all of the memory. * * Given the low demand this implements a very simple * allocator that finds the first hole of the appropriate * size in the reserved memory region, and allocates all * of the memory up to and including the hole.
*/ unsignedlong hole_start, hole_end, size; struct page *pages;
if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) break; /* See if I overlap any of the segments */ for (i = 0; i < image->nr_segments; i++) { unsignedlong mstart, mend;
mstart = image->segment[i].mem;
mend = mstart + image->segment[i].memsz - 1; if ((hole_end >= mstart) && (hole_start <= mend)) { /* Advance the hole to the end of the segment */
hole_start = ALIGN(mend, size);
hole_end = hole_start + size - 1; break;
}
} /* If I don't overlap any segments I have found my hole! */ if (i == image->nr_segments) {
pages = pfn_to_page(hole_start >> PAGE_SHIFT);
image->control_page = hole_end + 1; break;
}
}
/* Ensure that these pages are decrypted if SME is enabled. */ if (pages)
arch_kexec_post_alloc_pages(page_address(pages), 1 << order, 0);
staticvoid kimage_free_extra_pages(struct kimage *image)
{ /* Walk through and free any extra destination pages I may have */
kimage_free_page_list(&image->dest_pages);
/* Walk through and free any unusable pages I have cached */
kimage_free_page_list(&image->unusable_pages);
}
void kimage_terminate(struct kimage *image)
{ if (*image->entry != 0)
image->entry++;
#ifdef CONFIG_CRASH_DUMP if (image->vmcoreinfo_data_copy) {
crash_update_vmcoreinfo_safecopy(NULL);
vunmap(image->vmcoreinfo_data_copy);
} #endif
kimage_free_extra_pages(image);
for_each_kimage_entry(image, ptr, entry) { if (entry & IND_INDIRECTION) { /* Free the previous indirection page */ if (ind & IND_INDIRECTION)
kimage_free_entry(ind); /* Save this indirection page until we are * done with it.
*/
ind = entry;
} elseif (entry & IND_SOURCE)
kimage_free_entry(entry);
} /* Free the final indirection page */ if (ind & IND_INDIRECTION)
kimage_free_entry(ind);
/* Handle any machine specific cleanup */
machine_kexec_cleanup(image);
/* Free the kexec control pages... */
kimage_free_page_list(&image->control_pages);
/* * Free up any temporary buffers allocated. This might hit if * error occurred much later after buffer allocation.
*/ if (image->file_mode)
kimage_file_post_load_cleanup(image);
staticstruct page *kimage_alloc_page(struct kimage *image,
gfp_t gfp_mask, unsignedlong destination)
{ /* * Here we implement safeguards to ensure that a source page * is not copied to its destination page before the data on * the destination page is no longer useful. * * To do this we maintain the invariant that a source page is * either its own destination page, or it is not a * destination page at all. * * That is slightly stronger than required, but the proof * that no problems will not occur is trivial, and the * implementation is simply to verify. * * When allocating all pages normally this algorithm will run * in O(N) time, but in the worst case it will run in O(N^2) * time. If the runtime is a problem the data structures can * be fixed.
*/ struct page *page; unsignedlong addr;
/* * Walk through the list of destination pages, and see if I * have a match.
*/
list_for_each_entry(page, &image->dest_pages, lru) {
addr = page_to_boot_pfn(page) << PAGE_SHIFT; if (addr == destination) {
list_del(&page->lru); return page;
}
}
page = NULL; while (1) {
kimage_entry_t *old;
/* Allocate a page, if we run out of memory give up */
page = kimage_alloc_pages(gfp_mask, 0); if (!page) return NULL; /* If the page cannot be used file it away */ if (page_to_boot_pfn(page) >
(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
list_add(&page->lru, &image->unusable_pages); continue;
}
addr = page_to_boot_pfn(page) << PAGE_SHIFT;
/* If it is the destination page we want use it */ if (addr == destination) break;
/* If the page is not a destination page use it */ if (!kimage_is_destination_range(image, addr,
addr + PAGE_SIZE - 1)) break;
/* * I know that the page is someones destination page. * See if there is already a source page for this * destination page. And if so swap the source pages.
*/
old = kimage_dst_used(image, addr); if (old) { /* If so move it */ unsignedlong old_addr; struct page *old_page;
/* The old page I have found cannot be a * destination page, so return it if it's * gfp_flags honor the ones passed in.
*/ if (!(gfp_mask & __GFP_HIGHMEM) &&
PageHighMem(old_page)) {
kimage_free_pages(old_page); continue;
}
page = old_page; break;
} /* Place the page on the destination list, to be used later */
list_add(&page->lru, &image->dest_pages);
}
if (uchunk) { /* For file based kexec, source pages are in kernel memory */ if (image->file_mode)
memcpy(ptr, kbuf, uchunk); else
result = copy_from_user(ptr, buf, uchunk);
ubytes -= uchunk; if (image->file_mode)
kbuf += uchunk; else
buf += uchunk;
}
if (uchunk) { /* For file based kexec, source pages are in kernel memory */ if (image->file_mode)
memcpy(ptr, kbuf, uchunk); else
result = copy_from_user(ptr, buf, uchunk);
ubytes -= uchunk; if (image->file_mode)
kbuf += uchunk; else
buf += uchunk;
}
kunmap_local(ptr); if (result) {
result = -EFAULT; goto out;
}
maddr += mchunk;
mbytes -= mchunk;
cond_resched();
}
out: return result;
}
#ifdef CONFIG_CRASH_DUMP staticint kimage_load_crash_segment(struct kimage *image, int idx)
{ /* For crash dumps kernels we simply copy the data from * user space to it's destination. * We do things a page at a time for the sake of kmap.
*/ struct kexec_segment *segment = &image->segment[idx]; unsignedlong maddr;
size_t ubytes, mbytes; int result; unsignedchar __user *buf = NULL; unsignedchar *kbuf = NULL;
/* * Collect the source pages and map them in a contiguous VA range.
*/
npages = PFN_UP(eaddr) - PFN_DOWN(addr);
src_pages = kmalloc_array(npages, sizeof(*src_pages), GFP_KERNEL); if (!src_pages) {
pr_err("Could not allocate ima pages array.\n"); return NULL;
}
/* * Only the superuser can use the kexec syscall and if it has not * been disabled.
*/ if (!capable(CAP_SYS_BOOT) || kexec_load_disabled) returnfalse;
/* Check limit counter and decrease it.*/
limit = (kexec_image_type == KEXEC_TYPE_CRASH) ?
&load_limit_panic : &load_limit_reboot;
mutex_lock(&limit->mutex); if (!limit->limit) {
mutex_unlock(&limit->mutex); returnfalse;
} if (limit->limit != -1)
limit->limit--;
mutex_unlock(&limit->mutex);
returntrue;
}
/* * Move into place and start executing a preloaded standalone * executable. If nothing was preloaded return an error.
*/ int kernel_kexec(void)
{ int error = 0;
if (!kexec_trylock()) return -EBUSY; if (!kexec_image) {
error = -EINVAL; goto Unlock;
}
#ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { /* * This flow is analogous to hibernation flows that occur * before creating an image and before jumping from the * restore kernel to the image one, so it uses the same * device callbacks as those two flows.
*/
pm_prepare_console();
error = freeze_processes(); if (error) {
error = -EBUSY; goto Restore_console;
}
console_suspend_all();
error = dpm_suspend_start(PMSG_FREEZE); if (error) goto Resume_devices; /* * dpm_suspend_end() must be called after dpm_suspend_start() * to complete the transition, like in the hibernation flows * mentioned above.
*/
error = dpm_suspend_end(PMSG_FREEZE); if (error) goto Resume_devices;
error = suspend_disable_secondary_cpus(); if (error) goto Enable_cpus;
local_irq_disable();
error = syscore_suspend(); if (error) goto Enable_irqs;
} else #endif
{
kexec_in_progress = true;
kernel_restart_prepare("kexec reboot");
migrate_to_reboot_cpu();
syscore_shutdown();
/* * migrate_to_reboot_cpu() disables CPU hotplug assuming that * no further code needs to use CPU hotplug (which is true in * the reboot case). However, the kexec path depends on using * CPU hotplug again; so re-enable it here.
*/
cpu_hotplug_enable();
pr_notice("Starting new kernel\n");
machine_shutdown();
}
#ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { /* * This flow is analogous to hibernation flows that occur after * creating an image and after the image kernel has got control * back, and in case the devices have been reset or otherwise * manipulated in the meantime, it uses the device callbacks * used by the latter.
*/
syscore_resume();
Enable_irqs:
local_irq_enable();
Enable_cpus:
suspend_enable_secondary_cpus();
dpm_resume_start(PMSG_RESTORE);
Resume_devices:
dpm_resume_end(PMSG_RESTORE);
console_resume_all();
thaw_processes();
Restore_console:
pm_restore_console();
} #endif
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.