/* * NOTE: UML does not have exception tables. As such, this is almost a copy * of the code in mm/memory.c, only adjusting the logic to simply check whether * we are coming from the kernel instead of doing an additional lookup in the * exception table. * We can do this simplification because we never get here if the exception was * fixable.
*/ staticinlinebool get_mmap_lock_carefully(struct mm_struct *mm, bool is_user)
{ if (likely(mmap_read_trylock(mm))) returntrue;
if (!is_user) returnfalse;
return !mmap_read_lock_killable(mm);
}
staticinlinebool mmap_upgrade_trylock(struct mm_struct *mm)
{ /* * We don't have this operation yet. * * It should be easy enough to do: it's basically a * atomic_long_try_cmpxchg_acquire() * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but * it also needs the proper lockdep magic etc.
*/ returnfalse;
}
/* * Helper for page fault handling. * * This is kind of equivalend to "mmap_read_lock()" followed * by "find_extend_vma()", except it's a lot more careful about * the locking (and will drop the lock on failure). * * For example, if we have a kernel bug that causes a page * fault, we don't want to just use mmap_read_lock() to get * the mm lock, because that would deadlock if the bug were * to happen while we're holding the mm lock for writing. * * So this checks the exception tables on kernel faults in * order to only do this all for instructions that are actually * expected to fault. * * We can also actually take the mm lock for writing if we * need to extend the vma, which helps the VM layer a lot.
*/ staticstruct vm_area_struct *
um_lock_mm_and_find_vma(struct mm_struct *mm, unsignedlong addr, bool is_user)
{ struct vm_area_struct *vma;
if (!get_mmap_lock_carefully(mm, is_user)) return NULL;
/* * Well, dang. We might still be successful, but only * if we can extend a vma to do so.
*/ if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) {
mmap_read_unlock(mm); return NULL;
}
/* * We can try to upgrade the mmap lock atomically, * in which case we can continue to use the vma * we already looked up. * * Otherwise we'll have to drop the mmap lock and * re-take it, and also look up the vma again, * re-checking it.
*/ if (!mmap_upgrade_trylock(mm)) { if (!upgrade_mmap_lock_carefully(mm, is_user)) return NULL;
vma = find_vma(mm, addr); if (!vma) goto fail; if (vma->vm_start <= addr) goto success; if (!(vma->vm_flags & VM_GROWSDOWN)) goto fail;
}
if (expand_stack_locked(vma, addr)) goto fail;
success:
mmap_write_downgrade(mm); return vma;
fail:
mmap_write_unlock(mm); return NULL;
}
/* * Note this is constrained to return 0, -EFAULT, -EACCES, -ENOMEM by * segv().
*/ int handle_page_fault(unsignedlong address, unsignedlong ip, int is_write, int is_user, int *code_out)
{ struct mm_struct *mm = current->mm; struct vm_area_struct *vma;
pmd_t *pmd;
pte_t *pte; int err = -EFAULT; unsignedint flags = FAULT_FLAG_DEFAULT;
*code_out = SEGV_MAPERR;
/* * If the fault was with pagefaults disabled, don't take the fault, just * fail.
*/ if (faulthandler_disabled()) goto out_nosemaphore;
if (is_user)
flags |= FAULT_FLAG_USER;
retry:
vma = um_lock_mm_and_find_vma(mm, address, is_user); if (!vma) goto out_nosemaphore;
*code_out = SEGV_ACCERR; if (is_write) { if (!(vma->vm_flags & VM_WRITE)) goto out;
flags |= FAULT_FLAG_WRITE;
} else { /* Don't require VM_READ|VM_EXEC for write faults! */ if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto out;
}
pmd = pmd_off(mm, address);
pte = pte_offset_kernel(pmd, address);
} while (!pte_present(*pte));
err = 0; /* * The below warning was added in place of * pte_mkyoung(); if (is_write) pte_mkdirty(); * If it's triggered, we'd see normally a hang here (a clean pte is * marked read-only to emulate the dirty bit). * However, the generic code can mark a PTE writable but clean on a * concurrent read fault, triggering this harmlessly. So comment it out.
*/ #if 0
WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte))); #endif
out_of_memory: /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed).
*/
mmap_read_unlock(mm); if (!is_user) goto out_nosemaphore;
pagefault_out_of_memory(); return 0;
}
void fatal_sigsegv(void)
{
force_fatal_sig(SIGSEGV);
do_signal(¤t->thread.regs); /* * This is to tell gcc that we're not returning - do_signal * can, in general, return, but in this case, it's not, since * we just got a fatal SIGSEGV queued.
*/
os_dump_core();
}
/** * segv_handler() - the SIGSEGV handler * @sig: the signal number * @unused_si: the signal info struct; unused in this handler * @regs: the ptrace register information * @mc: the mcontext of the signal * * The handler first extracts the faultinfo from the UML ptrace regs struct. * If the userfault did not happen in an UML userspace process, bad_segv is called. * Otherwise the signal did happen in a cloned userspace process, handle it.
*/ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs, void *mc)
{ struct faultinfo * fi = UPT_FAULTINFO(regs);
/* * We give a *copy* of the faultinfo in the regs to segv. * This must be done, since nesting SEGVs could overwrite * the info in the regs. A pointer to the info then would * give us bad data!
*/ unsignedlong segv(struct faultinfo fi, unsignedlong ip, int is_user, struct uml_pt_regs *regs, void *mc)
{ int si_code; int err; int is_write = FAULT_WRITE(fi); unsignedlong address = FAULT_ADDRESS(fi);
if (!is_user && regs)
current->thread.segv_regs = container_of(regs, struct pt_regs, regs);
if (!is_user && init_mm.context.sync_tlb_range_to) { /* * Kernel has pending updates from set_ptes that were not * flushed yet. Syncing them should fix the pagefault (if not * we'll get here again and panic).
*/
err = um_tlb_sync(&init_mm); if (err == -ENOMEM)
report_enomem(); if (err)
panic("Failed to sync kernel TLBs: %d", err); goto out;
} elseif (current->pagefault_disabled) { if (!mc) {
show_regs(container_of(regs, struct pt_regs, regs));
panic("Segfault with pagefaults disabled but no mcontext");
} if (!current->thread.segv_continue) {
show_regs(container_of(regs, struct pt_regs, regs));
panic("Segfault without recovery target");
}
mc_set_rip(mc, current->thread.segv_continue);
current->thread.segv_continue = NULL; goto out;
} elseif (current->mm == NULL) {
show_regs(container_of(regs, struct pt_regs, regs));
panic("Segfault with no mm");
} elseif (!is_user && address > PAGE_SIZE && address < TASK_SIZE) {
show_regs(container_of(regs, struct pt_regs, regs));
panic("Kernel tried to access user memory at addr 0x%lx, ip 0x%lx",
address, ip);
}
if (SEGV_IS_FIXABLE(&fi))
err = handle_page_fault(address, ip, is_write, is_user,
&si_code); else {
err = -EFAULT; /* * A thread accessed NULL, we get a fault, but CR2 is invalid. * This code is used in __do_copy_from_user() of TT mode. * XXX tt mode is gone, so maybe this isn't needed any more
*/
address = 0;
}
void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs, void *mc)
{ int code, err; if (!UPT_IS_USER(regs)) { if (sig == SIGBUS)
printk(KERN_ERR "Bus error - the host /dev/shm or /tmp " "mount likely just ran out of space\n");
panic("Kernel mode signal %d", sig);
}
arch_examine_signal(sig, regs);
/* Is the signal layout for the signal known? * Signal data must be scrubbed to prevent information leaks.
*/
code = si->si_code;
err = si->si_errno; if ((err == 0) && (siginfo_layout(sig, code) == SIL_FAULT)) { struct faultinfo *fi = UPT_FAULTINFO(regs);
current->thread.arch.faultinfo = *fi;
force_sig_fault(sig, code, (void __user *)FAULT_ADDRESS(*fi));
} else {
printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d) with errno %d\n",
sig, code, err);
force_sig(sig);
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.