// SPDX-License-Identifier: GPL-2.0-only /* * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace * Copyright (c) 2014-2015 Andrew Lutomirski * * This is a series of tests that exercises the sigreturn(2) syscall and * the IRET / SYSRET paths in the kernel. * * For now, this focuses on the effects of unusual CS and SS values, * and it has a bunch of tests to make sure that ESP/RSP is restored * properly. * * The basic idea behind these tests is to raise(SIGUSR1) to create a * sigcontext frame, plug in the values to be tested, and then return, * which implicitly invokes sigreturn(2) and programs the user context * as desired. * * For tests for which we expect sigreturn and the subsequent return to * user mode to succeed, we return to a short trampoline that generates * SIGTRAP so that the meat of the tests can be ordinary C code in a * SIGTRAP handler. * * The inner workings of each test is documented below. * * Do not run on outdated, unpatched kernels at risk of nasty crashes.
*/
/* * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc * headers.
*/ #ifdef __x86_64__ /* * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on * kernels that save SS in the sigcontext. All kernels that set * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp * regardless of SS (i.e. they implement espfix). * * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS * when delivering a signal that came from 64-bit code. * * Sigreturn restores SS as follows: * * if (saved SS is valid || UC_STRICT_RESTORE_SS is set || * saved CS is not 64-bit) * new SS = saved SS (will fail IRET and signal if invalid) * else * new SS = a flat 32-bit data segment
*/ #define UC_SIGCONTEXT_SS 0x2 #define UC_STRICT_RESTORE_SS 0x4 #endif
/* * In principle, this test can run on Linux emulation layers (e.g. * Illumos "LX branded zones"). Solaris-based kernels reserve LDT * entries 0-5 for their own internal purposes, so start our LDT * allocations above that reservation. (The tests don't pass on LX * branded zones, but at least this lets them run.)
*/ #define LDT_OFFSET 6
/* An aligned stack accessible through some of our segments. */ staticunsignedchar stack16[65536] __attribute__((aligned(4096)));
/* * An aligned int3 instruction used as a trampoline. Some of the tests * want to fish out their ss values, so this trampoline copies ss to eax * before the int3.
*/ asm (".pushsection .text\n\t" ".type int3, @function\n\t" ".align 4096\n\t" "int3:\n\t" "mov %ss,%ecx\n\t" "int3\n\t" ".size int3, . - int3\n\t" ".align 4096, 0xcc\n\t" ".popsection"); externchar int3[4096];
/* * At startup, we prepapre: * * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero * descriptor or out of bounds). * - code16_sel: A 16-bit LDT code segment pointing to int3. * - data16_sel: A 16-bit LDT data segment pointing to stack16. * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3. * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16. * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16. * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to * stack16. * * For no particularly good reason, xyz_sel is a selector value with the * RPL and LDT bits filled in, whereas xyz_idx is just an index into the * descriptor table. These variables will be zero if their respective * segments could not be allocated.
*/ staticunsignedshort ldt_nonexistent_sel; staticunsignedshort code16_sel, data16_sel, npcode32_sel, npdata32_sel;
staticvoid setup_ldt(void)
{ if ((unsignedlong)stack16 > (1ULL << 32) - sizeof(stack16))
errx(1, "stack16 is too high\n"); if ((unsignedlong)int3 > (1ULL << 32) - sizeof(int3))
errx(1, "int3 is too high\n");
if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) { /* * This probably indicates vulnerability to CVE-2014-8133. * Merely getting here isn't definitive, though, and we'll * diagnose the problem for real later on.
*/
printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
gdt_data16_desc.entry_number);
gdt_data16_idx = gdt_data16_desc.entry_number;
} else {
printf("[OK]\tset_thread_area refused 16-bit data\n");
}
if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) {
printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n");
nerrs++;
/* * This happens on Linux 4.1. The rest will fail, too, so * return now to reduce the noise.
*/ return;
}
/* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */ if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) {
printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n",
sig);
nerrs++;
}
if (is_valid_ss(*ssptr(ctx))) { /* * DOSEMU was written before 64-bit sigcontext had SS, and * it tries to figure out the signal source SS by looking at * the physical register. Make sure that keeps working.
*/ unsignedshort hw_ss; asm ("mov %%ss, %0" : "=rm" (hw_ss)); if (hw_ss != *ssptr(ctx)) {
printf("[FAIL]\tHW SS didn't match saved SS\n");
nerrs++;
}
} #endif
}
/* * SIGUSR1 handler. Sets CS and SS as requested and points IP to the * int3 trampoline. Sets SP to a large known value so that we can see * whether the value round-trips back to user mode correctly.
*/ staticvoid sigusr1(int sig, siginfo_t *info, void *ctx_void)
{
ucontext_t *ctx = (ucontext_t*)ctx_void;
#ifdef __i386__ /* * Make sure the kernel doesn't inadvertently use DS or ES-relative * accesses in a region where user DS or ES is loaded. * * Skip this for 64-bit builds because long mode doesn't care about * DS and ES and skipping it increases test coverage a little bit, * since 64-bit kernels can still run the 32-bit build.
*/
ctx->uc_mcontext.gregs[REG_DS] = 0;
ctx->uc_mcontext.gregs[REG_ES] = 0; #endif
memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
requested_regs[REG_CX] = *ssptr(ctx); /* The asm code does this. */
return;
}
/* * Called after a successful sigreturn (via int3) or from a failed * sigreturn (directly by kernel). Restores our state so that the * original raise(SIGUSR1) returns.
*/ staticvoid sigtrap(int sig, siginfo_t *info, void *ctx_void)
{
ucontext_t *ctx = (ucontext_t*)ctx_void;
#ifdef __x86_64__ if (sig_corrupt_final_ss) { if (ctx->uc_flags & UC_STRICT_RESTORE_SS) {
printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n");
nerrs++;
} else { /* * DOSEMU transitions from 32-bit to 64-bit mode by * adjusting sigcontext, and it requires that this work * even if the saved SS is bogus.
*/
printf("\tCorrupting SS on return to 64-bit mode\n");
*ssptr(ctx) = 0;
}
} #endif
if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) {
printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n");
nerrs++; return; /* We can't do the rest. */
}
/* * Check that each register had an acceptable value when the * int3 trampoline was invoked.
*/ for (int i = 0; i < NGREG; i++) {
greg_t req = requested_regs[i], res = resulting_regs[i];
if (i == REG_TRAPNO || i == REG_IP) continue; /* don't care */
if (i == REG_SP) { /* * If we were using a 16-bit stack segment, then * the kernel is a bit stuck: IRET only restores * the low 16 bits of ESP/RSP if SS is 16-bit. * The kernel uses a hack to restore bits 31:16, * but that hack doesn't help with bits 63:32. * On Intel CPUs, bits 63:32 end up zeroed, and, on * AMD CPUs, they leak the high bits of the kernel * espfix64 stack pointer. There's very little that * the kernel can do about it. * * Similarly, if we are returning to a 32-bit context, * the CPU will often lose the high 32 bits of RSP.
*/
printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
trapname, (unsignedlong)sig_err,
errdesc, strsignal(sig_trapped)); return 0;
} else { /* * This also implicitly tests UC_STRICT_RESTORE_SS: * We check that these signals set UC_STRICT_RESTORE_SS and, * if UC_STRICT_RESTORE_SS doesn't cause strict behavior, * then we won't get SIGSEGV.
*/
printf("[FAIL]\tDid not get SIGSEGV\n"); return 1;
}
}
int main()
{ int total_nerrs = 0; unsignedshort my_cs, my_ss;
/* Easy cases: return to a 32-bit SS in each possible CS bitness. */
total_nerrs += test_valid_sigreturn(64, false, -1);
total_nerrs += test_valid_sigreturn(32, false, -1);
total_nerrs += test_valid_sigreturn(16, false, -1);
/* * Test easy espfix cases: return to a 16-bit LDT SS in each possible * CS bitness. NB: with a long mode CS, the SS bitness is irrelevant. * * This catches the original missing-espfix-on-64-bit-kernels issue * as well as CVE-2014-8134.
*/
total_nerrs += test_valid_sigreturn(64, true, -1);
total_nerrs += test_valid_sigreturn(32, true, -1);
total_nerrs += test_valid_sigreturn(16, true, -1);
if (gdt_data16_idx) { /* * For performance reasons, Linux skips espfix if SS points * to the GDT. If we were able to allocate a 16-bit SS in * the GDT, see if it leaks parts of the kernel stack pointer. * * This tests for CVE-2014-8133.
*/
total_nerrs += test_valid_sigreturn(64, true,
GDT3(gdt_data16_idx));
total_nerrs += test_valid_sigreturn(32, true,
GDT3(gdt_data16_idx));
total_nerrs += test_valid_sigreturn(16, true,
GDT3(gdt_data16_idx));
}
#ifdef __x86_64__ /* Nasty ABI case: check SS corruption handling. */
sig_corrupt_final_ss = 1;
total_nerrs += test_valid_sigreturn(32, false, -1);
total_nerrs += test_valid_sigreturn(32, true, -1);
sig_corrupt_final_ss = 0; #endif
/* * We're done testing valid sigreturn cases. Now we test states * for which sigreturn itself will succeed but the subsequent * entry to user mode will fail. * * Depending on the failure mode and the kernel bitness, these * entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
*/
clearhandler(SIGTRAP);
sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
sethandler(SIGBUS, sigtrap, SA_ONSTACK);
sethandler(SIGILL, sigtrap, SA_ONSTACK); /* 32-bit kernels do this */
/* These fail because SS isn't a data segment, resulting in #GP(SS) */
test_bad_iret(64, my_cs, -1);
test_bad_iret(32, my_cs, -1);
test_bad_iret(16, my_cs, -1);
/* Try to return to a not-present code segment, triggering #NP(SS). */
test_bad_iret(32, my_ss, npcode32_sel);
/* * Try to return to a not-present but otherwise valid data segment. * This will cause IRET to fail with #SS on the espfix stack. This * exercises CVE-2014-9322. * * Note that, if espfix is enabled, 64-bit Linux will lose track * of the actual cause of failure and report #GP(0) instead. * This would be very difficult for Linux to avoid, because * espfix64 causes IRET failures to be promoted to #DF, so the * original exception frame is never pushed onto the stack.
*/
test_bad_iret(32, npdata32_sel, -1);
/* * Try to return to a not-present but otherwise valid data * segment without invoking espfix. Newer kernels don't allow * this to happen in the first place. On older kernels, though, * this can trigger CVE-2014-9322.
*/ if (gdt_npdata32_idx)
test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.