// SPDX-License-Identifier: GPL-2.0 /* * Tests Memory Protection Keys (see Documentation/core-api/protection-keys.rst) * * There are examples in here of: * * how to set protection keys on memory * * how to set/clear bits in pkey registers (the rights register) * * how to handle SEGV_PKUERR signals and extract pkey-relevant * information from the siginfo * * Things to add: * make sure KSM and KSM COW breaking works * prefault pages in at malloc, or not * protect MPX bounds tables with protection keys? * make sure VMA splitting/merging is working correctly * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks * * Compile like this: * gcc -mxsave -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm * gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
*/ #define _GNU_SOURCE #define __SANE_USERSPACE_TYPES__ #include <errno.h> #include <linux/elf.h> #include <linux/futex.h> #include <time.h> #include <sys/time.h> #include <sys/syscall.h> #include <string.h> #include <stdio.h> #include <stdint.h> #include <stdbool.h> #include <signal.h> #include <assert.h> #include <stdlib.h> #include <ucontext.h> #include <sys/mman.h> #include <sys/types.h> #include <sys/wait.h> #include <sys/stat.h> #include <fcntl.h> #include <unistd.h> #include <sys/ptrace.h> #include <setjmp.h>
#include"pkey-helpers.h"
int iteration_nr = 1; int test_nr;
u64 shadow_pkey_reg; int dprint_in_signal;
noinline int read_ptr(int *ptr)
{ /* Keep GCC from optimizing this away somehow */
barrier(); return *ptr;
}
staticvoid cat_into_file(char *str, char *file)
{ int fd = open(file, O_RDWR); int ret;
dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file); /* * these need to be raw because they are called under * pkey_assert()
*/ if (fd < 0) {
fprintf(stderr, "error opening '%s'\n", str);
perror("error: "); exit(__LINE__);
}
ret = write(fd, str, strlen(str)); if (ret != strlen(str)) {
perror("write to file failed");
fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); exit(__LINE__);
}
close(fd);
}
#if CONTROL_TRACING > 0 staticint warned_tracing; staticint tracing_root_ok(void)
{ if (geteuid() != 0) { if (!warned_tracing)
fprintf(stderr, "WARNING: not run as root, " "can not do tracing control\n");
warned_tracing = 1; return 0;
} return 1;
} #endif
/* * This attempts to have roughly a page of instructions followed by a few * instructions that do a write, and another page of instructions. That * way, we are pretty sure that the write is in the second page of * instructions and has at least a page of padding behind it. * * *That* lets us be sure to madvise() away the write instruction, which * will then fault, which makes sure that the fault code handles * execute-only memory properly.
*/ #ifdefined(__powerpc64__) || defined(__aarch64__) /* This way, both 4K and 64K alignment are maintained */
__attribute__((__aligned__(65536))) #else
__attribute__((__aligned__(PAGE_SIZE))) #endif staticvoid lots_o_noops_around_write(int *write_to_me)
{
dprintf3("running %s()\n", __func__);
__page_o_noops(); /* Assume this happens in the second page of instructions: */
*write_to_me = __LINE__; /* pad out by another page: */
__page_o_noops();
dprintf3("%s() done\n", __func__);
}
staticvoid dump_mem(void *dumpme, int len_bytes)
{ char *c = (void *)dumpme; int i;
for (i = 0; i < len_bytes; i += sizeof(u64)) {
u64 *ptr = (u64 *)(c + i);
dprintf1("dump[%03d][@%p]: %016llx\n", i, ptr, *ptr);
}
}
ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
assert(!ret); /* pkey_reg and flags have the same format */
shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, pkey, pkey_rights);
dprintf1("%s(%d) shadow: 0x%016llx\n",
__func__, pkey, shadow_pkey_reg);
#ifdefined(__i386__) || defined(__x86_64__) /* arch */ #ifdef __i386__ /* * 32-bit has some extra padding so that userspace can tell whether * the XSTATE header is present in addition to the "legacy" FPU * state. We just assume that it is here.
*/
fpregs += 0x70; #endif/* i386 */
pkey_reg_offset = pkey_reg_xstate_offset();
pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]);
/* * If we got a PKEY fault, we *HAVE* to have at least one bit set in * here.
*/
dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset()); if (DEBUG_LEVEL > 4)
dump_mem(pkey_reg_ptr - 128, 256);
pkey_assert(*pkey_reg_ptr); #endif/* arch */
/* * need __read_pkey_reg() version so we do not do shadow_pkey_reg * checking
*/
dprintf1("signal pkey_reg from pkey_reg: %016llx\n",
__read_pkey_reg());
dprintf1("pkey from siginfo: %016llx\n", siginfo_pkey); #ifdefined(__i386__) || defined(__x86_64__) /* arch */
dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr);
*(u64 *)pkey_reg_ptr = 0x00000000;
dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction to continue\n"); #elifdefined(__powerpc64__) /* arch */ /* restore access and let the faulting instruction continue */
pkey_access_allow(siginfo_pkey); #elifdefined(__aarch64__)
aarch64_write_signal_pkey(uctxt, PKEY_REG_ALLOW_ALL); #endif/* arch */
pkey_faults++;
dprintf1("<<<<==================================================\n");
dprint_in_signal = 0;
}
/*sigset_t - signals to block while in the handler */ /* get the old signal mask. */
rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
pkey_assert(rs == 0);
/* call sa_sigaction, not sa_handler*/
newact.sa_flags = SA_SIGINFO;
if (!forkret) { /* in the child */ while (1) {
dprintf1("child sleeping...\n");
sleep(30);
}
} return forkret;
}
staticint alloc_pkey(void)
{ int ret; unsignedlong init_val = PKEY_UNRESTRICTED;
dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n",
__func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg);
ret = sys_pkey_alloc(0, init_val); /* * pkey_alloc() sets PKEY register, so we need to reflect it in * shadow_pkey_reg:
*/
dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" " shadow: 0x%016llx\n",
__func__, __LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg); if (ret > 0) { /* clear both the bits: */
shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret,
~PKEY_MASK);
dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" " shadow: 0x%016llx\n",
__func__,
__LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg); /* * move the new state in from init_val * (remember, we cheated and init_val == pkey_reg format)
*/
shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret,
init_val);
}
dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" " shadow: 0x%016llx\n",
__func__, __LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg);
dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno); /* for shadow checking: */
read_pkey_reg();
dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" " shadow: 0x%016llx\n",
__func__, __LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg); return ret;
}
/* * I had a bug where pkey bits could be set by mprotect() but * not cleared. This ensures we get lots of random bit sets * and clears on the vma and pte pkey bits.
*/ staticint alloc_random_pkey(void)
{ int max_nr_pkey_allocs; int ret; int i; int alloced_pkeys[NR_PKEYS]; int nr_alloced = 0; int random_index;
memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
/* allocate every possible key and make a note of which ones we got */
max_nr_pkey_allocs = NR_PKEYS; for (i = 0; i < max_nr_pkey_allocs; i++) { int new_pkey = alloc_pkey(); if (new_pkey < 0) break;
alloced_pkeys[nr_alloced++] = new_pkey;
}
pkey_assert(nr_alloced > 0); /* select a random one out of the allocated ones */
random_index = rand() % nr_alloced;
ret = alloced_pkeys[random_index]; /* now zero it out so we don't free it next */
alloced_pkeys[random_index] = 0;
/* go through the allocated ones that we did not want and free them */ for (i = 0; i < nr_alloced; i++) { int free_ret; if (!alloced_pkeys[i]) continue;
free_ret = sys_pkey_free(alloced_pkeys[i]);
pkey_assert(!free_ret);
}
dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" " shadow: 0x%016llx\n", __func__,
__LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); return ret;
}
int mprotect_pkey(void *ptr, size_t size, unsignedlong orig_prot, unsignedlong pkey)
{ int nr_iterations = random() % 100; int ret;
while (0) { int rpkey = alloc_random_pkey();
ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
ptr, size, orig_prot, pkey, ret); if (nr_iterations-- < 0) break;
struct pkey_malloc_record { void *ptr; long size; int prot;
}; struct pkey_malloc_record *pkey_malloc_records; struct pkey_malloc_record *pkey_last_malloc_record; staticlong nr_pkey_malloc_records; void record_pkey_malloc(void *ptr, long size, int prot)
{ long i; struct pkey_malloc_record *rec = NULL;
for (i = 0; i < nr_pkey_malloc_records; i++) {
rec = &pkey_malloc_records[i]; /* find a free record */ if (rec) break;
} if (!rec) { /* every record is full */
size_t old_nr_records = nr_pkey_malloc_records;
size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1);
size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record);
dprintf2("new_nr_records: %zd\n", new_nr_records);
dprintf2("new_size: %zd\n", new_size);
pkey_malloc_records = realloc(pkey_malloc_records, new_size);
pkey_assert(pkey_malloc_records != NULL);
rec = &pkey_malloc_records[nr_pkey_malloc_records]; /* * realloc() does not initialize memory, so zero it from * the first new record all the way to the end.
*/ for (i = 0; i < new_nr_records - old_nr_records; i++)
memset(rec + i, 0, sizeof(*rec));
}
dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n",
(int)(rec - pkey_malloc_records), rec, ptr, size);
rec->ptr = ptr;
rec->size = size;
rec->prot = prot;
pkey_last_malloc_record = rec;
nr_pkey_malloc_records++;
}
staticvoid free_pkey_malloc(void *ptr)
{ long i; int ret;
dprintf3("%s(%p)\n", __func__, ptr); for (i = 0; i < nr_pkey_malloc_records; i++) { struct pkey_malloc_record *rec = &pkey_malloc_records[i];
dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n",
ptr, i, rec, rec->ptr, rec->size); if ((ptr < rec->ptr) ||
(ptr >= rec->ptr + rec->size)) continue;
staticvoid *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
{ int ret; void *ptr;
dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
size, prot, pkey); /* * Guarantee we can fit at least one huge page in the resulting * allocation by allocating space for 2:
*/
size = ALIGN_UP(size, HPAGE_SIZE * 2);
ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
pkey_assert(ptr != (void *)-1);
record_pkey_malloc(ptr, size, prot);
mprotect_pkey(ptr, size, prot, pkey);
/* * Now go make sure that we got the pages and that they * are PMD-level pages. Someone might have made PUD-level * pages the default.
*/
hpagesz_kb = HPAGE_SIZE / 1024;
hpagesz_mb = hpagesz_kb / 1024;
sprintf(buf, SYSFS_FMT_NR_HUGE_PAGES, hpagesz_kb);
fd = open(buf, O_RDONLY); if (fd < 0) {
fprintf(stderr, "opening sysfs %ldM hugetlb config: %s\n",
hpagesz_mb, strerror(errno)); return;
}
/* * For exec-only memory, we do not know the pkey in * advance, so skip this check.
*/ if (pkey != UNKNOWN_PKEY)
pkey_assert(last_si_pkey == pkey);
#ifdefined(__i386__) || defined(__x86_64__) /* arch */ /* * The signal handler shold have cleared out PKEY register to let the * test program continue. We now have to restore it.
*/ if (__read_pkey_reg() != 0) #elifdefined(__aarch64__) if (__read_pkey_reg() != PKEY_REG_ALLOW_ALL) #else if (__read_pkey_reg() != shadow_pkey_reg) #endif/* arch */
pkey_assert(0);
__write_pkey_reg(shadow_pkey_reg);
dprintf1("%s() set pkey_reg=%016llx to restore state after signal " "nuked it\n", __func__, shadow_pkey_reg);
last_pkey_faults = pkey_faults;
last_si_pkey = -1;
}
#define do_not_expect_pkey_fault(msg) do { \ if (last_pkey_faults != pkey_faults) \
dprintf0("unexpected PKey fault: %s\n", msg); \
pkey_assert(last_pkey_faults == pkey_faults); \
} while (0)
for (i = 0; i < nr_test_fds; i++) { if (test_fds[i] < 0) continue;
close(test_fds[i]);
test_fds[i] = -1;
}
nr_test_fds = 0;
}
staticvoid test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey)
{ int i, err; int max_nr_pkey_allocs; int alloced_pkeys[NR_PKEYS]; int nr_alloced = 0; long size;
pkey_assert(pkey_last_malloc_record);
size = pkey_last_malloc_record->size; /* * This is a bit of a hack. But mprotect() requires * huge-page-aligned sizes when operating on hugetlbfs. * So, make sure that we use something that's a multiple * of a huge page when we can.
*/ if (size >= HPAGE_SIZE)
size = HPAGE_SIZE;
/* allocate every possible key and make sure key-0 never got allocated */
max_nr_pkey_allocs = NR_PKEYS; for (i = 0; i < max_nr_pkey_allocs; i++) { int new_pkey = alloc_pkey();
pkey_assert(new_pkey != 0);
if (new_pkey < 0) break;
alloced_pkeys[nr_alloced++] = new_pkey;
} /* free all the allocated keys */ for (i = 0; i < nr_alloced; i++) { int free_ret;
if (!alloced_pkeys[i]) continue;
free_ret = sys_pkey_free(alloced_pkeys[i]);
pkey_assert(!free_ret);
}
staticvoid test_write_of_access_disabled_region_with_page_already_mapped(int *ptr,
u16 pkey)
{
*ptr = __LINE__;
dprintf1("disabling access; after accessing the page, " " to PKEY[%02d], doing write\n", pkey);
pkey_access_deny(pkey);
*ptr = __LINE__;
expected_pkey_fault(pkey);
}
staticvoid test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
{ int ret; int test_fd = get_test_read_fd();
dprintf1("disabling access to PKEY[%02d], " "having kernel read() to buffer\n", pkey);
pkey_access_deny(pkey);
ret = read(test_fd, ptr, 1);
dprintf1("read ret: %d\n", ret);
pkey_assert(ret);
}
staticvoid test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
{ int ret; int test_fd = get_test_read_fd();
pkey_write_deny(pkey);
ret = read(test_fd, ptr, 100);
dprintf1("read ret: %d\n", ret); if (ret < 0 && (DEBUG_LEVEL > 0))
perror("verbose read result (OK for this to be bad)");
pkey_assert(ret);
}
staticvoid test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
{ int pipe_ret, vmsplice_ret; struct iovec iov; int pipe_fds[2];
/* Assumes that all pkeys other than 'pkey' are unallocated */ staticvoid test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
{ int err; int i;
/* Note: 0 is the default pkey, so don't mess with it */ for (i = 1; i < NR_PKEYS; i++) { if (pkey == i) continue;
/* Assumes that all pkeys other than 'pkey' are unallocated */ staticvoid test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
{ int err; int bad_pkey = NR_PKEYS+99;
if (!forkret) { /* in the child */ return;
} exit(0);
}
/* Assumes that all pkeys other than 'pkey' are unallocated */ staticvoid test_pkey_alloc_exhaust(int *ptr, u16 pkey)
{ int err; int allocated_pkeys[NR_PKEYS] = {0}; int nr_allocated_pkeys = 0; int i;
for (i = 0; i < NR_PKEYS*3; i++) { int new_pkey;
dprintf1("%s() alloc loop: %d\n", __func__, i);
new_pkey = alloc_pkey();
dprintf4("%s()::%d, err: %d pkey_reg: 0x%016llx" " shadow: 0x%016llx\n",
__func__, __LINE__, err, __read_pkey_reg(),
shadow_pkey_reg);
read_pkey_reg(); /* for shadow checking */
dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); if ((new_pkey == -1) && (errno == ENOSPC)) {
dprintf2("%s() failed to allocate pkey after %d tries\n",
__func__, nr_allocated_pkeys);
} else { /* * Ensure the number of successes never * exceeds the number of keys supported * in the hardware.
*/
pkey_assert(nr_allocated_pkeys < NR_PKEYS);
allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
}
/* * Make sure that allocation state is properly * preserved across fork().
*/ if (i == NR_PKEYS*2)
become_child();
}
dprintf3("%s()::%d\n", __func__, __LINE__);
/* * On x86: * There are 16 pkeys supported in hardware. Three are * allocated by the time we get here: * 1. The default key (0) * 2. One possibly consumed by an execute-only mapping. * 3. One allocated by the test code and passed in via * 'pkey' to this function. * Ensure that we can allocate at least another 13 (16-3). * * On powerpc: * There are either 5, 28, 29 or 32 pkeys supported in * hardware depending on the page size (4K or 64K) and * platform (powernv or powervm). Four are allocated by * the time we get here. These include pkey-0, pkey-1, * exec-only pkey and the one allocated by the test code. * Ensure that we can allocate the remaining.
*/
pkey_assert(i >= (NR_PKEYS - get_arch_reserved_keys() - 1));
for (i = 0; i < nr_allocated_pkeys; i++) {
err = sys_pkey_free(allocated_pkeys[i]);
pkey_assert(!err);
read_pkey_reg(); /* for shadow checking */
}
}
/* * All keys should be allocated and set to allow reads and * writes, so the register should be all 0. If not, just * skip the test.
*/ if (read_pkey_reg()) return;
/* * Just allocate an absurd about of memory rather than * doing the XSAVE size enumeration dance.
*/
buf = mmap(NULL, 1*MB, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
/* These __builtins require compiling with -mxsave */
/* XSAVE to build a valid buffer: */
__builtin_ia32_xsave(buf, XSTATE_PKEY); /* Clear XSTATE_BV[PKRU]: */
buf[XSTATE_BV_OFFSET/sizeof(u64)] &= ~XSTATE_PKEY; /* XRSTOR will likely get PKRU back to the init state: */
__builtin_ia32_xrstor(buf, XSTATE_PKEY);
munmap(buf, 1*MB); #endif
}
/* * This is mostly useless on ppc for now. But it will not * hurt anything and should give some better coverage as * a long-running test that continually checks the pkey * register.
*/ staticvoid test_pkey_init_state(int *ptr, u16 pkey)
{ int err; int allocated_pkeys[NR_PKEYS] = {0}; int nr_allocated_pkeys = 0; int i;
for (i = 0; i < NR_PKEYS; i++) { int new_pkey = alloc_pkey();
if (new_pkey < 0) continue;
allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
}
dprintf3("%s()::%d\n", __func__, __LINE__);
arch_force_pkey_reg_init();
/* * Loop for a bit, hoping to get exercise the kernel * context switch code.
*/ for (i = 0; i < 1000000; i++)
read_pkey_reg();
for (i = 0; i < nr_allocated_pkeys; i++) {
err = sys_pkey_free(allocated_pkeys[i]);
pkey_assert(!err);
read_pkey_reg(); /* for shadow checking */
}
}
/* * pkey 0 is special. It is allocated by default, so you do not * have to call pkey_alloc() to use it first. Make sure that it * is usable.
*/ staticvoid test_mprotect_with_pkey_0(int *ptr, u16 pkey)
{ long size; int prot;
assert(pkey_last_malloc_record);
size = pkey_last_malloc_record->size; /* * This is a bit of a hack. But mprotect() requires * huge-page-aligned sizes when operating on hugetlbfs. * So, make sure that we use something that's a multiple * of a huge page when we can.
*/ if (size >= HPAGE_SIZE)
size = HPAGE_SIZE;
prot = pkey_last_malloc_record->prot;
/* Use pkey 0 */
mprotect_pkey(ptr, size, prot, 0);
/* Make sure that we can set it back to the original pkey. */
mprotect_pkey(ptr, size, prot, pkey);
}
staticvoid test_ptrace_of_child(int *ptr, u16 pkey)
{
__attribute__((__unused__)) int peek_result;
pid_t child_pid; void *ignored = 0; long ret; int status; /* * This is the "control" for our little expermient. Make sure * we can always access it when ptracing.
*/ int *plain_ptr_unaligned = malloc(HPAGE_SIZE); int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE);
/* * Fork a child which is an exact copy of this process, of course. * That means we can do all of our tests via ptrace() and then plain * memory access and ensure they work differently.
*/
child_pid = fork_lazy_child();
dprintf1("[%d] child pid: %d\n", getpid(), child_pid);
ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored); if (ret)
perror("attach");
dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__);
pkey_assert(ret != -1);
ret = waitpid(child_pid, &status, WUNTRACED); if ((ret != child_pid) || !(WIFSTOPPED(status))) {
fprintf(stderr, "weird waitpid result %ld stat %x\n",
ret, status);
pkey_assert(0);
}
dprintf2("waitpid ret: %ld\n", ret);
dprintf2("waitpid status: %d\n", status);
pkey_access_deny(pkey);
pkey_write_deny(pkey);
/* Write access, untested for now: ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data); pkey_assert(ret != -1); dprintf1("poke at %p: %ld\n", peek_at, ret);
*/
/* * Try to access the pkey-protected "ptr" via ptrace:
*/
ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored); /* expect it to work, without an error: */
pkey_assert(ret != -1); /* Now access from the current task, and expect an exception: */
peek_result = read_ptr(ptr);
expected_pkey_fault(pkey);
/* * Try to access the NON-pkey-protected "plain_ptr" via ptrace:
*/
ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored); /* expect it to work, without an error: */
pkey_assert(ret != -1); /* Now access from the current task, and expect NO exception: */
peek_result = read_ptr(plain_ptr);
do_not_expect_pkey_fault("read plain pointer after ptrace");
ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
pkey_assert(ret != -1);
ret = kill(child_pid, SIGKILL);
pkey_assert(ret != -1);
p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); /* lots_o_noops_around_write should be page-aligned already */
assert(p1 == &lots_o_noops_around_write);
/* Point 'p1' at the *second* page of the function: */
p1 += PAGE_SIZE;
/* * Try to ensure we fault this in on next touch to ensure * we get an instruction fault as opposed to a data one
*/
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
return p1;
}
staticvoid test_executing_on_unreadable_memory(int *ptr, u16 pkey)
{ void *p1; int scratch; int ptr_contents; int ret;
ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey);
pkey_assert(!ret);
pkey_access_deny(pkey);
dprintf2("pkey_reg: %016llx\n", read_pkey_reg());
/* * Make sure this is an *instruction* fault
*/
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
lots_o_noops_around_write(&scratch);
do_not_expect_pkey_fault("executing on PROT_EXEC memory");
expect_fault_on_read_execonly_key(p1, pkey);
// Reset back to PROT_EXEC | PROT_READ for architectures that support // non-PKEY execute-only permissions.
ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC | PROT_READ, (u64)pkey);
pkey_assert(!ret);
}
staticvoid test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
{ void *p1; int scratch; int ptr_contents; int ret;
/* Use a *normal* mprotect(), not mprotect_pkey(): */
ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
pkey_assert(!ret);
/* * Reset the shadow, assuming that the above mprotect() * correctly changed PKRU, but to an unknown value since * the actual allocated pkey is unknown.
*/
shadow_pkey_reg = __read_pkey_reg();
dprintf2("pkey_reg: %016llx\n", read_pkey_reg());
/* Make sure this is an *instruction* fault */
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
lots_o_noops_around_write(&scratch);
do_not_expect_pkey_fault("executing on PROT_EXEC memory");
expect_fault_on_read_execonly_key(p1, UNKNOWN_PKEY);
/* * Put the memory back to non-PROT_EXEC. Should clear the * exec-only pkey off the VMA and allow it to be readable * again. Go to PROT_NONE first to check for a kernel bug * that did not clear the pkey when doing PROT_NONE.
*/
ret = mprotect(p1, PAGE_SIZE, PROT_NONE);
pkey_assert(!ret);
ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
pkey_assert(!ret);
ptr_contents = read_ptr(p1);
do_not_expect_pkey_fault("plain read on recently PROT_EXEC area");
}
ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
pkey_assert(ret == 0);
/* Test that the modification is visible in ptrace before any execution */
memset(xsave, 0xCC, xsave_size);
ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
pkey_assert(ret == 0);
pkey_assert(*pkey_register == new_pkru);
/* Execute the tracee */
ret = ptrace(PTRACE_CONT, child, 0, 0);
pkey_assert(ret == 0);
/* Test that the tracee saw the PKRU value change */
pkey_assert(child == waitpid(child, &status, 0));
dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
/* Test that the modification is visible in ptrace after execution */
memset(xsave, 0xCC, xsave_size);
ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
pkey_assert(ret == 0);
pkey_assert(*pkey_register == new_pkru);
/* Clear the PKRU bit from XSTATE_BV */
xstate_bv = (u64 *)(xsave + 512);
*xstate_bv &= ~(1 << 9);
ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
pkey_assert(ret == 0);
/* Test that the modification is visible in ptrace before any execution */
memset(xsave, 0xCC, xsave_size);
ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
pkey_assert(ret == 0);
pkey_assert(*pkey_register == 0);
ret = ptrace(PTRACE_CONT, child, 0, 0);
pkey_assert(ret == 0);
/* Test that the tracee saw the PKRU value go to 0 */
pkey_assert(child == waitpid(child, &status, 0));
dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
/* Test that the modification is visible in ptrace after execution */
memset(xsave, 0xCC, xsave_size);
ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
pkey_assert(ret == 0);
pkey_assert(*pkey_register == 0);
ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_ARM_POE, &iov);
pkey_assert(ret == 0);
/* Test that the modification is visible in ptrace before any execution */
memset(&trace_pkey, 0, sizeof(trace_pkey));
ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
pkey_assert(ret == 0);
pkey_assert(trace_pkey == new_pkey);
/* Execute the tracee */
ret = ptrace(PTRACE_CONT, child, 0, 0);
pkey_assert(ret == 0);
/* Test that the tracee saw the PKRU value change */
pkey_assert(child == waitpid(child, &status, 0));
dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
/* Test that the modification is visible in ptrace after execution */
memset(&trace_pkey, 0, sizeof(trace_pkey));
ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
pkey_assert(ret == 0);
pkey_assert(trace_pkey == new_pkey);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.