// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. * * Test code for seccomp bpf.
*/
#define _GNU_SOURCE #include <sys/types.h>
/* * glibc 2.26 and later have SIGSYS in siginfo_t. Before that, * we need to use the kernel's siginfo.h file and trick glibc * into accepting it.
*/ #if !__GLIBC_PREREQ(2, 26) # include <asm/siginfo.h> # define __have_siginfo_t 1 # define __have_sigval_t 1 # define __have_sigevent_t 1 #endif
#ifndef SECCOMP_IOCTL_NOTIF_ADDFD /* On success, the return value is the remote process's added fd number */ #define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ struct seccomp_notif_addfd)
/* Have TH_LOG report actual location filecmp() is used. */ #define filecmp(pid1, pid2, fd1, fd2) ({ \ int _ret; \
\
_ret = __filecmp(pid1, pid2, fd1, fd2); \ if (_ret != 0) { \ if (_ret < 0 && errno == ENOSYS) { \
TH_LOG("kcmp() syscall missing (test is less accurate)");\
_ret = 0; \
} \
} \
_ret; })
TEST(kcmp)
{ int ret;
ret = __filecmp(getpid(), getpid(), 1, 1);
EXPECT_EQ(ret, 0); if (ret != 0 && errno == ENOSYS)
SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)");
}
TEST(mode_strict_support)
{ long ret;
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support CONFIG_SECCOMP");
}
syscall(__NR_exit, 0);
}
TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
{ long ret;
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support CONFIG_SECCOMP");
}
syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
NULL, NULL, NULL);
EXPECT_FALSE(true) {
TH_LOG("Unreachable!");
}
}
/* Note! This doesn't test no new privs behavior */
TEST(no_new_privs_support)
{ long ret;
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
EXPECT_EQ(0, ret) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
}
/* Tests kernel support by checking for a copy_from_user() fault on NULL. */
TEST(mode_filter_support)
{ long ret;
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
EXPECT_EQ(-1, ret);
EXPECT_EQ(EFAULT, errno) {
TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
}
}
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret);
prog.filter = filter;
prog.len = count;
/* Too many filter instructions in a single filter. */
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
ASSERT_NE(0, ret) {
TH_LOG("Installing %d insn filter was allowed", prog.len);
}
/* One less is okay, though. */
prog.len -= 1;
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
ASSERT_EQ(0, ret) {
TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
}
}
TEST(filter_chain_limits)
{ int i; int count = BPF_MAXINSNS; struct sock_filter allow[] = {
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
}; struct sock_filter *filter; struct sock_fprog prog = { }; long ret;
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret);
prog.filter = filter;
prog.len = 1;
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
ASSERT_EQ(0, ret);
prog.len = count;
/* Too many total filter instructions. */ for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); if (ret != 0) break;
}
ASSERT_NE(0, ret) {
TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
i, count, i * (count + 4));
}
}
/* * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS * flag cannot be downgraded by a new filter.
*/ if (kill_how == KILL_PROCESS)
ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
/* Start a thread that will exit immediately. */
ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
ASSERT_EQ(0, pthread_join(thread, &status));
ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsignedlong)status);
/* Start a thread that will die immediately. */
ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
ASSERT_EQ(0, pthread_join(thread, &status));
ASSERT_NE(SIBLING_EXIT_FAILURE, (unsignedlong)status);
/* * If we get here, only the spawned thread died. Let the parent know * the whole process didn't die (i.e. this thread, the spawner, * stayed running).
*/ exit(42);
}
/* If the entire process was killed, we'll see SIGSYS. */
EXPECT_TRUE(WIFSIGNALED(status)) {
TH_LOG("Unknown SECCOMP_RET is only killing the thread?");
}
ASSERT_EQ(SIGSYS, WTERMSIG(status));
}
/* Make sure basic errno values are correctly passed through a filter. */
TEST(ERRNO_valid)
{
ERRNO_FILTER(valid, E2BIG); long ret;
pid_t parent = getppid();
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret);
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
ASSERT_EQ(0, ret);
/* Make sure an errno of zero is correctly handled by the arch code. */
TEST(ERRNO_zero)
{
ERRNO_FILTER(zero, 0); long ret;
pid_t parent = getppid();
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret);
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
ASSERT_EQ(0, ret);
EXPECT_EQ(parent, syscall(__NR_getppid)); /* "errno" of 0 is ok. */
EXPECT_EQ(0, read(-1, NULL, 0));
}
/* * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller. * This tests that the errno value gets capped correctly, fixed by * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
*/
TEST(ERRNO_capped)
{
ERRNO_FILTER(capped, 4096); long ret;
pid_t parent = getppid();
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret);
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
ASSERT_EQ(0, ret);
/* * Filters are processed in reverse order: last applied is executed first. * Since only the SECCOMP_RET_ACTION mask is tested for return values, the * SECCOMP_RET_DATA mask results will follow the most recently applied * matching filter return (and not the lowest or highest value).
*/
TEST(ERRNO_order)
{
ERRNO_FILTER(first, 11);
ERRNO_FILTER(second, 13);
ERRNO_FILTER(third, 12); long ret;
pid_t parent = getppid();
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret);
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
ASSERT_EQ(0, ret);
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
ASSERT_EQ(0, ret);
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
ASSERT_EQ(0, ret);
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
ASSERT_EQ(0, ret);
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
ASSERT_EQ(0, ret);
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
ASSERT_EQ(0, ret);
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
ASSERT_EQ(0, ret);
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
ASSERT_EQ(0, ret);
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
ASSERT_EQ(0, ret); /* Should work just fine. */
res = syscall(__NR_getppid);
EXPECT_EQ(parent, res); /* getpid() should never return. */
res = syscall(__NR_getpid);
EXPECT_EQ(0, res);
}
TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
{
pid_t parent; long ret;
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
ASSERT_EQ(0, ret);
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
ASSERT_EQ(0, ret); /* Should work just fine. */
EXPECT_EQ(parent, syscall(__NR_getppid)); /* Should also work just fine */
EXPECT_EQ(mypid, syscall(__NR_getpid));
}
TEST_F(precedence, log_is_fifth_in_any_order)
{
pid_t mypid, parent; long ret;
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
ASSERT_EQ(0, ret);
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
ASSERT_EQ(0, ret); /* Should work just fine. */
EXPECT_EQ(parent, syscall(__NR_getppid)); /* Should also work just fine */
EXPECT_EQ(mypid, syscall(__NR_getpid));
}
ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
EXPECT_EQ(0, ret); /* If this fails, don't try to recover. */
ASSERT_EQ(0x1001, msg) {
kill(tracee, SIGKILL);
} /* * Poke in the message. * Registers are not touched to try to keep this relatively arch * agnostic.
*/
ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
EXPECT_EQ(0, ret);
}
#ifdefined(__x86_64__) # define ARCH_REGS struct user_regs_struct # define SYSCALL_NUM(_regs) (_regs).orig_rax # define SYSCALL_RET(_regs) (_regs).rax #elifdefined(__i386__) # define ARCH_REGS struct user_regs_struct # define SYSCALL_NUM(_regs) (_regs).orig_eax # define SYSCALL_RET(_regs) (_regs).eax #elifdefined(__arm__) # define ARCH_REGS struct pt_regs # define SYSCALL_NUM(_regs) (_regs).ARM_r7 # ifndef PTRACE_SET_SYSCALL # define PTRACE_SET_SYSCALL 23 # endif # define SYSCALL_NUM_SET(_regs, _nr) \
EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr)) # define SYSCALL_RET(_regs) (_regs).ARM_r0 #elifdefined(__aarch64__) # define ARCH_REGS struct user_pt_regs # define SYSCALL_NUM(_regs) (_regs).regs[8] # ifndef NT_ARM_SYSTEM_CALL # define NT_ARM_SYSTEM_CALL 0x404 # endif # define SYSCALL_NUM_SET(_regs, _nr) \ do { \ struct iovec __v; \
typeof(_nr) __nr = (_nr); \
__v.iov_base = &__nr; \
__v.iov_len = sizeof(__nr); \
EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \
NT_ARM_SYSTEM_CALL, &__v)); \
} while (0) # define SYSCALL_RET(_regs) (_regs).regs[0] #elifdefined(__loongarch__) # define ARCH_REGS struct user_pt_regs # define SYSCALL_NUM(_regs) (_regs).regs[11] # define SYSCALL_RET(_regs) (_regs).regs[4] #elifdefined(__riscv) && __riscv_xlen == 64 # define ARCH_REGS struct user_regs_struct # define SYSCALL_NUM(_regs) (_regs).a7 # define SYSCALL_RET(_regs) (_regs).a0 #elifdefined(__csky__) # define ARCH_REGS struct pt_regs # ifdefined(__CSKYABIV2__) # define SYSCALL_NUM(_regs) (_regs).regs[3] # else # define SYSCALL_NUM(_regs) (_regs).regs[9] # endif # define SYSCALL_RET(_regs) (_regs).a0 #elifdefined(__hppa__) # define ARCH_REGS struct user_regs_struct # define SYSCALL_NUM(_regs) (_regs).gr[20] # define SYSCALL_RET(_regs) (_regs).gr[28] #elifdefined(__powerpc__) # define ARCH_REGS struct pt_regs # define SYSCALL_NUM(_regs) (_regs).gpr[0] # define SYSCALL_RET(_regs) (_regs).gpr[3] # define SYSCALL_RET_SET(_regs, _val) \ do { \
typeof(_val) _result = (_val); \ if ((_regs.trap & 0xfff0) == 0x3000) { \ /* \ * scv 0 system call uses -ve result \ * for error, so no need to adjust. \
*/
SYSCALL_RET(_regs) = _result; \
} else { \ /* \ * A syscall error is signaled by the \ * CR0 SO bit and the code is stored as \ * a positive value. \
*/ if (_result < 0) { \
SYSCALL_RET(_regs) = -_result; \
(_regs).ccr |= 0x10000000; \
} else { \
SYSCALL_RET(_regs) = _result; \
(_regs).ccr &= ~0x10000000; \
} \
} \
} while (0) # define SYSCALL_RET_SET_ON_PTRACE_EXIT #elifdefined(__s390__) # define ARCH_REGS s390_regs # define SYSCALL_NUM(_regs) (_regs).gprs[2] # define SYSCALL_RET_SET(_regs, _val) \
TH_LOG("Can't modify syscall return on this architecture") #elifdefined(__mips__) # include <asm/unistd_nr_n32.h> # include <asm/unistd_nr_n64.h> # include <asm/unistd_nr_o32.h> # define ARCH_REGS struct pt_regs # define SYSCALL_NUM(_regs) \
({ \
typeof((_regs).regs[2]) _nr; \ if ((_regs).regs[2] == __NR_O32_Linux) \
_nr = (_regs).regs[4]; \ else \
_nr = (_regs).regs[2]; \
_nr; \
}) # define SYSCALL_NUM_SET(_regs, _nr) \ do { \ if ((_regs).regs[2] == __NR_O32_Linux) \
(_regs).regs[4] = _nr; \ else \
(_regs).regs[2] = _nr; \
} while (0) # define SYSCALL_RET_SET(_regs, _val) \
TH_LOG("Can't modify syscall return on this architecture") #elifdefined(__xtensa__) # define ARCH_REGS struct user_pt_regs # define SYSCALL_NUM(_regs) (_regs).syscall /* * On xtensa syscall return value is in the register * a2 of the current window which is not fixed.
*/ #define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2] #elifdefined(__sh__) # define ARCH_REGS struct pt_regs # define SYSCALL_NUM(_regs) (_regs).regs[3] # define SYSCALL_RET(_regs) (_regs).regs[0] #elifdefined(__mc68000__) # define ARCH_REGS struct user_regs_struct # define SYSCALL_NUM(_regs) (_regs).orig_d0 # define SYSCALL_RET(_regs) (_regs).d0 #else # error "Do not know how to find your architecture's registers and syscalls" #endif
/* * Most architectures can change the syscall by just updating the * associated register. This is the default if not defined above.
*/ #ifndef SYSCALL_NUM_SET # define SYSCALL_NUM_SET(_regs, _nr) \ do { \
SYSCALL_NUM(_regs) = (_nr); \
} while (0) #endif /* * Most architectures can change the syscall return value by just * writing to the SYSCALL_RET register. This is the default if not * defined above. If an architecture cannot set the return value * (for example when the syscall and return value register is * shared), report it with TH_LOG() in an arch-specific definition * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined.
*/ #if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET) # error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch" #endif #ifndef SYSCALL_RET_SET # define SYSCALL_RET_SET(_regs, _val) \ do { \
SYSCALL_RET(_regs) = (_val); \
} while (0) #endif
/* When the syscall return can't be changed, stub out the tests for it. */ #ifndef SYSCALL_RET # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) #else # define EXPECT_SYSCALL_RETURN(val, action) \ do { \
errno = 0; \ if (val < 0) { \
EXPECT_EQ(-1, action); \
EXPECT_EQ(-(val), errno); \
} else { \
EXPECT_EQ(val, action); \
} \
} while (0) #endif
/* * Some architectures (e.g. powerpc) can only set syscall * return values on syscall exit during ptrace.
*/ constbool ptrace_entry_set_syscall_nr = true; constbool ptrace_entry_set_syscall_ret = #ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT true; #else false; #endif
/* Flush any register changes made. */ if (memcmp(&orig, ®s, sizeof(orig)) != 0)
EXPECT_EQ(0, ARCH_SETREGS(regs));
}
/* Change only syscall number. */ void change_syscall_nr(struct __test_metadata *_metadata,
pid_t tracee, long syscall)
{
__change_syscall(_metadata, tracee, &syscall, NULL);
}
/* Change syscall return value (and set syscall number to -1). */ void change_syscall_ret(struct __test_metadata *_metadata,
pid_t tracee, long ret)
{ long syscall = -1;
/* * The traditional way to tell PTRACE_SYSCALL entry/exit * is by counting.
*/
entry = !entry;
/* Make sure we got an appropriate message. */
ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
EXPECT_EQ(0, ret);
EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
: PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
/* * Some architectures only support setting return values during * syscall exit under ptrace, and on exit the syscall number may * no longer be available. Therefore, save the initial sycall * number here, so it can be examined during both entry and exit * phases.
*/ if (entry)
self->syscall_nr = get_syscall(_metadata, tracee);
/* * Depending on the architecture's syscall setting abilities, we * pick which things to set during this phase (entry or exit).
*/ if (entry == ptrace_entry_set_syscall_nr)
syscall_nr = &syscall_nr_val; if (entry == ptrace_entry_set_syscall_ret)
syscall_ret = &syscall_ret_val;
/* Now handle the actual rewriting cases. */ switch (self->syscall_nr) { case __NR_getpid:
syscall_nr_val = __NR_getppid; /* Never change syscall return for this case. */
syscall_ret = NULL; break; case __NR_gettid:
syscall_nr_val = -1;
syscall_ret_val = 45000; break; case __NR_openat:
syscall_nr_val = -1;
syscall_ret_val = -ESRCH; break; default: /* Unhandled, do nothing. */ return;
}
FIXTURE_VARIANT(TRACE_syscall) { /* * All of the SECCOMP_RET_TRACE behaviors can be tested with either * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL. * This indicates if we should use SECCOMP_RET_TRACE (false), or * ptrace (true).
*/ bool use_ptrace;
};
/* Prepare some testable syscall results. */
self->mytid = syscall(__NR_gettid);
ASSERT_GT(self->mytid, 0);
ASSERT_NE(self->mytid, 1) {
TH_LOG("Running this test as init is not supported. :)");
}
TEST(negative_ENOSYS)
{ #ifdefined(__arm__)
SKIP(return, "arm32 does not support calling syscall -1"); #endif /* * There should be no difference between an "internal" skip * and userspace asking for syscall "-1".
*/
errno = 0;
EXPECT_EQ(-1, syscall(-1));
EXPECT_EQ(errno, ENOSYS); /* And no difference for "still not valid but not -1". */
errno = 0;
EXPECT_EQ(-1, syscall(-101));
EXPECT_EQ(errno, ENOSYS);
}
TEST_F(TRACE_syscall, syscall_allowed)
{ /* getppid works as expected (no changes). */
EXPECT_EQ(self->parent, syscall(__NR_getppid));
EXPECT_NE(self->mypid, syscall(__NR_getppid));
}
TEST_F(TRACE_syscall, syscall_redirected)
{ /* getpid has been redirected to getppid as expected. */
EXPECT_EQ(self->parent, syscall(__NR_getpid));
EXPECT_NE(self->mypid, syscall(__NR_getpid));
}
TEST_F(TRACE_syscall, syscall_errno)
{ /* Tracer should skip the open syscall, resulting in ESRCH. */
EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
}
TEST_F(TRACE_syscall, syscall_faked)
{ /* Tracer skips the gettid syscall and store altered return value. */
EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
}
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
/* Reject insane operation. */
ret = seccomp(-1, 0, &prog);
ASSERT_NE(ENOSYS, errno) {
TH_LOG("Kernel does not support seccomp syscall!");
}
EXPECT_EQ(EINVAL, errno) {
TH_LOG("Did not reject crazy op value!");
}
/* Reject strict with flags or pointer. */
ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
EXPECT_EQ(EINVAL, errno) {
TH_LOG("Did not reject mode strict with flags!");
}
ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
EXPECT_EQ(EINVAL, errno) {
TH_LOG("Did not reject mode strict with uargs!");
}
/* Reject insane args for filter. */
ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
EXPECT_EQ(EINVAL, errno) {
TH_LOG("Did not reject crazy filter flags!");
}
ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
EXPECT_EQ(EFAULT, errno) {
TH_LOG("Did not reject NULL filter!");
}
ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
EXPECT_EQ(0, errno) {
TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
strerror(errno));
}
}
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
ASSERT_NE(ENOSYS, errno) {
TH_LOG("Kernel does not support seccomp syscall!");
}
EXPECT_EQ(0, ret) {
TH_LOG("Could not install filter!");
}
/* Make sure neither entry point will switch to strict. */
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
EXPECT_EQ(EINVAL, errno) {
TH_LOG("Switched to mode strict!");
}
ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
EXPECT_EQ(EINVAL, errno) {
TH_LOG("Switched to mode strict!");
}
}
/* * Test detection of known and unknown filter flags. Userspace needs to be able * to check if a filter flag is supported by the current kernel and a good way * of doing that is by attempting to enter filter mode, with the flag bit in * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates * that the flag is valid and EINVAL indicates that the flag is invalid.
*/
TEST(detect_seccomp_filter_flags)
{ unsignedint flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
SECCOMP_FILTER_FLAG_LOG,
SECCOMP_FILTER_FLAG_SPEC_ALLOW,
SECCOMP_FILTER_FLAG_NEW_LISTENER,
SECCOMP_FILTER_FLAG_TSYNC_ESRCH }; unsignedint exclusive[] = {
SECCOMP_FILTER_FLAG_TSYNC,
SECCOMP_FILTER_FLAG_NEW_LISTENER }; unsignedint flag, all_flags, exclusive_mask; int i; long ret;
/* Test detection of individual known-good filter flags */ for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { int bits = 0;
flag = flags[i]; /* Make sure the flag is a single bit! */ while (flag) { if (flag & 0x1)
bits ++;
flag >>= 1;
}
ASSERT_EQ(1, bits);
flag = flags[i];
ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
ASSERT_NE(ENOSYS, errno) {
TH_LOG("Kernel does not support seccomp syscall!");
}
EXPECT_EQ(-1, ret);
EXPECT_EQ(EFAULT, errno) {
TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
flag);
}
all_flags |= flag;
}
/* * Test detection of all known-good filter flags combined. But * for the exclusive flags we need to mask them out and try them * individually for the "all flags" testing.
*/
exclusive_mask = 0; for (i = 0; i < ARRAY_SIZE(exclusive); i++)
exclusive_mask |= exclusive[i]; for (i = 0; i < ARRAY_SIZE(exclusive); i++) {
flag = all_flags & ~exclusive_mask;
flag |= exclusive[i];
ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
EXPECT_EQ(-1, ret);
EXPECT_EQ(EFAULT, errno) {
TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
flag);
}
}
/* Test detection of an unknown filter flags, without exclusives. */
flag = -1;
flag &= ~exclusive_mask;
ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
EXPECT_EQ(-1, ret);
EXPECT_EQ(EINVAL, errno) {
TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
flag);
}
/* * Test detection of an unknown filter flag that may simply need to be * added to this test
*/
flag = flags[ARRAY_SIZE(flags) - 1] << 1;
ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
EXPECT_EQ(-1, ret);
EXPECT_EQ(EINVAL, errno) {
TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
flag);
}
}
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&prog);
ASSERT_NE(ENOSYS, errno) {
TH_LOG("Kernel does not support seccomp syscall!");
}
EXPECT_EQ(0, ret) {
TH_LOG("Could not install initial filter with TSYNC!");
}
}
/* * To avoid joining joined threads (which is not allowed by Bionic), * make sure we both successfully join and clear the tid to skip a * later join attempt during fixture teardown. Any remaining threads * will be directly killed during teardown.
*/ #define PTHREAD_JOIN(tid, status) \ do { \ int _rc = pthread_join(tid, status); \ if (_rc) { \
TH_LOG("pthread_join of tid %u failed: %d\n", \
(unsignedint)tid, _rc); \
} else { \
tid = 0; \
} \
} while (0)
if (!s->tid) continue; /* * If a thread is still running, it may be stuck, so hit * it over the head really hard.
*/
pthread_kill(s->tid, 9);
}
pthread_mutex_destroy(&self->mutex);
pthread_cond_destroy(&self->cond);
sem_destroy(&self->started);
}
void *tsync_sibling(void *data)
{ long ret = 0; struct tsync_sibling *me = data;
me->system_tid = syscall(__NR_gettid);
pthread_mutex_lock(me->mutex); if (me->diverge) { /* Just re-apply the root prog to fork the tree */
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
me->prog, 0, 0);
}
sem_post(me->started); /* Return outside of started so parent notices failures. */ if (ret) {
pthread_mutex_unlock(me->mutex); return (void *)SIBLING_EXIT_FAILURE;
} do {
pthread_cond_wait(me->cond, me->mutex);
me->num_waits = me->num_waits - 1;
} while (me->num_waits);
pthread_mutex_unlock(me->mutex);
ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); if (!ret) return (void *)SIBLING_EXIT_NEWPRIVS;
read(-1, NULL, 0); return (void *)SIBLING_EXIT_UNKILLED;
}
TEST_F(TSYNC, two_siblings_with_ancestor)
{ long ret; void *status;
ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
ASSERT_NE(ENOSYS, errno) {
TH_LOG("Kernel does not support seccomp syscall!");
}
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
}
tsync_start_sibling(&self->sibling[0]);
tsync_start_sibling(&self->sibling[1]);
while (self->sibling_count < TSYNC_SIBLINGS) {
sem_wait(&self->started);
self->sibling_count++;
}
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&self->apply_prog);
ASSERT_EQ(0, ret) {
TH_LOG("Could install filter on all threads!");
} /* Tell the siblings to test the policy */
pthread_mutex_lock(&self->mutex);
ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
TH_LOG("cond broadcast non-zero");
}
pthread_mutex_unlock(&self->mutex); /* Ensure they are both killed and don't exit cleanly. */
PTHREAD_JOIN(self->sibling[0].tid, &status);
EXPECT_EQ(0x0, (long)status);
PTHREAD_JOIN(self->sibling[1].tid, &status);
EXPECT_EQ(0x0, (long)status);
}
/* start siblings before any prctl() operations */
tsync_start_sibling(&self->sibling[0]);
tsync_start_sibling(&self->sibling[1]); while (self->sibling_count < TSYNC_SIBLINGS) {
sem_wait(&self->started);
self->sibling_count++;
}
/* Tell the siblings to test no policy */
pthread_mutex_lock(&self->mutex);
ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
TH_LOG("cond broadcast non-zero");
}
pthread_mutex_unlock(&self->mutex);
/* Ensure they are both upset about lacking nnp. */
PTHREAD_JOIN(self->sibling[0].tid, &status);
EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
PTHREAD_JOIN(self->sibling[1].tid, &status);
EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
}
TEST_F(TSYNC, two_siblings_with_no_filter)
{ long ret; void *status;
/* start siblings before any prctl() operations */
tsync_start_sibling(&self->sibling[0]);
tsync_start_sibling(&self->sibling[1]); while (self->sibling_count < TSYNC_SIBLINGS) {
sem_wait(&self->started);
self->sibling_count++;
}
ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&self->apply_prog);
ASSERT_NE(ENOSYS, errno) {
TH_LOG("Kernel does not support seccomp syscall!");
}
ASSERT_EQ(0, ret) {
TH_LOG("Could install filter on all threads!");
}
/* Tell the siblings to test the policy */
pthread_mutex_lock(&self->mutex);
ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
TH_LOG("cond broadcast non-zero");
}
pthread_mutex_unlock(&self->mutex);
/* Ensure they are both killed and don't exit cleanly. */
PTHREAD_JOIN(self->sibling[0].tid, &status);
EXPECT_EQ(0x0, (long)status);
PTHREAD_JOIN(self->sibling[1].tid, &status);
EXPECT_EQ(0x0, (long)status);
}
TEST_F(TSYNC, two_siblings_with_one_divergence)
{ long ret; void *status;
ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
ASSERT_NE(ENOSYS, errno) {
TH_LOG("Kernel does not support seccomp syscall!");
}
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
}
self->sibling[0].diverge = 1;
tsync_start_sibling(&self->sibling[0]);
tsync_start_sibling(&self->sibling[1]);
while (self->sibling_count < TSYNC_SIBLINGS) {
sem_wait(&self->started);
self->sibling_count++;
}
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&self->apply_prog);
ASSERT_EQ(self->sibling[0].system_tid, ret) {
TH_LOG("Did not fail on diverged sibling.");
}
/* Wake the threads */
pthread_mutex_lock(&self->mutex);
ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
TH_LOG("cond broadcast non-zero");
}
pthread_mutex_unlock(&self->mutex);
/* Ensure they are both unkilled. */
PTHREAD_JOIN(self->sibling[0].tid, &status);
EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
PTHREAD_JOIN(self->sibling[1].tid, &status);
EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
}
TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err)
{ long ret, flags; void *status;
ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
ASSERT_NE(ENOSYS, errno) {
TH_LOG("Kernel does not support seccomp syscall!");
}
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
}
self->sibling[0].diverge = 1;
tsync_start_sibling(&self->sibling[0]);
tsync_start_sibling(&self->sibling[1]);
while (self->sibling_count < TSYNC_SIBLINGS) {
sem_wait(&self->started);
self->sibling_count++;
}
flags = SECCOMP_FILTER_FLAG_TSYNC | \
SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog);
ASSERT_EQ(ESRCH, errno) {
TH_LOG("Did not return ESRCH for diverged sibling.");
}
ASSERT_EQ(-1, ret) {
TH_LOG("Did not fail on diverged sibling.");
}
/* Wake the threads */
pthread_mutex_lock(&self->mutex);
ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
TH_LOG("cond broadcast non-zero");
}
pthread_mutex_unlock(&self->mutex);
/* Ensure they are both unkilled. */
PTHREAD_JOIN(self->sibling[0].tid, &status);
EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
PTHREAD_JOIN(self->sibling[1].tid, &status);
EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
}
ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
/* * Sibling 0 will have its own seccomp policy * and Sibling 1 will not be under seccomp at * all. Sibling 1 will enter seccomp and 0 * will cause failure.
*/
self->sibling[0].diverge = 1;
tsync_start_sibling(&self->sibling[0]);
tsync_start_sibling(&self->sibling[1]);
while (self->sibling_count < TSYNC_SIBLINGS) {
sem_wait(&self->started);
self->sibling_count++;
}
ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
ASSERT_NE(ENOSYS, errno) {
TH_LOG("Kernel does not support seccomp syscall!");
}
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
}
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&self->apply_prog);
ASSERT_EQ(ret, self->sibling[0].system_tid) {
TH_LOG("Did not fail on diverged sibling.");
}
sib = 1; if (ret == self->sibling[0].system_tid)
sib = 0;
pthread_mutex_lock(&self->mutex);
/* Increment the other siblings num_waits so we can clean up * the one we just saw.
*/
self->sibling[!sib].num_waits += 1;
/* Signal the thread to clean up*/
ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
TH_LOG("cond broadcast non-zero");
}
pthread_mutex_unlock(&self->mutex);
PTHREAD_JOIN(self->sibling[sib].tid, &status);
EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); /* Poll for actual task death. pthread_join doesn't guarantee it. */ while (!kill(self->sibling[sib].system_tid, 0))
nanosleep(&delay, NULL); /* Switch to the remaining sibling */
sib = !sib;
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&self->apply_prog);
ASSERT_EQ(0, ret) {
TH_LOG("Expected the remaining sibling to sync");
};
pthread_mutex_lock(&self->mutex);
/* If remaining sibling didn't have a chance to wake up during * the first broadcast, manually reduce the num_waits now.
*/ if (self->sibling[sib].num_waits > 1)
self->sibling[sib].num_waits = 1;
ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
TH_LOG("cond broadcast non-zero");
}
pthread_mutex_unlock(&self->mutex);
PTHREAD_JOIN(self->sibling[sib].tid, &status);
EXPECT_EQ(0, (long)status); /* Poll for actual task death. pthread_join doesn't guarantee it. */ while (!kill(self->sibling[sib].system_tid, 0))
nanosleep(&delay, NULL);
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&self->apply_prog);
ASSERT_EQ(0, ret); /* just us chickens */
}
/* Make sure restarted syscalls are seen directly as "restart_syscall". */
TEST(syscall_restart)
{ long ret; unsignedlong msg;
pid_t child_pid; int pipefd[2]; int status;
siginfo_t info = { }; struct sock_filter filter[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
child_pid = fork();
ASSERT_LE(0, child_pid); if (child_pid == 0) { /* Child uses EXPECT not ASSERT to deliver status correctly. */ char buf = ' '; struct timespec timeout = { };
/* Attach parent as tracer and stop. */
EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
EXPECT_EQ(0, raise(SIGSTOP));
EXPECT_EQ(0, close(pipefd[1]));
EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
EXPECT_EQ(0, ret) {
TH_LOG("Failed to install filter!");
}
EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
TH_LOG("Failed to read() sync from parent");
}
EXPECT_EQ('.', buf) {
TH_LOG("Failed to get sync data from read()");
}
/* Start nanosleep to be interrupted. */
timeout.tv_sec = 1;
errno = 0;
EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
TH_LOG("Call to nanosleep() failed (errno %d: %s)",
errno, strerror(errno));
}
/* Read final sync from parent. */
EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
TH_LOG("Failed final read() from parent");
}
EXPECT_EQ('!', buf) {
TH_LOG("Failed to get final data from read()");
}
/* Directly report the status of our test harness results. */
syscall(__NR_exit, _metadata->exit_code);
}
EXPECT_EQ(0, close(pipefd[0]));
/* Wait for nanosleep() to start. */
ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
ASSERT_EQ(true, WIFSTOPPED(status));
ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
ASSERT_EQ(0x100, msg);
ret = get_syscall(_metadata, child_pid);
EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep);
/* Might as well check siginfo for sanity while we're here. */
ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
ASSERT_EQ(SIGTRAP, info.si_signo);
ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
EXPECT_EQ(0, info.si_errno);
EXPECT_EQ(getuid(), info.si_uid); /* Verify signal delivery came from child (seccomp-triggered). */
EXPECT_EQ(child_pid, info.si_pid);
/* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
ASSERT_EQ(0, kill(child_pid, SIGSTOP));
ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
ASSERT_EQ(true, WIFSTOPPED(status));
ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); /* * There is no siginfo on SIGSTOP any more, so we can't verify * signal delivery came from parent now (getpid() == info.si_pid). * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com * At least verify the SIGSTOP via PTRACE_GETSIGINFO.
*/
EXPECT_EQ(SIGSTOP, info.si_signo);
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret);
/* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
&allow_prog);
ASSERT_NE(ENOSYS, errno) {
TH_LOG("Kernel does not support seccomp syscall!");
}
EXPECT_NE(0, ret) {
TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
}
EXPECT_EQ(EINVAL, errno) {
TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
}
/* Verify that a simple, permissive filter can be added with no flags */
ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
EXPECT_EQ(0, ret);
/* See if the same filter can be added with the FILTER_FLAG_LOG flag */
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
&allow_prog);
ASSERT_NE(EINVAL, errno) {
TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
}
EXPECT_EQ(0, ret);
/* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
&kill_prog);
EXPECT_EQ(0, ret);
EXPECT_EQ(parent, syscall(__NR_getppid)); /* getpid() should never return. */
EXPECT_EQ(0, syscall(__NR_getpid));
}
ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
ASSERT_NE(ENOSYS, errno) {
TH_LOG("Kernel does not support seccomp syscall!");
}
ASSERT_NE(EINVAL, errno) {
TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
}
EXPECT_EQ(ret, 0);
for (i = 0; i < ARRAY_SIZE(actions); i++) {
ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
EXPECT_EQ(ret, 0) {
TH_LOG("Expected action (0x%X) not available!",
actions[i]);
}
}
/* Check that an unknown action is handled properly (EOPNOTSUPP) */
ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
EXPECT_EQ(ret, -1);
EXPECT_EQ(errno, EOPNOTSUPP);
}
TEST(get_metadata)
{
pid_t pid; int pipefd[2]; char buf; struct seccomp_metadata md; long ret;
/* Only real root can get metadata. */ if (geteuid()) {
SKIP(return, "get_metadata requires real root"); return;
}
/* one with log, one without */
EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
SECCOMP_FILTER_FLAG_LOG, &prog));
EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
pid = fork();
ASSERT_GE(pid, 0);
/* Check that we get -ENOSYS with no listener attached */ if (pid == 0) { if (user_notif_syscall(__NR_getppid, 0) < 0) exit(1);
ret = syscall(__NR_getppid); exit(ret >= 0 || errno != ENOSYS);
}
/* Check that the basic notification machinery works */
listener = user_notif_syscall(__NR_getppid,
SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
/* Installing a second listener in the chain should EBUSY */
EXPECT_EQ(user_notif_syscall(__NR_getppid,
SECCOMP_FILTER_FLAG_NEW_LISTENER),
-1);
EXPECT_EQ(errno, EBUSY);
pid = fork();
ASSERT_GE(pid, 0);
if (pid == 0) {
ret = syscall(__NR_getppid); exit(ret != USER_NOTIF_MAGIC);
}
/* Test that we can't pass garbage to the kernel. */
memset(&req, 0, sizeof(req));
req.pid = -1;
errno = 0;
ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
EXPECT_EQ(-1, ret);
EXPECT_EQ(EINVAL, errno);
if (pid == 0) {
close(sk_pair[0]);
handled = sk_pair[1]; if (signal(SIGUSR1, signal_handler) == SIG_ERR) {
perror("signal"); exit(1);
} /* * ERESTARTSYS behavior is a bit hard to test, because we need * to rely on a signal that has not yet been handled. Let's at * least check that the error code gets propagated through, and * hope that it doesn't break when there is actually a signal :)
*/
ret = syscall(__NR_gettid); exit(!(ret == -1 && errno == 512));
}
/* * Make sure the signal really is delivered, which means we're not * stuck in the user notification code any more and the notification * should be dead.
*/
EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
/* * Check that we get an ENOSYS when the listener is closed.
*/
pid = fork();
ASSERT_GE(pid, 0); if (pid == 0) {
close(listener);
ret = syscall(__NR_getppid); exit(ret != -1 && errno != ENOSYS);
}
/* * Check that a pid in a child namespace still shows up as valid in ours.
*/
TEST(user_notification_child_pid_ns)
{
pid_t pid; int status, listener; struct seccomp_notif req = {}; struct seccomp_notif_resp resp = {};
/* * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e. * invalid.
*/
TEST(user_notification_sibling_pid_ns)
{
pid_t pid, pid2; int status, listener; struct seccomp_notif req = {}; struct seccomp_notif_resp resp = {};
ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
/* Create the sibling ns, and sibling in it. */
ASSERT_EQ(unshare(CLONE_NEWPID), 0) { if (errno == EPERM)
SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); elseif (errno == EINVAL)
SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)");
}
ASSERT_EQ(errno, 0);
pid2 = fork();
ASSERT_GE(pid2, 0);
if (pid2 == 0) {
ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); /* * The pid should be 0, i.e. the task is in some namespace that * we can't "see".
*/
EXPECT_EQ(req.pid, 0);
if (pid == 0) exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
/* Do a bad recv() */
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1);
EXPECT_EQ(errno, EFAULT);
/* We should still be able to receive this notification, though. */
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
EXPECT_EQ(req.pid, pid);
/* * The seccomp filter has become unused so we should be notified once * the kernel gets around to cleaning up task struct.
*/
pollfd.fd = 200;
pollfd.events = POLLHUP;
listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER); if (listener < 0)
_exit(EXIT_FAILURE);
if (dup2(listener, 200) != 200)
_exit(EXIT_FAILURE);
close(p[1]);
close(listener);
sleep(1);
_exit(EXIT_SUCCESS);
} if (read(p[0], &status, 1) != 0)
_exit(EXIT_SUCCESS);
close(p[0]); /* * The seccomp filter has become unused so we should be notified once * the kernel gets around to cleaning up task struct.
*/
EXPECT_EQ(ioctl(200, SECCOMP_IOCTL_NOTIF_RECV, &req), -1);
EXPECT_EQ(errno, ENOENT);
/* * The seccomp filter has become unused so we should be notified once * the kernel gets around to cleaning up task struct.
*/
pollfd.fd = 200;
pollfd.events = POLLHUP;
/* Verify bad newfd_flags cannot be set */
addfd.newfd_flags = ~O_CLOEXEC;
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
EXPECT_EQ(errno, EINVAL);
addfd.newfd_flags = O_CLOEXEC;
/* Verify bad flags cannot be set */
addfd.flags = 0xff;
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
EXPECT_EQ(errno, EINVAL);
addfd.flags = 0;
/* Verify that remote_fd cannot be set without setting flags */
addfd.newfd = 1;
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
EXPECT_EQ(errno, EINVAL);
addfd.newfd = 0;
/* Verify small size cannot be set */
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1);
EXPECT_EQ(errno, EINVAL);
/* Verify we can't send bits filled in unknown buffer area */
memset(&big, 0xAA, sizeof(big));
big.addfd = addfd;
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1);
EXPECT_EQ(errno, E2BIG);
/* Verify we can set an arbitrary remote fd */
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
EXPECT_EQ(fd, nextfd);
nextfd = get_next_fd(nextfd);
EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
/* Verify we can set an arbitrary remote fd with large size */
memset(&big, 0x0, sizeof(big));
big.addfd = addfd;
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
EXPECT_EQ(fd, nextfd);
nextfd = get_next_fd(nextfd);
/* Verify we can set a specific remote fd */
addfd.newfd = 42;
addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
EXPECT_EQ(fd, 42);
EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
/* * This sets the ID of the ADD FD to the last request plus 1. The * notification ID increments 1 per notification.
*/
addfd.id = req.id + 1;
/* This spins until the underlying notification is generated */ while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
errno != -EINPROGRESS)
nanosleep(&delay, NULL);
/* Verify we can do an atomic addfd and send */
addfd.newfd = 0;
addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); /* * Child has earlier "low" fds and now 42, so we expect the next * lowest available fd to be assigned here.
*/
EXPECT_EQ(fd, nextfd);
nextfd = get_next_fd(nextfd);
ASSERT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
/* * This sets the ID of the ADD FD to the last request plus 1. The * notification ID increments 1 per notification.
*/
addfd.id = req.id + 1;
/* This spins until the underlying notification is generated */ while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
errno != -EINPROGRESS)
nanosleep(&delay, NULL);
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
/* Check that the basic notification machinery works */
listener = user_notif_syscall(__NR_getppid,
SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
pid = fork();
ASSERT_GE(pid, 0);
if (pid == 0) exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
if (self->pid == 0) { while (1)
pause();
_exit(127);
}
}
FIXTURE_TEARDOWN(O_SUSPEND_SECCOMP)
{ if (self->pid)
kill(self->pid, SIGKILL);
}
TEST_F(O_SUSPEND_SECCOMP, setoptions)
{ int wstatus;
ASSERT_EQ(0, ptrace(PTRACE_ATTACH, self->pid, NULL, 0));
ASSERT_EQ(self->pid, wait(&wstatus));
ASSERT_EQ(-1, ptrace(PTRACE_SETOPTIONS, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP)); if (errno == EINVAL)
SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)");
ASSERT_EQ(EPERM, errno);
}
TEST_F(O_SUSPEND_SECCOMP, seize)
{ int ret;
ret = ptrace(PTRACE_SEIZE, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP);
ASSERT_EQ(-1, ret); if (errno == EINVAL)
SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)");
ASSERT_EQ(EPERM, errno);
}
/* * get_nth - Get the nth, space separated entry in a file. * * Returns the length of the read field. * Throws error if field is zero-lengthed.
*/ static ssize_t get_nth(struct __test_metadata *_metadata, constchar *path, constunsignedint position, char **entry)
{ char *line = NULL; unsignedint i;
ssize_t nread;
size_t len = 0;
FILE *f;
f = fopen(path, "r");
ASSERT_NE(f, NULL) {
TH_LOG("Could not open %s: %s", path, strerror(errno));
}
for (i = 0; i < position; i++) {
nread = getdelim(&line, &len, ' ', f);
ASSERT_GE(nread, 0) {
TH_LOG("Failed to read %d entry in file %s", i, path);
}
}
fclose(f);
ASSERT_GT(nread, 0) {
TH_LOG("Entry in file %s had zero length", path);
}
*entry = line; return nread - 1;
}
/* For a given PID, get the task state (D, R, etc...) */ staticchar get_proc_stat(struct __test_metadata *_metadata, pid_t pid)
{ char proc_path[100] = {0}; char status; char *line;
/* Start children, and generate notifications */ for (i = 0; i < ARRAY_SIZE(pids); i++) {
pid = fork(); if (pid == 0) {
ret = syscall(__NR_getppid); exit(ret != USER_NOTIF_MAGIC);
}
pids[i] = pid;
}
/* This spins until all of the children are sleeping */
restart_wait: for (i = 0; i < ARRAY_SIZE(pids); i++) { if (get_proc_stat(_metadata, pids[i]) != 'S') {
nanosleep(&delay, NULL); goto restart_wait;
}
}
/* Read the notifications in order (and respond) */ for (i = 0; i < ARRAY_SIZE(pids); i++) {
memset(&req, 0, sizeof(req));
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
EXPECT_EQ(req.id, baseid + i);
resp.id = req.id;
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
}
/* Make sure notifications were received */ for (i = 0; i < ARRAY_SIZE(pids); i++) {
EXPECT_EQ(waitpid(pids[i], &status, 0), pids[i]);
EXPECT_EQ(true, WIFEXITED(status));
EXPECT_EQ(0, WEXITSTATUS(status));
}
}
/* get_proc_syscall - Get the syscall in progress for a given pid * * Returns the current syscall number for a given process * Returns -1 if not in syscall (running or blocked)
*/ staticlong get_proc_syscall(struct __test_metadata *_metadata, int pid)
{ char proc_path[100] = {0}; long ret = -1;
ssize_t nread; char *line;
/* * Check that we can kill the process with SIGUSR1 prior to receiving * the notification. SIGUSR1 is wired up to a custom signal handler, * and make sure it gets called.
*/
pid = fork();
ASSERT_GE(pid, 0);
if (pid == 0) {
close(sk_pair[0]);
handled = sk_pair[1];
/* Setup the non-fatal sigaction without SA_RESTART */ if (sigaction(SIGUSR1, &new_action, NULL)) {
perror("sigaction"); exit(1);
}
ret = syscall(__NR_getppid); /* Make sure we got a return from a signal interruption */ exit(ret != -1 || errno != EINTR);
}
/* * Make sure we've gotten to the seccomp user notification wait * from getppid prior to sending any signals
*/ while (get_proc_syscall(_metadata, pid) != __NR_getppid &&
get_proc_stat(_metadata, pid) != 'S')
nanosleep(&delay, NULL);
/* Send non-fatal kill signal */
EXPECT_EQ(kill(pid, SIGUSR1), 0);
/* wait for process to exit (exit checks for EINTR) */
EXPECT_EQ(waitpid(pid, &status, 0), pid);
EXPECT_EQ(true, WIFEXITED(status));
EXPECT_EQ(0, WEXITSTATUS(status));
if (pid == 0) {
close(sk_pair[0]);
handled = sk_pair[1];
/* Setup the sigaction without SA_RESTART */ if (sigaction(SIGUSR1, &new_action, NULL)) {
perror("sigaction"); exit(1);
}
/* Make sure that the syscall is completed (no EINTR) */
ret = syscall(__NR_getppid); exit(ret != USER_NOTIF_MAGIC);
}
/* * Get the notification, to make move the notifying process into a * non-preemptible (TASK_KILLABLE) state.
*/
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); /* Send non-fatal kill signal */
EXPECT_EQ(kill(pid, SIGUSR1), 0);
/* * Make sure the task enters moves to TASK_KILLABLE by waiting for * D (Disk Sleep) state after receiving non-fatal signal.
*/ while (get_proc_stat(_metadata, pid) != 'D')
nanosleep(&delay, NULL);
resp.id = req.id;
resp.val = USER_NOTIF_MAGIC; /* Make sure the notification is found and able to be replied to */
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
/* * Make sure that the signal handler does get called once we're back in * userspace.
*/
EXPECT_EQ(read(sk_pair[0], &c, 1), 1); /* wait for process to exit (exit checks for USER_NOTIF_MAGIC) */
EXPECT_EQ(waitpid(pid, &status, 0), pid);
EXPECT_EQ(true, WIFEXITED(status));
EXPECT_EQ(0, WEXITSTATUS(status));
}
/* Ensure fatal signals after receive are not blocked */
TEST(user_notification_wait_killable_fatal)
{ struct seccomp_notif req = {}; int listener, status;
pid_t pid; long ret; /* 100 ms */ struct timespec delay = { .tv_nsec = 100000000 };
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret)
{
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
if (pid == 0) { /* This should never complete as it should get a SIGTERM */
syscall(__NR_getppid); exit(1);
}
while (get_proc_stat(_metadata, pid) != 'S')
nanosleep(&delay, NULL);
/* * Get the notification, to make move the notifying process into a * non-preemptible (TASK_KILLABLE) state.
*/
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); /* Kill the process with a fatal signal */
EXPECT_EQ(kill(pid, SIGTERM), 0);
/* * Wait for the process to exit, and make sure the process terminated * due to the SIGTERM signal.
*/
EXPECT_EQ(waitpid(pid, &status, 0), pid);
EXPECT_EQ(true, WIFSIGNALED(status));
EXPECT_EQ(SIGTERM, WTERMSIG(status));
}
ret = pthread_join(args->leader, &retval); if (ret) exit(1); if (retval != _args) exit(2);
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &allow_prog); if (ret) exit(3);
exit(0);
}
/* * Ensure that a dead thread leader doesn't prevent installing new filters with * SECCOMP_FILTER_FLAG_TSYNC from other threads.
*/
TEST(tsync_vs_dead_thread_leader)
{ int status;
pid_t pid; long ret;
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
ret = pthread_create(&sibling, NULL,
tsync_vs_dead_thread_leader_sibling, args);
ASSERT_EQ(0, ret);
/* Install a new filter just to the leader thread. */
ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
ASSERT_EQ(0, ret);
pthread_exit(args); exit(1);
}
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.86Bemerkung:
(Wie Sie bei der Firma Beratungs- und Dienstleistungen beauftragen können 2026-04-26)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.