sandbox-seccomp-filter.c revision 296853
1193326Sed/* 2193326Sed * Copyright (c) 2012 Will Drewry <wad@dataspill.org> 3193326Sed * 4193326Sed * Permission to use, copy, modify, and distribute this software for any 5193326Sed * purpose with or without fee is hereby granted, provided that the above 6193326Sed * copyright notice and this permission notice appear in all copies. 7193326Sed * 8193326Sed * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9193326Sed * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10193326Sed * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11193326Sed * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12193326Sed * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13193326Sed * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14193326Sed * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15205219Srdivacky */ 16193326Sed 17249423Sdim/* 18218893Sdim * Uncomment the SANDBOX_SECCOMP_FILTER_DEBUG macro below to help diagnose 19226633Sdim * filter breakage during development. *Do not* use this in production, 20249423Sdim * as it relies on making library calls that are unsafe in signal context. 21249423Sdim * 22193326Sed * Instead, live systems the auditctl(8) may be used to monitor failures. 23193326Sed * E.g. 24249423Sdim * auditctl -a task,always -F uid=<privsep uid> 25198092Srdivacky */ 26193326Sed/* #define SANDBOX_SECCOMP_FILTER_DEBUG 1 */ 27249423Sdim 28205219Srdivacky/* XXX it should be possible to do logging via the log socket safely */ 29218893Sdim 30205219Srdivacky#ifdef SANDBOX_SECCOMP_FILTER_DEBUG 31193326Sed/* Use the kernel headers in case of an older toolchain. */ 32193326Sed# include <asm/siginfo.h> 33193326Sed# define __have_siginfo_t 1 34193326Sed# define __have_sigval_t 1 35193326Sed# define __have_sigevent_t 1 36193326Sed#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 37193326Sed 38193326Sed#include "includes.h" 39193326Sed 40212904Sdim#ifdef SANDBOX_SECCOMP_FILTER 41212904Sdim 42193326Sed#include <sys/types.h> 43193326Sed#include <sys/resource.h> 44226633Sdim#include <sys/prctl.h> 45226633Sdim 46193326Sed#include <linux/net.h> 47205408Srdivacky#include <linux/audit.h> 48193326Sed#include <linux/filter.h> 49193326Sed#include <linux/seccomp.h> 50221345Sdim#include <elf.h> 51221345Sdim 52221345Sdim#include <asm/unistd.h> 53221345Sdim 54221345Sdim#include <errno.h> 55221345Sdim#include <signal.h> 56221345Sdim#include <stdarg.h> 57221345Sdim#include <stddef.h> /* for offsetof */ 58221345Sdim#include <stdio.h> 59221345Sdim#include <stdlib.h> 60221345Sdim#include <string.h> 61221345Sdim#include <unistd.h> 62221345Sdim 63193326Sed#include "log.h" 64193326Sed#include "ssh-sandbox.h" 65193326Sed#include "xmalloc.h" 66200583Srdivacky 67193326Sed/* Linux seccomp_filter sandbox */ 68205408Srdivacky#define SECCOMP_FILTER_FAIL SECCOMP_RET_KILL 69221345Sdim 70193326Sed/* Use a signal handler to emit violations when debugging */ 71193326Sed#ifdef SANDBOX_SECCOMP_FILTER_DEBUG 72212904Sdim# undef SECCOMP_FILTER_FAIL 73212904Sdim# define SECCOMP_FILTER_FAIL SECCOMP_RET_TRAP 74239462Sdim#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 75234353Sdim 76234353Sdim/* Simple helpers to avoid manual errors (but larger BPF programs). */ 77234353Sdim#define SC_DENY(_nr, _errno) \ 78234353Sdim BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 1), \ 79200583Srdivacky BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno)) 80212904Sdim#define SC_ALLOW(_nr) \ 81212904Sdim BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 1), \ 82205408Srdivacky BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) 83212904Sdim#define SC_ALLOW_ARG(_nr, _arg_nr, _arg_val) \ 84200583Srdivacky BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 4), \ 85200583Srdivacky /* load first syscall argument */ \ 86226633Sdim BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ 87207619Srdivacky offsetof(struct seccomp_data, args[(_arg_nr)])), \ 88207619Srdivacky BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_arg_val), 0, 1), \ 89205219Srdivacky BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \ 90218893Sdim /* reload syscall number; all rules expect it in accumulator */ \ 91221345Sdim BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ 92221345Sdim offsetof(struct seccomp_data, nr)) 93218893Sdim 94218893Sdim/* Syscall filtering set for preauth. */ 95205408Srdivackystatic const struct sock_filter preauth_insns[] = { 96218893Sdim /* Ensure the syscall arch convention is as expected. */ 97218893Sdim BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 98206084Srdivacky offsetof(struct seccomp_data, arch)), 99218893Sdim BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_AUDIT_ARCH, 1, 0), 100239462Sdim BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL), 101239462Sdim /* Load the syscall number for checking. */ 102239462Sdim BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 103239462Sdim offsetof(struct seccomp_data, nr)), 104206084Srdivacky 105218893Sdim /* Syscalls to non-fatally deny */ 106218893Sdim#ifdef __NR_fstat 107218893Sdim SC_DENY(fstat, EACCES), 108218893Sdim#endif 109218893Sdim#ifdef __NR_fstat64 110218893Sdim SC_DENY(fstat64, EACCES), 111218893Sdim#endif 112218893Sdim#ifdef __NR_open 113218893Sdim SC_DENY(open, EACCES), 114218893Sdim#endif 115218893Sdim#ifdef __NR_openat 116226633Sdim SC_DENY(openat, EACCES), 117221345Sdim#endif 118218893Sdim#ifdef __NR_newfstatat 119218893Sdim SC_DENY(newfstatat, EACCES), 120221345Sdim#endif 121218893Sdim#ifdef __NR_stat 122207619Srdivacky SC_DENY(stat, EACCES), 123218893Sdim#endif 124218893Sdim#ifdef __NR_stat64 125221345Sdim SC_DENY(stat64, EACCES), 126218893Sdim#endif 127218893Sdim 128221345Sdim /* Syscalls to permit */ 129206084Srdivacky#ifdef __NR_brk 130218893Sdim SC_ALLOW(brk), 131207619Srdivacky#endif 132218893Sdim#ifdef __NR_clock_gettime 133218893Sdim SC_ALLOW(clock_gettime), 134198092Srdivacky#endif 135205219Srdivacky#ifdef __NR_close 136218893Sdim SC_ALLOW(close), 137218893Sdim#endif 138221345Sdim#ifdef __NR_exit 139218893Sdim SC_ALLOW(exit), 140218893Sdim#endif 141221345Sdim#ifdef __NR_exit_group 142218893Sdim SC_ALLOW(exit_group), 143218893Sdim#endif 144221345Sdim#ifdef __NR_getpgid 145218893Sdim SC_ALLOW(getpgid), 146218893Sdim#endif 147218893Sdim#ifdef __NR_getpid 148218893Sdim SC_ALLOW(getpid), 149218893Sdim#endif 150221345Sdim#ifdef __NR_getrandom 151218893Sdim SC_ALLOW(getrandom), 152221345Sdim#endif 153218893Sdim#ifdef __NR_gettimeofday 154226633Sdim SC_ALLOW(gettimeofday), 155221345Sdim#endif 156218893Sdim#ifdef __NR_madvise 157218893Sdim SC_ALLOW(madvise), 158218893Sdim#endif 159218893Sdim#ifdef __NR_mmap 160218893Sdim SC_ALLOW(mmap), 161218893Sdim#endif 162218893Sdim#ifdef __NR_mmap2 163218893Sdim SC_ALLOW(mmap2), 164218893Sdim#endif 165218893Sdim#ifdef __NR_mremap 166218893Sdim SC_ALLOW(mremap), 167218893Sdim#endif 168221345Sdim#ifdef __NR_munmap 169218893Sdim SC_ALLOW(munmap), 170221345Sdim#endif 171218893Sdim#ifdef __NR__newselect 172218893Sdim SC_ALLOW(_newselect), 173218893Sdim#endif 174205408Srdivacky#ifdef __NR_poll 175212904Sdim SC_ALLOW(poll), 176205408Srdivacky#endif 177205408Srdivacky#ifdef __NR_pselect6 178198092Srdivacky SC_ALLOW(pselect6), 179198092Srdivacky#endif 180226633Sdim#ifdef __NR_read 181193326Sed SC_ALLOW(read), 182193326Sed#endif 183198092Srdivacky#ifdef __NR_rt_sigprocmask 184224145Sdim SC_ALLOW(rt_sigprocmask), 185193326Sed#endif 186193326Sed#ifdef __NR_select 187198092Srdivacky SC_ALLOW(select), 188193326Sed#endif 189193326Sed#ifdef __NR_shutdown 190193326Sed SC_ALLOW(shutdown), 191193326Sed#endif 192193326Sed#ifdef __NR_sigprocmask 193193326Sed SC_ALLOW(sigprocmask), 194193326Sed#endif 195239462Sdim#ifdef __NR_time 196193326Sed SC_ALLOW(time), 197239462Sdim#endif 198193326Sed#ifdef __NR_write 199193326Sed SC_ALLOW(write), 200198092Srdivacky#endif 201193326Sed#ifdef __NR_socketcall 202193326Sed SC_ALLOW_ARG(socketcall, 0, SYS_SHUTDOWN), 203198092Srdivacky#endif 204193326Sed 205193326Sed /* Default deny */ 206198092Srdivacky BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL), 207193326Sed}; 208193326Sed 209193326Sedstatic const struct sock_fprog preauth_program = { 210193326Sed .len = (unsigned short)(sizeof(preauth_insns)/sizeof(preauth_insns[0])), 211193326Sed .filter = (struct sock_filter *)preauth_insns, 212198092Srdivacky}; 213193326Sed 214193326Sedstruct ssh_sandbox { 215193326Sed pid_t child_pid; 216193326Sed}; 217193326Sed 218198092Srdivackystruct ssh_sandbox * 219193326Sedssh_sandbox_init(struct monitor *monitor) 220193326Sed{ 221193326Sed struct ssh_sandbox *box; 222193326Sed 223193326Sed /* 224193326Sed * Strictly, we don't need to maintain any state here but we need 225239462Sdim * to return non-NULL to satisfy the API. 226193326Sed */ 227193326Sed debug3("%s: preparing seccomp filter sandbox", __func__); 228239462Sdim box = xcalloc(1, sizeof(*box)); 229193326Sed box->child_pid = 0; 230193326Sed 231193326Sed return box; 232193326Sed} 233198092Srdivacky 234193326Sed#ifdef SANDBOX_SECCOMP_FILTER_DEBUG 235198092Srdivackyextern struct monitor *pmonitor; 236193326Sedvoid mm_log_handler(LogLevel level, const char *msg, void *ctx); 237193326Sed 238193326Sedstatic void 239193326Sedssh_sandbox_violation(int signum, siginfo_t *info, void *void_context) 240193326Sed{ 241193326Sed char msg[256]; 242193326Sed 243193326Sed snprintf(msg, sizeof(msg), 244193326Sed "%s: unexpected system call (arch:0x%x,syscall:%d @ %p)", 245193326Sed __func__, info->si_arch, info->si_syscall, info->si_call_addr); 246193326Sed mm_log_handler(SYSLOG_LEVEL_FATAL, msg, pmonitor); 247198092Srdivacky _exit(1); 248193326Sed} 249193326Sed 250193326Sedstatic void 251193326Sedssh_sandbox_child_debugging(void) 252193326Sed{ 253193326Sed struct sigaction act; 254198092Srdivacky sigset_t mask; 255193326Sed 256193326Sed debug3("%s: installing SIGSYS handler", __func__); 257193326Sed memset(&act, 0, sizeof(act)); 258193326Sed sigemptyset(&mask); 259193326Sed sigaddset(&mask, SIGSYS); 260193326Sed 261193326Sed act.sa_sigaction = &ssh_sandbox_violation; 262239462Sdim act.sa_flags = SA_SIGINFO; 263193326Sed if (sigaction(SIGSYS, &act, NULL) == -1) 264193326Sed fatal("%s: sigaction(SIGSYS): %s", __func__, strerror(errno)); 265193326Sed if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1) 266193326Sed fatal("%s: sigprocmask(SIGSYS): %s", 267193326Sed __func__, strerror(errno)); 268193326Sed} 269193326Sed#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 270193326Sed 271193326Sedvoid 272193326Sedssh_sandbox_child(struct ssh_sandbox *box) 273193326Sed{ 274193326Sed struct rlimit rl_zero; 275193326Sed int nnp_failed = 0; 276193326Sed 277193326Sed /* Set rlimits for completeness if possible. */ 278193326Sed rl_zero.rlim_cur = rl_zero.rlim_max = 0; 279193326Sed if (setrlimit(RLIMIT_FSIZE, &rl_zero) == -1) 280193326Sed fatal("%s: setrlimit(RLIMIT_FSIZE, { 0, 0 }): %s", 281239462Sdim __func__, strerror(errno)); 282193326Sed if (setrlimit(RLIMIT_NOFILE, &rl_zero) == -1) 283193326Sed fatal("%s: setrlimit(RLIMIT_NOFILE, { 0, 0 }): %s", 284193326Sed __func__, strerror(errno)); 285193326Sed if (setrlimit(RLIMIT_NPROC, &rl_zero) == -1) 286193326Sed fatal("%s: setrlimit(RLIMIT_NPROC, { 0, 0 }): %s", 287198092Srdivacky __func__, strerror(errno)); 288226633Sdim 289193326Sed#ifdef SANDBOX_SECCOMP_FILTER_DEBUG 290193326Sed ssh_sandbox_child_debugging(); 291224145Sdim#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 292193326Sed 293193326Sed debug3("%s: setting PR_SET_NO_NEW_PRIVS", __func__); 294193326Sed if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) { 295193326Sed debug("%s: prctl(PR_SET_NO_NEW_PRIVS): %s", 296193326Sed __func__, strerror(errno)); 297193326Sed nnp_failed = 1; 298193326Sed } 299193326Sed debug3("%s: attaching seccomp filter program", __func__); 300226633Sdim if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &preauth_program) == -1) 301198092Srdivacky debug("%s: prctl(PR_SET_SECCOMP): %s", 302221345Sdim __func__, strerror(errno)); 303221345Sdim else if (nnp_failed) 304221345Sdim fatal("%s: SECCOMP_MODE_FILTER activated but " 305221345Sdim "PR_SET_NO_NEW_PRIVS failed", __func__); 306221345Sdim} 307221345Sdim 308193326Sedvoid 309193326Sedssh_sandbox_parent_finish(struct ssh_sandbox *box) 310193326Sed{ 311198092Srdivacky free(box); 312193326Sed debug3("%s: finished", __func__); 313193326Sed} 314239462Sdim 315193326Sedvoid 316193326Sedssh_sandbox_parent_preauth(struct ssh_sandbox *box, pid_t child_pid) 317193326Sed{ 318193326Sed box->child_pid = child_pid; 319193326Sed} 320193326Sed 321193326Sed#endif /* SANDBOX_SECCOMP_FILTER */ 322193326Sed