1239844Sdes/* 2239844Sdes * Copyright (c) 2012 Will Drewry <wad@dataspill.org> 3239844Sdes * 4239844Sdes * Permission to use, copy, modify, and distribute this software for any 5239844Sdes * purpose with or without fee is hereby granted, provided that the above 6239844Sdes * copyright notice and this permission notice appear in all copies. 7239844Sdes * 8239844Sdes * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9239844Sdes * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10239844Sdes * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11239844Sdes * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12239844Sdes * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13239844Sdes * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14239844Sdes * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15239844Sdes */ 16239844Sdes 17239844Sdes/* 18239844Sdes * Uncomment the SANDBOX_SECCOMP_FILTER_DEBUG macro below to help diagnose 19239844Sdes * filter breakage during development. *Do not* use this in production, 20239844Sdes * as it relies on making library calls that are unsafe in signal context. 21239844Sdes * 22239844Sdes * Instead, live systems the auditctl(8) may be used to monitor failures. 23239844Sdes * E.g. 24239844Sdes * auditctl -a task,always -F uid=<privsep uid> 25239844Sdes */ 26239844Sdes/* #define SANDBOX_SECCOMP_FILTER_DEBUG 1 */ 27239844Sdes 28295367Sdes/* XXX it should be possible to do logging via the log socket safely */ 29295367Sdes 30239844Sdes#ifdef SANDBOX_SECCOMP_FILTER_DEBUG 31239844Sdes/* Use the kernel headers in case of an older toolchain. */ 32239844Sdes# include <asm/siginfo.h> 33239844Sdes# define __have_siginfo_t 1 34239844Sdes# define __have_sigval_t 1 35239844Sdes# define __have_sigevent_t 1 36239844Sdes#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 37239844Sdes 38239844Sdes#include "includes.h" 39239844Sdes 40239844Sdes#ifdef SANDBOX_SECCOMP_FILTER 41239844Sdes 42239844Sdes#include <sys/types.h> 43239844Sdes#include <sys/resource.h> 44239844Sdes#include <sys/prctl.h> 45239844Sdes 46295367Sdes#include <linux/net.h> 47239844Sdes#include <linux/audit.h> 48239844Sdes#include <linux/filter.h> 49239844Sdes#include <linux/seccomp.h> 50248613Sdes#include <elf.h> 51239844Sdes 52239844Sdes#include <asm/unistd.h> 53239844Sdes 54239844Sdes#include <errno.h> 55239844Sdes#include <signal.h> 56239844Sdes#include <stdarg.h> 57239844Sdes#include <stddef.h> /* for offsetof */ 58239844Sdes#include <stdio.h> 59239844Sdes#include <stdlib.h> 60239844Sdes#include <string.h> 61239844Sdes#include <unistd.h> 62239844Sdes 63239844Sdes#include "log.h" 64239844Sdes#include "ssh-sandbox.h" 65239844Sdes#include "xmalloc.h" 66239844Sdes 67239844Sdes/* Linux seccomp_filter sandbox */ 68239844Sdes#define SECCOMP_FILTER_FAIL SECCOMP_RET_KILL 69239844Sdes 70239844Sdes/* Use a signal handler to emit violations when debugging */ 71239844Sdes#ifdef SANDBOX_SECCOMP_FILTER_DEBUG 72239844Sdes# undef SECCOMP_FILTER_FAIL 73239844Sdes# define SECCOMP_FILTER_FAIL SECCOMP_RET_TRAP 74239844Sdes#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 75239844Sdes 76239844Sdes/* Simple helpers to avoid manual errors (but larger BPF programs). */ 77239844Sdes#define SC_DENY(_nr, _errno) \ 78239844Sdes BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 1), \ 79239844Sdes BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno)) 80239844Sdes#define SC_ALLOW(_nr) \ 81239844Sdes BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 1), \ 82239844Sdes BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) 83295367Sdes#define SC_ALLOW_ARG(_nr, _arg_nr, _arg_val) \ 84295367Sdes BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 4), \ 85295367Sdes /* load first syscall argument */ \ 86295367Sdes BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ 87295367Sdes offsetof(struct seccomp_data, args[(_arg_nr)])), \ 88295367Sdes BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_arg_val), 0, 1), \ 89295367Sdes BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \ 90295367Sdes /* reload syscall number; all rules expect it in accumulator */ \ 91295367Sdes BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ 92295367Sdes offsetof(struct seccomp_data, nr)) 93239844Sdes 94239844Sdes/* Syscall filtering set for preauth. */ 95239844Sdesstatic const struct sock_filter preauth_insns[] = { 96239844Sdes /* Ensure the syscall arch convention is as expected. */ 97239844Sdes BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 98239844Sdes offsetof(struct seccomp_data, arch)), 99239844Sdes BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_AUDIT_ARCH, 1, 0), 100239844Sdes BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL), 101239844Sdes /* Load the syscall number for checking. */ 102239844Sdes BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 103239844Sdes offsetof(struct seccomp_data, nr)), 104295367Sdes 105295367Sdes /* Syscalls to non-fatally deny */ 106323124Sdes#ifdef __NR_lstat 107323124Sdes SC_DENY(lstat, EACCES), 108323124Sdes#endif 109323124Sdes#ifdef __NR_lstat64 110323124Sdes SC_DENY(lstat64, EACCES), 111323124Sdes#endif 112295367Sdes#ifdef __NR_fstat 113295367Sdes SC_DENY(fstat, EACCES), 114295367Sdes#endif 115295367Sdes#ifdef __NR_fstat64 116295367Sdes SC_DENY(fstat64, EACCES), 117295367Sdes#endif 118295367Sdes#ifdef __NR_open 119239844Sdes SC_DENY(open, EACCES), 120295367Sdes#endif 121295367Sdes#ifdef __NR_openat 122295367Sdes SC_DENY(openat, EACCES), 123295367Sdes#endif 124295367Sdes#ifdef __NR_newfstatat 125295367Sdes SC_DENY(newfstatat, EACCES), 126295367Sdes#endif 127295367Sdes#ifdef __NR_stat 128295367Sdes SC_DENY(stat, EACCES), 129295367Sdes#endif 130295367Sdes#ifdef __NR_stat64 131295367Sdes SC_DENY(stat64, EACCES), 132295367Sdes#endif 133295367Sdes 134295367Sdes /* Syscalls to permit */ 135295367Sdes#ifdef __NR_brk 136295367Sdes SC_ALLOW(brk), 137295367Sdes#endif 138295367Sdes#ifdef __NR_clock_gettime 139255767Sdes SC_ALLOW(clock_gettime), 140248613Sdes#endif 141295367Sdes#ifdef __NR_close 142239844Sdes SC_ALLOW(close), 143264377Sdes#endif 144295367Sdes#ifdef __NR_exit 145295367Sdes SC_ALLOW(exit), 146239844Sdes#endif 147295367Sdes#ifdef __NR_exit_group 148295367Sdes SC_ALLOW(exit_group), 149295367Sdes#endif 150295367Sdes#ifdef __NR_getpgid 151295367Sdes SC_ALLOW(getpgid), 152295367Sdes#endif 153295367Sdes#ifdef __NR_getpid 154295367Sdes SC_ALLOW(getpid), 155295367Sdes#endif 156296781Sdes#ifdef __NR_getrandom 157296781Sdes SC_ALLOW(getrandom), 158296781Sdes#endif 159295367Sdes#ifdef __NR_gettimeofday 160295367Sdes SC_ALLOW(gettimeofday), 161295367Sdes#endif 162295367Sdes#ifdef __NR_madvise 163239844Sdes SC_ALLOW(madvise), 164248613Sdes#endif 165248613Sdes#ifdef __NR_mmap 166239844Sdes SC_ALLOW(mmap), 167248613Sdes#endif 168295367Sdes#ifdef __NR_mmap2 169295367Sdes SC_ALLOW(mmap2), 170295367Sdes#endif 171295367Sdes#ifdef __NR_mremap 172295367Sdes SC_ALLOW(mremap), 173295367Sdes#endif 174295367Sdes#ifdef __NR_munmap 175239844Sdes SC_ALLOW(munmap), 176295367Sdes#endif 177295367Sdes#ifdef __NR__newselect 178295367Sdes SC_ALLOW(_newselect), 179295367Sdes#endif 180295367Sdes#ifdef __NR_poll 181295367Sdes SC_ALLOW(poll), 182295367Sdes#endif 183295367Sdes#ifdef __NR_pselect6 184295367Sdes SC_ALLOW(pselect6), 185295367Sdes#endif 186295367Sdes#ifdef __NR_read 187295367Sdes SC_ALLOW(read), 188295367Sdes#endif 189239844Sdes#ifdef __NR_rt_sigprocmask 190239844Sdes SC_ALLOW(rt_sigprocmask), 191295367Sdes#endif 192295367Sdes#ifdef __NR_select 193295367Sdes SC_ALLOW(select), 194295367Sdes#endif 195295367Sdes#ifdef __NR_shutdown 196295367Sdes SC_ALLOW(shutdown), 197295367Sdes#endif 198295367Sdes#ifdef __NR_sigprocmask 199239844Sdes SC_ALLOW(sigprocmask), 200239844Sdes#endif 201295367Sdes#ifdef __NR_time 202295367Sdes SC_ALLOW(time), 203295367Sdes#endif 204295367Sdes#ifdef __NR_write 205295367Sdes SC_ALLOW(write), 206295367Sdes#endif 207295367Sdes#ifdef __NR_socketcall 208295367Sdes SC_ALLOW_ARG(socketcall, 0, SYS_SHUTDOWN), 209295367Sdes#endif 210295367Sdes 211295367Sdes /* Default deny */ 212239844Sdes BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL), 213239844Sdes}; 214239844Sdes 215239844Sdesstatic const struct sock_fprog preauth_program = { 216239844Sdes .len = (unsigned short)(sizeof(preauth_insns)/sizeof(preauth_insns[0])), 217239844Sdes .filter = (struct sock_filter *)preauth_insns, 218239844Sdes}; 219239844Sdes 220239844Sdesstruct ssh_sandbox { 221239844Sdes pid_t child_pid; 222239844Sdes}; 223239844Sdes 224239844Sdesstruct ssh_sandbox * 225262566Sdesssh_sandbox_init(struct monitor *monitor) 226239844Sdes{ 227239844Sdes struct ssh_sandbox *box; 228239844Sdes 229239844Sdes /* 230239844Sdes * Strictly, we don't need to maintain any state here but we need 231239844Sdes * to return non-NULL to satisfy the API. 232239844Sdes */ 233239844Sdes debug3("%s: preparing seccomp filter sandbox", __func__); 234239844Sdes box = xcalloc(1, sizeof(*box)); 235239844Sdes box->child_pid = 0; 236239844Sdes 237239844Sdes return box; 238239844Sdes} 239239844Sdes 240239844Sdes#ifdef SANDBOX_SECCOMP_FILTER_DEBUG 241239844Sdesextern struct monitor *pmonitor; 242239844Sdesvoid mm_log_handler(LogLevel level, const char *msg, void *ctx); 243239844Sdes 244239844Sdesstatic void 245239844Sdesssh_sandbox_violation(int signum, siginfo_t *info, void *void_context) 246239844Sdes{ 247239844Sdes char msg[256]; 248239844Sdes 249239844Sdes snprintf(msg, sizeof(msg), 250239844Sdes "%s: unexpected system call (arch:0x%x,syscall:%d @ %p)", 251239844Sdes __func__, info->si_arch, info->si_syscall, info->si_call_addr); 252239844Sdes mm_log_handler(SYSLOG_LEVEL_FATAL, msg, pmonitor); 253239844Sdes _exit(1); 254239844Sdes} 255239844Sdes 256239844Sdesstatic void 257239844Sdesssh_sandbox_child_debugging(void) 258239844Sdes{ 259239844Sdes struct sigaction act; 260239844Sdes sigset_t mask; 261239844Sdes 262239844Sdes debug3("%s: installing SIGSYS handler", __func__); 263239844Sdes memset(&act, 0, sizeof(act)); 264239844Sdes sigemptyset(&mask); 265239844Sdes sigaddset(&mask, SIGSYS); 266239844Sdes 267239844Sdes act.sa_sigaction = &ssh_sandbox_violation; 268239844Sdes act.sa_flags = SA_SIGINFO; 269239844Sdes if (sigaction(SIGSYS, &act, NULL) == -1) 270239844Sdes fatal("%s: sigaction(SIGSYS): %s", __func__, strerror(errno)); 271239844Sdes if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1) 272239844Sdes fatal("%s: sigprocmask(SIGSYS): %s", 273239844Sdes __func__, strerror(errno)); 274239844Sdes} 275239844Sdes#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 276239844Sdes 277239844Sdesvoid 278239844Sdesssh_sandbox_child(struct ssh_sandbox *box) 279239844Sdes{ 280239844Sdes struct rlimit rl_zero; 281239849Sdes int nnp_failed = 0; 282239844Sdes 283239844Sdes /* Set rlimits for completeness if possible. */ 284239844Sdes rl_zero.rlim_cur = rl_zero.rlim_max = 0; 285239844Sdes if (setrlimit(RLIMIT_FSIZE, &rl_zero) == -1) 286239844Sdes fatal("%s: setrlimit(RLIMIT_FSIZE, { 0, 0 }): %s", 287239844Sdes __func__, strerror(errno)); 288239844Sdes if (setrlimit(RLIMIT_NOFILE, &rl_zero) == -1) 289239844Sdes fatal("%s: setrlimit(RLIMIT_NOFILE, { 0, 0 }): %s", 290239844Sdes __func__, strerror(errno)); 291239844Sdes if (setrlimit(RLIMIT_NPROC, &rl_zero) == -1) 292239844Sdes fatal("%s: setrlimit(RLIMIT_NPROC, { 0, 0 }): %s", 293239844Sdes __func__, strerror(errno)); 294239844Sdes 295239844Sdes#ifdef SANDBOX_SECCOMP_FILTER_DEBUG 296239844Sdes ssh_sandbox_child_debugging(); 297239844Sdes#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 298239844Sdes 299239844Sdes debug3("%s: setting PR_SET_NO_NEW_PRIVS", __func__); 300239849Sdes if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) { 301239849Sdes debug("%s: prctl(PR_SET_NO_NEW_PRIVS): %s", 302239844Sdes __func__, strerror(errno)); 303239849Sdes nnp_failed = 1; 304239849Sdes } 305239844Sdes debug3("%s: attaching seccomp filter program", __func__); 306239844Sdes if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &preauth_program) == -1) 307239849Sdes debug("%s: prctl(PR_SET_SECCOMP): %s", 308239844Sdes __func__, strerror(errno)); 309239849Sdes else if (nnp_failed) 310239849Sdes fatal("%s: SECCOMP_MODE_FILTER activated but " 311239849Sdes "PR_SET_NO_NEW_PRIVS failed", __func__); 312239844Sdes} 313239844Sdes 314239844Sdesvoid 315239844Sdesssh_sandbox_parent_finish(struct ssh_sandbox *box) 316239844Sdes{ 317239844Sdes free(box); 318239844Sdes debug3("%s: finished", __func__); 319239844Sdes} 320239844Sdes 321239844Sdesvoid 322239844Sdesssh_sandbox_parent_preauth(struct ssh_sandbox *box, pid_t child_pid) 323239844Sdes{ 324239844Sdes box->child_pid = child_pid; 325239844Sdes} 326239844Sdes 327239844Sdes#endif /* SANDBOX_SECCOMP_FILTER */ 328