sandbox-seccomp-filter.c revision 296853
1193326Sed/*
2193326Sed * Copyright (c) 2012 Will Drewry <wad@dataspill.org>
3193326Sed *
4193326Sed * Permission to use, copy, modify, and distribute this software for any
5193326Sed * purpose with or without fee is hereby granted, provided that the above
6193326Sed * copyright notice and this permission notice appear in all copies.
7193326Sed *
8193326Sed * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9193326Sed * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10193326Sed * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11193326Sed * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12193326Sed * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13193326Sed * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14193326Sed * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15205219Srdivacky */
16193326Sed
17249423Sdim/*
18218893Sdim * Uncomment the SANDBOX_SECCOMP_FILTER_DEBUG macro below to help diagnose
19226633Sdim * filter breakage during development. *Do not* use this in production,
20249423Sdim * as it relies on making library calls that are unsafe in signal context.
21249423Sdim *
22193326Sed * Instead, live systems the auditctl(8) may be used to monitor failures.
23193326Sed * E.g.
24249423Sdim *   auditctl -a task,always -F uid=<privsep uid>
25198092Srdivacky */
26193326Sed/* #define SANDBOX_SECCOMP_FILTER_DEBUG 1 */
27249423Sdim
28205219Srdivacky/* XXX it should be possible to do logging via the log socket safely */
29218893Sdim
30205219Srdivacky#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
31193326Sed/* Use the kernel headers in case of an older toolchain. */
32193326Sed# include <asm/siginfo.h>
33193326Sed# define __have_siginfo_t 1
34193326Sed# define __have_sigval_t 1
35193326Sed# define __have_sigevent_t 1
36193326Sed#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
37193326Sed
38193326Sed#include "includes.h"
39193326Sed
40212904Sdim#ifdef SANDBOX_SECCOMP_FILTER
41212904Sdim
42193326Sed#include <sys/types.h>
43193326Sed#include <sys/resource.h>
44226633Sdim#include <sys/prctl.h>
45226633Sdim
46193326Sed#include <linux/net.h>
47205408Srdivacky#include <linux/audit.h>
48193326Sed#include <linux/filter.h>
49193326Sed#include <linux/seccomp.h>
50221345Sdim#include <elf.h>
51221345Sdim
52221345Sdim#include <asm/unistd.h>
53221345Sdim
54221345Sdim#include <errno.h>
55221345Sdim#include <signal.h>
56221345Sdim#include <stdarg.h>
57221345Sdim#include <stddef.h>  /* for offsetof */
58221345Sdim#include <stdio.h>
59221345Sdim#include <stdlib.h>
60221345Sdim#include <string.h>
61221345Sdim#include <unistd.h>
62221345Sdim
63193326Sed#include "log.h"
64193326Sed#include "ssh-sandbox.h"
65193326Sed#include "xmalloc.h"
66200583Srdivacky
67193326Sed/* Linux seccomp_filter sandbox */
68205408Srdivacky#define SECCOMP_FILTER_FAIL SECCOMP_RET_KILL
69221345Sdim
70193326Sed/* Use a signal handler to emit violations when debugging */
71193326Sed#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
72212904Sdim# undef SECCOMP_FILTER_FAIL
73212904Sdim# define SECCOMP_FILTER_FAIL SECCOMP_RET_TRAP
74239462Sdim#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
75234353Sdim
76234353Sdim/* Simple helpers to avoid manual errors (but larger BPF programs). */
77234353Sdim#define SC_DENY(_nr, _errno) \
78234353Sdim	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 1), \
79200583Srdivacky	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno))
80212904Sdim#define SC_ALLOW(_nr) \
81212904Sdim	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 1), \
82205408Srdivacky	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
83212904Sdim#define SC_ALLOW_ARG(_nr, _arg_nr, _arg_val) \
84200583Srdivacky	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 4), \
85200583Srdivacky	/* load first syscall argument */ \
86226633Sdim	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
87207619Srdivacky	    offsetof(struct seccomp_data, args[(_arg_nr)])), \
88207619Srdivacky	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_arg_val), 0, 1), \
89205219Srdivacky	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \
90218893Sdim	/* reload syscall number; all rules expect it in accumulator */ \
91221345Sdim	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
92221345Sdim		offsetof(struct seccomp_data, nr))
93218893Sdim
94218893Sdim/* Syscall filtering set for preauth. */
95205408Srdivackystatic const struct sock_filter preauth_insns[] = {
96218893Sdim	/* Ensure the syscall arch convention is as expected. */
97218893Sdim	BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
98206084Srdivacky		offsetof(struct seccomp_data, arch)),
99218893Sdim	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_AUDIT_ARCH, 1, 0),
100239462Sdim	BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL),
101239462Sdim	/* Load the syscall number for checking. */
102239462Sdim	BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
103239462Sdim		offsetof(struct seccomp_data, nr)),
104206084Srdivacky
105218893Sdim	/* Syscalls to non-fatally deny */
106218893Sdim#ifdef __NR_fstat
107218893Sdim	SC_DENY(fstat, EACCES),
108218893Sdim#endif
109218893Sdim#ifdef __NR_fstat64
110218893Sdim	SC_DENY(fstat64, EACCES),
111218893Sdim#endif
112218893Sdim#ifdef __NR_open
113218893Sdim	SC_DENY(open, EACCES),
114218893Sdim#endif
115218893Sdim#ifdef __NR_openat
116226633Sdim	SC_DENY(openat, EACCES),
117221345Sdim#endif
118218893Sdim#ifdef __NR_newfstatat
119218893Sdim	SC_DENY(newfstatat, EACCES),
120221345Sdim#endif
121218893Sdim#ifdef __NR_stat
122207619Srdivacky	SC_DENY(stat, EACCES),
123218893Sdim#endif
124218893Sdim#ifdef __NR_stat64
125221345Sdim	SC_DENY(stat64, EACCES),
126218893Sdim#endif
127218893Sdim
128221345Sdim	/* Syscalls to permit */
129206084Srdivacky#ifdef __NR_brk
130218893Sdim	SC_ALLOW(brk),
131207619Srdivacky#endif
132218893Sdim#ifdef __NR_clock_gettime
133218893Sdim	SC_ALLOW(clock_gettime),
134198092Srdivacky#endif
135205219Srdivacky#ifdef __NR_close
136218893Sdim	SC_ALLOW(close),
137218893Sdim#endif
138221345Sdim#ifdef __NR_exit
139218893Sdim	SC_ALLOW(exit),
140218893Sdim#endif
141221345Sdim#ifdef __NR_exit_group
142218893Sdim	SC_ALLOW(exit_group),
143218893Sdim#endif
144221345Sdim#ifdef __NR_getpgid
145218893Sdim	SC_ALLOW(getpgid),
146218893Sdim#endif
147218893Sdim#ifdef __NR_getpid
148218893Sdim	SC_ALLOW(getpid),
149218893Sdim#endif
150221345Sdim#ifdef __NR_getrandom
151218893Sdim	SC_ALLOW(getrandom),
152221345Sdim#endif
153218893Sdim#ifdef __NR_gettimeofday
154226633Sdim	SC_ALLOW(gettimeofday),
155221345Sdim#endif
156218893Sdim#ifdef __NR_madvise
157218893Sdim	SC_ALLOW(madvise),
158218893Sdim#endif
159218893Sdim#ifdef __NR_mmap
160218893Sdim	SC_ALLOW(mmap),
161218893Sdim#endif
162218893Sdim#ifdef __NR_mmap2
163218893Sdim	SC_ALLOW(mmap2),
164218893Sdim#endif
165218893Sdim#ifdef __NR_mremap
166218893Sdim	SC_ALLOW(mremap),
167218893Sdim#endif
168221345Sdim#ifdef __NR_munmap
169218893Sdim	SC_ALLOW(munmap),
170221345Sdim#endif
171218893Sdim#ifdef __NR__newselect
172218893Sdim	SC_ALLOW(_newselect),
173218893Sdim#endif
174205408Srdivacky#ifdef __NR_poll
175212904Sdim	SC_ALLOW(poll),
176205408Srdivacky#endif
177205408Srdivacky#ifdef __NR_pselect6
178198092Srdivacky	SC_ALLOW(pselect6),
179198092Srdivacky#endif
180226633Sdim#ifdef __NR_read
181193326Sed	SC_ALLOW(read),
182193326Sed#endif
183198092Srdivacky#ifdef __NR_rt_sigprocmask
184224145Sdim	SC_ALLOW(rt_sigprocmask),
185193326Sed#endif
186193326Sed#ifdef __NR_select
187198092Srdivacky	SC_ALLOW(select),
188193326Sed#endif
189193326Sed#ifdef __NR_shutdown
190193326Sed	SC_ALLOW(shutdown),
191193326Sed#endif
192193326Sed#ifdef __NR_sigprocmask
193193326Sed	SC_ALLOW(sigprocmask),
194193326Sed#endif
195239462Sdim#ifdef __NR_time
196193326Sed	SC_ALLOW(time),
197239462Sdim#endif
198193326Sed#ifdef __NR_write
199193326Sed	SC_ALLOW(write),
200198092Srdivacky#endif
201193326Sed#ifdef __NR_socketcall
202193326Sed	SC_ALLOW_ARG(socketcall, 0, SYS_SHUTDOWN),
203198092Srdivacky#endif
204193326Sed
205193326Sed	/* Default deny */
206198092Srdivacky	BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL),
207193326Sed};
208193326Sed
209193326Sedstatic const struct sock_fprog preauth_program = {
210193326Sed	.len = (unsigned short)(sizeof(preauth_insns)/sizeof(preauth_insns[0])),
211193326Sed	.filter = (struct sock_filter *)preauth_insns,
212198092Srdivacky};
213193326Sed
214193326Sedstruct ssh_sandbox {
215193326Sed	pid_t child_pid;
216193326Sed};
217193326Sed
218198092Srdivackystruct ssh_sandbox *
219193326Sedssh_sandbox_init(struct monitor *monitor)
220193326Sed{
221193326Sed	struct ssh_sandbox *box;
222193326Sed
223193326Sed	/*
224193326Sed	 * Strictly, we don't need to maintain any state here but we need
225239462Sdim	 * to return non-NULL to satisfy the API.
226193326Sed	 */
227193326Sed	debug3("%s: preparing seccomp filter sandbox", __func__);
228239462Sdim	box = xcalloc(1, sizeof(*box));
229193326Sed	box->child_pid = 0;
230193326Sed
231193326Sed	return box;
232193326Sed}
233198092Srdivacky
234193326Sed#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
235198092Srdivackyextern struct monitor *pmonitor;
236193326Sedvoid mm_log_handler(LogLevel level, const char *msg, void *ctx);
237193326Sed
238193326Sedstatic void
239193326Sedssh_sandbox_violation(int signum, siginfo_t *info, void *void_context)
240193326Sed{
241193326Sed	char msg[256];
242193326Sed
243193326Sed	snprintf(msg, sizeof(msg),
244193326Sed	    "%s: unexpected system call (arch:0x%x,syscall:%d @ %p)",
245193326Sed	    __func__, info->si_arch, info->si_syscall, info->si_call_addr);
246193326Sed	mm_log_handler(SYSLOG_LEVEL_FATAL, msg, pmonitor);
247198092Srdivacky	_exit(1);
248193326Sed}
249193326Sed
250193326Sedstatic void
251193326Sedssh_sandbox_child_debugging(void)
252193326Sed{
253193326Sed	struct sigaction act;
254198092Srdivacky	sigset_t mask;
255193326Sed
256193326Sed	debug3("%s: installing SIGSYS handler", __func__);
257193326Sed	memset(&act, 0, sizeof(act));
258193326Sed	sigemptyset(&mask);
259193326Sed	sigaddset(&mask, SIGSYS);
260193326Sed
261193326Sed	act.sa_sigaction = &ssh_sandbox_violation;
262239462Sdim	act.sa_flags = SA_SIGINFO;
263193326Sed	if (sigaction(SIGSYS, &act, NULL) == -1)
264193326Sed		fatal("%s: sigaction(SIGSYS): %s", __func__, strerror(errno));
265193326Sed	if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1)
266193326Sed		fatal("%s: sigprocmask(SIGSYS): %s",
267193326Sed		      __func__, strerror(errno));
268193326Sed}
269193326Sed#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
270193326Sed
271193326Sedvoid
272193326Sedssh_sandbox_child(struct ssh_sandbox *box)
273193326Sed{
274193326Sed	struct rlimit rl_zero;
275193326Sed	int nnp_failed = 0;
276193326Sed
277193326Sed	/* Set rlimits for completeness if possible. */
278193326Sed	rl_zero.rlim_cur = rl_zero.rlim_max = 0;
279193326Sed	if (setrlimit(RLIMIT_FSIZE, &rl_zero) == -1)
280193326Sed		fatal("%s: setrlimit(RLIMIT_FSIZE, { 0, 0 }): %s",
281239462Sdim			__func__, strerror(errno));
282193326Sed	if (setrlimit(RLIMIT_NOFILE, &rl_zero) == -1)
283193326Sed		fatal("%s: setrlimit(RLIMIT_NOFILE, { 0, 0 }): %s",
284193326Sed			__func__, strerror(errno));
285193326Sed	if (setrlimit(RLIMIT_NPROC, &rl_zero) == -1)
286193326Sed		fatal("%s: setrlimit(RLIMIT_NPROC, { 0, 0 }): %s",
287198092Srdivacky			__func__, strerror(errno));
288226633Sdim
289193326Sed#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
290193326Sed	ssh_sandbox_child_debugging();
291224145Sdim#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
292193326Sed
293193326Sed	debug3("%s: setting PR_SET_NO_NEW_PRIVS", __func__);
294193326Sed	if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) {
295193326Sed		debug("%s: prctl(PR_SET_NO_NEW_PRIVS): %s",
296193326Sed		      __func__, strerror(errno));
297193326Sed		nnp_failed = 1;
298193326Sed	}
299193326Sed	debug3("%s: attaching seccomp filter program", __func__);
300226633Sdim	if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &preauth_program) == -1)
301198092Srdivacky		debug("%s: prctl(PR_SET_SECCOMP): %s",
302221345Sdim		      __func__, strerror(errno));
303221345Sdim	else if (nnp_failed)
304221345Sdim		fatal("%s: SECCOMP_MODE_FILTER activated but "
305221345Sdim		    "PR_SET_NO_NEW_PRIVS failed", __func__);
306221345Sdim}
307221345Sdim
308193326Sedvoid
309193326Sedssh_sandbox_parent_finish(struct ssh_sandbox *box)
310193326Sed{
311198092Srdivacky	free(box);
312193326Sed	debug3("%s: finished", __func__);
313193326Sed}
314239462Sdim
315193326Sedvoid
316193326Sedssh_sandbox_parent_preauth(struct ssh_sandbox *box, pid_t child_pid)
317193326Sed{
318193326Sed	box->child_pid = child_pid;
319193326Sed}
320193326Sed
321193326Sed#endif /* SANDBOX_SECCOMP_FILTER */
322193326Sed