1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2008-2011 Robert N. M. Watson
5 * Copyright (c) 2010-2011 Jonathan Anderson
6 * Copyright (c) 2012 FreeBSD Foundation
7 * All rights reserved.
8 *
9 * This software was developed at the University of Cambridge Computer
10 * Laboratory with support from a grant from Google, Inc.
11 *
12 * Portions of this software were developed by Pawel Jakub Dawidek under
13 * sponsorship from the FreeBSD Foundation.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37/*
38 * FreeBSD kernel capability facility.
39 *
40 * Two kernel features are implemented here: capability mode, a sandboxed mode
41 * of execution for processes, and capabilities, a refinement on file
42 * descriptors that allows fine-grained control over operations on the file
43 * descriptor.  Collectively, these allow processes to run in the style of a
44 * historic "capability system" in which they can use only resources
45 * explicitly delegated to them.  This model is enforced by restricting access
46 * to global namespaces in capability mode.
47 *
48 * Capabilities wrap other file descriptor types, binding them to a constant
49 * rights mask set when the capability is created.  New capabilities may be
50 * derived from existing capabilities, but only if they have the same or a
51 * strict subset of the rights on the original capability.
52 *
53 * System calls permitted in capability mode are defined in capabilities.conf;
54 * calls must be carefully audited for safety to ensure that they don't allow
55 * escape from a sandbox.  Some calls permit only a subset of operations in
56 * capability mode -- for example, shm_open(2) is limited to creating
57 * anonymous, rather than named, POSIX shared memory objects.
58 */
59
60#include <sys/cdefs.h>
61#include "opt_capsicum.h"
62#include "opt_ktrace.h"
63
64#include <sys/param.h>
65#include <sys/capsicum.h>
66#include <sys/file.h>
67#include <sys/filedesc.h>
68#include <sys/kernel.h>
69#include <sys/limits.h>
70#include <sys/lock.h>
71#include <sys/mutex.h>
72#include <sys/proc.h>
73#include <sys/syscallsubr.h>
74#include <sys/sysproto.h>
75#include <sys/sysctl.h>
76#include <sys/systm.h>
77#include <sys/ucred.h>
78#include <sys/uio.h>
79#include <sys/ktrace.h>
80
81#include <security/audit/audit.h>
82
83#include <vm/uma.h>
84#include <vm/vm.h>
85
86bool __read_frequently trap_enotcap;
87SYSCTL_BOOL(_kern, OID_AUTO, trap_enotcap, CTLFLAG_RWTUN, &trap_enotcap, 0,
88    "Deliver SIGTRAP on ECAPMODE and ENOTCAPABLE");
89
90#ifdef CAPABILITY_MODE
91
92#define        IOCTLS_MAX_COUNT        256     /* XXX: Is 256 sane? */
93
94FEATURE(security_capability_mode, "Capsicum Capability Mode");
95
96/*
97 * System call to enter capability mode for the process.
98 */
99int
100sys_cap_enter(struct thread *td, struct cap_enter_args *uap)
101{
102	struct ucred *newcred, *oldcred;
103	struct proc *p;
104
105	if (IN_CAPABILITY_MODE(td))
106		return (0);
107
108	newcred = crget();
109	p = td->td_proc;
110	PROC_LOCK(p);
111	oldcred = crcopysafe(p, newcred);
112	newcred->cr_flags |= CRED_FLAG_CAPMODE;
113	proc_set_cred(p, newcred);
114	PROC_UNLOCK(p);
115	crfree(oldcred);
116	return (0);
117}
118
119/*
120 * System call to query whether the process is in capability mode.
121 */
122int
123sys_cap_getmode(struct thread *td, struct cap_getmode_args *uap)
124{
125	u_int i;
126
127	i = IN_CAPABILITY_MODE(td) ? 1 : 0;
128	return (copyout(&i, uap->modep, sizeof(i)));
129}
130
131#else /* !CAPABILITY_MODE */
132
133int
134sys_cap_enter(struct thread *td, struct cap_enter_args *uap)
135{
136
137	return (ENOSYS);
138}
139
140int
141sys_cap_getmode(struct thread *td, struct cap_getmode_args *uap)
142{
143
144	return (ENOSYS);
145}
146
147#endif /* CAPABILITY_MODE */
148
149#ifdef CAPABILITIES
150
151FEATURE(security_capabilities, "Capsicum Capabilities");
152
153MALLOC_DECLARE(M_FILECAPS);
154
155static inline int
156_cap_check(const cap_rights_t *havep, const cap_rights_t *needp,
157    enum ktr_cap_violation type)
158{
159	const cap_rights_t rights[] = { *needp, *havep };
160
161	if (!cap_rights_contains(havep, needp)) {
162		if (CAP_TRACING(curthread))
163			ktrcapfail(type, rights);
164		return (ENOTCAPABLE);
165	}
166	return (0);
167}
168
169/*
170 * Test whether a capability grants the requested rights.
171 */
172int
173cap_check(const cap_rights_t *havep, const cap_rights_t *needp)
174{
175
176	return (_cap_check(havep, needp, CAPFAIL_NOTCAPABLE));
177}
178
179int
180cap_check_failed_notcapable(const cap_rights_t *havep, const cap_rights_t *needp)
181{
182	const cap_rights_t rights[] = { *needp, *havep };
183
184	if (CAP_TRACING(curthread))
185		ktrcapfail(CAPFAIL_NOTCAPABLE, rights);
186	return (ENOTCAPABLE);
187}
188
189/*
190 * Convert capability rights into VM access flags.
191 */
192vm_prot_t
193cap_rights_to_vmprot(const cap_rights_t *havep)
194{
195	vm_prot_t maxprot;
196
197	maxprot = VM_PROT_NONE;
198	if (cap_rights_is_set(havep, CAP_MMAP_R))
199		maxprot |= VM_PROT_READ;
200	if (cap_rights_is_set(havep, CAP_MMAP_W))
201		maxprot |= VM_PROT_WRITE;
202	if (cap_rights_is_set(havep, CAP_MMAP_X))
203		maxprot |= VM_PROT_EXECUTE;
204
205	return (maxprot);
206}
207
208/*
209 * Extract rights from a capability for monitoring purposes -- not for use in
210 * any other way, as we want to keep all capability permission evaluation in
211 * this one file.
212 */
213
214const cap_rights_t *
215cap_rights_fde(const struct filedescent *fdep)
216{
217
218	return (cap_rights_fde_inline(fdep));
219}
220
221const cap_rights_t *
222cap_rights(struct filedesc *fdp, int fd)
223{
224
225	return (cap_rights_fde(&fdp->fd_ofiles[fd]));
226}
227
228int
229kern_cap_rights_limit(struct thread *td, int fd, cap_rights_t *rights)
230{
231	struct filedesc *fdp;
232	struct filedescent *fdep;
233	u_long *ioctls;
234	int error;
235
236	fdp = td->td_proc->p_fd;
237	FILEDESC_XLOCK(fdp);
238	fdep = fdeget_noref(fdp, fd);
239	if (fdep == NULL) {
240		FILEDESC_XUNLOCK(fdp);
241		return (EBADF);
242	}
243	ioctls = NULL;
244	error = _cap_check(cap_rights(fdp, fd), rights, CAPFAIL_INCREASE);
245	if (error == 0) {
246		seqc_write_begin(&fdep->fde_seqc);
247		fdep->fde_rights = *rights;
248		if (!cap_rights_is_set(rights, CAP_IOCTL)) {
249			ioctls = fdep->fde_ioctls;
250			fdep->fde_ioctls = NULL;
251			fdep->fde_nioctls = 0;
252		}
253		if (!cap_rights_is_set(rights, CAP_FCNTL))
254			fdep->fde_fcntls = 0;
255		seqc_write_end(&fdep->fde_seqc);
256	}
257	FILEDESC_XUNLOCK(fdp);
258	free(ioctls, M_FILECAPS);
259	return (error);
260}
261
262/*
263 * System call to limit rights of the given capability.
264 */
265int
266sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap)
267{
268	cap_rights_t rights;
269	int error, version;
270
271	cap_rights_init_zero(&rights);
272
273	error = copyin(uap->rightsp, &rights, sizeof(rights.cr_rights[0]));
274	if (error != 0)
275		return (error);
276	version = CAPVER(&rights);
277	if (version != CAP_RIGHTS_VERSION_00)
278		return (EINVAL);
279
280	error = copyin(uap->rightsp, &rights,
281	    sizeof(rights.cr_rights[0]) * CAPARSIZE(&rights));
282	if (error != 0)
283		return (error);
284	/* Check for race. */
285	if (CAPVER(&rights) != version)
286		return (EINVAL);
287
288	if (!cap_rights_is_valid(&rights))
289		return (EINVAL);
290
291	if (version != CAP_RIGHTS_VERSION) {
292		rights.cr_rights[0] &= ~(0x3ULL << 62);
293		rights.cr_rights[0] |= ((uint64_t)CAP_RIGHTS_VERSION << 62);
294	}
295#ifdef KTRACE
296	if (KTRPOINT(td, KTR_STRUCT))
297		ktrcaprights(&rights);
298#endif
299
300	AUDIT_ARG_FD(uap->fd);
301	AUDIT_ARG_RIGHTS(&rights);
302	return (kern_cap_rights_limit(td, uap->fd, &rights));
303}
304
305/*
306 * System call to query the rights mask associated with a capability.
307 */
308int
309sys___cap_rights_get(struct thread *td, struct __cap_rights_get_args *uap)
310{
311	struct filedesc *fdp;
312	cap_rights_t rights;
313	int error, fd, i, n;
314
315	if (uap->version != CAP_RIGHTS_VERSION_00)
316		return (EINVAL);
317
318	fd = uap->fd;
319
320	AUDIT_ARG_FD(fd);
321
322	fdp = td->td_proc->p_fd;
323	FILEDESC_SLOCK(fdp);
324	if (fget_noref(fdp, fd) == NULL) {
325		FILEDESC_SUNLOCK(fdp);
326		return (EBADF);
327	}
328	rights = *cap_rights(fdp, fd);
329	FILEDESC_SUNLOCK(fdp);
330	n = uap->version + 2;
331	if (uap->version != CAPVER(&rights)) {
332		/*
333		 * For older versions we need to check if the descriptor
334		 * doesn't contain rights not understood by the caller.
335		 * If it does, we have to return an error.
336		 */
337		for (i = n; i < CAPARSIZE(&rights); i++) {
338			if ((rights.cr_rights[i] & ~(0x7FULL << 57)) != 0)
339				return (EINVAL);
340		}
341	}
342	error = copyout(&rights, uap->rightsp, sizeof(rights.cr_rights[0]) * n);
343#ifdef KTRACE
344	if (error == 0 && KTRPOINT(td, KTR_STRUCT))
345		ktrcaprights(&rights);
346#endif
347	return (error);
348}
349
350/*
351 * Test whether a capability grants the given ioctl command.
352 * If descriptor doesn't have CAP_IOCTL, then ioctls list is empty and
353 * ENOTCAPABLE will be returned.
354 */
355int
356cap_ioctl_check(struct filedesc *fdp, int fd, u_long cmd)
357{
358	struct filedescent *fdep;
359	u_long *cmds;
360	ssize_t ncmds;
361	long i;
362
363	KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
364		("%s: invalid fd=%d", __func__, fd));
365
366	fdep = fdeget_noref(fdp, fd);
367	KASSERT(fdep != NULL,
368	    ("%s: invalid fd=%d", __func__, fd));
369
370	ncmds = fdep->fde_nioctls;
371	if (ncmds == -1)
372		return (0);
373
374	cmds = fdep->fde_ioctls;
375	for (i = 0; i < ncmds; i++) {
376		if (cmds[i] == cmd)
377			return (0);
378	}
379
380	return (ENOTCAPABLE);
381}
382
383/*
384 * Check if the current ioctls list can be replaced by the new one.
385 */
386static int
387cap_ioctl_limit_check(struct filedescent *fdep, const u_long *cmds,
388    size_t ncmds)
389{
390	u_long *ocmds;
391	ssize_t oncmds;
392	u_long i;
393	long j;
394
395	oncmds = fdep->fde_nioctls;
396	if (oncmds == -1)
397		return (0);
398	if (oncmds < (ssize_t)ncmds)
399		return (ENOTCAPABLE);
400
401	ocmds = fdep->fde_ioctls;
402	for (i = 0; i < ncmds; i++) {
403		for (j = 0; j < oncmds; j++) {
404			if (cmds[i] == ocmds[j])
405				break;
406		}
407		if (j == oncmds)
408			return (ENOTCAPABLE);
409	}
410
411	return (0);
412}
413
414int
415kern_cap_ioctls_limit(struct thread *td, int fd, u_long *cmds, size_t ncmds)
416{
417	struct filedesc *fdp;
418	struct filedescent *fdep;
419	u_long *ocmds;
420	int error;
421
422	AUDIT_ARG_FD(fd);
423
424	if (ncmds > IOCTLS_MAX_COUNT) {
425		error = EINVAL;
426		goto out_free;
427	}
428
429	fdp = td->td_proc->p_fd;
430	FILEDESC_XLOCK(fdp);
431
432	fdep = fdeget_noref(fdp, fd);
433	if (fdep == NULL) {
434		error = EBADF;
435		goto out;
436	}
437
438	error = cap_ioctl_limit_check(fdep, cmds, ncmds);
439	if (error != 0)
440		goto out;
441
442	ocmds = fdep->fde_ioctls;
443	seqc_write_begin(&fdep->fde_seqc);
444	fdep->fde_ioctls = cmds;
445	fdep->fde_nioctls = ncmds;
446	seqc_write_end(&fdep->fde_seqc);
447
448	cmds = ocmds;
449	error = 0;
450out:
451	FILEDESC_XUNLOCK(fdp);
452out_free:
453	free(cmds, M_FILECAPS);
454	return (error);
455}
456
457int
458sys_cap_ioctls_limit(struct thread *td, struct cap_ioctls_limit_args *uap)
459{
460	u_long *cmds;
461	size_t ncmds;
462	int error;
463
464	ncmds = uap->ncmds;
465
466	if (ncmds > IOCTLS_MAX_COUNT)
467		return (EINVAL);
468
469	if (ncmds == 0) {
470		cmds = NULL;
471	} else {
472		cmds = malloc(sizeof(cmds[0]) * ncmds, M_FILECAPS, M_WAITOK);
473		error = copyin(uap->cmds, cmds, sizeof(cmds[0]) * ncmds);
474		if (error != 0) {
475			free(cmds, M_FILECAPS);
476			return (error);
477		}
478	}
479
480	return (kern_cap_ioctls_limit(td, uap->fd, cmds, ncmds));
481}
482
483int
484sys_cap_ioctls_get(struct thread *td, struct cap_ioctls_get_args *uap)
485{
486	struct filedesc *fdp;
487	struct filedescent *fdep;
488	u_long *cmdsp, *dstcmds;
489	size_t maxcmds, ncmds;
490	int16_t count;
491	int error, fd;
492
493	fd = uap->fd;
494	dstcmds = uap->cmds;
495	maxcmds = uap->maxcmds;
496
497	AUDIT_ARG_FD(fd);
498
499	fdp = td->td_proc->p_fd;
500
501	cmdsp = NULL;
502	if (dstcmds != NULL) {
503		cmdsp = malloc(sizeof(cmdsp[0]) * IOCTLS_MAX_COUNT, M_FILECAPS,
504		    M_WAITOK | M_ZERO);
505	}
506
507	FILEDESC_SLOCK(fdp);
508	fdep = fdeget_noref(fdp, fd);
509	if (fdep == NULL) {
510		error = EBADF;
511		FILEDESC_SUNLOCK(fdp);
512		goto out;
513	}
514	count = fdep->fde_nioctls;
515	if (count != -1 && cmdsp != NULL) {
516		ncmds = MIN(count, maxcmds);
517		memcpy(cmdsp, fdep->fde_ioctls, sizeof(cmdsp[0]) * ncmds);
518	}
519	FILEDESC_SUNLOCK(fdp);
520
521	/*
522	 * If all ioctls are allowed (fde_nioctls == -1 && fde_ioctls == NULL)
523	 * the only sane thing we can do is to not populate the given array and
524	 * return CAP_IOCTLS_ALL.
525	 */
526	if (count != -1) {
527		if (cmdsp != NULL) {
528			error = copyout(cmdsp, dstcmds,
529			    sizeof(cmdsp[0]) * ncmds);
530			if (error != 0)
531				goto out;
532		}
533		td->td_retval[0] = count;
534	} else {
535		td->td_retval[0] = CAP_IOCTLS_ALL;
536	}
537
538	error = 0;
539out:
540	free(cmdsp, M_FILECAPS);
541	return (error);
542}
543
544/*
545 * Test whether a capability grants the given fcntl command.
546 */
547int
548cap_fcntl_check_fde(struct filedescent *fdep, int cmd)
549{
550	uint32_t fcntlcap;
551
552	fcntlcap = (1 << cmd);
553	KASSERT((CAP_FCNTL_ALL & fcntlcap) != 0,
554	    ("Unsupported fcntl=%d.", cmd));
555
556	if ((fdep->fde_fcntls & fcntlcap) != 0)
557		return (0);
558
559	return (ENOTCAPABLE);
560}
561
562int
563cap_fcntl_check(struct filedesc *fdp, int fd, int cmd)
564{
565
566	KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
567	    ("%s: invalid fd=%d", __func__, fd));
568
569	return (cap_fcntl_check_fde(&fdp->fd_ofiles[fd], cmd));
570}
571
572int
573sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap)
574{
575	struct filedesc *fdp;
576	struct filedescent *fdep;
577	uint32_t fcntlrights;
578	int fd;
579
580	fd = uap->fd;
581	fcntlrights = uap->fcntlrights;
582
583	AUDIT_ARG_FD(fd);
584	AUDIT_ARG_FCNTL_RIGHTS(fcntlrights);
585
586	if ((fcntlrights & ~CAP_FCNTL_ALL) != 0)
587		return (EINVAL);
588
589	fdp = td->td_proc->p_fd;
590	FILEDESC_XLOCK(fdp);
591
592	fdep = fdeget_noref(fdp, fd);
593	if (fdep == NULL) {
594		FILEDESC_XUNLOCK(fdp);
595		return (EBADF);
596	}
597
598	if ((fcntlrights & ~fdep->fde_fcntls) != 0) {
599		FILEDESC_XUNLOCK(fdp);
600		return (ENOTCAPABLE);
601	}
602
603	seqc_write_begin(&fdep->fde_seqc);
604	fdep->fde_fcntls = fcntlrights;
605	seqc_write_end(&fdep->fde_seqc);
606	FILEDESC_XUNLOCK(fdp);
607
608	return (0);
609}
610
611int
612sys_cap_fcntls_get(struct thread *td, struct cap_fcntls_get_args *uap)
613{
614	struct filedesc *fdp;
615	struct filedescent *fdep;
616	uint32_t rights;
617	int fd;
618
619	fd = uap->fd;
620
621	AUDIT_ARG_FD(fd);
622
623	fdp = td->td_proc->p_fd;
624	FILEDESC_SLOCK(fdp);
625	fdep = fdeget_noref(fdp, fd);
626	if (fdep == NULL) {
627		FILEDESC_SUNLOCK(fdp);
628		return (EBADF);
629	}
630	rights = fdep->fde_fcntls;
631	FILEDESC_SUNLOCK(fdp);
632
633	return (copyout(&rights, uap->fcntlrightsp, sizeof(rights)));
634}
635
636#else /* !CAPABILITIES */
637
638/*
639 * Stub Capability functions for when options CAPABILITIES isn't compiled
640 * into the kernel.
641 */
642
643int
644sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap)
645{
646
647	return (ENOSYS);
648}
649
650int
651sys___cap_rights_get(struct thread *td, struct __cap_rights_get_args *uap)
652{
653
654	return (ENOSYS);
655}
656
657int
658sys_cap_ioctls_limit(struct thread *td, struct cap_ioctls_limit_args *uap)
659{
660
661	return (ENOSYS);
662}
663
664int
665sys_cap_ioctls_get(struct thread *td, struct cap_ioctls_get_args *uap)
666{
667
668	return (ENOSYS);
669}
670
671int
672sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap)
673{
674
675	return (ENOSYS);
676}
677
678int
679sys_cap_fcntls_get(struct thread *td, struct cap_fcntls_get_args *uap)
680{
681
682	return (ENOSYS);
683}
684
685#endif /* CAPABILITIES */
686