1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 1999 Poul-Henning Kamp.
5 * Copyright (c) 2009 James Gritton.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#ifndef _SYS_JAIL_H_
31#define _SYS_JAIL_H_
32
33#ifdef _KERNEL
34struct jail_v0 {
35	u_int32_t	version;
36	char		*path;
37	char		*hostname;
38	u_int32_t	ip_number;
39};
40#endif
41
42struct jail {
43	uint32_t	version;
44	char		*path;
45	char		*hostname;
46	char		*jailname;
47	uint32_t	ip4s;
48	uint32_t	ip6s;
49	struct in_addr	*ip4;
50	struct in6_addr	*ip6;
51};
52#define	JAIL_API_VERSION	2
53
54/*
55 * For all xprison structs, always keep the pr_version an int and
56 * the first variable so userspace can easily distinguish them.
57 */
58#ifndef _KERNEL
59struct xprison_v1 {
60	int		 pr_version;
61	int		 pr_id;
62	char		 pr_path[MAXPATHLEN];
63	char		 pr_host[MAXHOSTNAMELEN];
64	u_int32_t	 pr_ip;
65};
66#endif
67
68struct xprison {
69	int		 pr_version;
70	int		 pr_id;
71	int		 pr_state;
72	cpusetid_t	 pr_cpusetid;
73	char		 pr_path[MAXPATHLEN];
74	char		 pr_host[MAXHOSTNAMELEN];
75	char		 pr_name[MAXHOSTNAMELEN];
76	uint32_t	 pr_ip4s;
77	uint32_t	 pr_ip6s;
78#if 0
79	/*
80	 * sizeof(xprison) will be malloced + size needed for all
81	 * IPv4 and IPv6 addesses. Offsets are based numbers of addresses.
82	 */
83	struct in_addr	 pr_ip4[];
84	struct in6_addr	 pr_ip6[];
85#endif
86};
87#define	XPRISON_VERSION		3
88
89enum prison_state {
90    PRISON_STATE_INVALID = 0,	/* New prison, not ready to be seen */
91    PRISON_STATE_ALIVE,		/* Current prison, visible to all */
92    PRISON_STATE_DYING		/* Removed but holding resources, */
93};				/* optionally visible. */
94
95/*
96 * Flags for jail_set and jail_get.
97 */
98#define	JAIL_CREATE	0x01	/* Create jail if it doesn't exist */
99#define	JAIL_UPDATE	0x02	/* Update parameters of existing jail */
100#define	JAIL_ATTACH	0x04	/* Attach to jail upon creation */
101#define	JAIL_DYING	0x08	/* Allow getting a dying jail */
102#define	JAIL_SET_MASK	0x0f	/* JAIL_DYING is deprecated/ignored here */
103#define	JAIL_GET_MASK	0x08
104
105#define	JAIL_SYS_DISABLE	0
106#define	JAIL_SYS_NEW		1
107#define	JAIL_SYS_INHERIT	2
108
109#ifndef _KERNEL
110
111struct iovec;
112
113__BEGIN_DECLS
114int jail(struct jail *);
115int jail_set(struct iovec *, unsigned int, int);
116int jail_get(struct iovec *, unsigned int, int);
117int jail_attach(int);
118int jail_remove(int);
119__END_DECLS
120
121#else /* _KERNEL */
122
123#include <sys/queue.h>
124#include <sys/sysctl.h>
125#include <sys/lock.h>
126#include <sys/mutex.h>
127#include <sys/_task.h>
128
129#define JAIL_MAX	999999
130
131#ifdef MALLOC_DECLARE
132MALLOC_DECLARE(M_PRISON);
133#endif
134#endif /* _KERNEL */
135
136#if defined(_KERNEL) || defined(_WANT_PRISON)
137
138#include <sys/osd.h>
139
140#define	HOSTUUIDLEN	64
141#define	DEFAULT_HOSTUUID	"00000000-0000-0000-0000-000000000000"
142#define	OSRELEASELEN	32
143
144struct racct;
145struct prison_racct;
146
147typedef enum {
148	PR_INET		= 0,
149	PR_INET6	= 1,
150	PR_FAMILY_MAX	= 2,
151} pr_family_t;
152
153/*
154 * This structure describes a prison.  It is pointed to by all struct
155 * ucreds's of the inmates.  pr_ref keeps track of them and is used to
156 * delete the structure when the last inmate is dead.
157 *
158 * Lock key:
159 *   (a) allprison_lock
160 *   (A) allproc_lock
161 *   (c) set only during creation before the structure is shared, no mutex
162 *       required to read
163 *   (m) locked by pr_mtx
164 *   (p) locked by pr_mtx, and also at least shared allprison_lock required
165 *       to update
166 *   (q) locked by both pr_mtx and allprison_lock
167 *   (r) atomic via refcount(9), pr_mtx and allprison_lock required to
168 *       decrement to zero
169 *   (n) read access granted with the network epoch
170 */
171struct prison {
172	TAILQ_ENTRY(prison) pr_list;			/* (a) all prisons */
173	int		 pr_id;				/* (c) prison id */
174	volatile u_int	 pr_ref;			/* (r) refcount */
175	volatile u_int	 pr_uref;			/* (r) user (alive) refcount */
176	unsigned	 pr_flags;			/* (p) PR_* flags */
177	LIST_HEAD(, prison) pr_children;		/* (a) list of child jails */
178	LIST_HEAD(, proc) pr_proclist;			/* (A) list of jailed processes */
179	LIST_ENTRY(prison) pr_sibling;			/* (a) next in parent's list */
180	struct prison	*pr_parent;			/* (c) containing jail */
181	struct mtx	 pr_mtx;
182	struct task	 pr_task;			/* (c) destroy task */
183	struct osd	 pr_osd;			/* (p) additional data */
184	struct cpuset	*pr_cpuset;			/* (p) cpuset */
185	struct vnet	*pr_vnet;			/* (c) network stack */
186	struct vnode	*pr_root;			/* (c) vnode to rdir */
187	struct prison_ip  *pr_addrs[PR_FAMILY_MAX];	/* (p,n) IPs of jail */
188	struct prison_racct *pr_prison_racct;		/* (c) racct jail proxy */
189	void		*pr_sparep[3];
190	int		 pr_childcount;			/* (a) number of child jails */
191	int		 pr_childmax;			/* (p) maximum child jails */
192	unsigned	 pr_allow;			/* (p) PR_ALLOW_* flags */
193	int		 pr_securelevel;		/* (p) securelevel */
194	int		 pr_enforce_statfs;		/* (p) statfs permission */
195	int		 pr_devfs_rsnum;		/* (p) devfs ruleset */
196	enum prison_state pr_state;			/* (q) state in life cycle */
197	volatile int	 pr_exportcnt;			/* (r) count of mount exports */
198	int		 pr_spare;
199	int		 pr_osreldate;			/* (c) kern.osreldate value */
200	unsigned long	 pr_hostid;			/* (p) jail hostid */
201	char		 pr_name[MAXHOSTNAMELEN];	/* (p) admin jail name */
202	char		 pr_path[MAXPATHLEN];		/* (c) chroot path */
203	char		 pr_hostname[MAXHOSTNAMELEN];	/* (p) jail hostname */
204	char		 pr_domainname[MAXHOSTNAMELEN];	/* (p) jail domainname */
205	char		 pr_hostuuid[HOSTUUIDLEN];	/* (p) jail hostuuid */
206	char		 pr_osrelease[OSRELEASELEN];	/* (c) kern.osrelease value */
207};
208
209struct prison_racct {
210	LIST_ENTRY(prison_racct) prr_next;
211	char		prr_name[MAXHOSTNAMELEN];
212	u_int		prr_refcount;
213	struct racct	*prr_racct;
214};
215#endif /* _KERNEL || _WANT_PRISON */
216
217#ifdef _KERNEL
218/* Flag bits set via options */
219#define	PR_PERSIST	0x00000001	/* Can exist without processes */
220#define	PR_HOST		0x00000002	/* Virtualize hostname et al */
221#define	PR_IP4_USER	0x00000004	/* Restrict IPv4 addresses */
222#define	PR_IP6_USER	0x00000008	/* Restrict IPv6 addresses */
223#define	PR_VNET		0x00000010	/* Virtual network stack */
224#define	PR_IP4_SADDRSEL	0x00000080	/* Do IPv4 src addr sel. or use the */
225					/* primary jail address. */
226#define	PR_IP6_SADDRSEL	0x00000100	/* Do IPv6 src addr sel. or use the */
227					/* primary jail address. */
228
229/* Internal flag bits */
230#define	PR_REMOVE	0x01000000	/* In process of being removed */
231#define	PR_IP4		0x02000000	/* IPv4 restricted or disabled */
232					/* by this jail or an ancestor */
233#define	PR_IP6		0x04000000	/* IPv6 restricted or disabled */
234					/* by this jail or an ancestor */
235#define PR_COMPLETE_PROC 0x08000000	/* prison_complete called from */
236					/* prison_proc_free, releases uref */
237
238/*
239 * Flags for pr_allow
240 * Bits not noted here may be used for dynamic allow.mount.xxxfs.
241 */
242#define	PR_ALLOW_SET_HOSTNAME		0x00000001
243#define	PR_ALLOW_SYSVIPC		0x00000002
244#define	PR_ALLOW_RAW_SOCKETS		0x00000004
245#define	PR_ALLOW_CHFLAGS		0x00000008
246#define	PR_ALLOW_MOUNT			0x00000010
247#define	PR_ALLOW_QUOTAS			0x00000020
248#define	PR_ALLOW_SOCKET_AF		0x00000040
249#define	PR_ALLOW_MLOCK			0x00000080
250#define	PR_ALLOW_READ_MSGBUF		0x00000100
251#define	PR_ALLOW_UNPRIV_DEBUG		0x00000200
252#define	PR_ALLOW_SUSER			0x00000400
253#define	PR_ALLOW_RESERVED_PORTS		0x00008000
254#define	PR_ALLOW_KMEM_ACCESS		0x00010000	/* reserved, not used yet */
255#define	PR_ALLOW_NFSD			0x00020000
256#define	PR_ALLOW_EXTATTR		0x00040000
257#define	PR_ALLOW_ALL_STATIC		0x000787ff
258
259/*
260 * PR_ALLOW_DIFFERENCES determines which flags are able to be
261 * different between the parent and child jail upon creation.
262 */
263#define	PR_ALLOW_DIFFERENCES		(PR_ALLOW_UNPRIV_DEBUG)
264
265/*
266 * OSD methods
267 */
268#define	PR_METHOD_CREATE	0
269#define	PR_METHOD_GET		1
270#define	PR_METHOD_SET		2
271#define	PR_METHOD_CHECK		3
272#define	PR_METHOD_ATTACH	4
273#define	PR_METHOD_REMOVE	5
274#define	PR_MAXMETHOD		6
275
276/*
277 * Lock/unlock a prison.
278 * XXX These exist not so much for general convenience, but to be useable in
279 *     the FOREACH_PRISON_DESCENDANT_LOCKED macro which can't handle them in
280 *     non-function form as currently defined.
281 */
282static __inline void
283prison_lock(struct prison *pr)
284{
285
286	mtx_lock(&pr->pr_mtx);
287}
288
289static __inline void
290prison_unlock(struct prison *pr)
291{
292
293	mtx_unlock(&pr->pr_mtx);
294}
295
296/* Traverse a prison's immediate children. */
297#define	FOREACH_PRISON_CHILD(ppr, cpr)					\
298	LIST_FOREACH(cpr, &(ppr)->pr_children, pr_sibling)
299
300/*
301 * Preorder traversal of all of a prison's descendants.
302 * This ugly loop allows the macro to be followed by a single block
303 * as expected in a looping primitive.
304 */
305#define	FOREACH_PRISON_DESCENDANT(ppr, cpr, descend)			\
306	for ((cpr) = (ppr), (descend) = 1;				\
307	    ((cpr) = (((descend) && !LIST_EMPTY(&(cpr)->pr_children))	\
308	      ? LIST_FIRST(&(cpr)->pr_children)				\
309	      : ((cpr) == (ppr)						\
310		 ? NULL							\
311		 : (((descend) = LIST_NEXT(cpr, pr_sibling) != NULL)	\
312		    ? LIST_NEXT(cpr, pr_sibling)			\
313		    : (cpr)->pr_parent))));)				\
314		if (!(descend))						\
315			;						\
316		else
317
318/*
319 * As above, but lock descendants on the way down and unlock on the way up.
320 */
321#define	FOREACH_PRISON_DESCENDANT_LOCKED(ppr, cpr, descend)		\
322	for ((cpr) = (ppr), (descend) = 1;				\
323	    ((cpr) = (((descend) && !LIST_EMPTY(&(cpr)->pr_children))	\
324	      ? LIST_FIRST(&(cpr)->pr_children)				\
325	      : ((cpr) == (ppr)						\
326		 ? NULL							\
327		 : ((prison_unlock(cpr),				\
328		    (descend) = LIST_NEXT(cpr, pr_sibling) != NULL)	\
329		    ? LIST_NEXT(cpr, pr_sibling)			\
330		    : (cpr)->pr_parent))));)				\
331		if ((descend) ? (prison_lock(cpr), 0) : 1)		\
332			;						\
333		else
334
335/*
336 * As above, but also keep track of the level descended to.
337 */
338#define	FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL(ppr, cpr, descend, level)\
339	for ((cpr) = (ppr), (descend) = 1, (level) = 0;			\
340	    ((cpr) = (((descend) && !LIST_EMPTY(&(cpr)->pr_children))	\
341	      ? (level++, LIST_FIRST(&(cpr)->pr_children))		\
342	      : ((cpr) == (ppr)						\
343		 ? NULL							\
344		 : ((prison_unlock(cpr),				\
345		    (descend) = LIST_NEXT(cpr, pr_sibling) != NULL)	\
346		    ? LIST_NEXT(cpr, pr_sibling)			\
347		    : (level--, (cpr)->pr_parent)))));)			\
348		if ((descend) ? (prison_lock(cpr), 0) : 1)		\
349			;						\
350		else
351
352/*
353 * Traverse a prison's descendants, visiting both preorder and postorder.
354 */
355#define FOREACH_PRISON_DESCENDANT_PRE_POST(ppr, cpr, descend)		\
356	for ((cpr) = (ppr), (descend) = 1;				\
357	     ((cpr) = (descend)						\
358	      ? ((descend) = !LIST_EMPTY(&(cpr)->pr_children))		\
359		? LIST_FIRST(&(cpr)->pr_children)			\
360		: (cpr)							\
361	      : ((descend) = LIST_NEXT(cpr, pr_sibling) != NULL)	\
362		? LIST_NEXT(cpr, pr_sibling)				\
363		: cpr->pr_parent) != (ppr);)
364
365/*
366 * Attributes of the physical system, and the root of the jail tree.
367 */
368extern struct	prison prison0;
369
370TAILQ_HEAD(prisonlist, prison);
371extern struct	prisonlist allprison;
372extern struct	sx allprison_lock;
373
374/*
375 * Sysctls to describe jail parameters.
376 */
377SYSCTL_DECL(_security_jail_param);
378
379#define	SYSCTL_JAIL_PARAM(module, param, type, fmt, descr)		\
380    SYSCTL_PROC(_security_jail_param ## module, OID_AUTO, param,	\
381	(type) | CTLFLAG_MPSAFE, NULL, 0, sysctl_jail_param, fmt, descr)
382#define	SYSCTL_JAIL_PARAM_STRING(module, param, access, len, descr)	\
383    SYSCTL_PROC(_security_jail_param ## module, OID_AUTO, param,	\
384	CTLTYPE_STRING | CTLFLAG_MPSAFE | (access), NULL, len,		\
385	sysctl_jail_param, "A", descr)
386#define	SYSCTL_JAIL_PARAM_STRUCT(module, param, access, len, fmt, descr)\
387    SYSCTL_PROC(_security_jail_param ## module, OID_AUTO, param,	\
388	CTLTYPE_STRUCT | CTLFLAG_MPSAFE | (access), NULL, len,		\
389	sysctl_jail_param, fmt, descr)
390#define	SYSCTL_JAIL_PARAM_NODE(module, descr)				\
391    SYSCTL_NODE(_security_jail_param, OID_AUTO, module, CTLFLAG_MPSAFE,	\
392        0, descr)
393#define	SYSCTL_JAIL_PARAM_SUBNODE(parent, module, descr)		\
394    SYSCTL_NODE(_security_jail_param_##parent, OID_AUTO, module, 	\
395        CTLFLAG_MPSAFE, 0, descr)
396#define	SYSCTL_JAIL_PARAM_SYS_NODE(module, access, descr)		\
397    SYSCTL_JAIL_PARAM_NODE(module, descr);				\
398    SYSCTL_JAIL_PARAM(_##module, , CTLTYPE_INT | (access), "E,jailsys",	\
399	descr)
400
401/*
402 * Kernel support functions for jail().
403 */
404struct ucred;
405struct mount;
406struct sockaddr;
407struct statfs;
408struct vfsconf;
409
410/*
411 * Return 1 if the passed credential is in a jail, otherwise 0.
412 */
413#define jailed(cred)	(cred->cr_prison != &prison0)
414
415bool jailed_without_vnet(struct ucred *);
416void getcredhostname(struct ucred *, char *, size_t);
417void getcreddomainname(struct ucred *, char *, size_t);
418void getcredhostuuid(struct ucred *, char *, size_t);
419void getcredhostid(struct ucred *, unsigned long *);
420void getjailname(struct ucred *cred, char *name, size_t len);
421void prison0_init(void);
422bool prison_allow(struct ucred *, unsigned);
423int prison_check(struct ucred *cred1, struct ucred *cred2);
424bool prison_check_nfsd(struct ucred *cred);
425bool prison_owns_vnet(struct ucred *);
426int prison_canseemount(struct ucred *cred, struct mount *mp);
427void prison_enforce_statfs(struct ucred *cred, struct mount *mp,
428    struct statfs *sp);
429struct prison *prison_find(int prid);
430struct prison *prison_find_child(struct prison *, int);
431struct prison *prison_find_name(struct prison *, const char *);
432bool prison_flag(struct ucred *, unsigned);
433void prison_free(struct prison *pr);
434void prison_free_locked(struct prison *pr);
435void prison_hold(struct prison *pr);
436void prison_hold_locked(struct prison *pr);
437void prison_proc_hold(struct prison *);
438void prison_proc_free(struct prison *);
439void prison_proc_link(struct prison *, struct proc *);
440void prison_proc_unlink(struct prison *, struct proc *);
441void prison_proc_iterate(struct prison *, void (*)(struct proc *, void *), void *);
442void prison_set_allow(struct ucred *cred, unsigned flag, int enable);
443bool prison_ischild(struct prison *, struct prison *);
444bool prison_isalive(const struct prison *);
445bool prison_isvalid(struct prison *);
446#if defined(INET) || defined(INET6)
447int prison_ip_check(const struct prison *, const pr_family_t, const void *);
448const void *prison_ip_get0(const struct prison *, const pr_family_t);
449u_int prison_ip_cnt(const struct prison *, const pr_family_t);
450#endif
451#ifdef INET
452bool prison_equal_ip4(struct prison *, struct prison *);
453int prison_get_ip4(struct ucred *cred, struct in_addr *ia);
454int prison_local_ip4(struct ucred *cred, struct in_addr *ia);
455int prison_remote_ip4(struct ucred *cred, struct in_addr *ia);
456int prison_check_ip4(const struct ucred *, const struct in_addr *);
457int prison_check_ip4_locked(const struct prison *, const struct in_addr *);
458bool prison_saddrsel_ip4(struct ucred *, struct in_addr *);
459int prison_qcmp_v4(const void *, const void *);
460bool prison_valid_v4(const void *);
461#endif
462#ifdef INET6
463bool prison_equal_ip6(struct prison *, struct prison *);
464int prison_get_ip6(struct ucred *, struct in6_addr *);
465int prison_local_ip6(struct ucred *, struct in6_addr *, int);
466int prison_remote_ip6(struct ucred *, struct in6_addr *);
467int prison_check_ip6(const struct ucred *, const struct in6_addr *);
468int prison_check_ip6_locked(const struct prison *, const struct in6_addr *);
469bool prison_saddrsel_ip6(struct ucred *, struct in6_addr *);
470int prison_qcmp_v6(const void *, const void *);
471bool prison_valid_v6(const void *);
472#endif
473int prison_check_af(struct ucred *cred, int af);
474int prison_if(struct ucred *cred, const struct sockaddr *sa);
475char *prison_name(struct prison *, struct prison *);
476int prison_priv_check(struct ucred *cred, int priv);
477int sysctl_jail_param(SYSCTL_HANDLER_ARGS);
478unsigned prison_add_allow(const char *prefix, const char *name,
479    const char *prefix_descr, const char *descr);
480void prison_add_vfs(struct vfsconf *vfsp);
481void prison_racct_foreach(void (*callback)(struct racct *racct,
482    void *arg2, void *arg3), void (*pre)(void), void (*post)(void),
483    void *arg2, void *arg3);
484struct prison_racct *prison_racct_find(const char *name);
485void prison_racct_hold(struct prison_racct *prr);
486void prison_racct_free(struct prison_racct *prr);
487
488#endif /* _KERNEL */
489#endif /* !_SYS_JAIL_H_ */
490