kern_jail.c revision 227293
1139804Simp/*-
2185435Sbz * Copyright (c) 1999 Poul-Henning Kamp.
3185435Sbz * Copyright (c) 2008 Bjoern A. Zeeb.
4191673Sjamie * Copyright (c) 2009 James Gritton.
5185435Sbz * All rights reserved.
6190466Sjamie *
7185404Sbz * Redistribution and use in source and binary forms, with or without
8185404Sbz * modification, are permitted provided that the following conditions
9185404Sbz * are met:
10185404Sbz * 1. Redistributions of source code must retain the above copyright
11185404Sbz *    notice, this list of conditions and the following disclaimer.
12185404Sbz * 2. Redistributions in binary form must reproduce the above copyright
13185404Sbz *    notice, this list of conditions and the following disclaimer in the
14185404Sbz *    documentation and/or other materials provided with the distribution.
15185404Sbz *
16185404Sbz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17185404Sbz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18185404Sbz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19185404Sbz * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20185404Sbz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21185404Sbz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22185404Sbz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23185404Sbz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24185404Sbz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25185404Sbz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26185404Sbz * SUCH DAMAGE.
2746197Sphk */
2846155Sphk
29116182Sobrien#include <sys/cdefs.h>
30116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 227293 2011-11-07 06:44:47Z ed $");
31116182Sobrien
32193066Sjamie#include "opt_compat.h"
33185435Sbz#include "opt_ddb.h"
34185435Sbz#include "opt_inet.h"
35185435Sbz#include "opt_inet6.h"
36131177Spjd
3746155Sphk#include <sys/param.h>
3846155Sphk#include <sys/types.h>
3946155Sphk#include <sys/kernel.h>
4046155Sphk#include <sys/systm.h>
4146155Sphk#include <sys/errno.h>
4246155Sphk#include <sys/sysproto.h>
4346155Sphk#include <sys/malloc.h>
44192895Sjamie#include <sys/osd.h>
45164032Srwatson#include <sys/priv.h>
4646155Sphk#include <sys/proc.h>
47124882Srwatson#include <sys/taskqueue.h>
48177785Skib#include <sys/fcntl.h>
4946155Sphk#include <sys/jail.h>
5087275Srwatson#include <sys/lock.h>
5187275Srwatson#include <sys/mutex.h>
52220137Strasz#include <sys/racct.h>
53221362Strasz#include <sys/refcount.h>
54168401Spjd#include <sys/sx.h>
55193066Sjamie#include <sys/sysent.h>
56113275Smike#include <sys/namei.h>
57147185Spjd#include <sys/mount.h>
58113275Smike#include <sys/queue.h>
5946155Sphk#include <sys/socket.h>
60113275Smike#include <sys/syscallsubr.h>
6157163Srwatson#include <sys/sysctl.h>
62113275Smike#include <sys/vnode.h>
63196019Srwatson
6446155Sphk#include <net/if.h>
65196019Srwatson#include <net/vnet.h>
66196019Srwatson
6746155Sphk#include <netinet/in.h>
68196019Srwatson
69185435Sbz#ifdef DDB
70185435Sbz#include <ddb/ddb.h>
71185435Sbz#ifdef INET6
72185435Sbz#include <netinet6/in6_var.h>
73185435Sbz#endif /* INET6 */
74185435Sbz#endif /* DDB */
7546155Sphk
76163606Srwatson#include <security/mac/mac_framework.h>
77163606Srwatson
78195944Sjamie#define	DEFAULT_HOSTUUID	"00000000-0000-0000-0000-000000000000"
79195944Sjamie
8046155SphkMALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
81227293Sedstatic MALLOC_DEFINE(M_PRISON_RACCT, "prison_racct", "Prison racct structures");
8246155Sphk
83202468Sbz/* Keep struct prison prison0 and some code in kern_jail_set() readable. */
84202468Sbz#ifdef INET
85202468Sbz#ifdef INET6
86202468Sbz#define	_PR_IP_SADDRSEL	PR_IP4_SADDRSEL|PR_IP6_SADDRSEL
87202468Sbz#else
88202468Sbz#define	_PR_IP_SADDRSEL	PR_IP4_SADDRSEL
89202468Sbz#endif
90202468Sbz#else /* !INET */
91202468Sbz#ifdef INET6
92202468Sbz#define	_PR_IP_SADDRSEL	PR_IP6_SADDRSEL
93202468Sbz#else
94202468Sbz#define	_PR_IP_SADDRSEL	0
95202468Sbz#endif
96202468Sbz#endif
97202468Sbz
98192895Sjamie/* prison0 describes what is "real" about the system. */
99192895Sjamiestruct prison prison0 = {
100192895Sjamie	.pr_id		= 0,
101192895Sjamie	.pr_name	= "0",
102192895Sjamie	.pr_ref		= 1,
103192895Sjamie	.pr_uref	= 1,
104192895Sjamie	.pr_path	= "/",
105192895Sjamie	.pr_securelevel	= -1,
106194762Sjamie	.pr_childmax	= JAIL_MAX,
107195944Sjamie	.pr_hostuuid	= DEFAULT_HOSTUUID,
108201145Santoine	.pr_children	= LIST_HEAD_INITIALIZER(prison0.pr_children),
109196176Sbz#ifdef VIMAGE
110202468Sbz	.pr_flags	= PR_HOST|PR_VNET|_PR_IP_SADDRSEL,
111196176Sbz#else
112202468Sbz	.pr_flags	= PR_HOST|_PR_IP_SADDRSEL,
113196176Sbz#endif
114192895Sjamie	.pr_allow	= PR_ALLOW_ALL,
115192895Sjamie};
116192895SjamieMTX_SYSINIT(prison0, &prison0.pr_mtx, "jail mutex", MTX_DEF);
11757163Srwatson
118221362Strasz/* allprison, allprison_racct and lastprid are protected by allprison_lock. */
119168401Spjdstruct	sx allprison_lock;
120191673SjamieSX_SYSINIT(allprison_lock, &allprison_lock, "allprison");
121191673Sjamiestruct	prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison);
122221362StraszLIST_HEAD(, prison_racct) allprison_racct;
123179881Sdelphijint	lastprid = 0;
124113275Smike
125191673Sjamiestatic int do_jail_attach(struct thread *td, struct prison *pr);
126190466Sjamiestatic void prison_complete(void *context, int pending);
127191673Sjamiestatic void prison_deref(struct prison *pr, int flags);
128192895Sjamiestatic char *prison_path(struct prison *pr1, struct prison *pr2);
129192895Sjamiestatic void prison_remove_one(struct prison *pr);
130221362Strasz#ifdef RACCT
131221362Straszstatic void prison_racct_attach(struct prison *pr);
132221362Straszstatic void prison_racct_detach(struct prison *pr);
133221362Strasz#endif
134185435Sbz#ifdef INET
135190466Sjamiestatic int _prison_check_ip4(struct prison *pr, struct in_addr *ia);
136192895Sjamiestatic int prison_restrict_ip4(struct prison *pr, struct in_addr *newip4);
137185435Sbz#endif
138185435Sbz#ifdef INET6
139190466Sjamiestatic int _prison_check_ip6(struct prison *pr, struct in6_addr *ia6);
140192895Sjamiestatic int prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6);
141185435Sbz#endif
142113275Smike
143191673Sjamie/* Flags for prison_deref */
144191673Sjamie#define	PD_DEREF	0x01
145191673Sjamie#define	PD_DEUREF	0x02
146191673Sjamie#define	PD_LOCKED	0x04
147191673Sjamie#define	PD_LIST_SLOCKED	0x08
148191673Sjamie#define	PD_LIST_XLOCKED	0x10
149113275Smike
150192895Sjamie/*
151216861Sbz * Parameter names corresponding to PR_* flag values.  Size values are for kvm
152216861Sbz * as we cannot figure out the size of a sparse array, or an array without a
153216861Sbz * terminating entry.
154192895Sjamie */
155192895Sjamiestatic char *pr_flag_names[] = {
156192895Sjamie	[0] = "persist",
157202468Sbz#ifdef INET
158202468Sbz	[7] = "ip4.saddrsel",
159202468Sbz#endif
160202468Sbz#ifdef INET6
161202468Sbz	[8] = "ip6.saddrsel",
162202468Sbz#endif
163192895Sjamie};
164216861Sbzconst size_t pr_flag_names_size = sizeof(pr_flag_names);
165192895Sjamie
166192895Sjamiestatic char *pr_flag_nonames[] = {
167192895Sjamie	[0] = "nopersist",
168202468Sbz#ifdef INET
169202468Sbz	[7] = "ip4.nosaddrsel",
170202468Sbz#endif
171202468Sbz#ifdef INET6
172202468Sbz	[8] = "ip6.nosaddrsel",
173202468Sbz#endif
174195870Sjamie};
175216861Sbzconst size_t pr_flag_nonames_size = sizeof(pr_flag_nonames);
176195870Sjamie
177195870Sjamiestruct jailsys_flags {
178195870Sjamie	const char	*name;
179195870Sjamie	unsigned	 disable;
180195870Sjamie	unsigned	 new;
181195870Sjamie} pr_flag_jailsys[] = {
182195870Sjamie	{ "host", 0, PR_HOST },
183195870Sjamie#ifdef VIMAGE
184195870Sjamie	{ "vnet", 0, PR_VNET },
185195870Sjamie#endif
186192895Sjamie#ifdef INET
187195870Sjamie	{ "ip4", PR_IP4_USER | PR_IP4_DISABLE, PR_IP4_USER },
188192895Sjamie#endif
189192895Sjamie#ifdef INET6
190195870Sjamie	{ "ip6", PR_IP6_USER | PR_IP6_DISABLE, PR_IP6_USER },
191192895Sjamie#endif
192192895Sjamie};
193216861Sbzconst size_t pr_flag_jailsys_size = sizeof(pr_flag_jailsys);
194192895Sjamie
195192895Sjamiestatic char *pr_allow_names[] = {
196192895Sjamie	"allow.set_hostname",
197192895Sjamie	"allow.sysvipc",
198192895Sjamie	"allow.raw_sockets",
199192895Sjamie	"allow.chflags",
200192895Sjamie	"allow.mount",
201192895Sjamie	"allow.quotas",
202192895Sjamie	"allow.socket_af",
203192895Sjamie};
204216861Sbzconst size_t pr_allow_names_size = sizeof(pr_allow_names);
205192895Sjamie
206192895Sjamiestatic char *pr_allow_nonames[] = {
207192895Sjamie	"allow.noset_hostname",
208192895Sjamie	"allow.nosysvipc",
209192895Sjamie	"allow.noraw_sockets",
210192895Sjamie	"allow.nochflags",
211192895Sjamie	"allow.nomount",
212192895Sjamie	"allow.noquotas",
213192895Sjamie	"allow.nosocket_af",
214192895Sjamie};
215216861Sbzconst size_t pr_allow_nonames_size = sizeof(pr_allow_nonames);
216192895Sjamie
217196002Sjamie#define	JAIL_DEFAULT_ALLOW		PR_ALLOW_SET_HOSTNAME
218196002Sjamie#define	JAIL_DEFAULT_ENFORCE_STATFS	2
219192895Sjamiestatic unsigned jail_default_allow = JAIL_DEFAULT_ALLOW;
220196002Sjamiestatic int jail_default_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS;
221192895Sjamie#if defined(INET) || defined(INET6)
222193865Sjamiestatic unsigned jail_max_af_ips = 255;
223192895Sjamie#endif
224192895Sjamie
225192895Sjamie#ifdef INET
226185435Sbzstatic int
227185435Sbzqcmp_v4(const void *ip1, const void *ip2)
228185435Sbz{
229185435Sbz	in_addr_t iaa, iab;
230185435Sbz
231185435Sbz	/*
232185435Sbz	 * We need to compare in HBO here to get the list sorted as expected
233185435Sbz	 * by the result of the code.  Sorting NBO addresses gives you
234185435Sbz	 * interesting results.  If you do not understand, do not try.
235185435Sbz	 */
236185435Sbz	iaa = ntohl(((const struct in_addr *)ip1)->s_addr);
237185435Sbz	iab = ntohl(((const struct in_addr *)ip2)->s_addr);
238185435Sbz
239185435Sbz	/*
240185435Sbz	 * Do not simply return the difference of the two numbers, the int is
241185435Sbz	 * not wide enough.
242185435Sbz	 */
243185435Sbz	if (iaa > iab)
244185435Sbz		return (1);
245185435Sbz	else if (iaa < iab)
246185435Sbz		return (-1);
247185435Sbz	else
248185435Sbz		return (0);
249185435Sbz}
250185435Sbz#endif
251185435Sbz
252185435Sbz#ifdef INET6
253185435Sbzstatic int
254185435Sbzqcmp_v6(const void *ip1, const void *ip2)
255185435Sbz{
256185435Sbz	const struct in6_addr *ia6a, *ia6b;
257185435Sbz	int i, rc;
258185435Sbz
259185435Sbz	ia6a = (const struct in6_addr *)ip1;
260185435Sbz	ia6b = (const struct in6_addr *)ip2;
261185435Sbz
262185435Sbz	rc = 0;
263190466Sjamie	for (i = 0; rc == 0 && i < sizeof(struct in6_addr); i++) {
264185435Sbz		if (ia6a->s6_addr[i] > ia6b->s6_addr[i])
265185435Sbz			rc = 1;
266185435Sbz		else if (ia6a->s6_addr[i] < ia6b->s6_addr[i])
267185435Sbz			rc = -1;
268185435Sbz	}
269185435Sbz	return (rc);
270185435Sbz}
271185435Sbz#endif
272185435Sbz
273191673Sjamie/*
274191673Sjamie * struct jail_args {
275191673Sjamie *	struct jail *jail;
276191673Sjamie * };
277191673Sjamie */
278191673Sjamieint
279225617Skmacysys_jail(struct thread *td, struct jail_args *uap)
280185435Sbz{
281191673Sjamie	uint32_t version;
282191673Sjamie	int error;
283192895Sjamie	struct jail j;
284185435Sbz
285191673Sjamie	error = copyin(uap->jail, &version, sizeof(uint32_t));
286191673Sjamie	if (error)
287191673Sjamie		return (error);
288185435Sbz
289191673Sjamie	switch (version) {
290191673Sjamie	case 0:
291191673Sjamie	{
292191673Sjamie		struct jail_v0 j0;
293185435Sbz
294192895Sjamie		/* FreeBSD single IPv4 jails. */
295192895Sjamie		bzero(&j, sizeof(struct jail));
296191673Sjamie		error = copyin(uap->jail, &j0, sizeof(struct jail_v0));
297191673Sjamie		if (error)
298191673Sjamie			return (error);
299192895Sjamie		j.version = j0.version;
300192895Sjamie		j.path = j0.path;
301192895Sjamie		j.hostname = j0.hostname;
302192895Sjamie		j.ip4s = j0.ip_number;
303191673Sjamie		break;
304191673Sjamie	}
305191673Sjamie
306191673Sjamie	case 1:
307185435Sbz		/*
308191673Sjamie		 * Version 1 was used by multi-IPv4 jail implementations
309191673Sjamie		 * that never made it into the official kernel.
310185435Sbz		 */
311191673Sjamie		return (EINVAL);
312185435Sbz
313191673Sjamie	case 2:	/* JAIL_API_VERSION */
314191673Sjamie		/* FreeBSD multi-IPv4/IPv6,noIP jails. */
315191673Sjamie		error = copyin(uap->jail, &j, sizeof(struct jail));
316191673Sjamie		if (error)
317191673Sjamie			return (error);
318192895Sjamie		break;
319192895Sjamie
320192895Sjamie	default:
321192895Sjamie		/* Sci-Fi jails are not supported, sorry. */
322192895Sjamie		return (EINVAL);
323192895Sjamie	}
324192895Sjamie	return (kern_jail(td, &j));
325192895Sjamie}
326192895Sjamie
327192895Sjamieint
328192895Sjamiekern_jail(struct thread *td, struct jail *j)
329192895Sjamie{
330193865Sjamie	struct iovec optiov[2 * (4
331193865Sjamie			    + sizeof(pr_allow_names) / sizeof(pr_allow_names[0])
332193865Sjamie#ifdef INET
333193865Sjamie			    + 1
334193865Sjamie#endif
335193865Sjamie#ifdef INET6
336193865Sjamie			    + 1
337193865Sjamie#endif
338193865Sjamie			    )];
339192895Sjamie	struct uio opt;
340192895Sjamie	char *u_path, *u_hostname, *u_name;
341185435Sbz#ifdef INET
342193865Sjamie	uint32_t ip4s;
343192895Sjamie	struct in_addr *u_ip4;
344192895Sjamie#endif
345192895Sjamie#ifdef INET6
346192895Sjamie	struct in6_addr *u_ip6;
347192895Sjamie#endif
348192895Sjamie	size_t tmplen;
349192895Sjamie	int error, enforce_statfs, fi;
350192895Sjamie
351192895Sjamie	bzero(&optiov, sizeof(optiov));
352192895Sjamie	opt.uio_iov = optiov;
353192895Sjamie	opt.uio_iovcnt = 0;
354192895Sjamie	opt.uio_offset = -1;
355192895Sjamie	opt.uio_resid = -1;
356192895Sjamie	opt.uio_segflg = UIO_SYSSPACE;
357192895Sjamie	opt.uio_rw = UIO_READ;
358192895Sjamie	opt.uio_td = td;
359192895Sjamie
360192895Sjamie	/* Set permissions for top-level jails from sysctls. */
361192895Sjamie	if (!jailed(td->td_ucred)) {
362192895Sjamie		for (fi = 0; fi < sizeof(pr_allow_names) /
363192895Sjamie		     sizeof(pr_allow_names[0]); fi++) {
364192895Sjamie			optiov[opt.uio_iovcnt].iov_base =
365192895Sjamie			    (jail_default_allow & (1 << fi))
366192895Sjamie			    ? pr_allow_names[fi] : pr_allow_nonames[fi];
367192895Sjamie			optiov[opt.uio_iovcnt].iov_len =
368192895Sjamie			    strlen(optiov[opt.uio_iovcnt].iov_base) + 1;
369192895Sjamie			opt.uio_iovcnt += 2;
370192895Sjamie		}
371192895Sjamie		optiov[opt.uio_iovcnt].iov_base = "enforce_statfs";
372192895Sjamie		optiov[opt.uio_iovcnt].iov_len = sizeof("enforce_statfs");
373192895Sjamie		opt.uio_iovcnt++;
374192895Sjamie		enforce_statfs = jail_default_enforce_statfs;
375192895Sjamie		optiov[opt.uio_iovcnt].iov_base = &enforce_statfs;
376192895Sjamie		optiov[opt.uio_iovcnt].iov_len = sizeof(enforce_statfs);
377192895Sjamie		opt.uio_iovcnt++;
378192895Sjamie	}
379192895Sjamie
380192895Sjamie	tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN;
381192895Sjamie#ifdef INET
382192895Sjamie	ip4s = (j->version == 0) ? 1 : j->ip4s;
383192895Sjamie	if (ip4s > jail_max_af_ips)
384192895Sjamie		return (EINVAL);
385192895Sjamie	tmplen += ip4s * sizeof(struct in_addr);
386191673Sjamie#else
387192895Sjamie	if (j->ip4s > 0)
388192895Sjamie		return (EINVAL);
389191673Sjamie#endif
390191673Sjamie#ifdef INET6
391192895Sjamie	if (j->ip6s > jail_max_af_ips)
392192895Sjamie		return (EINVAL);
393192895Sjamie	tmplen += j->ip6s * sizeof(struct in6_addr);
394191673Sjamie#else
395192895Sjamie	if (j->ip6s > 0)
396192895Sjamie		return (EINVAL);
397191673Sjamie#endif
398192895Sjamie	u_path = malloc(tmplen, M_TEMP, M_WAITOK);
399192895Sjamie	u_hostname = u_path + MAXPATHLEN;
400192895Sjamie	u_name = u_hostname + MAXHOSTNAMELEN;
401191673Sjamie#ifdef INET
402192895Sjamie	u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN);
403191673Sjamie#endif
404191673Sjamie#ifdef INET6
405191673Sjamie#ifdef INET
406192895Sjamie	u_ip6 = (struct in6_addr *)(u_ip4 + ip4s);
407191673Sjamie#else
408192895Sjamie	u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN);
409191673Sjamie#endif
410191673Sjamie#endif
411192895Sjamie	optiov[opt.uio_iovcnt].iov_base = "path";
412192895Sjamie	optiov[opt.uio_iovcnt].iov_len = sizeof("path");
413192895Sjamie	opt.uio_iovcnt++;
414192895Sjamie	optiov[opt.uio_iovcnt].iov_base = u_path;
415192895Sjamie	error = copyinstr(j->path, u_path, MAXPATHLEN,
416192895Sjamie	    &optiov[opt.uio_iovcnt].iov_len);
417192895Sjamie	if (error) {
418192895Sjamie		free(u_path, M_TEMP);
419192895Sjamie		return (error);
420192895Sjamie	}
421192895Sjamie	opt.uio_iovcnt++;
422192895Sjamie	optiov[opt.uio_iovcnt].iov_base = "host.hostname";
423192895Sjamie	optiov[opt.uio_iovcnt].iov_len = sizeof("host.hostname");
424192895Sjamie	opt.uio_iovcnt++;
425192895Sjamie	optiov[opt.uio_iovcnt].iov_base = u_hostname;
426192895Sjamie	error = copyinstr(j->hostname, u_hostname, MAXHOSTNAMELEN,
427192895Sjamie	    &optiov[opt.uio_iovcnt].iov_len);
428192895Sjamie	if (error) {
429192895Sjamie		free(u_path, M_TEMP);
430192895Sjamie		return (error);
431192895Sjamie	}
432192895Sjamie	opt.uio_iovcnt++;
433192895Sjamie	if (j->jailname != NULL) {
434192895Sjamie		optiov[opt.uio_iovcnt].iov_base = "name";
435192895Sjamie		optiov[opt.uio_iovcnt].iov_len = sizeof("name");
436192895Sjamie		opt.uio_iovcnt++;
437192895Sjamie		optiov[opt.uio_iovcnt].iov_base = u_name;
438192895Sjamie		error = copyinstr(j->jailname, u_name, MAXHOSTNAMELEN,
439192895Sjamie		    &optiov[opt.uio_iovcnt].iov_len);
440191673Sjamie		if (error) {
441191673Sjamie			free(u_path, M_TEMP);
442191673Sjamie			return (error);
443191673Sjamie		}
444192895Sjamie		opt.uio_iovcnt++;
445192895Sjamie	}
446191673Sjamie#ifdef INET
447192895Sjamie	optiov[opt.uio_iovcnt].iov_base = "ip4.addr";
448192895Sjamie	optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr");
449192895Sjamie	opt.uio_iovcnt++;
450192895Sjamie	optiov[opt.uio_iovcnt].iov_base = u_ip4;
451192895Sjamie	optiov[opt.uio_iovcnt].iov_len = ip4s * sizeof(struct in_addr);
452192895Sjamie	if (j->version == 0)
453192895Sjamie		u_ip4->s_addr = j->ip4s;
454192895Sjamie	else {
455192895Sjamie		error = copyin(j->ip4, u_ip4, optiov[opt.uio_iovcnt].iov_len);
456191673Sjamie		if (error) {
457191673Sjamie			free(u_path, M_TEMP);
458191673Sjamie			return (error);
459191673Sjamie		}
460192895Sjamie	}
461192895Sjamie	opt.uio_iovcnt++;
462185435Sbz#endif
463185435Sbz#ifdef INET6
464192895Sjamie	optiov[opt.uio_iovcnt].iov_base = "ip6.addr";
465192895Sjamie	optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr");
466192895Sjamie	opt.uio_iovcnt++;
467192895Sjamie	optiov[opt.uio_iovcnt].iov_base = u_ip6;
468192895Sjamie	optiov[opt.uio_iovcnt].iov_len = j->ip6s * sizeof(struct in6_addr);
469192895Sjamie	error = copyin(j->ip6, u_ip6, optiov[opt.uio_iovcnt].iov_len);
470192895Sjamie	if (error) {
471192895Sjamie		free(u_path, M_TEMP);
472192895Sjamie		return (error);
473192895Sjamie	}
474192895Sjamie	opt.uio_iovcnt++;
475185435Sbz#endif
476192895Sjamie	KASSERT(opt.uio_iovcnt <= sizeof(optiov) / sizeof(optiov[0]),
477192895Sjamie	    ("kern_jail: too many iovecs (%d)", opt.uio_iovcnt));
478191673Sjamie	error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH);
479191673Sjamie	free(u_path, M_TEMP);
480191673Sjamie	return (error);
481185435Sbz}
482185435Sbz
483192895Sjamie
484191673Sjamie/*
485191673Sjamie * struct jail_set_args {
486191673Sjamie *	struct iovec *iovp;
487191673Sjamie *	unsigned int iovcnt;
488191673Sjamie *	int flags;
489191673Sjamie * };
490191673Sjamie */
491191673Sjamieint
492225617Skmacysys_jail_set(struct thread *td, struct jail_set_args *uap)
493185435Sbz{
494191673Sjamie	struct uio *auio;
495191673Sjamie	int error;
496191673Sjamie
497191673Sjamie	/* Check that we have an even number of iovecs. */
498191673Sjamie	if (uap->iovcnt & 1)
499191673Sjamie		return (EINVAL);
500191673Sjamie
501191673Sjamie	error = copyinuio(uap->iovp, uap->iovcnt, &auio);
502191673Sjamie	if (error)
503191673Sjamie		return (error);
504191673Sjamie	error = kern_jail_set(td, auio, uap->flags);
505191673Sjamie	free(auio, M_IOV);
506191673Sjamie	return (error);
507191673Sjamie}
508191673Sjamie
509191673Sjamieint
510191673Sjamiekern_jail_set(struct thread *td, struct uio *optuio, int flags)
511191673Sjamie{
512191673Sjamie	struct nameidata nd;
513185435Sbz#ifdef INET
514190466Sjamie	struct in_addr *ip4;
515185435Sbz#endif
516185435Sbz#ifdef INET6
517185435Sbz	struct in6_addr *ip6;
518185435Sbz#endif
519191673Sjamie	struct vfsopt *opt;
520191673Sjamie	struct vfsoptlist *opts;
521196135Sbz	struct prison *pr, *deadpr, *mypr, *ppr, *tpr;
522191673Sjamie	struct vnode *root;
523196835Sjamie	char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid;
524192895Sjamie#if defined(INET) || defined(INET6)
525196135Sbz	struct prison *tppr;
526191673Sjamie	void *op;
527192895Sjamie#endif
528193066Sjamie	unsigned long hid;
529192895Sjamie	size_t namelen, onamelen;
530192895Sjamie	int created, cuflags, descend, enforce, error, errmsg_len, errmsg_pos;
531195870Sjamie	int gotchildmax, gotenforce, gothid, gotslevel;
532195870Sjamie	int fi, jid, jsys, len, level;
533194762Sjamie	int childmax, slevel, vfslocked;
534191673Sjamie#if defined(INET) || defined(INET6)
535192895Sjamie	int ii, ij;
536191673Sjamie#endif
537191673Sjamie#ifdef INET
538195974Sjamie	int ip4s, redo_ip4;
539191673Sjamie#endif
540191673Sjamie#ifdef INET6
541195974Sjamie	int ip6s, redo_ip6;
542191673Sjamie#endif
543224290Smckusick	uint64_t pr_allow, ch_allow, pr_flags, ch_flags;
544224290Smckusick	unsigned tallow;
545191673Sjamie	char numbuf[12];
546185435Sbz
547191673Sjamie	error = priv_check(td, PRIV_JAIL_SET);
548191673Sjamie	if (!error && (flags & JAIL_ATTACH))
549191673Sjamie		error = priv_check(td, PRIV_JAIL_ATTACH);
550191673Sjamie	if (error)
551191673Sjamie		return (error);
552192895Sjamie	mypr = ppr = td->td_ucred->cr_prison;
553194762Sjamie	if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
554192895Sjamie		return (EPERM);
555191673Sjamie	if (flags & ~JAIL_SET_MASK)
556191673Sjamie		return (EINVAL);
557191673Sjamie
558185435Sbz	/*
559191673Sjamie	 * Check all the parameters before committing to anything.  Not all
560191673Sjamie	 * errors can be caught early, but we may as well try.  Also, this
561191673Sjamie	 * takes care of some expensive stuff (path lookup) before getting
562191673Sjamie	 * the allprison lock.
563185435Sbz	 *
564191673Sjamie	 * XXX Jails are not filesystems, and jail parameters are not mount
565191673Sjamie	 *     options.  But it makes more sense to re-use the vfsopt code
566191673Sjamie	 *     than duplicate it under a different name.
567185435Sbz	 */
568191673Sjamie	error = vfs_buildopts(optuio, &opts);
569191673Sjamie	if (error)
570191673Sjamie		return (error);
571185435Sbz#ifdef INET
572185435Sbz	ip4 = NULL;
573185435Sbz#endif
574185435Sbz#ifdef INET6
575185435Sbz	ip6 = NULL;
576185435Sbz#endif
577191673Sjamie
578191673Sjamie	error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
579191673Sjamie	if (error == ENOENT)
580191673Sjamie		jid = 0;
581191673Sjamie	else if (error != 0)
582191673Sjamie		goto done_free;
583191673Sjamie
584191673Sjamie	error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel));
585191673Sjamie	if (error == ENOENT)
586191673Sjamie		gotslevel = 0;
587191673Sjamie	else if (error != 0)
588191673Sjamie		goto done_free;
589191673Sjamie	else
590191673Sjamie		gotslevel = 1;
591191673Sjamie
592194762Sjamie	error =
593194762Sjamie	    vfs_copyopt(opts, "children.max", &childmax, sizeof(childmax));
594194762Sjamie	if (error == ENOENT)
595194762Sjamie		gotchildmax = 0;
596194762Sjamie	else if (error != 0)
597194762Sjamie		goto done_free;
598194762Sjamie	else
599194762Sjamie		gotchildmax = 1;
600194762Sjamie
601192895Sjamie	error = vfs_copyopt(opts, "enforce_statfs", &enforce, sizeof(enforce));
602212436Sjamie	if (error == ENOENT)
603212436Sjamie		gotenforce = 0;
604212436Sjamie	else if (error != 0)
605192895Sjamie		goto done_free;
606212436Sjamie	else if (enforce < 0 || enforce > 2) {
607212436Sjamie		error = EINVAL;
608212436Sjamie		goto done_free;
609212436Sjamie	} else
610212436Sjamie		gotenforce = 1;
611192895Sjamie
612191673Sjamie	pr_flags = ch_flags = 0;
613192895Sjamie	for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]);
614192895Sjamie	    fi++) {
615192895Sjamie		if (pr_flag_names[fi] == NULL)
616192895Sjamie			continue;
617192895Sjamie		vfs_flagopt(opts, pr_flag_names[fi], &pr_flags, 1 << fi);
618192895Sjamie		vfs_flagopt(opts, pr_flag_nonames[fi], &ch_flags, 1 << fi);
619192895Sjamie	}
620191673Sjamie	ch_flags |= pr_flags;
621195870Sjamie	for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]);
622195870Sjamie	    fi++) {
623195870Sjamie		error = vfs_copyopt(opts, pr_flag_jailsys[fi].name, &jsys,
624195870Sjamie		    sizeof(jsys));
625195870Sjamie		if (error == ENOENT)
626195870Sjamie			continue;
627195870Sjamie		if (error != 0)
628195870Sjamie			goto done_free;
629195870Sjamie		switch (jsys) {
630195870Sjamie		case JAIL_SYS_DISABLE:
631195870Sjamie			if (!pr_flag_jailsys[fi].disable) {
632195870Sjamie				error = EINVAL;
633195870Sjamie				goto done_free;
634195870Sjamie			}
635195870Sjamie			pr_flags |= pr_flag_jailsys[fi].disable;
636195870Sjamie			break;
637195870Sjamie		case JAIL_SYS_NEW:
638195870Sjamie			pr_flags |= pr_flag_jailsys[fi].new;
639195870Sjamie			break;
640195870Sjamie		case JAIL_SYS_INHERIT:
641195870Sjamie			break;
642195870Sjamie		default:
643195870Sjamie			error = EINVAL;
644195870Sjamie			goto done_free;
645195870Sjamie		}
646195870Sjamie		ch_flags |=
647195870Sjamie		    pr_flag_jailsys[fi].new | pr_flag_jailsys[fi].disable;
648195870Sjamie	}
649211085Sjamie	if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE
650211085Sjamie	    && !(pr_flags & PR_PERSIST)) {
651211085Sjamie		error = EINVAL;
652211085Sjamie		vfs_opterror(opts, "new jail must persist or attach");
653211085Sjamie		goto done_errmsg;
654211085Sjamie	}
655194251Sjamie#ifdef VIMAGE
656194251Sjamie	if ((flags & JAIL_UPDATE) && (ch_flags & PR_VNET)) {
657194251Sjamie		error = EINVAL;
658194251Sjamie		vfs_opterror(opts, "vnet cannot be changed after creation");
659194251Sjamie		goto done_errmsg;
660194251Sjamie	}
661194251Sjamie#endif
662195974Sjamie#ifdef INET
663195974Sjamie	if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP4_USER)) {
664195974Sjamie		error = EINVAL;
665195974Sjamie		vfs_opterror(opts, "ip4 cannot be changed after creation");
666195974Sjamie		goto done_errmsg;
667195974Sjamie	}
668195974Sjamie#endif
669195974Sjamie#ifdef INET6
670195974Sjamie	if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP6_USER)) {
671195974Sjamie		error = EINVAL;
672195974Sjamie		vfs_opterror(opts, "ip6 cannot be changed after creation");
673195974Sjamie		goto done_errmsg;
674195974Sjamie	}
675195974Sjamie#endif
676191673Sjamie
677192895Sjamie	pr_allow = ch_allow = 0;
678192895Sjamie	for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]);
679192895Sjamie	    fi++) {
680192895Sjamie		vfs_flagopt(opts, pr_allow_names[fi], &pr_allow, 1 << fi);
681192895Sjamie		vfs_flagopt(opts, pr_allow_nonames[fi], &ch_allow, 1 << fi);
682192895Sjamie	}
683192895Sjamie	ch_allow |= pr_allow;
684192895Sjamie
685191673Sjamie	error = vfs_getopt(opts, "name", (void **)&name, &len);
686191673Sjamie	if (error == ENOENT)
687191673Sjamie		name = NULL;
688191673Sjamie	else if (error != 0)
689191673Sjamie		goto done_free;
690191673Sjamie	else {
691191673Sjamie		if (len == 0 || name[len - 1] != '\0') {
692191673Sjamie			error = EINVAL;
693191673Sjamie			goto done_free;
694191673Sjamie		}
695191673Sjamie		if (len > MAXHOSTNAMELEN) {
696191673Sjamie			error = ENAMETOOLONG;
697191673Sjamie			goto done_free;
698191673Sjamie		}
699191673Sjamie	}
700191673Sjamie
701191673Sjamie	error = vfs_getopt(opts, "host.hostname", (void **)&host, &len);
702191673Sjamie	if (error == ENOENT)
703191673Sjamie		host = NULL;
704191673Sjamie	else if (error != 0)
705191673Sjamie		goto done_free;
706191673Sjamie	else {
707193066Sjamie		ch_flags |= PR_HOST;
708193066Sjamie		pr_flags |= PR_HOST;
709191673Sjamie		if (len == 0 || host[len - 1] != '\0') {
710191673Sjamie			error = EINVAL;
711191673Sjamie			goto done_free;
712191673Sjamie		}
713191673Sjamie		if (len > MAXHOSTNAMELEN) {
714191673Sjamie			error = ENAMETOOLONG;
715191673Sjamie			goto done_free;
716191673Sjamie		}
717191673Sjamie	}
718191673Sjamie
719193066Sjamie	error = vfs_getopt(opts, "host.domainname", (void **)&domain, &len);
720193066Sjamie	if (error == ENOENT)
721193066Sjamie		domain = NULL;
722193066Sjamie	else if (error != 0)
723193066Sjamie		goto done_free;
724193066Sjamie	else {
725193066Sjamie		ch_flags |= PR_HOST;
726193066Sjamie		pr_flags |= PR_HOST;
727193066Sjamie		if (len == 0 || domain[len - 1] != '\0') {
728193066Sjamie			error = EINVAL;
729193066Sjamie			goto done_free;
730193066Sjamie		}
731193066Sjamie		if (len > MAXHOSTNAMELEN) {
732193066Sjamie			error = ENAMETOOLONG;
733193066Sjamie			goto done_free;
734193066Sjamie		}
735193066Sjamie	}
736193066Sjamie
737193066Sjamie	error = vfs_getopt(opts, "host.hostuuid", (void **)&uuid, &len);
738193066Sjamie	if (error == ENOENT)
739193066Sjamie		uuid = NULL;
740193066Sjamie	else if (error != 0)
741193066Sjamie		goto done_free;
742193066Sjamie	else {
743193066Sjamie		ch_flags |= PR_HOST;
744193066Sjamie		pr_flags |= PR_HOST;
745193066Sjamie		if (len == 0 || uuid[len - 1] != '\0') {
746193066Sjamie			error = EINVAL;
747193066Sjamie			goto done_free;
748193066Sjamie		}
749193066Sjamie		if (len > HOSTUUIDLEN) {
750193066Sjamie			error = ENAMETOOLONG;
751193066Sjamie			goto done_free;
752193066Sjamie		}
753193066Sjamie	}
754193066Sjamie
755205014Snwhitehorn#ifdef COMPAT_FREEBSD32
756217896Sdchagin	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
757193066Sjamie		uint32_t hid32;
758193066Sjamie
759193066Sjamie		error = vfs_copyopt(opts, "host.hostid", &hid32, sizeof(hid32));
760193066Sjamie		hid = hid32;
761193066Sjamie	} else
762193066Sjamie#endif
763193066Sjamie		error = vfs_copyopt(opts, "host.hostid", &hid, sizeof(hid));
764193066Sjamie	if (error == ENOENT)
765193066Sjamie		gothid = 0;
766193066Sjamie	else if (error != 0)
767193066Sjamie		goto done_free;
768193066Sjamie	else {
769193066Sjamie		gothid = 1;
770193066Sjamie		ch_flags |= PR_HOST;
771193066Sjamie		pr_flags |= PR_HOST;
772193066Sjamie	}
773193066Sjamie
774185435Sbz#ifdef INET
775191673Sjamie	error = vfs_getopt(opts, "ip4.addr", &op, &ip4s);
776191673Sjamie	if (error == ENOENT)
777195870Sjamie		ip4s = (pr_flags & PR_IP4_DISABLE) ? 0 : -1;
778191673Sjamie	else if (error != 0)
779191673Sjamie		goto done_free;
780191673Sjamie	else if (ip4s & (sizeof(*ip4) - 1)) {
781191673Sjamie		error = EINVAL;
782191673Sjamie		goto done_free;
783192895Sjamie	} else {
784195870Sjamie		ch_flags |= PR_IP4_USER | PR_IP4_DISABLE;
785195870Sjamie		if (ip4s == 0)
786195870Sjamie			pr_flags |= PR_IP4_USER | PR_IP4_DISABLE;
787195870Sjamie		else {
788195870Sjamie			pr_flags = (pr_flags & ~PR_IP4_DISABLE) | PR_IP4_USER;
789192895Sjamie			ip4s /= sizeof(*ip4);
790192895Sjamie			if (ip4s > jail_max_af_ips) {
791185435Sbz				error = EINVAL;
792192895Sjamie				vfs_opterror(opts, "too many IPv4 addresses");
793192895Sjamie				goto done_errmsg;
794185435Sbz			}
795195974Sjamie			ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK);
796192895Sjamie			bcopy(op, ip4, ip4s * sizeof(*ip4));
797192895Sjamie			/*
798192895Sjamie			 * IP addresses are all sorted but ip[0] to preserve
799192895Sjamie			 * the primary IP address as given from userland.
800192895Sjamie			 * This special IP is used for unbound outgoing
801202116Sbz			 * connections as well for "loopback" traffic in case
802202116Sbz			 * source address selection cannot find any more fitting
803202116Sbz			 * address to connect from.
804192895Sjamie			 */
805192895Sjamie			if (ip4s > 1)
806192895Sjamie				qsort(ip4 + 1, ip4s - 1, sizeof(*ip4), qcmp_v4);
807192895Sjamie			/*
808192895Sjamie			 * Check for duplicate addresses and do some simple
809192895Sjamie			 * zero and broadcast checks. If users give other bogus
810192895Sjamie			 * addresses it is their problem.
811192895Sjamie			 *
812192895Sjamie			 * We do not have to care about byte order for these
813192895Sjamie			 * checks so we will do them in NBO.
814192895Sjamie			 */
815192895Sjamie			for (ii = 0; ii < ip4s; ii++) {
816192895Sjamie				if (ip4[ii].s_addr == INADDR_ANY ||
817192895Sjamie				    ip4[ii].s_addr == INADDR_BROADCAST) {
818192895Sjamie					error = EINVAL;
819192895Sjamie					goto done_free;
820192895Sjamie				}
821192895Sjamie				if ((ii+1) < ip4s &&
822192895Sjamie				    (ip4[0].s_addr == ip4[ii+1].s_addr ||
823192895Sjamie				     ip4[ii].s_addr == ip4[ii+1].s_addr)) {
824192895Sjamie					error = EINVAL;
825192895Sjamie					goto done_free;
826192895Sjamie				}
827192895Sjamie			}
828185435Sbz		}
829191673Sjamie	}
830191673Sjamie#endif
831185435Sbz
832185435Sbz#ifdef INET6
833191673Sjamie	error = vfs_getopt(opts, "ip6.addr", &op, &ip6s);
834191673Sjamie	if (error == ENOENT)
835195870Sjamie		ip6s = (pr_flags & PR_IP6_DISABLE) ? 0 : -1;
836191673Sjamie	else if (error != 0)
837191673Sjamie		goto done_free;
838191673Sjamie	else if (ip6s & (sizeof(*ip6) - 1)) {
839191673Sjamie		error = EINVAL;
840191673Sjamie		goto done_free;
841192895Sjamie	} else {
842195870Sjamie		ch_flags |= PR_IP6_USER | PR_IP6_DISABLE;
843195870Sjamie		if (ip6s == 0)
844195870Sjamie			pr_flags |= PR_IP6_USER | PR_IP6_DISABLE;
845195870Sjamie		else {
846195870Sjamie			pr_flags = (pr_flags & ~PR_IP6_DISABLE) | PR_IP6_USER;
847192895Sjamie			ip6s /= sizeof(*ip6);
848192895Sjamie			if (ip6s > jail_max_af_ips) {
849185435Sbz				error = EINVAL;
850192895Sjamie				vfs_opterror(opts, "too many IPv6 addresses");
851192895Sjamie				goto done_errmsg;
852185435Sbz			}
853195974Sjamie			ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK);
854192895Sjamie			bcopy(op, ip6, ip6s * sizeof(*ip6));
855192895Sjamie			if (ip6s > 1)
856192895Sjamie				qsort(ip6 + 1, ip6s - 1, sizeof(*ip6), qcmp_v6);
857192895Sjamie			for (ii = 0; ii < ip6s; ii++) {
858192895Sjamie				if (IN6_IS_ADDR_UNSPECIFIED(&ip6[ii])) {
859192895Sjamie					error = EINVAL;
860192895Sjamie					goto done_free;
861192895Sjamie				}
862192895Sjamie				if ((ii+1) < ip6s &&
863192895Sjamie				    (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[ii+1]) ||
864192895Sjamie				     IN6_ARE_ADDR_EQUAL(&ip6[ii], &ip6[ii+1])))
865192895Sjamie				{
866192895Sjamie					error = EINVAL;
867192895Sjamie					goto done_free;
868192895Sjamie				}
869192895Sjamie			}
870185435Sbz		}
871191673Sjamie	}
872185435Sbz#endif
873185435Sbz
874195945Sjamie#if defined(VIMAGE) && (defined(INET) || defined(INET6))
875195945Sjamie	if ((ch_flags & PR_VNET) && (ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
876195945Sjamie		error = EINVAL;
877195945Sjamie		vfs_opterror(opts,
878195945Sjamie		    "vnet jails cannot have IP address restrictions");
879195945Sjamie		goto done_errmsg;
880195945Sjamie	}
881195945Sjamie#endif
882195945Sjamie
883191673Sjamie	root = NULL;
884191673Sjamie	error = vfs_getopt(opts, "path", (void **)&path, &len);
885191673Sjamie	if (error == ENOENT)
886191673Sjamie		path = NULL;
887191673Sjamie	else if (error != 0)
888191673Sjamie		goto done_free;
889191673Sjamie	else {
890191673Sjamie		if (flags & JAIL_UPDATE) {
891191673Sjamie			error = EINVAL;
892191673Sjamie			vfs_opterror(opts,
893191673Sjamie			    "path cannot be changed after creation");
894191673Sjamie			goto done_errmsg;
895191673Sjamie		}
896191673Sjamie		if (len == 0 || path[len - 1] != '\0') {
897191673Sjamie			error = EINVAL;
898191673Sjamie			goto done_free;
899191673Sjamie		}
900191673Sjamie		if (len < 2 || (len == 2 && path[0] == '/'))
901191673Sjamie			path = NULL;
902191673Sjamie		else {
903192895Sjamie			/* Leave room for a real-root full pathname. */
904192895Sjamie			if (len + (path[0] == '/' && strcmp(mypr->pr_path, "/")
905192895Sjamie			    ? strlen(mypr->pr_path) : 0) > MAXPATHLEN) {
906192895Sjamie				error = ENAMETOOLONG;
907192895Sjamie				goto done_free;
908192895Sjamie			}
909191673Sjamie			NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_SYSSPACE,
910191673Sjamie			    path, td);
911191673Sjamie			error = namei(&nd);
912191673Sjamie			if (error)
913191673Sjamie				goto done_free;
914191673Sjamie			vfslocked = NDHASGIANT(&nd);
915191673Sjamie			root = nd.ni_vp;
916191673Sjamie			NDFREE(&nd, NDF_ONLY_PNBUF);
917191673Sjamie			if (root->v_type != VDIR) {
918191673Sjamie				error = ENOTDIR;
919191673Sjamie				vrele(root);
920191673Sjamie				VFS_UNLOCK_GIANT(vfslocked);
921191673Sjamie				goto done_free;
922191673Sjamie			}
923191673Sjamie			VFS_UNLOCK_GIANT(vfslocked);
924191673Sjamie		}
925191673Sjamie	}
926185435Sbz
927191673Sjamie	/*
928191673Sjamie	 * Grab the allprison lock before letting modules check their
929191673Sjamie	 * parameters.  Once we have it, do not let go so we'll have a
930191673Sjamie	 * consistent view of the OSD list.
931191673Sjamie	 */
932191673Sjamie	sx_xlock(&allprison_lock);
933191673Sjamie	error = osd_jail_call(NULL, PR_METHOD_CHECK, opts);
934191673Sjamie	if (error)
935191673Sjamie		goto done_unlock_list;
936185435Sbz
937191673Sjamie	/* By now, all parameters should have been noted. */
938191673Sjamie	TAILQ_FOREACH(opt, opts, link) {
939191673Sjamie		if (!opt->seen && strcmp(opt->name, "errmsg")) {
940191673Sjamie			error = EINVAL;
941191673Sjamie			vfs_opterror(opts, "unknown parameter: %s", opt->name);
942191673Sjamie			goto done_unlock_list;
943191673Sjamie		}
944191673Sjamie	}
945191673Sjamie
946185435Sbz	/*
947191673Sjamie	 * See if we are creating a new record or updating an existing one.
948191673Sjamie	 * This abuses the file error codes ENOENT and EEXIST.
949185435Sbz	 */
950191673Sjamie	cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
951191673Sjamie	if (!cuflags) {
952191673Sjamie		error = EINVAL;
953191673Sjamie		vfs_opterror(opts, "no valid operation (create or update)");
954191673Sjamie		goto done_unlock_list;
955191673Sjamie	}
956191673Sjamie	pr = NULL;
957196835Sjamie	namelc = NULL;
958196835Sjamie	if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) {
959196835Sjamie		namelc = strrchr(name, '.');
960196835Sjamie		jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10);
961196835Sjamie		if (*p != '\0')
962196835Sjamie			jid = 0;
963196835Sjamie	}
964191673Sjamie	if (jid != 0) {
965192895Sjamie		/*
966192895Sjamie		 * See if a requested jid already exists.  There is an
967192895Sjamie		 * information leak here if the jid exists but is not within
968192895Sjamie		 * the caller's jail hierarchy.  Jail creators will get EEXIST
969192895Sjamie		 * even though they cannot see the jail, and CREATE | UPDATE
970192895Sjamie		 * will return ENOENT which is not normally a valid error.
971192895Sjamie		 */
972191673Sjamie		if (jid < 0) {
973191673Sjamie			error = EINVAL;
974191673Sjamie			vfs_opterror(opts, "negative jid");
975191673Sjamie			goto done_unlock_list;
976191673Sjamie		}
977191673Sjamie		pr = prison_find(jid);
978191673Sjamie		if (pr != NULL) {
979192895Sjamie			ppr = pr->pr_parent;
980191673Sjamie			/* Create: jid must not exist. */
981191673Sjamie			if (cuflags == JAIL_CREATE) {
982191673Sjamie				mtx_unlock(&pr->pr_mtx);
983191673Sjamie				error = EEXIST;
984191673Sjamie				vfs_opterror(opts, "jail %d already exists",
985191673Sjamie				    jid);
986191673Sjamie				goto done_unlock_list;
987191673Sjamie			}
988192895Sjamie			if (!prison_ischild(mypr, pr)) {
989192895Sjamie				mtx_unlock(&pr->pr_mtx);
990192895Sjamie				pr = NULL;
991192895Sjamie			} else if (pr->pr_uref == 0) {
992191673Sjamie				if (!(flags & JAIL_DYING)) {
993191673Sjamie					mtx_unlock(&pr->pr_mtx);
994191673Sjamie					error = ENOENT;
995191673Sjamie					vfs_opterror(opts, "jail %d is dying",
996191673Sjamie					    jid);
997191673Sjamie					goto done_unlock_list;
998191673Sjamie				} else if ((flags & JAIL_ATTACH) ||
999191673Sjamie				    (pr_flags & PR_PERSIST)) {
1000191673Sjamie					/*
1001191673Sjamie					 * A dying jail might be resurrected
1002191673Sjamie					 * (via attach or persist), but first
1003191673Sjamie					 * it must determine if another jail
1004191673Sjamie					 * has claimed its name.  Accomplish
1005191673Sjamie					 * this by implicitly re-setting the
1006191673Sjamie					 * name.
1007191673Sjamie					 */
1008191673Sjamie					if (name == NULL)
1009192895Sjamie						name = prison_name(mypr, pr);
1010191673Sjamie				}
1011191673Sjamie			}
1012191673Sjamie		}
1013191673Sjamie		if (pr == NULL) {
1014191673Sjamie			/* Update: jid must exist. */
1015191673Sjamie			if (cuflags == JAIL_UPDATE) {
1016191673Sjamie				error = ENOENT;
1017191673Sjamie				vfs_opterror(opts, "jail %d not found", jid);
1018191673Sjamie				goto done_unlock_list;
1019191673Sjamie			}
1020191673Sjamie		}
1021191673Sjamie	}
1022191673Sjamie	/*
1023191673Sjamie	 * If the caller provided a name, look for a jail by that name.
1024191673Sjamie	 * This has different semantics for creates and updates keyed by jid
1025191673Sjamie	 * (where the name must not already exist in a different jail),
1026191673Sjamie	 * and updates keyed by the name itself (where the name must exist
1027191673Sjamie	 * because that is the jail being updated).
1028191673Sjamie	 */
1029191673Sjamie	if (name != NULL) {
1030196835Sjamie		namelc = strrchr(name, '.');
1031196835Sjamie		if (namelc == NULL)
1032196835Sjamie			namelc = name;
1033196835Sjamie		else {
1034192895Sjamie			/*
1035192895Sjamie			 * This is a hierarchical name.  Split it into the
1036192895Sjamie			 * parent and child names, and make sure the parent
1037192895Sjamie			 * exists or matches an already found jail.
1038192895Sjamie			 */
1039196835Sjamie			*namelc = '\0';
1040192895Sjamie			if (pr != NULL) {
1041196835Sjamie				if (strncmp(name, ppr->pr_name, namelc - name)
1042196835Sjamie				    || ppr->pr_name[namelc - name] != '\0') {
1043192895Sjamie					mtx_unlock(&pr->pr_mtx);
1044192895Sjamie					error = EINVAL;
1045192895Sjamie					vfs_opterror(opts,
1046192895Sjamie					    "cannot change jail's parent");
1047192895Sjamie					goto done_unlock_list;
1048192895Sjamie				}
1049192895Sjamie			} else {
1050192895Sjamie				ppr = prison_find_name(mypr, name);
1051192895Sjamie				if (ppr == NULL) {
1052192895Sjamie					error = ENOENT;
1053192895Sjamie					vfs_opterror(opts,
1054192895Sjamie					    "jail \"%s\" not found", name);
1055192895Sjamie					goto done_unlock_list;
1056192895Sjamie				}
1057192895Sjamie				mtx_unlock(&ppr->pr_mtx);
1058192895Sjamie			}
1059196835Sjamie			name = ++namelc;
1060192895Sjamie		}
1061191673Sjamie		if (name[0] != '\0') {
1062192895Sjamie			namelen =
1063192895Sjamie			    (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
1064192895Sjamie name_again:
1065191673Sjamie			deadpr = NULL;
1066192895Sjamie			FOREACH_PRISON_CHILD(ppr, tpr) {
1067191673Sjamie				if (tpr != pr && tpr->pr_ref > 0 &&
1068192895Sjamie				    !strcmp(tpr->pr_name + namelen, name)) {
1069191673Sjamie					if (pr == NULL &&
1070191673Sjamie					    cuflags != JAIL_CREATE) {
1071191673Sjamie						mtx_lock(&tpr->pr_mtx);
1072191673Sjamie						if (tpr->pr_ref > 0) {
1073191673Sjamie							/*
1074191673Sjamie							 * Use this jail
1075191673Sjamie							 * for updates.
1076191673Sjamie							 */
1077191673Sjamie							if (tpr->pr_uref > 0) {
1078191673Sjamie								pr = tpr;
1079191673Sjamie								break;
1080191673Sjamie							}
1081191673Sjamie							deadpr = tpr;
1082191673Sjamie						}
1083191673Sjamie						mtx_unlock(&tpr->pr_mtx);
1084191673Sjamie					} else if (tpr->pr_uref > 0) {
1085191673Sjamie						/*
1086191673Sjamie						 * Create, or update(jid):
1087191673Sjamie						 * name must not exist in an
1088192895Sjamie						 * active sibling jail.
1089191673Sjamie						 */
1090191673Sjamie						error = EEXIST;
1091191673Sjamie						if (pr != NULL)
1092191673Sjamie							mtx_unlock(&pr->pr_mtx);
1093191673Sjamie						vfs_opterror(opts,
1094191673Sjamie						   "jail \"%s\" already exists",
1095191673Sjamie						   name);
1096191673Sjamie						goto done_unlock_list;
1097191673Sjamie					}
1098191673Sjamie				}
1099191673Sjamie			}
1100191673Sjamie			/* If no active jail is found, use a dying one. */
1101191673Sjamie			if (deadpr != NULL && pr == NULL) {
1102191673Sjamie				if (flags & JAIL_DYING) {
1103191673Sjamie					mtx_lock(&deadpr->pr_mtx);
1104191673Sjamie					if (deadpr->pr_ref == 0) {
1105191673Sjamie						mtx_unlock(&deadpr->pr_mtx);
1106191673Sjamie						goto name_again;
1107191673Sjamie					}
1108191673Sjamie					pr = deadpr;
1109191673Sjamie				} else if (cuflags == JAIL_UPDATE) {
1110191673Sjamie					error = ENOENT;
1111191673Sjamie					vfs_opterror(opts,
1112191673Sjamie					    "jail \"%s\" is dying", name);
1113191673Sjamie					goto done_unlock_list;
1114191673Sjamie				}
1115191673Sjamie			}
1116191673Sjamie			/* Update: name must exist if no jid. */
1117191673Sjamie			else if (cuflags == JAIL_UPDATE && pr == NULL) {
1118191673Sjamie				error = ENOENT;
1119191673Sjamie				vfs_opterror(opts, "jail \"%s\" not found",
1120191673Sjamie				    name);
1121191673Sjamie				goto done_unlock_list;
1122191673Sjamie			}
1123191673Sjamie		}
1124191673Sjamie	}
1125191673Sjamie	/* Update: must provide a jid or name. */
1126191673Sjamie	else if (cuflags == JAIL_UPDATE && pr == NULL) {
1127191673Sjamie		error = ENOENT;
1128191673Sjamie		vfs_opterror(opts, "update specified no jail");
1129191673Sjamie		goto done_unlock_list;
1130191673Sjamie	}
1131185435Sbz
1132191673Sjamie	/* If there's no prison to update, create a new one and link it in. */
1133191673Sjamie	if (pr == NULL) {
1134194762Sjamie		for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent)
1135194762Sjamie			if (tpr->pr_childcount >= tpr->pr_childmax) {
1136194762Sjamie				error = EPERM;
1137194762Sjamie				vfs_opterror(opts, "prison limit exceeded");
1138194762Sjamie				goto done_unlock_list;
1139194762Sjamie			}
1140191673Sjamie		created = 1;
1141192895Sjamie		mtx_lock(&ppr->pr_mtx);
1142192895Sjamie		if (ppr->pr_ref == 0 || (ppr->pr_flags & PR_REMOVE)) {
1143192895Sjamie			mtx_unlock(&ppr->pr_mtx);
1144192895Sjamie			error = ENOENT;
1145192895Sjamie			vfs_opterror(opts, "parent jail went away!");
1146192895Sjamie			goto done_unlock_list;
1147192895Sjamie		}
1148192895Sjamie		ppr->pr_ref++;
1149192895Sjamie		ppr->pr_uref++;
1150192895Sjamie		mtx_unlock(&ppr->pr_mtx);
1151191673Sjamie		pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
1152191673Sjamie		if (jid == 0) {
1153191673Sjamie			/* Find the next free jid. */
1154191673Sjamie			jid = lastprid + 1;
1155191673Sjamie findnext:
1156191673Sjamie			if (jid == JAIL_MAX)
1157191673Sjamie				jid = 1;
1158191673Sjamie			TAILQ_FOREACH(tpr, &allprison, pr_list) {
1159191673Sjamie				if (tpr->pr_id < jid)
1160191673Sjamie					continue;
1161191673Sjamie				if (tpr->pr_id > jid || tpr->pr_ref == 0) {
1162191673Sjamie					TAILQ_INSERT_BEFORE(tpr, pr, pr_list);
1163191673Sjamie					break;
1164191673Sjamie				}
1165191673Sjamie				if (jid == lastprid) {
1166191673Sjamie					error = EAGAIN;
1167191673Sjamie					vfs_opterror(opts,
1168191673Sjamie					    "no available jail IDs");
1169191673Sjamie					free(pr, M_PRISON);
1170192895Sjamie					prison_deref(ppr, PD_DEREF |
1171192895Sjamie					    PD_DEUREF | PD_LIST_XLOCKED);
1172192895Sjamie					goto done_releroot;
1173191673Sjamie				}
1174191673Sjamie				jid++;
1175191673Sjamie				goto findnext;
1176191673Sjamie			}
1177191673Sjamie			lastprid = jid;
1178191673Sjamie		} else {
1179191673Sjamie			/*
1180191673Sjamie			 * The jail already has a jid (that did not yet exist),
1181191673Sjamie			 * so just find where to insert it.
1182191673Sjamie			 */
1183191673Sjamie			TAILQ_FOREACH(tpr, &allprison, pr_list)
1184191673Sjamie				if (tpr->pr_id >= jid) {
1185191673Sjamie					TAILQ_INSERT_BEFORE(tpr, pr, pr_list);
1186191673Sjamie					break;
1187191673Sjamie				}
1188191673Sjamie		}
1189191673Sjamie		if (tpr == NULL)
1190191673Sjamie			TAILQ_INSERT_TAIL(&allprison, pr, pr_list);
1191192895Sjamie		LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling);
1192192895Sjamie		for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent)
1193194762Sjamie			tpr->pr_childcount++;
1194185435Sbz
1195192895Sjamie		pr->pr_parent = ppr;
1196191673Sjamie		pr->pr_id = jid;
1197192895Sjamie
1198192895Sjamie		/* Set some default values, and inherit some from the parent. */
1199191673Sjamie		if (name == NULL)
1200191673Sjamie			name = "";
1201191673Sjamie		if (path == NULL) {
1202191673Sjamie			path = "/";
1203192895Sjamie			root = mypr->pr_root;
1204191673Sjamie			vref(root);
1205191673Sjamie		}
1206195944Sjamie		strlcpy(pr->pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN);
1207195944Sjamie		pr->pr_flags |= PR_HOST;
1208195945Sjamie#if defined(INET) || defined(INET6)
1209195945Sjamie#ifdef VIMAGE
1210195945Sjamie		if (!(pr_flags & PR_VNET))
1211195945Sjamie#endif
1212195945Sjamie		{
1213192895Sjamie#ifdef INET
1214195974Sjamie			if (!(ch_flags & PR_IP4_USER))
1215195974Sjamie				pr->pr_flags |=
1216195974Sjamie				    PR_IP4 | PR_IP4_USER | PR_IP4_DISABLE;
1217195974Sjamie			else if (!(pr_flags & PR_IP4_USER)) {
1218195974Sjamie				pr->pr_flags |= ppr->pr_flags & PR_IP4;
1219195974Sjamie				if (ppr->pr_ip4 != NULL) {
1220195974Sjamie					pr->pr_ip4s = ppr->pr_ip4s;
1221195974Sjamie					pr->pr_ip4 = malloc(pr->pr_ip4s *
1222195974Sjamie					    sizeof(struct in_addr), M_PRISON,
1223195974Sjamie					    M_WAITOK);
1224195974Sjamie					bcopy(ppr->pr_ip4, pr->pr_ip4,
1225195974Sjamie					    pr->pr_ip4s * sizeof(*pr->pr_ip4));
1226195974Sjamie				}
1227195974Sjamie			}
1228192895Sjamie#endif
1229192895Sjamie#ifdef INET6
1230195974Sjamie			if (!(ch_flags & PR_IP6_USER))
1231195974Sjamie				pr->pr_flags |=
1232195974Sjamie				    PR_IP6 | PR_IP6_USER | PR_IP6_DISABLE;
1233195974Sjamie			else if (!(pr_flags & PR_IP6_USER)) {
1234195974Sjamie				pr->pr_flags |= ppr->pr_flags & PR_IP6;
1235195974Sjamie				if (ppr->pr_ip6 != NULL) {
1236195974Sjamie					pr->pr_ip6s = ppr->pr_ip6s;
1237195974Sjamie					pr->pr_ip6 = malloc(pr->pr_ip6s *
1238195974Sjamie					    sizeof(struct in6_addr), M_PRISON,
1239195974Sjamie					    M_WAITOK);
1240195974Sjamie					bcopy(ppr->pr_ip6, pr->pr_ip6,
1241195974Sjamie					    pr->pr_ip6s * sizeof(*pr->pr_ip6));
1242195974Sjamie				}
1243195974Sjamie			}
1244192895Sjamie#endif
1245195945Sjamie		}
1246195945Sjamie#endif
1247202468Sbz		/* Source address selection is always on by default. */
1248202468Sbz		pr->pr_flags |= _PR_IP_SADDRSEL;
1249202468Sbz
1250192895Sjamie		pr->pr_securelevel = ppr->pr_securelevel;
1251192895Sjamie		pr->pr_allow = JAIL_DEFAULT_ALLOW & ppr->pr_allow;
1252196002Sjamie		pr->pr_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS;
1253191673Sjamie
1254192895Sjamie		LIST_INIT(&pr->pr_children);
1255192895Sjamie		mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
1256191673Sjamie
1257194251Sjamie#ifdef VIMAGE
1258194251Sjamie		/* Allocate a new vnet if specified. */
1259194251Sjamie		pr->pr_vnet = (pr_flags & PR_VNET)
1260194251Sjamie		    ? vnet_alloc() : ppr->pr_vnet;
1261194251Sjamie#endif
1262185435Sbz		/*
1263191673Sjamie		 * Allocate a dedicated cpuset for each jail.
1264191673Sjamie		 * Unlike other initial settings, this may return an erorr.
1265185435Sbz		 */
1266192895Sjamie		error = cpuset_create_root(ppr, &pr->pr_cpuset);
1267191673Sjamie		if (error) {
1268191673Sjamie			prison_deref(pr, PD_LIST_XLOCKED);
1269191673Sjamie			goto done_releroot;
1270191673Sjamie		}
1271185435Sbz
1272191673Sjamie		mtx_lock(&pr->pr_mtx);
1273185435Sbz		/*
1274191673Sjamie		 * New prisons do not yet have a reference, because we do not
1275191673Sjamie		 * want other to see the incomplete prison once the
1276191673Sjamie		 * allprison_lock is downgraded.
1277185435Sbz		 */
1278191673Sjamie	} else {
1279191673Sjamie		created = 0;
1280195974Sjamie		/*
1281195974Sjamie		 * Grab a reference for existing prisons, to ensure they
1282195974Sjamie		 * continue to exist for the duration of the call.
1283195974Sjamie		 */
1284195974Sjamie		pr->pr_ref++;
1285195945Sjamie#if defined(VIMAGE) && (defined(INET) || defined(INET6))
1286195945Sjamie		if ((pr->pr_flags & PR_VNET) &&
1287195945Sjamie		    (ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
1288195945Sjamie			error = EINVAL;
1289195945Sjamie			vfs_opterror(opts,
1290195945Sjamie			    "vnet jails cannot have IP address restrictions");
1291195945Sjamie			goto done_deref_locked;
1292195945Sjamie		}
1293195945Sjamie#endif
1294195974Sjamie#ifdef INET
1295195974Sjamie		if (PR_IP4_USER & ch_flags & (pr_flags ^ pr->pr_flags)) {
1296195974Sjamie			error = EINVAL;
1297195974Sjamie			vfs_opterror(opts,
1298195974Sjamie			    "ip4 cannot be changed after creation");
1299195974Sjamie			goto done_deref_locked;
1300195974Sjamie		}
1301195974Sjamie#endif
1302195974Sjamie#ifdef INET6
1303195974Sjamie		if (PR_IP6_USER & ch_flags & (pr_flags ^ pr->pr_flags)) {
1304195974Sjamie			error = EINVAL;
1305195974Sjamie			vfs_opterror(opts,
1306195974Sjamie			    "ip6 cannot be changed after creation");
1307195974Sjamie			goto done_deref_locked;
1308195974Sjamie		}
1309195974Sjamie#endif
1310191673Sjamie	}
1311185435Sbz
1312191673Sjamie	/* Do final error checking before setting anything. */
1313192895Sjamie	if (gotslevel) {
1314192895Sjamie		if (slevel < ppr->pr_securelevel) {
1315192895Sjamie			error = EPERM;
1316192895Sjamie			goto done_deref_locked;
1317192895Sjamie		}
1318192895Sjamie	}
1319194762Sjamie	if (gotchildmax) {
1320194762Sjamie		if (childmax >= ppr->pr_childmax) {
1321194762Sjamie			error = EPERM;
1322194762Sjamie			goto done_deref_locked;
1323194762Sjamie		}
1324194762Sjamie	}
1325192895Sjamie	if (gotenforce) {
1326192895Sjamie		if (enforce < ppr->pr_enforce_statfs) {
1327192895Sjamie			error = EPERM;
1328192895Sjamie			goto done_deref_locked;
1329192895Sjamie		}
1330192895Sjamie	}
1331185435Sbz#ifdef INET
1332195974Sjamie	if (ip4s > 0) {
1333192895Sjamie		if (ppr->pr_flags & PR_IP4) {
1334195974Sjamie			/*
1335195974Sjamie			 * Make sure the new set of IP addresses is a
1336195974Sjamie			 * subset of the parent's list.  Don't worry
1337195974Sjamie			 * about the parent being unlocked, as any
1338195974Sjamie			 * setting is done with allprison_lock held.
1339195974Sjamie			 */
1340195974Sjamie			for (ij = 0; ij < ppr->pr_ip4s; ij++)
1341195974Sjamie				if (ip4[0].s_addr == ppr->pr_ip4[ij].s_addr)
1342195974Sjamie					break;
1343195974Sjamie			if (ij == ppr->pr_ip4s) {
1344195974Sjamie				error = EPERM;
1345195974Sjamie				goto done_deref_locked;
1346195974Sjamie			}
1347195974Sjamie			if (ip4s > 1) {
1348195974Sjamie				for (ii = ij = 1; ii < ip4s; ii++) {
1349195974Sjamie					if (ip4[ii].s_addr ==
1350195974Sjamie					    ppr->pr_ip4[0].s_addr)
1351195974Sjamie						continue;
1352195974Sjamie					for (; ij < ppr->pr_ip4s; ij++)
1353195974Sjamie						if (ip4[ii].s_addr ==
1354195974Sjamie						    ppr->pr_ip4[ij].s_addr)
1355195974Sjamie							break;
1356195974Sjamie					if (ij == ppr->pr_ip4s)
1357195974Sjamie						break;
1358192895Sjamie				}
1359192895Sjamie				if (ij == ppr->pr_ip4s) {
1360192895Sjamie					error = EPERM;
1361192895Sjamie					goto done_deref_locked;
1362192895Sjamie				}
1363192895Sjamie			}
1364192895Sjamie		}
1365195974Sjamie		/*
1366195974Sjamie		 * Check for conflicting IP addresses.  We permit them
1367195974Sjamie		 * if there is no more than one IP on each jail.  If
1368195974Sjamie		 * there is a duplicate on a jail with more than one
1369195974Sjamie		 * IP stop checking and return error.
1370195974Sjamie		 */
1371195974Sjamie		tppr = ppr;
1372195945Sjamie#ifdef VIMAGE
1373195974Sjamie		for (; tppr != &prison0; tppr = tppr->pr_parent)
1374195974Sjamie			if (tppr->pr_flags & PR_VNET)
1375195974Sjamie				break;
1376195945Sjamie#endif
1377195974Sjamie		FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) {
1378195974Sjamie			if (tpr == pr ||
1379195945Sjamie#ifdef VIMAGE
1380195974Sjamie			    (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
1381195945Sjamie#endif
1382195974Sjamie			    tpr->pr_uref == 0) {
1383192895Sjamie				descend = 0;
1384195974Sjamie				continue;
1385195974Sjamie			}
1386195974Sjamie			if (!(tpr->pr_flags & PR_IP4_USER))
1387195974Sjamie				continue;
1388195974Sjamie			descend = 0;
1389195974Sjamie			if (tpr->pr_ip4 == NULL ||
1390195974Sjamie			    (ip4s == 1 && tpr->pr_ip4s == 1))
1391195974Sjamie				continue;
1392195974Sjamie			for (ii = 0; ii < ip4s; ii++) {
1393195974Sjamie				if (_prison_check_ip4(tpr, &ip4[ii]) == 0) {
1394195974Sjamie					error = EADDRINUSE;
1395195974Sjamie					vfs_opterror(opts,
1396195974Sjamie					    "IPv4 addresses clash");
1397195974Sjamie					goto done_deref_locked;
1398192895Sjamie				}
1399192895Sjamie			}
1400192895Sjamie		}
1401192895Sjamie	}
1402185435Sbz#endif
1403191673Sjamie#ifdef INET6
1404195974Sjamie	if (ip6s > 0) {
1405192895Sjamie		if (ppr->pr_flags & PR_IP6) {
1406195974Sjamie			/*
1407195974Sjamie			 * Make sure the new set of IP addresses is a
1408195974Sjamie			 * subset of the parent's list.
1409195974Sjamie			 */
1410195974Sjamie			for (ij = 0; ij < ppr->pr_ip6s; ij++)
1411195974Sjamie				if (IN6_ARE_ADDR_EQUAL(&ip6[0],
1412195974Sjamie				    &ppr->pr_ip6[ij]))
1413195974Sjamie					break;
1414195974Sjamie			if (ij == ppr->pr_ip6s) {
1415195974Sjamie				error = EPERM;
1416195974Sjamie				goto done_deref_locked;
1417195974Sjamie			}
1418195974Sjamie			if (ip6s > 1) {
1419195974Sjamie				for (ii = ij = 1; ii < ip6s; ii++) {
1420195974Sjamie					if (IN6_ARE_ADDR_EQUAL(&ip6[ii],
1421195974Sjamie					     &ppr->pr_ip6[0]))
1422195974Sjamie						continue;
1423195974Sjamie					for (; ij < ppr->pr_ip6s; ij++)
1424195974Sjamie						if (IN6_ARE_ADDR_EQUAL(
1425195974Sjamie						    &ip6[ii], &ppr->pr_ip6[ij]))
1426195974Sjamie							break;
1427195974Sjamie					if (ij == ppr->pr_ip6s)
1428195974Sjamie						break;
1429192895Sjamie				}
1430192895Sjamie				if (ij == ppr->pr_ip6s) {
1431192895Sjamie					error = EPERM;
1432192895Sjamie					goto done_deref_locked;
1433192895Sjamie				}
1434192895Sjamie			}
1435192895Sjamie		}
1436195974Sjamie		/* Check for conflicting IP addresses. */
1437195974Sjamie		tppr = ppr;
1438195945Sjamie#ifdef VIMAGE
1439195974Sjamie		for (; tppr != &prison0; tppr = tppr->pr_parent)
1440195974Sjamie			if (tppr->pr_flags & PR_VNET)
1441195974Sjamie				break;
1442195945Sjamie#endif
1443195974Sjamie		FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) {
1444195974Sjamie			if (tpr == pr ||
1445195945Sjamie#ifdef VIMAGE
1446195974Sjamie			    (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
1447195945Sjamie#endif
1448195974Sjamie			    tpr->pr_uref == 0) {
1449192895Sjamie				descend = 0;
1450195974Sjamie				continue;
1451195974Sjamie			}
1452195974Sjamie			if (!(tpr->pr_flags & PR_IP6_USER))
1453195974Sjamie				continue;
1454195974Sjamie			descend = 0;
1455195974Sjamie			if (tpr->pr_ip6 == NULL ||
1456195974Sjamie			    (ip6s == 1 && tpr->pr_ip6s == 1))
1457195974Sjamie				continue;
1458195974Sjamie			for (ii = 0; ii < ip6s; ii++) {
1459195974Sjamie				if (_prison_check_ip6(tpr, &ip6[ii]) == 0) {
1460195974Sjamie					error = EADDRINUSE;
1461195974Sjamie					vfs_opterror(opts,
1462195974Sjamie					    "IPv6 addresses clash");
1463195974Sjamie					goto done_deref_locked;
1464192895Sjamie				}
1465192895Sjamie			}
1466191673Sjamie		}
1467192895Sjamie	}
1468191673Sjamie#endif
1469192895Sjamie	onamelen = namelen = 0;
1470192895Sjamie	if (name != NULL) {
1471191673Sjamie		/* Give a default name of the jid. */
1472191673Sjamie		if (name[0] == '\0')
1473191673Sjamie			snprintf(name = numbuf, sizeof(numbuf), "%d", jid);
1474196835Sjamie		else if (*namelc == '0' || (strtoul(namelc, &p, 10) != jid &&
1475196835Sjamie		    *p == '\0')) {
1476191673Sjamie			error = EINVAL;
1477196835Sjamie			vfs_opterror(opts,
1478196835Sjamie			    "name cannot be numeric (unless it is the jid)");
1479192895Sjamie			goto done_deref_locked;
1480191673Sjamie		}
1481191673Sjamie		/*
1482192895Sjamie		 * Make sure the name isn't too long for the prison or its
1483192895Sjamie		 * children.
1484191673Sjamie		 */
1485192895Sjamie		onamelen = strlen(pr->pr_name);
1486192895Sjamie		namelen = strlen(name);
1487192895Sjamie		if (strlen(ppr->pr_name) + namelen + 2 > sizeof(pr->pr_name)) {
1488192895Sjamie			error = ENAMETOOLONG;
1489192895Sjamie			goto done_deref_locked;
1490192895Sjamie		}
1491192895Sjamie		FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
1492192895Sjamie			if (strlen(tpr->pr_name) + (namelen - onamelen) >=
1493192895Sjamie			    sizeof(pr->pr_name)) {
1494192895Sjamie				error = ENAMETOOLONG;
1495192895Sjamie				goto done_deref_locked;
1496192895Sjamie			}
1497192895Sjamie		}
1498191673Sjamie	}
1499192895Sjamie	if (pr_allow & ~ppr->pr_allow) {
1500192895Sjamie		error = EPERM;
1501192895Sjamie		goto done_deref_locked;
1502192895Sjamie	}
1503185435Sbz
1504191673Sjamie	/* Set the parameters of the prison. */
1505191673Sjamie#ifdef INET
1506192895Sjamie	redo_ip4 = 0;
1507195974Sjamie	if (pr_flags & PR_IP4_USER) {
1508195974Sjamie		pr->pr_flags |= PR_IP4;
1509195974Sjamie		free(pr->pr_ip4, M_PRISON);
1510195974Sjamie		pr->pr_ip4s = ip4s;
1511195974Sjamie		pr->pr_ip4 = ip4;
1512195974Sjamie		ip4 = NULL;
1513192895Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
1514195945Sjamie#ifdef VIMAGE
1515195945Sjamie			if (tpr->pr_flags & PR_VNET) {
1516195945Sjamie				descend = 0;
1517195945Sjamie				continue;
1518195945Sjamie			}
1519195945Sjamie#endif
1520192895Sjamie			if (prison_restrict_ip4(tpr, NULL)) {
1521192895Sjamie				redo_ip4 = 1;
1522192895Sjamie				descend = 0;
1523192895Sjamie			}
1524192895Sjamie		}
1525185435Sbz	}
1526191673Sjamie#endif
1527191673Sjamie#ifdef INET6
1528192895Sjamie	redo_ip6 = 0;
1529195974Sjamie	if (pr_flags & PR_IP6_USER) {
1530195974Sjamie		pr->pr_flags |= PR_IP6;
1531195974Sjamie		free(pr->pr_ip6, M_PRISON);
1532195974Sjamie		pr->pr_ip6s = ip6s;
1533195974Sjamie		pr->pr_ip6 = ip6;
1534195974Sjamie		ip6 = NULL;
1535192895Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
1536195945Sjamie#ifdef VIMAGE
1537195945Sjamie			if (tpr->pr_flags & PR_VNET) {
1538195945Sjamie				descend = 0;
1539195945Sjamie				continue;
1540195945Sjamie			}
1541195945Sjamie#endif
1542192895Sjamie			if (prison_restrict_ip6(tpr, NULL)) {
1543192895Sjamie				redo_ip6 = 1;
1544192895Sjamie				descend = 0;
1545192895Sjamie			}
1546192895Sjamie		}
1547191673Sjamie	}
1548191673Sjamie#endif
1549192895Sjamie	if (gotslevel) {
1550191673Sjamie		pr->pr_securelevel = slevel;
1551192895Sjamie		/* Set all child jails to be at least this level. */
1552192895Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
1553192895Sjamie			if (tpr->pr_securelevel < slevel)
1554192895Sjamie				tpr->pr_securelevel = slevel;
1555192895Sjamie	}
1556194762Sjamie	if (gotchildmax) {
1557194762Sjamie		pr->pr_childmax = childmax;
1558194762Sjamie		/* Set all child jails to under this limit. */
1559194762Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL(pr, tpr, descend, level)
1560194762Sjamie			if (tpr->pr_childmax > childmax - level)
1561194762Sjamie				tpr->pr_childmax = childmax > level
1562194762Sjamie				    ? childmax - level : 0;
1563194762Sjamie	}
1564192895Sjamie	if (gotenforce) {
1565192895Sjamie		pr->pr_enforce_statfs = enforce;
1566192895Sjamie		/* Pass this restriction on to the children. */
1567192895Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
1568192895Sjamie			if (tpr->pr_enforce_statfs < enforce)
1569192895Sjamie				tpr->pr_enforce_statfs = enforce;
1570192895Sjamie	}
1571192895Sjamie	if (name != NULL) {
1572192895Sjamie		if (ppr == &prison0)
1573192895Sjamie			strlcpy(pr->pr_name, name, sizeof(pr->pr_name));
1574192895Sjamie		else
1575192895Sjamie			snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s",
1576192895Sjamie			    ppr->pr_name, name);
1577192895Sjamie		/* Change this component of child names. */
1578192895Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
1579192895Sjamie			bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen,
1580192895Sjamie			    strlen(tpr->pr_name + onamelen) + 1);
1581192895Sjamie			bcopy(pr->pr_name, tpr->pr_name, namelen);
1582192895Sjamie		}
1583192895Sjamie	}
1584191673Sjamie	if (path != NULL) {
1585192895Sjamie		/* Try to keep a real-rooted full pathname. */
1586192895Sjamie		if (path[0] == '/' && strcmp(mypr->pr_path, "/"))
1587192895Sjamie			snprintf(pr->pr_path, sizeof(pr->pr_path), "%s%s",
1588192895Sjamie			    mypr->pr_path, path);
1589192895Sjamie		else
1590192895Sjamie			strlcpy(pr->pr_path, path, sizeof(pr->pr_path));
1591191673Sjamie		pr->pr_root = root;
1592191673Sjamie	}
1593193066Sjamie	if (PR_HOST & ch_flags & ~pr_flags) {
1594193066Sjamie		if (pr->pr_flags & PR_HOST) {
1595193066Sjamie			/*
1596193066Sjamie			 * Copy the parent's host info.  As with pr_ip4 above,
1597193066Sjamie			 * the lack of a lock on the parent is not a problem;
1598193066Sjamie			 * it is always set with allprison_lock at least
1599193066Sjamie			 * shared, and is held exclusively here.
1600193066Sjamie			 */
1601194118Sjamie			strlcpy(pr->pr_hostname, pr->pr_parent->pr_hostname,
1602194118Sjamie			    sizeof(pr->pr_hostname));
1603194118Sjamie			strlcpy(pr->pr_domainname, pr->pr_parent->pr_domainname,
1604194118Sjamie			    sizeof(pr->pr_domainname));
1605194118Sjamie			strlcpy(pr->pr_hostuuid, pr->pr_parent->pr_hostuuid,
1606194118Sjamie			    sizeof(pr->pr_hostuuid));
1607193066Sjamie			pr->pr_hostid = pr->pr_parent->pr_hostid;
1608193066Sjamie		}
1609193066Sjamie	} else if (host != NULL || domain != NULL || uuid != NULL || gothid) {
1610193066Sjamie		/* Set this prison, and any descendants without PR_HOST. */
1611193066Sjamie		if (host != NULL)
1612194118Sjamie			strlcpy(pr->pr_hostname, host, sizeof(pr->pr_hostname));
1613193066Sjamie		if (domain != NULL)
1614194118Sjamie			strlcpy(pr->pr_domainname, domain,
1615194118Sjamie			    sizeof(pr->pr_domainname));
1616193066Sjamie		if (uuid != NULL)
1617194118Sjamie			strlcpy(pr->pr_hostuuid, uuid, sizeof(pr->pr_hostuuid));
1618193066Sjamie		if (gothid)
1619193066Sjamie			pr->pr_hostid = hid;
1620193066Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
1621193066Sjamie			if (tpr->pr_flags & PR_HOST)
1622193066Sjamie				descend = 0;
1623193066Sjamie			else {
1624193066Sjamie				if (host != NULL)
1625194118Sjamie					strlcpy(tpr->pr_hostname,
1626194118Sjamie					    pr->pr_hostname,
1627194118Sjamie					    sizeof(tpr->pr_hostname));
1628193066Sjamie				if (domain != NULL)
1629194118Sjamie					strlcpy(tpr->pr_domainname,
1630194118Sjamie					    pr->pr_domainname,
1631194118Sjamie					    sizeof(tpr->pr_domainname));
1632193066Sjamie				if (uuid != NULL)
1633194118Sjamie					strlcpy(tpr->pr_hostuuid,
1634194118Sjamie					    pr->pr_hostuuid,
1635194118Sjamie					    sizeof(tpr->pr_hostuuid));
1636193066Sjamie				if (gothid)
1637193066Sjamie					tpr->pr_hostid = hid;
1638193066Sjamie			}
1639193066Sjamie		}
1640193066Sjamie	}
1641192895Sjamie	if ((tallow = ch_allow & ~pr_allow)) {
1642192895Sjamie		/* Clear allow bits in all children. */
1643192895Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
1644192895Sjamie			tpr->pr_allow &= ~tallow;
1645192895Sjamie	}
1646192895Sjamie	pr->pr_allow = (pr->pr_allow & ~ch_allow) | pr_allow;
1647191673Sjamie	/*
1648191673Sjamie	 * Persistent prisons get an extra reference, and prisons losing their
1649191673Sjamie	 * persist flag lose that reference.  Only do this for existing prisons
1650191673Sjamie	 * for now, so new ones will remain unseen until after the module
1651191673Sjamie	 * handlers have completed.
1652191673Sjamie	 */
1653191673Sjamie	if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
1654191673Sjamie		if (pr_flags & PR_PERSIST) {
1655191673Sjamie			pr->pr_ref++;
1656191673Sjamie			pr->pr_uref++;
1657191673Sjamie		} else {
1658191673Sjamie			pr->pr_ref--;
1659191673Sjamie			pr->pr_uref--;
1660191673Sjamie		}
1661191673Sjamie	}
1662191673Sjamie	pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
1663191673Sjamie	mtx_unlock(&pr->pr_mtx);
1664185435Sbz
1665221362Strasz#ifdef RACCT
1666221362Strasz	if (created)
1667221362Strasz		prison_racct_attach(pr);
1668221362Strasz#endif
1669221362Strasz
1670192895Sjamie	/* Locks may have prevented a complete restriction of child IP
1671192895Sjamie	 * addresses.  If so, allocate some more memory and try again.
1672192895Sjamie	 */
1673192895Sjamie#ifdef INET
1674192895Sjamie	while (redo_ip4) {
1675192895Sjamie		ip4s = pr->pr_ip4s;
1676192895Sjamie		ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK);
1677192895Sjamie		mtx_lock(&pr->pr_mtx);
1678192895Sjamie		redo_ip4 = 0;
1679192895Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
1680195945Sjamie#ifdef VIMAGE
1681195945Sjamie			if (tpr->pr_flags & PR_VNET) {
1682195945Sjamie				descend = 0;
1683195945Sjamie				continue;
1684195945Sjamie			}
1685195945Sjamie#endif
1686192895Sjamie			if (prison_restrict_ip4(tpr, ip4)) {
1687192895Sjamie				if (ip4 != NULL)
1688192895Sjamie					ip4 = NULL;
1689192895Sjamie				else
1690192895Sjamie					redo_ip4 = 1;
1691192895Sjamie			}
1692192895Sjamie		}
1693192895Sjamie		mtx_unlock(&pr->pr_mtx);
1694192895Sjamie	}
1695192895Sjamie#endif
1696192895Sjamie#ifdef INET6
1697192895Sjamie	while (redo_ip6) {
1698192895Sjamie		ip6s = pr->pr_ip6s;
1699192895Sjamie		ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK);
1700192895Sjamie		mtx_lock(&pr->pr_mtx);
1701192895Sjamie		redo_ip6 = 0;
1702192895Sjamie		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
1703195945Sjamie#ifdef VIMAGE
1704195945Sjamie			if (tpr->pr_flags & PR_VNET) {
1705195945Sjamie				descend = 0;
1706195945Sjamie				continue;
1707195945Sjamie			}
1708195945Sjamie#endif
1709192895Sjamie			if (prison_restrict_ip6(tpr, ip6)) {
1710192895Sjamie				if (ip6 != NULL)
1711192895Sjamie					ip6 = NULL;
1712192895Sjamie				else
1713192895Sjamie					redo_ip6 = 1;
1714192895Sjamie			}
1715192895Sjamie		}
1716192895Sjamie		mtx_unlock(&pr->pr_mtx);
1717192895Sjamie	}
1718192895Sjamie#endif
1719192895Sjamie
1720191673Sjamie	/* Let the modules do their work. */
1721191673Sjamie	sx_downgrade(&allprison_lock);
1722191673Sjamie	if (created) {
1723191673Sjamie		error = osd_jail_call(pr, PR_METHOD_CREATE, opts);
1724191673Sjamie		if (error) {
1725191673Sjamie			prison_deref(pr, PD_LIST_SLOCKED);
1726191673Sjamie			goto done_errmsg;
1727191673Sjamie		}
1728191673Sjamie	}
1729191673Sjamie	error = osd_jail_call(pr, PR_METHOD_SET, opts);
1730191673Sjamie	if (error) {
1731191673Sjamie		prison_deref(pr, created
1732191673Sjamie		    ? PD_LIST_SLOCKED
1733191673Sjamie		    : PD_DEREF | PD_LIST_SLOCKED);
1734191673Sjamie		goto done_errmsg;
1735191673Sjamie	}
1736191673Sjamie
1737191673Sjamie	/* Attach this process to the prison if requested. */
1738191673Sjamie	if (flags & JAIL_ATTACH) {
1739191673Sjamie		mtx_lock(&pr->pr_mtx);
1740191673Sjamie		error = do_jail_attach(td, pr);
1741191673Sjamie		if (error) {
1742191673Sjamie			vfs_opterror(opts, "attach failed");
1743191673Sjamie			if (!created)
1744191673Sjamie				prison_deref(pr, PD_DEREF);
1745191673Sjamie			goto done_errmsg;
1746191673Sjamie		}
1747191673Sjamie	}
1748191673Sjamie
1749191673Sjamie	/*
1750191673Sjamie	 * Now that it is all there, drop the temporary reference from existing
1751191673Sjamie	 * prisons.  Or add a reference to newly created persistent prisons
1752191673Sjamie	 * (which was not done earlier so that the prison would not be publicly
1753191673Sjamie	 * visible).
1754191673Sjamie	 */
1755191673Sjamie	if (!created) {
1756191673Sjamie		prison_deref(pr, (flags & JAIL_ATTACH)
1757191673Sjamie		    ? PD_DEREF
1758191673Sjamie		    : PD_DEREF | PD_LIST_SLOCKED);
1759191673Sjamie	} else {
1760191673Sjamie		if (pr_flags & PR_PERSIST) {
1761191673Sjamie			mtx_lock(&pr->pr_mtx);
1762191673Sjamie			pr->pr_ref++;
1763191673Sjamie			pr->pr_uref++;
1764191673Sjamie			mtx_unlock(&pr->pr_mtx);
1765191673Sjamie		}
1766191673Sjamie		if (!(flags & JAIL_ATTACH))
1767191673Sjamie			sx_sunlock(&allprison_lock);
1768191673Sjamie	}
1769191673Sjamie	td->td_retval[0] = pr->pr_id;
1770191673Sjamie	goto done_errmsg;
1771191673Sjamie
1772192895Sjamie done_deref_locked:
1773192895Sjamie	prison_deref(pr, created
1774192895Sjamie	    ? PD_LOCKED | PD_LIST_XLOCKED
1775192895Sjamie	    : PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED);
1776192895Sjamie	goto done_releroot;
1777191673Sjamie done_unlock_list:
1778191673Sjamie	sx_xunlock(&allprison_lock);
1779191673Sjamie done_releroot:
1780191673Sjamie	if (root != NULL) {
1781191673Sjamie		vfslocked = VFS_LOCK_GIANT(root->v_mount);
1782191673Sjamie		vrele(root);
1783191673Sjamie		VFS_UNLOCK_GIANT(vfslocked);
1784191673Sjamie	}
1785191673Sjamie done_errmsg:
1786191673Sjamie	if (error) {
1787191673Sjamie		vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len);
1788191673Sjamie		if (errmsg_len > 0) {
1789191673Sjamie			errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1;
1790191673Sjamie			if (errmsg_pos > 0) {
1791191673Sjamie				if (optuio->uio_segflg == UIO_SYSSPACE)
1792191673Sjamie					bcopy(errmsg,
1793191673Sjamie					   optuio->uio_iov[errmsg_pos].iov_base,
1794191673Sjamie					   errmsg_len);
1795191673Sjamie				else
1796191673Sjamie					copyout(errmsg,
1797191673Sjamie					   optuio->uio_iov[errmsg_pos].iov_base,
1798191673Sjamie					   errmsg_len);
1799191673Sjamie			}
1800191673Sjamie		}
1801191673Sjamie	}
1802191673Sjamie done_free:
1803191673Sjamie#ifdef INET
1804191673Sjamie	free(ip4, M_PRISON);
1805191673Sjamie#endif
1806191673Sjamie#ifdef INET6
1807191673Sjamie	free(ip6, M_PRISON);
1808191673Sjamie#endif
1809191673Sjamie	vfs_freeopts(opts);
1810191673Sjamie	return (error);
1811191673Sjamie}
1812191673Sjamie
1813191673Sjamie
181482710Sdillon/*
1815191673Sjamie * struct jail_get_args {
1816191673Sjamie *	struct iovec *iovp;
1817191673Sjamie *	unsigned int iovcnt;
1818191673Sjamie *	int flags;
1819114168Smike * };
182082710Sdillon */
182146155Sphkint
1822225617Skmacysys_jail_get(struct thread *td, struct jail_get_args *uap)
182346155Sphk{
1824191673Sjamie	struct uio *auio;
1825185435Sbz	int error;
1826185435Sbz
1827191673Sjamie	/* Check that we have an even number of iovecs. */
1828191673Sjamie	if (uap->iovcnt & 1)
1829191673Sjamie		return (EINVAL);
1830191673Sjamie
1831191673Sjamie	error = copyinuio(uap->iovp, uap->iovcnt, &auio);
1832185435Sbz	if (error)
1833185435Sbz		return (error);
1834191673Sjamie	error = kern_jail_get(td, auio, uap->flags);
1835191673Sjamie	if (error == 0)
1836191673Sjamie		error = copyout(auio->uio_iov, uap->iovp,
1837191673Sjamie		    uap->iovcnt * sizeof (struct iovec));
1838191673Sjamie	free(auio, M_IOV);
1839191673Sjamie	return (error);
1840191673Sjamie}
1841185435Sbz
1842191673Sjamieint
1843191673Sjamiekern_jail_get(struct thread *td, struct uio *optuio, int flags)
1844191673Sjamie{
1845192895Sjamie	struct prison *pr, *mypr;
1846191673Sjamie	struct vfsopt *opt;
1847191673Sjamie	struct vfsoptlist *opts;
1848191673Sjamie	char *errmsg, *name;
1849192895Sjamie	int error, errmsg_len, errmsg_pos, fi, i, jid, len, locked, pos;
1850185435Sbz
1851191673Sjamie	if (flags & ~JAIL_GET_MASK)
1852191673Sjamie		return (EINVAL);
1853185435Sbz
1854191673Sjamie	/* Get the parameter list. */
1855191673Sjamie	error = vfs_buildopts(optuio, &opts);
1856191673Sjamie	if (error)
1857191673Sjamie		return (error);
1858191673Sjamie	errmsg_pos = vfs_getopt_pos(opts, "errmsg");
1859192895Sjamie	mypr = td->td_ucred->cr_prison;
1860185435Sbz
1861191673Sjamie	/*
1862191673Sjamie	 * Find the prison specified by one of: lastjid, jid, name.
1863191673Sjamie	 */
1864191673Sjamie	sx_slock(&allprison_lock);
1865191673Sjamie	error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid));
1866191673Sjamie	if (error == 0) {
1867191673Sjamie		TAILQ_FOREACH(pr, &allprison, pr_list) {
1868192895Sjamie			if (pr->pr_id > jid && prison_ischild(mypr, pr)) {
1869191673Sjamie				mtx_lock(&pr->pr_mtx);
1870191673Sjamie				if (pr->pr_ref > 0 &&
1871191673Sjamie				    (pr->pr_uref > 0 || (flags & JAIL_DYING)))
1872191673Sjamie					break;
1873191673Sjamie				mtx_unlock(&pr->pr_mtx);
1874191673Sjamie			}
1875191673Sjamie		}
1876191673Sjamie		if (pr != NULL)
1877191673Sjamie			goto found_prison;
1878191673Sjamie		error = ENOENT;
1879191673Sjamie		vfs_opterror(opts, "no jail after %d", jid);
1880191673Sjamie		goto done_unlock_list;
1881191673Sjamie	} else if (error != ENOENT)
1882191673Sjamie		goto done_unlock_list;
1883185435Sbz
1884191673Sjamie	error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
1885191673Sjamie	if (error == 0) {
1886191673Sjamie		if (jid != 0) {
1887192895Sjamie			pr = prison_find_child(mypr, jid);
1888191673Sjamie			if (pr != NULL) {
1889191673Sjamie				if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) {
1890191673Sjamie					mtx_unlock(&pr->pr_mtx);
1891191673Sjamie					error = ENOENT;
1892191673Sjamie					vfs_opterror(opts, "jail %d is dying",
1893191673Sjamie					    jid);
1894191673Sjamie					goto done_unlock_list;
1895191673Sjamie				}
1896191673Sjamie				goto found_prison;
1897191673Sjamie			}
1898191673Sjamie			error = ENOENT;
1899191673Sjamie			vfs_opterror(opts, "jail %d not found", jid);
1900191673Sjamie			goto done_unlock_list;
1901191673Sjamie		}
1902191673Sjamie	} else if (error != ENOENT)
1903191673Sjamie		goto done_unlock_list;
190446155Sphk
1905191673Sjamie	error = vfs_getopt(opts, "name", (void **)&name, &len);
1906191673Sjamie	if (error == 0) {
1907191673Sjamie		if (len == 0 || name[len - 1] != '\0') {
1908191673Sjamie			error = EINVAL;
1909191673Sjamie			goto done_unlock_list;
1910191673Sjamie		}
1911192895Sjamie		pr = prison_find_name(mypr, name);
1912191673Sjamie		if (pr != NULL) {
1913191673Sjamie			if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) {
1914191673Sjamie				mtx_unlock(&pr->pr_mtx);
1915191673Sjamie				error = ENOENT;
1916191673Sjamie				vfs_opterror(opts, "jail \"%s\" is dying",
1917191673Sjamie				    name);
1918191673Sjamie				goto done_unlock_list;
1919191673Sjamie			}
1920191673Sjamie			goto found_prison;
1921191673Sjamie		}
1922191673Sjamie		error = ENOENT;
1923191673Sjamie		vfs_opterror(opts, "jail \"%s\" not found", name);
1924191673Sjamie		goto done_unlock_list;
1925191673Sjamie	} else if (error != ENOENT)
1926191673Sjamie		goto done_unlock_list;
1927185435Sbz
1928191673Sjamie	vfs_opterror(opts, "no jail specified");
1929191673Sjamie	error = ENOENT;
1930191673Sjamie	goto done_unlock_list;
1931191673Sjamie
1932191673Sjamie found_prison:
1933191673Sjamie	/* Get the parameters of the prison. */
1934191673Sjamie	pr->pr_ref++;
1935191673Sjamie	locked = PD_LOCKED;
1936191673Sjamie	td->td_retval[0] = pr->pr_id;
1937191673Sjamie	error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id));
1938191673Sjamie	if (error != 0 && error != ENOENT)
1939191673Sjamie		goto done_deref;
1940192895Sjamie	i = (pr->pr_parent == mypr) ? 0 : pr->pr_parent->pr_id;
1941192895Sjamie	error = vfs_setopt(opts, "parent", &i, sizeof(i));
1942191673Sjamie	if (error != 0 && error != ENOENT)
1943191673Sjamie		goto done_deref;
1944192895Sjamie	error = vfs_setopts(opts, "name", prison_name(mypr, pr));
1945192895Sjamie	if (error != 0 && error != ENOENT)
1946192895Sjamie		goto done_deref;
1947192895Sjamie	error = vfs_setopt(opts, "cpuset.id", &pr->pr_cpuset->cs_id,
1948191673Sjamie	    sizeof(pr->pr_cpuset->cs_id));
1949191673Sjamie	if (error != 0 && error != ENOENT)
1950191673Sjamie		goto done_deref;
1951192895Sjamie	error = vfs_setopts(opts, "path", prison_path(mypr, pr));
1952191673Sjamie	if (error != 0 && error != ENOENT)
1953191673Sjamie		goto done_deref;
1954191673Sjamie#ifdef INET
1955191673Sjamie	error = vfs_setopt_part(opts, "ip4.addr", pr->pr_ip4,
1956191673Sjamie	    pr->pr_ip4s * sizeof(*pr->pr_ip4));
1957191673Sjamie	if (error != 0 && error != ENOENT)
1958191673Sjamie		goto done_deref;
1959191673Sjamie#endif
1960191673Sjamie#ifdef INET6
1961191673Sjamie	error = vfs_setopt_part(opts, "ip6.addr", pr->pr_ip6,
1962191673Sjamie	    pr->pr_ip6s * sizeof(*pr->pr_ip6));
1963191673Sjamie	if (error != 0 && error != ENOENT)
1964191673Sjamie		goto done_deref;
1965191673Sjamie#endif
1966191673Sjamie	error = vfs_setopt(opts, "securelevel", &pr->pr_securelevel,
1967191673Sjamie	    sizeof(pr->pr_securelevel));
1968191673Sjamie	if (error != 0 && error != ENOENT)
1969191673Sjamie		goto done_deref;
1970194762Sjamie	error = vfs_setopt(opts, "children.cur", &pr->pr_childcount,
1971194762Sjamie	    sizeof(pr->pr_childcount));
1972194762Sjamie	if (error != 0 && error != ENOENT)
1973194762Sjamie		goto done_deref;
1974194762Sjamie	error = vfs_setopt(opts, "children.max", &pr->pr_childmax,
1975194762Sjamie	    sizeof(pr->pr_childmax));
1976194762Sjamie	if (error != 0 && error != ENOENT)
1977194762Sjamie		goto done_deref;
1978194118Sjamie	error = vfs_setopts(opts, "host.hostname", pr->pr_hostname);
1979191673Sjamie	if (error != 0 && error != ENOENT)
1980191673Sjamie		goto done_deref;
1981194118Sjamie	error = vfs_setopts(opts, "host.domainname", pr->pr_domainname);
1982193066Sjamie	if (error != 0 && error != ENOENT)
1983193066Sjamie		goto done_deref;
1984194118Sjamie	error = vfs_setopts(opts, "host.hostuuid", pr->pr_hostuuid);
1985193066Sjamie	if (error != 0 && error != ENOENT)
1986193066Sjamie		goto done_deref;
1987205014Snwhitehorn#ifdef COMPAT_FREEBSD32
1988217896Sdchagin	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
1989193066Sjamie		uint32_t hid32 = pr->pr_hostid;
1990193066Sjamie
1991193066Sjamie		error = vfs_setopt(opts, "host.hostid", &hid32, sizeof(hid32));
1992193066Sjamie	} else
1993193066Sjamie#endif
1994193066Sjamie	error = vfs_setopt(opts, "host.hostid", &pr->pr_hostid,
1995193066Sjamie	    sizeof(pr->pr_hostid));
1996193066Sjamie	if (error != 0 && error != ENOENT)
1997193066Sjamie		goto done_deref;
1998192895Sjamie	error = vfs_setopt(opts, "enforce_statfs", &pr->pr_enforce_statfs,
1999192895Sjamie	    sizeof(pr->pr_enforce_statfs));
2000191673Sjamie	if (error != 0 && error != ENOENT)
2001191673Sjamie		goto done_deref;
2002192895Sjamie	for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]);
2003192895Sjamie	    fi++) {
2004192895Sjamie		if (pr_flag_names[fi] == NULL)
2005192895Sjamie			continue;
2006192895Sjamie		i = (pr->pr_flags & (1 << fi)) ? 1 : 0;
2007192895Sjamie		error = vfs_setopt(opts, pr_flag_names[fi], &i, sizeof(i));
2008192895Sjamie		if (error != 0 && error != ENOENT)
2009192895Sjamie			goto done_deref;
2010192895Sjamie		i = !i;
2011192895Sjamie		error = vfs_setopt(opts, pr_flag_nonames[fi], &i, sizeof(i));
2012192895Sjamie		if (error != 0 && error != ENOENT)
2013192895Sjamie			goto done_deref;
2014192895Sjamie	}
2015195870Sjamie	for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]);
2016195870Sjamie	    fi++) {
2017195870Sjamie		i = pr->pr_flags &
2018195870Sjamie		    (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new);
2019195870Sjamie		i = pr_flag_jailsys[fi].disable &&
2020195870Sjamie		      (i == pr_flag_jailsys[fi].disable) ? JAIL_SYS_DISABLE
2021195870Sjamie		    : (i == pr_flag_jailsys[fi].new) ? JAIL_SYS_NEW
2022195870Sjamie		    : JAIL_SYS_INHERIT;
2023195870Sjamie		error =
2024195870Sjamie		    vfs_setopt(opts, pr_flag_jailsys[fi].name, &i, sizeof(i));
2025195870Sjamie		if (error != 0 && error != ENOENT)
2026195870Sjamie			goto done_deref;
2027195870Sjamie	}
2028192895Sjamie	for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]);
2029192895Sjamie	    fi++) {
2030192895Sjamie		if (pr_allow_names[fi] == NULL)
2031192895Sjamie			continue;
2032192895Sjamie		i = (pr->pr_allow & (1 << fi)) ? 1 : 0;
2033192895Sjamie		error = vfs_setopt(opts, pr_allow_names[fi], &i, sizeof(i));
2034192895Sjamie		if (error != 0 && error != ENOENT)
2035192895Sjamie			goto done_deref;
2036192895Sjamie		i = !i;
2037192895Sjamie		error = vfs_setopt(opts, pr_allow_nonames[fi], &i, sizeof(i));
2038192895Sjamie		if (error != 0 && error != ENOENT)
2039192895Sjamie			goto done_deref;
2040192895Sjamie	}
2041191673Sjamie	i = (pr->pr_uref == 0);
2042191673Sjamie	error = vfs_setopt(opts, "dying", &i, sizeof(i));
2043191673Sjamie	if (error != 0 && error != ENOENT)
2044191673Sjamie		goto done_deref;
2045191673Sjamie	i = !i;
2046191673Sjamie	error = vfs_setopt(opts, "nodying", &i, sizeof(i));
2047191673Sjamie	if (error != 0 && error != ENOENT)
2048191673Sjamie		goto done_deref;
2049191673Sjamie
2050191673Sjamie	/* Get the module parameters. */
2051191673Sjamie	mtx_unlock(&pr->pr_mtx);
2052191673Sjamie	locked = 0;
2053191673Sjamie	error = osd_jail_call(pr, PR_METHOD_GET, opts);
205446155Sphk	if (error)
2055191673Sjamie		goto done_deref;
2056191673Sjamie	prison_deref(pr, PD_DEREF | PD_LIST_SLOCKED);
205784828Sjhb
2058191673Sjamie	/* By now, all parameters should have been noted. */
2059191673Sjamie	TAILQ_FOREACH(opt, opts, link) {
2060191673Sjamie		if (!opt->seen && strcmp(opt->name, "errmsg")) {
2061191673Sjamie			error = EINVAL;
2062191673Sjamie			vfs_opterror(opts, "unknown parameter: %s", opt->name);
2063191673Sjamie			goto done_errmsg;
2064191673Sjamie		}
2065185435Sbz	}
2066191673Sjamie
2067191673Sjamie	/* Write the fetched parameters back to userspace. */
2068191673Sjamie	error = 0;
2069191673Sjamie	TAILQ_FOREACH(opt, opts, link) {
2070191673Sjamie		if (opt->pos >= 0 && opt->pos != errmsg_pos) {
2071191673Sjamie			pos = 2 * opt->pos + 1;
2072191673Sjamie			optuio->uio_iov[pos].iov_len = opt->len;
2073191673Sjamie			if (opt->value != NULL) {
2074191673Sjamie				if (optuio->uio_segflg == UIO_SYSSPACE) {
2075191673Sjamie					bcopy(opt->value,
2076191673Sjamie					    optuio->uio_iov[pos].iov_base,
2077191673Sjamie					    opt->len);
2078191673Sjamie				} else {
2079191673Sjamie					error = copyout(opt->value,
2080191673Sjamie					    optuio->uio_iov[pos].iov_base,
2081191673Sjamie					    opt->len);
2082191673Sjamie					if (error)
2083191673Sjamie						break;
2084191673Sjamie				}
2085191673Sjamie			}
2086191673Sjamie		}
2087185435Sbz	}
2088191673Sjamie	goto done_errmsg;
2089191673Sjamie
2090191673Sjamie done_deref:
2091191673Sjamie	prison_deref(pr, locked | PD_DEREF | PD_LIST_SLOCKED);
2092191673Sjamie	goto done_errmsg;
2093191673Sjamie
2094191673Sjamie done_unlock_list:
2095191673Sjamie	sx_sunlock(&allprison_lock);
2096191673Sjamie done_errmsg:
2097191673Sjamie	if (error && errmsg_pos >= 0) {
2098191673Sjamie		vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len);
2099191673Sjamie		errmsg_pos = 2 * errmsg_pos + 1;
2100191673Sjamie		if (errmsg_len > 0) {
2101191673Sjamie			if (optuio->uio_segflg == UIO_SYSSPACE)
2102191673Sjamie				bcopy(errmsg,
2103191673Sjamie				    optuio->uio_iov[errmsg_pos].iov_base,
2104191673Sjamie				    errmsg_len);
2105191673Sjamie			else
2106191673Sjamie				copyout(errmsg,
2107191673Sjamie				    optuio->uio_iov[errmsg_pos].iov_base,
2108191673Sjamie				    errmsg_len);
2109191673Sjamie		}
2110185435Sbz	}
2111191673Sjamie	vfs_freeopts(opts);
2112191673Sjamie	return (error);
2113191673Sjamie}
2114113275Smike
2115192895Sjamie
2116191673Sjamie/*
2117191673Sjamie * struct jail_remove_args {
2118191673Sjamie *	int jid;
2119191673Sjamie * };
2120191673Sjamie */
2121191673Sjamieint
2122225617Skmacysys_jail_remove(struct thread *td, struct jail_remove_args *uap)
2123191673Sjamie{
2124192895Sjamie	struct prison *pr, *cpr, *lpr, *tpr;
2125192895Sjamie	int descend, error;
2126185435Sbz
2127191673Sjamie	error = priv_check(td, PRIV_JAIL_REMOVE);
2128185435Sbz	if (error)
2129191673Sjamie		return (error);
2130185435Sbz
2131185435Sbz	sx_xlock(&allprison_lock);
2132192895Sjamie	pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
2133191673Sjamie	if (pr == NULL) {
2134185435Sbz		sx_xunlock(&allprison_lock);
2135191673Sjamie		return (EINVAL);
2136185435Sbz	}
2137185435Sbz
2138192895Sjamie	/* Remove all descendants of this prison, then remove this prison. */
2139192895Sjamie	pr->pr_ref++;
2140192895Sjamie	pr->pr_flags |= PR_REMOVE;
2141192895Sjamie	if (!LIST_EMPTY(&pr->pr_children)) {
2142192895Sjamie		mtx_unlock(&pr->pr_mtx);
2143192895Sjamie		lpr = NULL;
2144192895Sjamie		FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
2145192895Sjamie			mtx_lock(&cpr->pr_mtx);
2146192895Sjamie			if (cpr->pr_ref > 0) {
2147192895Sjamie				tpr = cpr;
2148192895Sjamie				cpr->pr_ref++;
2149192895Sjamie				cpr->pr_flags |= PR_REMOVE;
2150192895Sjamie			} else {
2151192895Sjamie				/* Already removed - do not do it again. */
2152192895Sjamie				tpr = NULL;
2153192895Sjamie			}
2154192895Sjamie			mtx_unlock(&cpr->pr_mtx);
2155192895Sjamie			if (lpr != NULL) {
2156192895Sjamie				mtx_lock(&lpr->pr_mtx);
2157192895Sjamie				prison_remove_one(lpr);
2158192895Sjamie				sx_xlock(&allprison_lock);
2159192895Sjamie			}
2160192895Sjamie			lpr = tpr;
2161192895Sjamie		}
2162192895Sjamie		if (lpr != NULL) {
2163192895Sjamie			mtx_lock(&lpr->pr_mtx);
2164192895Sjamie			prison_remove_one(lpr);
2165192895Sjamie			sx_xlock(&allprison_lock);
2166192895Sjamie		}
2167192895Sjamie		mtx_lock(&pr->pr_mtx);
2168192895Sjamie	}
2169192895Sjamie	prison_remove_one(pr);
2170192895Sjamie	return (0);
2171192895Sjamie}
2172192895Sjamie
2173192895Sjamiestatic void
2174192895Sjamieprison_remove_one(struct prison *pr)
2175192895Sjamie{
2176192895Sjamie	struct proc *p;
2177192895Sjamie	int deuref;
2178192895Sjamie
2179191673Sjamie	/* If the prison was persistent, it is not anymore. */
2180191673Sjamie	deuref = 0;
2181191673Sjamie	if (pr->pr_flags & PR_PERSIST) {
2182191673Sjamie		pr->pr_ref--;
2183191673Sjamie		deuref = PD_DEUREF;
2184191673Sjamie		pr->pr_flags &= ~PR_PERSIST;
2185179881Sdelphij	}
2186113275Smike
2187192895Sjamie	/*
2188192895Sjamie	 * jail_remove added a reference.  If that's the only one, remove
2189192895Sjamie	 * the prison now.
2190192895Sjamie	 */
2191192895Sjamie	KASSERT(pr->pr_ref > 0,
2192192895Sjamie	    ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id));
2193192895Sjamie	if (pr->pr_ref == 1) {
2194191673Sjamie		prison_deref(pr,
2195191673Sjamie		    deuref | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED);
2196192895Sjamie		return;
2197191673Sjamie	}
2198191673Sjamie
2199113275Smike	mtx_unlock(&pr->pr_mtx);
2200191673Sjamie	sx_xunlock(&allprison_lock);
2201191673Sjamie	/*
2202191673Sjamie	 * Kill all processes unfortunate enough to be attached to this prison.
2203191673Sjamie	 */
2204191673Sjamie	sx_slock(&allproc_lock);
2205191673Sjamie	LIST_FOREACH(p, &allproc, p_list) {
2206191673Sjamie		PROC_LOCK(p);
2207191673Sjamie		if (p->p_state != PRS_NEW && p->p_ucred &&
2208191673Sjamie		    p->p_ucred->cr_prison == pr)
2209225617Skmacy			kern_psignal(p, SIGKILL);
2210191673Sjamie		PROC_UNLOCK(p);
2211191673Sjamie	}
2212191673Sjamie	sx_sunlock(&allproc_lock);
2213192895Sjamie	/* Remove the temporary reference added by jail_remove. */
2214191673Sjamie	prison_deref(pr, deuref | PD_DEREF);
2215113275Smike}
2216113275Smike
2217190466Sjamie
2218113275Smike/*
2219114168Smike * struct jail_attach_args {
2220114168Smike *	int jid;
2221114168Smike * };
2222113275Smike */
2223113275Smikeint
2224225617Skmacysys_jail_attach(struct thread *td, struct jail_attach_args *uap)
2225113275Smike{
2226113275Smike	struct prison *pr;
2227191673Sjamie	int error;
2228167309Spjd
2229164032Srwatson	error = priv_check(td, PRIV_JAIL_ATTACH);
2230126023Snectar	if (error)
2231126023Snectar		return (error);
2232126023Snectar
2233168401Spjd	sx_slock(&allprison_lock);
2234192895Sjamie	pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
2235113275Smike	if (pr == NULL) {
2236168401Spjd		sx_sunlock(&allprison_lock);
2237113275Smike		return (EINVAL);
2238113275Smike	}
2239185435Sbz
2240185435Sbz	/*
2241185435Sbz	 * Do not allow a process to attach to a prison that is not
2242191673Sjamie	 * considered to be "alive".
2243185435Sbz	 */
2244191673Sjamie	if (pr->pr_uref == 0) {
2245185435Sbz		mtx_unlock(&pr->pr_mtx);
2246185435Sbz		sx_sunlock(&allprison_lock);
2247185435Sbz		return (EINVAL);
2248185435Sbz	}
2249191673Sjamie
2250191673Sjamie	return (do_jail_attach(td, pr));
2251191673Sjamie}
2252191673Sjamie
2253191673Sjamiestatic int
2254191673Sjamiedo_jail_attach(struct thread *td, struct prison *pr)
2255191673Sjamie{
2256192895Sjamie	struct prison *ppr;
2257191673Sjamie	struct proc *p;
2258191673Sjamie	struct ucred *newcred, *oldcred;
2259191673Sjamie	int vfslocked, error;
2260191673Sjamie
2261191673Sjamie	/*
2262191673Sjamie	 * XXX: Note that there is a slight race here if two threads
2263191673Sjamie	 * in the same privileged process attempt to attach to two
2264191673Sjamie	 * different jails at the same time.  It is important for
2265191673Sjamie	 * user processes not to do this, or they might end up with
2266191673Sjamie	 * a process root from one prison, but attached to the jail
2267191673Sjamie	 * of another.
2268191673Sjamie	 */
2269113275Smike	pr->pr_ref++;
2270191673Sjamie	pr->pr_uref++;
2271113275Smike	mtx_unlock(&pr->pr_mtx);
2272191673Sjamie
2273191673Sjamie	/* Let modules do whatever they need to prepare for attaching. */
2274191673Sjamie	error = osd_jail_call(pr, PR_METHOD_ATTACH, td);
2275191673Sjamie	if (error) {
2276191673Sjamie		prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED);
2277191673Sjamie		return (error);
2278191673Sjamie	}
2279168401Spjd	sx_sunlock(&allprison_lock);
2280113275Smike
2281185435Sbz	/*
2282185435Sbz	 * Reparent the newly attached process to this jail.
2283185435Sbz	 */
2284192895Sjamie	ppr = td->td_ucred->cr_prison;
2285191673Sjamie	p = td->td_proc;
2286185435Sbz	error = cpuset_setproc_update_set(p, pr->pr_cpuset);
2287185435Sbz	if (error)
2288191673Sjamie		goto e_revert_osd;
2289185435Sbz
2290150652Scsjp	vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
2291175202Sattilio	vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY);
2292113275Smike	if ((error = change_dir(pr->pr_root, td)) != 0)
2293113275Smike		goto e_unlock;
2294113275Smike#ifdef MAC
2295172930Srwatson	if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root)))
2296113275Smike		goto e_unlock;
2297113275Smike#endif
2298175294Sattilio	VOP_UNLOCK(pr->pr_root, 0);
2299191673Sjamie	if ((error = change_root(pr->pr_root, td)))
2300191673Sjamie		goto e_unlock_giant;
2301150652Scsjp	VFS_UNLOCK_GIANT(vfslocked);
2302113275Smike
230384828Sjhb	newcred = crget();
230484828Sjhb	PROC_LOCK(p);
230584828Sjhb	oldcred = p->p_ucred;
2306113275Smike	setsugid(p);
230784828Sjhb	crcopy(newcred, oldcred);
2308113630Sjhb	newcred->cr_prison = pr;
230984828Sjhb	p->p_ucred = newcred;
231084828Sjhb	PROC_UNLOCK(p);
2311220137Strasz#ifdef RACCT
2312220137Strasz	racct_proc_ucred_changed(p, oldcred, newcred);
2313220137Strasz#endif
231484828Sjhb	crfree(oldcred);
2315192895Sjamie	prison_deref(ppr, PD_DEREF | PD_DEUREF);
231646155Sphk	return (0);
2317191673Sjamie e_unlock:
2318175294Sattilio	VOP_UNLOCK(pr->pr_root, 0);
2319191673Sjamie e_unlock_giant:
2320150652Scsjp	VFS_UNLOCK_GIANT(vfslocked);
2321191673Sjamie e_revert_osd:
2322191673Sjamie	/* Tell modules this thread is still in its old jail after all. */
2323192895Sjamie	(void)osd_jail_call(ppr, PR_METHOD_ATTACH, td);
2324191673Sjamie	prison_deref(pr, PD_DEREF | PD_DEUREF);
232546155Sphk	return (error);
232646155Sphk}
232746155Sphk
2328192895Sjamie
2329113275Smike/*
2330113275Smike * Returns a locked prison instance, or NULL on failure.
2331113275Smike */
2332168399Spjdstruct prison *
2333113275Smikeprison_find(int prid)
2334113275Smike{
2335113275Smike	struct prison *pr;
2336113275Smike
2337168401Spjd	sx_assert(&allprison_lock, SX_LOCKED);
2338191673Sjamie	TAILQ_FOREACH(pr, &allprison, pr_list) {
2339113275Smike		if (pr->pr_id == prid) {
2340113275Smike			mtx_lock(&pr->pr_mtx);
2341191673Sjamie			if (pr->pr_ref > 0)
2342191673Sjamie				return (pr);
2343191673Sjamie			mtx_unlock(&pr->pr_mtx);
2344113275Smike		}
2345113275Smike	}
2346113275Smike	return (NULL);
2347113275Smike}
2348113275Smike
2349191673Sjamie/*
2350192895Sjamie * Find a prison that is a descendant of mypr.  Returns a locked prison or NULL.
2351191673Sjamie */
2352191673Sjamiestruct prison *
2353192895Sjamieprison_find_child(struct prison *mypr, int prid)
2354191673Sjamie{
2355192895Sjamie	struct prison *pr;
2356192895Sjamie	int descend;
2357192895Sjamie
2358192895Sjamie	sx_assert(&allprison_lock, SX_LOCKED);
2359192895Sjamie	FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
2360192895Sjamie		if (pr->pr_id == prid) {
2361192895Sjamie			mtx_lock(&pr->pr_mtx);
2362192895Sjamie			if (pr->pr_ref > 0)
2363192895Sjamie				return (pr);
2364192895Sjamie			mtx_unlock(&pr->pr_mtx);
2365192895Sjamie		}
2366192895Sjamie	}
2367192895Sjamie	return (NULL);
2368192895Sjamie}
2369192895Sjamie
2370192895Sjamie/*
2371192895Sjamie * Look for the name relative to mypr.  Returns a locked prison or NULL.
2372192895Sjamie */
2373192895Sjamiestruct prison *
2374192895Sjamieprison_find_name(struct prison *mypr, const char *name)
2375192895Sjamie{
2376191673Sjamie	struct prison *pr, *deadpr;
2377192895Sjamie	size_t mylen;
2378192895Sjamie	int descend;
2379191673Sjamie
2380191673Sjamie	sx_assert(&allprison_lock, SX_LOCKED);
2381192895Sjamie	mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1;
2382191673Sjamie again:
2383191673Sjamie	deadpr = NULL;
2384192895Sjamie	FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
2385192895Sjamie		if (!strcmp(pr->pr_name + mylen, name)) {
2386191673Sjamie			mtx_lock(&pr->pr_mtx);
2387191673Sjamie			if (pr->pr_ref > 0) {
2388191673Sjamie				if (pr->pr_uref > 0)
2389191673Sjamie					return (pr);
2390191673Sjamie				deadpr = pr;
2391191673Sjamie			}
2392191673Sjamie			mtx_unlock(&pr->pr_mtx);
2393191673Sjamie		}
2394191673Sjamie	}
2395192895Sjamie	/* There was no valid prison - perhaps there was a dying one. */
2396191673Sjamie	if (deadpr != NULL) {
2397191673Sjamie		mtx_lock(&deadpr->pr_mtx);
2398191673Sjamie		if (deadpr->pr_ref == 0) {
2399191673Sjamie			mtx_unlock(&deadpr->pr_mtx);
2400191673Sjamie			goto again;
2401191673Sjamie		}
2402191673Sjamie	}
2403191673Sjamie	return (deadpr);
2404191673Sjamie}
2405191673Sjamie
2406191673Sjamie/*
2407192895Sjamie * See if a prison has the specific flag set.
2408192895Sjamie */
2409192895Sjamieint
2410192895Sjamieprison_flag(struct ucred *cred, unsigned flag)
2411192895Sjamie{
2412192895Sjamie
2413192895Sjamie	/* This is an atomic read, so no locking is necessary. */
2414192895Sjamie	return (cred->cr_prison->pr_flags & flag);
2415192895Sjamie}
2416192895Sjamie
2417192895Sjamieint
2418192895Sjamieprison_allow(struct ucred *cred, unsigned flag)
2419192895Sjamie{
2420192895Sjamie
2421192895Sjamie	/* This is an atomic read, so no locking is necessary. */
2422192895Sjamie	return (cred->cr_prison->pr_allow & flag);
2423192895Sjamie}
2424192895Sjamie
2425192895Sjamie/*
2426191673Sjamie * Remove a prison reference.  If that was the last reference, remove the
2427191673Sjamie * prison itself - but not in this context in case there are locks held.
2428191673Sjamie */
242972786Srwatsonvoid
2430185029Spjdprison_free_locked(struct prison *pr)
243172786Srwatson{
243272786Srwatson
2433185029Spjd	mtx_assert(&pr->pr_mtx, MA_OWNED);
243472786Srwatson	pr->pr_ref--;
243572786Srwatson	if (pr->pr_ref == 0) {
2436168483Spjd		mtx_unlock(&pr->pr_mtx);
2437124882Srwatson		TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
2438144660Sjeff		taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
243987275Srwatson		return;
244072786Srwatson	}
244187275Srwatson	mtx_unlock(&pr->pr_mtx);
244272786Srwatson}
244372786Srwatson
2444185029Spjdvoid
2445185029Spjdprison_free(struct prison *pr)
2446185029Spjd{
2447185029Spjd
2448185029Spjd	mtx_lock(&pr->pr_mtx);
2449185029Spjd	prison_free_locked(pr);
2450185029Spjd}
2451185029Spjd
2452124882Srwatsonstatic void
2453124882Srwatsonprison_complete(void *context, int pending)
2454124882Srwatson{
2455191673Sjamie
2456191673Sjamie	prison_deref((struct prison *)context, 0);
2457191673Sjamie}
2458191673Sjamie
2459191673Sjamie/*
2460191673Sjamie * Remove a prison reference (usually).  This internal version assumes no
2461191673Sjamie * mutexes are held, except perhaps the prison itself.  If there are no more
2462191673Sjamie * references, release and delist the prison.  On completion, the prison lock
2463191673Sjamie * and the allprison lock are both unlocked.
2464191673Sjamie */
2465191673Sjamiestatic void
2466191673Sjamieprison_deref(struct prison *pr, int flags)
2467191673Sjamie{
2468192895Sjamie	struct prison *ppr, *tpr;
2469150652Scsjp	int vfslocked;
2470124882Srwatson
2471191673Sjamie	if (!(flags & PD_LOCKED))
2472191673Sjamie		mtx_lock(&pr->pr_mtx);
2473225191Sjamie	for (;;) {
2474225191Sjamie		if (flags & PD_DEUREF) {
2475225191Sjamie			pr->pr_uref--;
2476225191Sjamie			KASSERT(prison0.pr_uref != 0, ("prison0 pr_uref=0"));
2477192895Sjamie		}
2478192895Sjamie		if (flags & PD_DEREF)
2479192895Sjamie			pr->pr_ref--;
2480192895Sjamie		/* If the prison still has references, nothing else to do. */
2481192895Sjamie		if (pr->pr_ref > 0) {
2482192895Sjamie			mtx_unlock(&pr->pr_mtx);
2483192895Sjamie			if (flags & PD_LIST_SLOCKED)
2484192895Sjamie				sx_sunlock(&allprison_lock);
2485192895Sjamie			else if (flags & PD_LIST_XLOCKED)
2486192895Sjamie				sx_xunlock(&allprison_lock);
2487192895Sjamie			return;
2488191673Sjamie		}
2489191673Sjamie
2490192895Sjamie		mtx_unlock(&pr->pr_mtx);
2491192895Sjamie		if (flags & PD_LIST_SLOCKED) {
2492192895Sjamie			if (!sx_try_upgrade(&allprison_lock)) {
2493192895Sjamie				sx_sunlock(&allprison_lock);
2494192895Sjamie				sx_xlock(&allprison_lock);
2495192895Sjamie			}
2496192895Sjamie		} else if (!(flags & PD_LIST_XLOCKED))
2497192895Sjamie			sx_xlock(&allprison_lock);
2498168489Spjd
2499192895Sjamie		TAILQ_REMOVE(&allprison, pr, pr_list);
2500192895Sjamie		LIST_REMOVE(pr, pr_sibling);
2501192895Sjamie		ppr = pr->pr_parent;
2502192895Sjamie		for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent)
2503194762Sjamie			tpr->pr_childcount--;
2504196592Sjamie		sx_xunlock(&allprison_lock);
2505192895Sjamie
2506194251Sjamie#ifdef VIMAGE
2507196505Szec		if (pr->pr_vnet != ppr->pr_vnet)
2508194251Sjamie			vnet_destroy(pr->pr_vnet);
2509194251Sjamie#endif
2510192895Sjamie		if (pr->pr_root != NULL) {
2511192895Sjamie			vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
2512192895Sjamie			vrele(pr->pr_root);
2513192895Sjamie			VFS_UNLOCK_GIANT(vfslocked);
2514192895Sjamie		}
2515192895Sjamie		mtx_destroy(&pr->pr_mtx);
2516191673Sjamie#ifdef INET
2517192895Sjamie		free(pr->pr_ip4, M_PRISON);
2518191673Sjamie#endif
2519185435Sbz#ifdef INET6
2520192895Sjamie		free(pr->pr_ip6, M_PRISON);
2521185435Sbz#endif
2522192895Sjamie		if (pr->pr_cpuset != NULL)
2523192895Sjamie			cpuset_rel(pr->pr_cpuset);
2524192895Sjamie		osd_jail_exit(pr);
2525221362Strasz#ifdef RACCT
2526221362Strasz		prison_racct_detach(pr);
2527220163Strasz#endif
2528192895Sjamie		free(pr, M_PRISON);
2529192895Sjamie
2530192895Sjamie		/* Removing a prison frees a reference on its parent. */
2531192895Sjamie		pr = ppr;
2532192895Sjamie		mtx_lock(&pr->pr_mtx);
2533225191Sjamie		flags = PD_DEREF | PD_DEUREF;
2534192895Sjamie	}
2535124882Srwatson}
2536124882Srwatson
253772786Srwatsonvoid
2538185029Spjdprison_hold_locked(struct prison *pr)
253972786Srwatson{
254072786Srwatson
2541185029Spjd	mtx_assert(&pr->pr_mtx, MA_OWNED);
2542168489Spjd	KASSERT(pr->pr_ref > 0,
2543191671Sjamie	    ("Trying to hold dead prison (jid=%d).", pr->pr_id));
254472786Srwatson	pr->pr_ref++;
2545185029Spjd}
2546185029Spjd
2547185029Spjdvoid
2548185029Spjdprison_hold(struct prison *pr)
2549185029Spjd{
2550185029Spjd
2551185029Spjd	mtx_lock(&pr->pr_mtx);
2552185029Spjd	prison_hold_locked(pr);
255387275Srwatson	mtx_unlock(&pr->pr_mtx);
255472786Srwatson}
255572786Srwatson
2556185435Sbzvoid
2557185435Sbzprison_proc_hold(struct prison *pr)
255887275Srwatson{
255987275Srwatson
2560185435Sbz	mtx_lock(&pr->pr_mtx);
2561191673Sjamie	KASSERT(pr->pr_uref > 0,
2562191673Sjamie	    ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id));
2563191673Sjamie	pr->pr_uref++;
2564185435Sbz	mtx_unlock(&pr->pr_mtx);
256587275Srwatson}
256687275Srwatson
2567185435Sbzvoid
2568185435Sbzprison_proc_free(struct prison *pr)
2569185435Sbz{
2570185435Sbz
2571185435Sbz	mtx_lock(&pr->pr_mtx);
2572191673Sjamie	KASSERT(pr->pr_uref > 0,
2573191673Sjamie	    ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
2574191673Sjamie	prison_deref(pr, PD_DEUREF | PD_LOCKED);
2575185435Sbz}
2576185435Sbz
2577185435Sbz
2578185435Sbz#ifdef INET
2579185435Sbz/*
2580192895Sjamie * Restrict a prison's IP address list with its parent's, possibly replacing
2581192895Sjamie * it.  Return true if the replacement buffer was used (or would have been).
2582192895Sjamie */
2583192895Sjamiestatic int
2584192895Sjamieprison_restrict_ip4(struct prison *pr, struct in_addr *newip4)
2585192895Sjamie{
2586192895Sjamie	int ii, ij, used;
2587192895Sjamie	struct prison *ppr;
2588192895Sjamie
2589192895Sjamie	ppr = pr->pr_parent;
2590192895Sjamie	if (!(pr->pr_flags & PR_IP4_USER)) {
2591192895Sjamie		/* This has no user settings, so just copy the parent's list. */
2592192895Sjamie		if (pr->pr_ip4s < ppr->pr_ip4s) {
2593192895Sjamie			/*
2594192895Sjamie			 * There's no room for the parent's list.  Use the
2595192895Sjamie			 * new list buffer, which is assumed to be big enough
2596192895Sjamie			 * (if it was passed).  If there's no buffer, try to
2597192895Sjamie			 * allocate one.
2598192895Sjamie			 */
2599192895Sjamie			used = 1;
2600192895Sjamie			if (newip4 == NULL) {
2601192895Sjamie				newip4 = malloc(ppr->pr_ip4s * sizeof(*newip4),
2602192895Sjamie				    M_PRISON, M_NOWAIT);
2603192895Sjamie				if (newip4 != NULL)
2604192895Sjamie					used = 0;
2605192895Sjamie			}
2606192895Sjamie			if (newip4 != NULL) {
2607192895Sjamie				bcopy(ppr->pr_ip4, newip4,
2608192895Sjamie				    ppr->pr_ip4s * sizeof(*newip4));
2609192895Sjamie				free(pr->pr_ip4, M_PRISON);
2610192895Sjamie				pr->pr_ip4 = newip4;
2611192895Sjamie				pr->pr_ip4s = ppr->pr_ip4s;
2612192895Sjamie			}
2613192895Sjamie			return (used);
2614192895Sjamie		}
2615192895Sjamie		pr->pr_ip4s = ppr->pr_ip4s;
2616192895Sjamie		if (pr->pr_ip4s > 0)
2617192895Sjamie			bcopy(ppr->pr_ip4, pr->pr_ip4,
2618192895Sjamie			    pr->pr_ip4s * sizeof(*newip4));
2619192895Sjamie		else if (pr->pr_ip4 != NULL) {
2620192895Sjamie			free(pr->pr_ip4, M_PRISON);
2621192895Sjamie			pr->pr_ip4 = NULL;
2622192895Sjamie		}
2623195974Sjamie	} else if (pr->pr_ip4s > 0) {
2624192895Sjamie		/* Remove addresses that aren't in the parent. */
2625192895Sjamie		for (ij = 0; ij < ppr->pr_ip4s; ij++)
2626192895Sjamie			if (pr->pr_ip4[0].s_addr == ppr->pr_ip4[ij].s_addr)
2627192895Sjamie				break;
2628192895Sjamie		if (ij < ppr->pr_ip4s)
2629192895Sjamie			ii = 1;
2630192895Sjamie		else {
2631192895Sjamie			bcopy(pr->pr_ip4 + 1, pr->pr_ip4,
2632192895Sjamie			    --pr->pr_ip4s * sizeof(*pr->pr_ip4));
2633192895Sjamie			ii = 0;
2634192895Sjamie		}
2635192895Sjamie		for (ij = 1; ii < pr->pr_ip4s; ) {
2636192895Sjamie			if (pr->pr_ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) {
2637192895Sjamie				ii++;
2638192895Sjamie				continue;
2639192895Sjamie			}
2640192895Sjamie			switch (ij >= ppr->pr_ip4s ? -1 :
2641192895Sjamie				qcmp_v4(&pr->pr_ip4[ii], &ppr->pr_ip4[ij])) {
2642192895Sjamie			case -1:
2643192895Sjamie				bcopy(pr->pr_ip4 + ii + 1, pr->pr_ip4 + ii,
2644192895Sjamie				    (--pr->pr_ip4s - ii) * sizeof(*pr->pr_ip4));
2645192895Sjamie				break;
2646192895Sjamie			case 0:
2647192895Sjamie				ii++;
2648192895Sjamie				ij++;
2649192895Sjamie				break;
2650192895Sjamie			case 1:
2651192895Sjamie				ij++;
2652192895Sjamie				break;
2653192895Sjamie			}
2654192895Sjamie		}
2655192895Sjamie		if (pr->pr_ip4s == 0) {
2656195870Sjamie			pr->pr_flags |= PR_IP4_DISABLE;
2657192895Sjamie			free(pr->pr_ip4, M_PRISON);
2658192895Sjamie			pr->pr_ip4 = NULL;
2659192895Sjamie		}
2660192895Sjamie	}
2661192895Sjamie	return (0);
2662192895Sjamie}
2663192895Sjamie
2664192895Sjamie/*
2665185435Sbz * Pass back primary IPv4 address of this jail.
2666185435Sbz *
2667192895Sjamie * If not restricted return success but do not alter the address.  Caller has
2668192895Sjamie * to make sure to initialize it correctly (e.g. INADDR_ANY).
2669185435Sbz *
2670188144Sjamie * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
2671188144Sjamie * Address returned in NBO.
2672185435Sbz */
267346155Sphkint
2674187684Sbzprison_get_ip4(struct ucred *cred, struct in_addr *ia)
267546155Sphk{
2676191673Sjamie	struct prison *pr;
267746155Sphk
2678185435Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
2679185435Sbz	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
2680185435Sbz
2681192895Sjamie	pr = cred->cr_prison;
2682192895Sjamie	if (!(pr->pr_flags & PR_IP4))
268346155Sphk		return (0);
2684191673Sjamie	mtx_lock(&pr->pr_mtx);
2685192895Sjamie	if (!(pr->pr_flags & PR_IP4)) {
2686192895Sjamie		mtx_unlock(&pr->pr_mtx);
2687192895Sjamie		return (0);
2688192895Sjamie	}
2689191673Sjamie	if (pr->pr_ip4 == NULL) {
2690191673Sjamie		mtx_unlock(&pr->pr_mtx);
2691188144Sjamie		return (EAFNOSUPPORT);
2692191673Sjamie	}
2693185435Sbz
2694191673Sjamie	ia->s_addr = pr->pr_ip4[0].s_addr;
2695191673Sjamie	mtx_unlock(&pr->pr_mtx);
2696185435Sbz	return (0);
2697185435Sbz}
2698185435Sbz
2699185435Sbz/*
2700202468Sbz * Return 1 if we should do proper source address selection or are not jailed.
2701202468Sbz * We will return 0 if we should bypass source address selection in favour
2702202468Sbz * of the primary jail IPv4 address. Only in this case *ia will be updated and
2703202468Sbz * returned in NBO.
2704202468Sbz * Return EAFNOSUPPORT, in case this jail does not allow IPv4.
2705202468Sbz */
2706202468Sbzint
2707202468Sbzprison_saddrsel_ip4(struct ucred *cred, struct in_addr *ia)
2708202468Sbz{
2709202468Sbz	struct prison *pr;
2710202468Sbz	struct in_addr lia;
2711202468Sbz	int error;
2712202468Sbz
2713202468Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
2714202468Sbz	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
2715202468Sbz
2716202468Sbz	if (!jailed(cred))
2717202468Sbz		return (1);
2718202468Sbz
2719202468Sbz	pr = cred->cr_prison;
2720202468Sbz	if (pr->pr_flags & PR_IP4_SADDRSEL)
2721202468Sbz		return (1);
2722202468Sbz
2723202468Sbz	lia.s_addr = INADDR_ANY;
2724202468Sbz	error = prison_get_ip4(cred, &lia);
2725202468Sbz	if (error)
2726202468Sbz		return (error);
2727202468Sbz	if (lia.s_addr == INADDR_ANY)
2728202468Sbz		return (1);
2729202468Sbz
2730202468Sbz	ia->s_addr = lia.s_addr;
2731202468Sbz	return (0);
2732202468Sbz}
2733202468Sbz
2734202468Sbz/*
2735192895Sjamie * Return true if pr1 and pr2 have the same IPv4 address restrictions.
2736192895Sjamie */
2737192895Sjamieint
2738192895Sjamieprison_equal_ip4(struct prison *pr1, struct prison *pr2)
2739192895Sjamie{
2740192895Sjamie
2741192895Sjamie	if (pr1 == pr2)
2742192895Sjamie		return (1);
2743192895Sjamie
2744192895Sjamie	/*
2745195974Sjamie	 * No need to lock since the PR_IP4_USER flag can't be altered for
2746195974Sjamie	 * existing prisons.
2747192895Sjamie	 */
2748195945Sjamie	while (pr1 != &prison0 &&
2749195945Sjamie#ifdef VIMAGE
2750195945Sjamie	       !(pr1->pr_flags & PR_VNET) &&
2751195945Sjamie#endif
2752195945Sjamie	       !(pr1->pr_flags & PR_IP4_USER))
2753192895Sjamie		pr1 = pr1->pr_parent;
2754195945Sjamie	while (pr2 != &prison0 &&
2755195945Sjamie#ifdef VIMAGE
2756195945Sjamie	       !(pr2->pr_flags & PR_VNET) &&
2757195945Sjamie#endif
2758195945Sjamie	       !(pr2->pr_flags & PR_IP4_USER))
2759192895Sjamie		pr2 = pr2->pr_parent;
2760192895Sjamie	return (pr1 == pr2);
2761192895Sjamie}
2762192895Sjamie
2763192895Sjamie/*
2764185435Sbz * Make sure our (source) address is set to something meaningful to this
2765185435Sbz * jail.
2766185435Sbz *
2767192895Sjamie * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail,
2768192895Sjamie * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
2769192895Sjamie * doesn't allow IPv4.  Address passed in in NBO and returned in NBO.
2770185435Sbz */
2771185435Sbzint
2772185435Sbzprison_local_ip4(struct ucred *cred, struct in_addr *ia)
2773185435Sbz{
2774191673Sjamie	struct prison *pr;
2775185435Sbz	struct in_addr ia0;
2776191673Sjamie	int error;
2777185435Sbz
2778185435Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
2779185435Sbz	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
2780185435Sbz
2781192895Sjamie	pr = cred->cr_prison;
2782192895Sjamie	if (!(pr->pr_flags & PR_IP4))
278346155Sphk		return (0);
2784191673Sjamie	mtx_lock(&pr->pr_mtx);
2785192895Sjamie	if (!(pr->pr_flags & PR_IP4)) {
2786192895Sjamie		mtx_unlock(&pr->pr_mtx);
2787192895Sjamie		return (0);
2788192895Sjamie	}
2789191673Sjamie	if (pr->pr_ip4 == NULL) {
2790191673Sjamie		mtx_unlock(&pr->pr_mtx);
2791188144Sjamie		return (EAFNOSUPPORT);
2792191673Sjamie	}
2793185435Sbz
2794185435Sbz	ia0.s_addr = ntohl(ia->s_addr);
2795185435Sbz	if (ia0.s_addr == INADDR_LOOPBACK) {
2796191673Sjamie		ia->s_addr = pr->pr_ip4[0].s_addr;
2797191673Sjamie		mtx_unlock(&pr->pr_mtx);
2798185435Sbz		return (0);
279946155Sphk	}
2800185435Sbz
2801188144Sjamie	if (ia0.s_addr == INADDR_ANY) {
2802188144Sjamie		/*
2803188144Sjamie		 * In case there is only 1 IPv4 address, bind directly.
2804188144Sjamie		 */
2805191673Sjamie		if (pr->pr_ip4s == 1)
2806191673Sjamie			ia->s_addr = pr->pr_ip4[0].s_addr;
2807191673Sjamie		mtx_unlock(&pr->pr_mtx);
2808185435Sbz		return (0);
2809185435Sbz	}
2810185435Sbz
2811191673Sjamie	error = _prison_check_ip4(pr, ia);
2812191673Sjamie	mtx_unlock(&pr->pr_mtx);
2813191673Sjamie	return (error);
2814185435Sbz}
2815185435Sbz
2816185435Sbz/*
2817185435Sbz * Rewrite destination address in case we will connect to loopback address.
2818185435Sbz *
2819188144Sjamie * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
2820188144Sjamie * Address passed in in NBO and returned in NBO.
2821185435Sbz */
2822185435Sbzint
2823185435Sbzprison_remote_ip4(struct ucred *cred, struct in_addr *ia)
2824185435Sbz{
2825191673Sjamie	struct prison *pr;
2826185435Sbz
2827185435Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
2828185435Sbz	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
2829185435Sbz
2830192895Sjamie	pr = cred->cr_prison;
2831192895Sjamie	if (!(pr->pr_flags & PR_IP4))
2832185435Sbz		return (0);
2833191673Sjamie	mtx_lock(&pr->pr_mtx);
2834192895Sjamie	if (!(pr->pr_flags & PR_IP4)) {
2835192895Sjamie		mtx_unlock(&pr->pr_mtx);
2836192895Sjamie		return (0);
2837192895Sjamie	}
2838191673Sjamie	if (pr->pr_ip4 == NULL) {
2839191673Sjamie		mtx_unlock(&pr->pr_mtx);
2840188144Sjamie		return (EAFNOSUPPORT);
2841191673Sjamie	}
2842188144Sjamie
2843185435Sbz	if (ntohl(ia->s_addr) == INADDR_LOOPBACK) {
2844191673Sjamie		ia->s_addr = pr->pr_ip4[0].s_addr;
2845191673Sjamie		mtx_unlock(&pr->pr_mtx);
2846185435Sbz		return (0);
2847185435Sbz	}
2848185435Sbz
2849185435Sbz	/*
2850185435Sbz	 * Return success because nothing had to be changed.
2851185435Sbz	 */
2852191673Sjamie	mtx_unlock(&pr->pr_mtx);
2853185435Sbz	return (0);
2854185435Sbz}
2855185435Sbz
2856185435Sbz/*
2857188144Sjamie * Check if given address belongs to the jail referenced by cred/prison.
2858185435Sbz *
2859192895Sjamie * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail,
2860192895Sjamie * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
2861192895Sjamie * doesn't allow IPv4.  Address passed in in NBO.
2862185435Sbz */
2863185435Sbzstatic int
2864185435Sbz_prison_check_ip4(struct prison *pr, struct in_addr *ia)
2865185435Sbz{
2866185435Sbz	int i, a, z, d;
2867185435Sbz
2868185435Sbz	/*
2869185435Sbz	 * Check the primary IP.
2870185435Sbz	 */
2871185435Sbz	if (pr->pr_ip4[0].s_addr == ia->s_addr)
2872188144Sjamie		return (0);
2873185435Sbz
2874185435Sbz	/*
2875185435Sbz	 * All the other IPs are sorted so we can do a binary search.
2876185435Sbz	 */
2877185435Sbz	a = 0;
2878185435Sbz	z = pr->pr_ip4s - 2;
2879185435Sbz	while (a <= z) {
2880185435Sbz		i = (a + z) / 2;
2881185435Sbz		d = qcmp_v4(&pr->pr_ip4[i+1], ia);
2882185435Sbz		if (d > 0)
2883185435Sbz			z = i - 1;
2884185435Sbz		else if (d < 0)
2885185435Sbz			a = i + 1;
288681114Srwatson		else
2887188144Sjamie			return (0);
2888185435Sbz	}
2889188144Sjamie
2890188144Sjamie	return (EADDRNOTAVAIL);
2891185435Sbz}
2892185435Sbz
2893185435Sbzint
2894185435Sbzprison_check_ip4(struct ucred *cred, struct in_addr *ia)
2895185435Sbz{
2896191673Sjamie	struct prison *pr;
2897191673Sjamie	int error;
2898185435Sbz
2899185435Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
2900185435Sbz	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
2901185435Sbz
2902192895Sjamie	pr = cred->cr_prison;
2903192895Sjamie	if (!(pr->pr_flags & PR_IP4))
2904188144Sjamie		return (0);
2905191673Sjamie	mtx_lock(&pr->pr_mtx);
2906192895Sjamie	if (!(pr->pr_flags & PR_IP4)) {
2907192895Sjamie		mtx_unlock(&pr->pr_mtx);
2908192895Sjamie		return (0);
2909192895Sjamie	}
2910191673Sjamie	if (pr->pr_ip4 == NULL) {
2911191673Sjamie		mtx_unlock(&pr->pr_mtx);
2912188144Sjamie		return (EAFNOSUPPORT);
2913191673Sjamie	}
2914185435Sbz
2915191673Sjamie	error = _prison_check_ip4(pr, ia);
2916191673Sjamie	mtx_unlock(&pr->pr_mtx);
2917191673Sjamie	return (error);
2918185435Sbz}
2919185435Sbz#endif
2920185435Sbz
2921185435Sbz#ifdef INET6
2922192895Sjamiestatic int
2923192895Sjamieprison_restrict_ip6(struct prison *pr, struct in6_addr *newip6)
2924192895Sjamie{
2925192895Sjamie	int ii, ij, used;
2926192895Sjamie	struct prison *ppr;
2927192895Sjamie
2928192895Sjamie	ppr = pr->pr_parent;
2929192895Sjamie	if (!(pr->pr_flags & PR_IP6_USER)) {
2930192895Sjamie		/* This has no user settings, so just copy the parent's list. */
2931192895Sjamie		if (pr->pr_ip6s < ppr->pr_ip6s) {
2932192895Sjamie			/*
2933192895Sjamie			 * There's no room for the parent's list.  Use the
2934192895Sjamie			 * new list buffer, which is assumed to be big enough
2935192895Sjamie			 * (if it was passed).  If there's no buffer, try to
2936192895Sjamie			 * allocate one.
2937192895Sjamie			 */
2938192895Sjamie			used = 1;
2939192895Sjamie			if (newip6 == NULL) {
2940192895Sjamie				newip6 = malloc(ppr->pr_ip6s * sizeof(*newip6),
2941192895Sjamie				    M_PRISON, M_NOWAIT);
2942192895Sjamie				if (newip6 != NULL)
2943192895Sjamie					used = 0;
2944192895Sjamie			}
2945192895Sjamie			if (newip6 != NULL) {
2946192895Sjamie				bcopy(ppr->pr_ip6, newip6,
2947192895Sjamie				    ppr->pr_ip6s * sizeof(*newip6));
2948192895Sjamie				free(pr->pr_ip6, M_PRISON);
2949192895Sjamie				pr->pr_ip6 = newip6;
2950192895Sjamie				pr->pr_ip6s = ppr->pr_ip6s;
2951192895Sjamie			}
2952192895Sjamie			return (used);
2953192895Sjamie		}
2954192895Sjamie		pr->pr_ip6s = ppr->pr_ip6s;
2955192895Sjamie		if (pr->pr_ip6s > 0)
2956192895Sjamie			bcopy(ppr->pr_ip6, pr->pr_ip6,
2957192895Sjamie			    pr->pr_ip6s * sizeof(*newip6));
2958192895Sjamie		else if (pr->pr_ip6 != NULL) {
2959192895Sjamie			free(pr->pr_ip6, M_PRISON);
2960192895Sjamie			pr->pr_ip6 = NULL;
2961192895Sjamie		}
2962195974Sjamie	} else if (pr->pr_ip6s > 0) {
2963192895Sjamie		/* Remove addresses that aren't in the parent. */
2964192895Sjamie		for (ij = 0; ij < ppr->pr_ip6s; ij++)
2965192895Sjamie			if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0],
2966192895Sjamie			    &ppr->pr_ip6[ij]))
2967192895Sjamie				break;
2968192895Sjamie		if (ij < ppr->pr_ip6s)
2969192895Sjamie			ii = 1;
2970192895Sjamie		else {
2971192895Sjamie			bcopy(pr->pr_ip6 + 1, pr->pr_ip6,
2972192895Sjamie			    --pr->pr_ip6s * sizeof(*pr->pr_ip6));
2973192895Sjamie			ii = 0;
2974192895Sjamie		}
2975192895Sjamie		for (ij = 1; ii < pr->pr_ip6s; ) {
2976192895Sjamie			if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[ii],
2977192895Sjamie			    &ppr->pr_ip6[0])) {
2978192895Sjamie				ii++;
2979192895Sjamie				continue;
2980192895Sjamie			}
2981192895Sjamie			switch (ij >= ppr->pr_ip4s ? -1 :
2982192895Sjamie				qcmp_v6(&pr->pr_ip6[ii], &ppr->pr_ip6[ij])) {
2983192895Sjamie			case -1:
2984192895Sjamie				bcopy(pr->pr_ip6 + ii + 1, pr->pr_ip6 + ii,
2985192895Sjamie				    (--pr->pr_ip6s - ii) * sizeof(*pr->pr_ip6));
2986192895Sjamie				break;
2987192895Sjamie			case 0:
2988192895Sjamie				ii++;
2989192895Sjamie				ij++;
2990192895Sjamie				break;
2991192895Sjamie			case 1:
2992192895Sjamie				ij++;
2993192895Sjamie				break;
2994192895Sjamie			}
2995192895Sjamie		}
2996192895Sjamie		if (pr->pr_ip6s == 0) {
2997195870Sjamie			pr->pr_flags |= PR_IP6_DISABLE;
2998192895Sjamie			free(pr->pr_ip6, M_PRISON);
2999192895Sjamie			pr->pr_ip6 = NULL;
3000192895Sjamie		}
3001192895Sjamie	}
3002192895Sjamie	return 0;
3003192895Sjamie}
3004192895Sjamie
3005185435Sbz/*
3006185435Sbz * Pass back primary IPv6 address for this jail.
3007185435Sbz *
3008192895Sjamie * If not restricted return success but do not alter the address.  Caller has
3009192895Sjamie * to make sure to initialize it correctly (e.g. IN6ADDR_ANY_INIT).
3010185435Sbz *
3011188144Sjamie * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6.
3012185435Sbz */
3013185435Sbzint
3014187684Sbzprison_get_ip6(struct ucred *cred, struct in6_addr *ia6)
3015185435Sbz{
3016191673Sjamie	struct prison *pr;
3017185435Sbz
3018185435Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
3019185435Sbz	KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
3020185435Sbz
3021192895Sjamie	pr = cred->cr_prison;
3022192895Sjamie	if (!(pr->pr_flags & PR_IP6))
302381114Srwatson		return (0);
3024191673Sjamie	mtx_lock(&pr->pr_mtx);
3025192895Sjamie	if (!(pr->pr_flags & PR_IP6)) {
3026192895Sjamie		mtx_unlock(&pr->pr_mtx);
3027192895Sjamie		return (0);
3028192895Sjamie	}
3029191673Sjamie	if (pr->pr_ip6 == NULL) {
3030191673Sjamie		mtx_unlock(&pr->pr_mtx);
3031188144Sjamie		return (EAFNOSUPPORT);
3032191673Sjamie	}
3033188144Sjamie
3034191673Sjamie	bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
3035191673Sjamie	mtx_unlock(&pr->pr_mtx);
3036185435Sbz	return (0);
3037185435Sbz}
3038185435Sbz
3039185435Sbz/*
3040202468Sbz * Return 1 if we should do proper source address selection or are not jailed.
3041202468Sbz * We will return 0 if we should bypass source address selection in favour
3042202468Sbz * of the primary jail IPv6 address. Only in this case *ia will be updated and
3043202468Sbz * returned in NBO.
3044202468Sbz * Return EAFNOSUPPORT, in case this jail does not allow IPv6.
3045202468Sbz */
3046202468Sbzint
3047202468Sbzprison_saddrsel_ip6(struct ucred *cred, struct in6_addr *ia6)
3048202468Sbz{
3049202468Sbz	struct prison *pr;
3050202468Sbz	struct in6_addr lia6;
3051202468Sbz	int error;
3052202468Sbz
3053202468Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
3054202468Sbz	KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
3055202468Sbz
3056202468Sbz	if (!jailed(cred))
3057202468Sbz		return (1);
3058202468Sbz
3059202468Sbz	pr = cred->cr_prison;
3060202468Sbz	if (pr->pr_flags & PR_IP6_SADDRSEL)
3061202468Sbz		return (1);
3062202468Sbz
3063202468Sbz	lia6 = in6addr_any;
3064202468Sbz	error = prison_get_ip6(cred, &lia6);
3065202468Sbz	if (error)
3066202468Sbz		return (error);
3067202468Sbz	if (IN6_IS_ADDR_UNSPECIFIED(&lia6))
3068202468Sbz		return (1);
3069202468Sbz
3070202468Sbz	bcopy(&lia6, ia6, sizeof(struct in6_addr));
3071202468Sbz	return (0);
3072202468Sbz}
3073202468Sbz
3074202468Sbz/*
3075192895Sjamie * Return true if pr1 and pr2 have the same IPv6 address restrictions.
3076192895Sjamie */
3077192895Sjamieint
3078192895Sjamieprison_equal_ip6(struct prison *pr1, struct prison *pr2)
3079192895Sjamie{
3080192895Sjamie
3081192895Sjamie	if (pr1 == pr2)
3082192895Sjamie		return (1);
3083192895Sjamie
3084195945Sjamie	while (pr1 != &prison0 &&
3085195945Sjamie#ifdef VIMAGE
3086195945Sjamie	       !(pr1->pr_flags & PR_VNET) &&
3087195945Sjamie#endif
3088195945Sjamie	       !(pr1->pr_flags & PR_IP6_USER))
3089192895Sjamie		pr1 = pr1->pr_parent;
3090195945Sjamie	while (pr2 != &prison0 &&
3091195945Sjamie#ifdef VIMAGE
3092195945Sjamie	       !(pr2->pr_flags & PR_VNET) &&
3093195945Sjamie#endif
3094195945Sjamie	       !(pr2->pr_flags & PR_IP6_USER))
3095192895Sjamie		pr2 = pr2->pr_parent;
3096192895Sjamie	return (pr1 == pr2);
3097192895Sjamie}
3098192895Sjamie
3099192895Sjamie/*
3100185435Sbz * Make sure our (source) address is set to something meaningful to this jail.
3101185435Sbz *
3102185435Sbz * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0)
3103185435Sbz * when needed while binding.
3104185435Sbz *
3105192895Sjamie * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail,
3106192895Sjamie * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
3107192895Sjamie * doesn't allow IPv6.
3108185435Sbz */
3109185435Sbzint
3110185435Sbzprison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only)
3111185435Sbz{
3112191673Sjamie	struct prison *pr;
3113191673Sjamie	int error;
3114185435Sbz
3115185435Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
3116185435Sbz	KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
3117185435Sbz
3118192895Sjamie	pr = cred->cr_prison;
3119192895Sjamie	if (!(pr->pr_flags & PR_IP6))
3120185435Sbz		return (0);
3121191673Sjamie	mtx_lock(&pr->pr_mtx);
3122192895Sjamie	if (!(pr->pr_flags & PR_IP6)) {
3123192895Sjamie		mtx_unlock(&pr->pr_mtx);
3124192895Sjamie		return (0);
3125192895Sjamie	}
3126191673Sjamie	if (pr->pr_ip6 == NULL) {
3127191673Sjamie		mtx_unlock(&pr->pr_mtx);
3128188144Sjamie		return (EAFNOSUPPORT);
3129191673Sjamie	}
3130188144Sjamie
3131185435Sbz	if (IN6_IS_ADDR_LOOPBACK(ia6)) {
3132191673Sjamie		bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
3133191673Sjamie		mtx_unlock(&pr->pr_mtx);
3134185435Sbz		return (0);
313581114Srwatson	}
3136185435Sbz
3137188144Sjamie	if (IN6_IS_ADDR_UNSPECIFIED(ia6)) {
3138188144Sjamie		/*
3139188144Sjamie		 * In case there is only 1 IPv6 address, and v6only is true,
3140188144Sjamie		 * then bind directly.
3141188144Sjamie		 */
3142191673Sjamie		if (v6only != 0 && pr->pr_ip6s == 1)
3143191673Sjamie			bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
3144191673Sjamie		mtx_unlock(&pr->pr_mtx);
3145185435Sbz		return (0);
3146185435Sbz	}
3147188144Sjamie
3148191673Sjamie	error = _prison_check_ip6(pr, ia6);
3149191673Sjamie	mtx_unlock(&pr->pr_mtx);
3150191673Sjamie	return (error);
3151185435Sbz}
3152185435Sbz
3153185435Sbz/*
3154185435Sbz * Rewrite destination address in case we will connect to loopback address.
3155185435Sbz *
3156188144Sjamie * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6.
3157185435Sbz */
3158185435Sbzint
3159185435Sbzprison_remote_ip6(struct ucred *cred, struct in6_addr *ia6)
3160185435Sbz{
3161191673Sjamie	struct prison *pr;
3162185435Sbz
3163185435Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
3164185435Sbz	KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
3165185435Sbz
3166192895Sjamie	pr = cred->cr_prison;
3167192895Sjamie	if (!(pr->pr_flags & PR_IP6))
3168185435Sbz		return (0);
3169191673Sjamie	mtx_lock(&pr->pr_mtx);
3170192895Sjamie	if (!(pr->pr_flags & PR_IP6)) {
3171192895Sjamie		mtx_unlock(&pr->pr_mtx);
3172192895Sjamie		return (0);
3173192895Sjamie	}
3174191673Sjamie	if (pr->pr_ip6 == NULL) {
3175191673Sjamie		mtx_unlock(&pr->pr_mtx);
3176188144Sjamie		return (EAFNOSUPPORT);
3177191673Sjamie	}
3178188144Sjamie
3179185435Sbz	if (IN6_IS_ADDR_LOOPBACK(ia6)) {
3180191673Sjamie		bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr));
3181191673Sjamie		mtx_unlock(&pr->pr_mtx);
3182185435Sbz		return (0);
3183185435Sbz	}
3184185435Sbz
3185185435Sbz	/*
3186185435Sbz	 * Return success because nothing had to be changed.
3187185435Sbz	 */
3188191673Sjamie	mtx_unlock(&pr->pr_mtx);
318946155Sphk	return (0);
319046155Sphk}
319146155Sphk
3192185435Sbz/*
3193188144Sjamie * Check if given address belongs to the jail referenced by cred/prison.
3194185435Sbz *
3195192895Sjamie * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail,
3196192895Sjamie * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
3197192895Sjamie * doesn't allow IPv6.
3198185435Sbz */
3199185435Sbzstatic int
3200185435Sbz_prison_check_ip6(struct prison *pr, struct in6_addr *ia6)
320146155Sphk{
3202185435Sbz	int i, a, z, d;
320346155Sphk
3204185435Sbz	/*
3205185435Sbz	 * Check the primary IP.
3206185435Sbz	 */
3207185435Sbz	if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6))
3208188144Sjamie		return (0);
3209185435Sbz
3210185435Sbz	/*
3211185435Sbz	 * All the other IPs are sorted so we can do a binary search.
3212185435Sbz	 */
3213185435Sbz	a = 0;
3214185435Sbz	z = pr->pr_ip6s - 2;
3215185435Sbz	while (a <= z) {
3216185435Sbz		i = (a + z) / 2;
3217185435Sbz		d = qcmp_v6(&pr->pr_ip6[i+1], ia6);
3218185435Sbz		if (d > 0)
3219185435Sbz			z = i - 1;
3220185435Sbz		else if (d < 0)
3221185435Sbz			a = i + 1;
322246155Sphk		else
3223188144Sjamie			return (0);
322446155Sphk	}
3225188144Sjamie
3226188144Sjamie	return (EADDRNOTAVAIL);
322746155Sphk}
322846155Sphk
322946155Sphkint
3230185435Sbzprison_check_ip6(struct ucred *cred, struct in6_addr *ia6)
3231185435Sbz{
3232191673Sjamie	struct prison *pr;
3233191673Sjamie	int error;
3234185435Sbz
3235185435Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
3236185435Sbz	KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__));
3237185435Sbz
3238192895Sjamie	pr = cred->cr_prison;
3239192895Sjamie	if (!(pr->pr_flags & PR_IP6))
3240188144Sjamie		return (0);
3241191673Sjamie	mtx_lock(&pr->pr_mtx);
3242192895Sjamie	if (!(pr->pr_flags & PR_IP6)) {
3243192895Sjamie		mtx_unlock(&pr->pr_mtx);
3244192895Sjamie		return (0);
3245192895Sjamie	}
3246191673Sjamie	if (pr->pr_ip6 == NULL) {
3247191673Sjamie		mtx_unlock(&pr->pr_mtx);
3248188144Sjamie		return (EAFNOSUPPORT);
3249191673Sjamie	}
3250185435Sbz
3251191673Sjamie	error = _prison_check_ip6(pr, ia6);
3252191673Sjamie	mtx_unlock(&pr->pr_mtx);
3253191673Sjamie	return (error);
3254185435Sbz}
3255185435Sbz#endif
3256185435Sbz
3257185435Sbz/*
3258188146Sjamie * Check if a jail supports the given address family.
3259188146Sjamie *
3260188146Sjamie * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT
3261188146Sjamie * if not.
3262188146Sjamie */
3263188146Sjamieint
3264188146Sjamieprison_check_af(struct ucred *cred, int af)
3265188146Sjamie{
3266192895Sjamie	struct prison *pr;
3267188146Sjamie	int error;
3268188146Sjamie
3269188146Sjamie	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
3270188146Sjamie
3271192895Sjamie	pr = cred->cr_prison;
3272194923Sjamie#ifdef VIMAGE
3273194915Sjamie	/* Prisons with their own network stack are not limited. */
3274200473Sbz	if (prison_owns_vnet(cred))
3275194915Sjamie		return (0);
3276194923Sjamie#endif
3277194915Sjamie
3278188146Sjamie	error = 0;
3279188146Sjamie	switch (af)
3280188146Sjamie	{
3281188146Sjamie#ifdef INET
3282188146Sjamie	case AF_INET:
3283192895Sjamie		if (pr->pr_flags & PR_IP4)
3284192895Sjamie		{
3285192895Sjamie			mtx_lock(&pr->pr_mtx);
3286192895Sjamie			if ((pr->pr_flags & PR_IP4) && pr->pr_ip4 == NULL)
3287192895Sjamie				error = EAFNOSUPPORT;
3288192895Sjamie			mtx_unlock(&pr->pr_mtx);
3289192895Sjamie		}
3290188146Sjamie		break;
3291188146Sjamie#endif
3292188146Sjamie#ifdef INET6
3293188146Sjamie	case AF_INET6:
3294192895Sjamie		if (pr->pr_flags & PR_IP6)
3295192895Sjamie		{
3296192895Sjamie			mtx_lock(&pr->pr_mtx);
3297192895Sjamie			if ((pr->pr_flags & PR_IP6) && pr->pr_ip6 == NULL)
3298192895Sjamie				error = EAFNOSUPPORT;
3299192895Sjamie			mtx_unlock(&pr->pr_mtx);
3300192895Sjamie		}
3301188146Sjamie		break;
3302188146Sjamie#endif
3303188146Sjamie	case AF_LOCAL:
3304188146Sjamie	case AF_ROUTE:
3305188146Sjamie		break;
3306188146Sjamie	default:
3307192895Sjamie		if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF))
3308188146Sjamie			error = EAFNOSUPPORT;
3309188146Sjamie	}
3310188146Sjamie	return (error);
3311188146Sjamie}
3312188146Sjamie
3313188146Sjamie/*
3314185435Sbz * Check if given address belongs to the jail referenced by cred (wrapper to
3315185435Sbz * prison_check_ip[46]).
3316185435Sbz *
3317192895Sjamie * Returns 0 if jail doesn't restrict the address family or if address belongs
3318192895Sjamie * to jail, EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if
3319192895Sjamie * the jail doesn't allow the address family.  IPv4 Address passed in in NBO.
3320185435Sbz */
3321185435Sbzint
332272786Srwatsonprison_if(struct ucred *cred, struct sockaddr *sa)
332346155Sphk{
3324185435Sbz#ifdef INET
3325114168Smike	struct sockaddr_in *sai;
3326185435Sbz#endif
3327185435Sbz#ifdef INET6
3328185435Sbz	struct sockaddr_in6 *sai6;
3329185435Sbz#endif
3330188144Sjamie	int error;
333146155Sphk
3332185435Sbz	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
3333185435Sbz	KASSERT(sa != NULL, ("%s: sa is NULL", __func__));
3334185435Sbz
3335200473Sbz#ifdef VIMAGE
3336200473Sbz	if (prison_owns_vnet(cred))
3337200473Sbz		return (0);
3338200473Sbz#endif
3339200473Sbz
3340188144Sjamie	error = 0;
3341188144Sjamie	switch (sa->sa_family)
3342185435Sbz	{
3343185435Sbz#ifdef INET
3344185435Sbz	case AF_INET:
3345185435Sbz		sai = (struct sockaddr_in *)sa;
3346188144Sjamie		error = prison_check_ip4(cred, &sai->sin_addr);
3347185435Sbz		break;
3348185435Sbz#endif
3349185435Sbz#ifdef INET6
3350185435Sbz	case AF_INET6:
3351185435Sbz		sai6 = (struct sockaddr_in6 *)sa;
3352188144Sjamie		error = prison_check_ip6(cred, &sai6->sin6_addr);
3353185435Sbz		break;
3354185435Sbz#endif
3355185435Sbz	default:
3356192895Sjamie		if (!(cred->cr_prison->pr_allow & PR_ALLOW_SOCKET_AF))
3357188144Sjamie			error = EAFNOSUPPORT;
3358185435Sbz	}
3359188144Sjamie	return (error);
336046155Sphk}
336172786Srwatson
336272786Srwatson/*
336372786Srwatson * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
336472786Srwatson */
336572786Srwatsonint
3366114168Smikeprison_check(struct ucred *cred1, struct ucred *cred2)
336772786Srwatson{
336872786Srwatson
3369192895Sjamie	return ((cred1->cr_prison == cred2->cr_prison ||
3370192895Sjamie	    prison_ischild(cred1->cr_prison, cred2->cr_prison)) ? 0 : ESRCH);
3371192895Sjamie}
337272786Srwatson
3373192895Sjamie/*
3374192895Sjamie * Return 1 if p2 is a child of p1, otherwise 0.
3375192895Sjamie */
3376192895Sjamieint
3377192895Sjamieprison_ischild(struct prison *pr1, struct prison *pr2)
3378192895Sjamie{
3379192895Sjamie
3380192895Sjamie	for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent)
3381192895Sjamie		if (pr1 == pr2)
3382192895Sjamie			return (1);
338372786Srwatson	return (0);
338472786Srwatson}
338572786Srwatson
338672786Srwatson/*
338772786Srwatson * Return 1 if the passed credential is in a jail, otherwise 0.
338872786Srwatson */
338972786Srwatsonint
3390114168Smikejailed(struct ucred *cred)
339172786Srwatson{
339272786Srwatson
3393192895Sjamie	return (cred->cr_prison != &prison0);
339472786Srwatson}
339591384Srobert
339691384Srobert/*
3397200473Sbz * Return 1 if the passed credential is in a jail and that jail does not
3398200473Sbz * have its own virtual network stack, otherwise 0.
3399200473Sbz */
3400200473Sbzint
3401200473Sbzjailed_without_vnet(struct ucred *cred)
3402200473Sbz{
3403200473Sbz
3404200473Sbz	if (!jailed(cred))
3405200473Sbz		return (0);
3406200473Sbz#ifdef VIMAGE
3407200473Sbz	if (prison_owns_vnet(cred))
3408200473Sbz		return (0);
3409200473Sbz#endif
3410200473Sbz
3411200473Sbz	return (1);
3412200473Sbz}
3413200473Sbz
3414200473Sbz/*
3415194090Sjamie * Return the correct hostname (domainname, et al) for the passed credential.
341691384Srobert */
341791391Srobertvoid
3418114168Smikegetcredhostname(struct ucred *cred, char *buf, size_t size)
341991384Srobert{
3420193066Sjamie	struct prison *pr;
342191384Srobert
3422194090Sjamie	/*
3423194090Sjamie	 * A NULL credential can be used to shortcut to the physical
3424194090Sjamie	 * system's hostname.
3425194090Sjamie	 */
3426193066Sjamie	pr = (cred != NULL) ? cred->cr_prison : &prison0;
3427193066Sjamie	mtx_lock(&pr->pr_mtx);
3428194118Sjamie	strlcpy(buf, pr->pr_hostname, size);
3429193066Sjamie	mtx_unlock(&pr->pr_mtx);
343091384Srobert}
3431113275Smike
3432194090Sjamievoid
3433194090Sjamiegetcreddomainname(struct ucred *cred, char *buf, size_t size)
3434194090Sjamie{
3435194090Sjamie
3436194090Sjamie	mtx_lock(&cred->cr_prison->pr_mtx);
3437194118Sjamie	strlcpy(buf, cred->cr_prison->pr_domainname, size);
3438194090Sjamie	mtx_unlock(&cred->cr_prison->pr_mtx);
3439194090Sjamie}
3440194090Sjamie
3441194090Sjamievoid
3442194090Sjamiegetcredhostuuid(struct ucred *cred, char *buf, size_t size)
3443194090Sjamie{
3444194090Sjamie
3445194090Sjamie	mtx_lock(&cred->cr_prison->pr_mtx);
3446194118Sjamie	strlcpy(buf, cred->cr_prison->pr_hostuuid, size);
3447194090Sjamie	mtx_unlock(&cred->cr_prison->pr_mtx);
3448194090Sjamie}
3449194090Sjamie
3450194090Sjamievoid
3451194090Sjamiegetcredhostid(struct ucred *cred, unsigned long *hostid)
3452194090Sjamie{
3453194090Sjamie
3454194090Sjamie	mtx_lock(&cred->cr_prison->pr_mtx);
3455194090Sjamie	*hostid = cred->cr_prison->pr_hostid;
3456194090Sjamie	mtx_unlock(&cred->cr_prison->pr_mtx);
3457194090Sjamie}
3458194090Sjamie
3459196176Sbz#ifdef VIMAGE
3460125804Srwatson/*
3461196176Sbz * Determine whether the prison represented by cred owns
3462196176Sbz * its vnet rather than having it inherited.
3463196176Sbz *
3464196176Sbz * Returns 1 in case the prison owns the vnet, 0 otherwise.
3465196176Sbz */
3466196176Sbzint
3467196176Sbzprison_owns_vnet(struct ucred *cred)
3468196176Sbz{
3469196176Sbz
3470196176Sbz	/*
3471196176Sbz	 * vnets cannot be added/removed after jail creation,
3472196176Sbz	 * so no need to lock here.
3473196176Sbz	 */
3474196176Sbz	return (cred->cr_prison->pr_flags & PR_VNET ? 1 : 0);
3475196176Sbz}
3476196176Sbz#endif
3477196176Sbz
3478196176Sbz/*
3479147185Spjd * Determine whether the subject represented by cred can "see"
3480147185Spjd * status of a mount point.
3481147185Spjd * Returns: 0 for permitted, ENOENT otherwise.
3482147185Spjd * XXX: This function should be called cr_canseemount() and should be
3483147185Spjd *      placed in kern_prot.c.
3484125804Srwatson */
3485125804Srwatsonint
3486147185Spjdprison_canseemount(struct ucred *cred, struct mount *mp)
3487125804Srwatson{
3488147185Spjd	struct prison *pr;
3489147185Spjd	struct statfs *sp;
3490147185Spjd	size_t len;
3491125804Srwatson
3492192895Sjamie	pr = cred->cr_prison;
3493192895Sjamie	if (pr->pr_enforce_statfs == 0)
3494147185Spjd		return (0);
3495147185Spjd	if (pr->pr_root->v_mount == mp)
3496147185Spjd		return (0);
3497192895Sjamie	if (pr->pr_enforce_statfs == 2)
3498147185Spjd		return (ENOENT);
3499147185Spjd	/*
3500147185Spjd	 * If jail's chroot directory is set to "/" we should be able to see
3501147185Spjd	 * all mount-points from inside a jail.
3502147185Spjd	 * This is ugly check, but this is the only situation when jail's
3503147185Spjd	 * directory ends with '/'.
3504147185Spjd	 */
3505147185Spjd	if (strcmp(pr->pr_path, "/") == 0)
3506147185Spjd		return (0);
3507147185Spjd	len = strlen(pr->pr_path);
3508147185Spjd	sp = &mp->mnt_stat;
3509147185Spjd	if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
3510147185Spjd		return (ENOENT);
3511147185Spjd	/*
3512147185Spjd	 * Be sure that we don't have situation where jail's root directory
3513147185Spjd	 * is "/some/path" and mount point is "/some/pathpath".
3514147185Spjd	 */
3515147185Spjd	if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
3516147185Spjd		return (ENOENT);
3517147185Spjd	return (0);
3518147185Spjd}
3519147185Spjd
3520147185Spjdvoid
3521147185Spjdprison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
3522147185Spjd{
3523147185Spjd	char jpath[MAXPATHLEN];
3524147185Spjd	struct prison *pr;
3525147185Spjd	size_t len;
3526147185Spjd
3527192895Sjamie	pr = cred->cr_prison;
3528192895Sjamie	if (pr->pr_enforce_statfs == 0)
3529147185Spjd		return;
3530147185Spjd	if (prison_canseemount(cred, mp) != 0) {
3531147185Spjd		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
3532147185Spjd		strlcpy(sp->f_mntonname, "[restricted]",
3533147185Spjd		    sizeof(sp->f_mntonname));
3534147185Spjd		return;
3535125804Srwatson	}
3536147185Spjd	if (pr->pr_root->v_mount == mp) {
3537147185Spjd		/*
3538147185Spjd		 * Clear current buffer data, so we are sure nothing from
3539147185Spjd		 * the valid path left there.
3540147185Spjd		 */
3541147185Spjd		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
3542147185Spjd		*sp->f_mntonname = '/';
3543147185Spjd		return;
3544147185Spjd	}
3545147185Spjd	/*
3546147185Spjd	 * If jail's chroot directory is set to "/" we should be able to see
3547147185Spjd	 * all mount-points from inside a jail.
3548147185Spjd	 */
3549147185Spjd	if (strcmp(pr->pr_path, "/") == 0)
3550147185Spjd		return;
3551147185Spjd	len = strlen(pr->pr_path);
3552147185Spjd	strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
3553147185Spjd	/*
3554147185Spjd	 * Clear current buffer data, so we are sure nothing from
3555147185Spjd	 * the valid path left there.
3556147185Spjd	 */
3557147185Spjd	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
3558147185Spjd	if (*jpath == '\0') {
3559147185Spjd		/* Should never happen. */
3560147185Spjd		*sp->f_mntonname = '/';
3561147185Spjd	} else {
3562147185Spjd		strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
3563147185Spjd	}
3564125804Srwatson}
3565125804Srwatson
3566164032Srwatson/*
3567164032Srwatson * Check with permission for a specific privilege is granted within jail.  We
3568164032Srwatson * have a specific list of accepted privileges; the rest are denied.
3569164032Srwatson */
3570164032Srwatsonint
3571164032Srwatsonprison_priv_check(struct ucred *cred, int priv)
3572164032Srwatson{
3573164032Srwatson
3574164032Srwatson	if (!jailed(cred))
3575164032Srwatson		return (0);
3576164032Srwatson
3577194915Sjamie#ifdef VIMAGE
3578194915Sjamie	/*
3579194915Sjamie	 * Privileges specific to prisons with a virtual network stack.
3580194915Sjamie	 * There might be a duplicate entry here in case the privilege
3581194915Sjamie	 * is only granted conditionally in the legacy jail case.
3582194915Sjamie	 */
3583164032Srwatson	switch (priv) {
3584194915Sjamie#ifdef notyet
3585194915Sjamie		/*
3586194915Sjamie		 * NFS-specific privileges.
3587194915Sjamie		 */
3588194915Sjamie	case PRIV_NFS_DAEMON:
3589194915Sjamie	case PRIV_NFS_LOCKD:
3590194915Sjamie#endif
3591194915Sjamie		/*
3592194915Sjamie		 * Network stack privileges.
3593194915Sjamie		 */
3594194915Sjamie	case PRIV_NET_BRIDGE:
3595194915Sjamie	case PRIV_NET_GRE:
3596194915Sjamie	case PRIV_NET_BPF:
3597194915Sjamie	case PRIV_NET_RAW:		/* Dup, cond. in legacy jail case. */
3598194915Sjamie	case PRIV_NET_ROUTE:
3599194915Sjamie	case PRIV_NET_TAP:
3600194915Sjamie	case PRIV_NET_SETIFMTU:
3601194915Sjamie	case PRIV_NET_SETIFFLAGS:
3602194915Sjamie	case PRIV_NET_SETIFCAP:
3603203052Sdelphij	case PRIV_NET_SETIFDESCR:
3604194915Sjamie	case PRIV_NET_SETIFNAME	:
3605194915Sjamie	case PRIV_NET_SETIFMETRIC:
3606194915Sjamie	case PRIV_NET_SETIFPHYS:
3607194915Sjamie	case PRIV_NET_SETIFMAC:
3608194915Sjamie	case PRIV_NET_ADDMULTI:
3609194915Sjamie	case PRIV_NET_DELMULTI:
3610194915Sjamie	case PRIV_NET_HWIOCTL:
3611194915Sjamie	case PRIV_NET_SETLLADDR:
3612194915Sjamie	case PRIV_NET_ADDIFGROUP:
3613194915Sjamie	case PRIV_NET_DELIFGROUP:
3614194915Sjamie	case PRIV_NET_IFCREATE:
3615194915Sjamie	case PRIV_NET_IFDESTROY:
3616194915Sjamie	case PRIV_NET_ADDIFADDR:
3617194915Sjamie	case PRIV_NET_DELIFADDR:
3618194915Sjamie	case PRIV_NET_LAGG:
3619194915Sjamie	case PRIV_NET_GIF:
3620194915Sjamie	case PRIV_NET_SETIFVNET:
3621223735Sbz	case PRIV_NET_SETIFFIB:
3622164032Srwatson
3623164032Srwatson		/*
3624194915Sjamie		 * 802.11-related privileges.
3625194915Sjamie		 */
3626194915Sjamie	case PRIV_NET80211_GETKEY:
3627194915Sjamie#ifdef notyet
3628194915Sjamie	case PRIV_NET80211_MANAGE:		/* XXX-BZ discuss with sam@ */
3629194915Sjamie#endif
3630194915Sjamie
3631194915Sjamie#ifdef notyet
3632194915Sjamie		/*
3633194915Sjamie		 * AppleTalk privileges.
3634194915Sjamie		 */
3635194915Sjamie	case PRIV_NETATALK_RESERVEDPORT:
3636194915Sjamie
3637194915Sjamie		/*
3638194915Sjamie		 * ATM privileges.
3639194915Sjamie		 */
3640194915Sjamie	case PRIV_NETATM_CFG:
3641194915Sjamie	case PRIV_NETATM_ADD:
3642194915Sjamie	case PRIV_NETATM_DEL:
3643194915Sjamie	case PRIV_NETATM_SET:
3644194915Sjamie
3645194915Sjamie		/*
3646194915Sjamie		 * Bluetooth privileges.
3647194915Sjamie		 */
3648194915Sjamie	case PRIV_NETBLUETOOTH_RAW:
3649194915Sjamie#endif
3650194915Sjamie
3651194915Sjamie		/*
3652194915Sjamie		 * Netgraph and netgraph module privileges.
3653194915Sjamie		 */
3654194915Sjamie	case PRIV_NETGRAPH_CONTROL:
3655194915Sjamie#ifdef notyet
3656194915Sjamie	case PRIV_NETGRAPH_TTY:
3657194915Sjamie#endif
3658194915Sjamie
3659194915Sjamie		/*
3660194915Sjamie		 * IPv4 and IPv6 privileges.
3661194915Sjamie		 */
3662194915Sjamie	case PRIV_NETINET_IPFW:
3663194915Sjamie	case PRIV_NETINET_DIVERT:
3664194915Sjamie	case PRIV_NETINET_PF:
3665194915Sjamie	case PRIV_NETINET_DUMMYNET:
3666194915Sjamie	case PRIV_NETINET_CARP:
3667194915Sjamie	case PRIV_NETINET_MROUTE:
3668194915Sjamie	case PRIV_NETINET_RAW:
3669194915Sjamie	case PRIV_NETINET_ADDRCTRL6:
3670194915Sjamie	case PRIV_NETINET_ND6:
3671194915Sjamie	case PRIV_NETINET_SCOPE6:
3672194915Sjamie	case PRIV_NETINET_ALIFETIME6:
3673194915Sjamie	case PRIV_NETINET_IPSEC:
3674194915Sjamie	case PRIV_NETINET_BINDANY:
3675194915Sjamie
3676194915Sjamie#ifdef notyet
3677194915Sjamie		/*
3678194915Sjamie		 * IPX/SPX privileges.
3679194915Sjamie		 */
3680194915Sjamie	case PRIV_NETIPX_RESERVEDPORT:
3681194915Sjamie	case PRIV_NETIPX_RAW:
3682194915Sjamie
3683194915Sjamie		/*
3684194915Sjamie		 * NCP privileges.
3685194915Sjamie		 */
3686194915Sjamie	case PRIV_NETNCP:
3687194915Sjamie
3688194915Sjamie		/*
3689194915Sjamie		 * SMB privileges.
3690194915Sjamie		 */
3691194915Sjamie	case PRIV_NETSMB:
3692194915Sjamie#endif
3693194915Sjamie
3694194915Sjamie	/*
3695194915Sjamie	 * No default: or deny here.
3696194915Sjamie	 * In case of no permit fall through to next switch().
3697194915Sjamie	 */
3698194915Sjamie		if (cred->cr_prison->pr_flags & PR_VNET)
3699194915Sjamie			return (0);
3700194915Sjamie	}
3701194915Sjamie#endif /* VIMAGE */
3702194915Sjamie
3703194915Sjamie	switch (priv) {
3704194915Sjamie
3705194915Sjamie		/*
3706164032Srwatson		 * Allow ktrace privileges for root in jail.
3707164032Srwatson		 */
3708164032Srwatson	case PRIV_KTRACE:
3709164032Srwatson
3710166827Srwatson#if 0
3711164032Srwatson		/*
3712164032Srwatson		 * Allow jailed processes to configure audit identity and
3713164032Srwatson		 * submit audit records (login, etc).  In the future we may
3714164032Srwatson		 * want to further refine the relationship between audit and
3715164032Srwatson		 * jail.
3716164032Srwatson		 */
3717164032Srwatson	case PRIV_AUDIT_GETAUDIT:
3718164032Srwatson	case PRIV_AUDIT_SETAUDIT:
3719164032Srwatson	case PRIV_AUDIT_SUBMIT:
3720166827Srwatson#endif
3721164032Srwatson
3722164032Srwatson		/*
3723164032Srwatson		 * Allow jailed processes to manipulate process UNIX
3724164032Srwatson		 * credentials in any way they see fit.
3725164032Srwatson		 */
3726164032Srwatson	case PRIV_CRED_SETUID:
3727164032Srwatson	case PRIV_CRED_SETEUID:
3728164032Srwatson	case PRIV_CRED_SETGID:
3729164032Srwatson	case PRIV_CRED_SETEGID:
3730164032Srwatson	case PRIV_CRED_SETGROUPS:
3731164032Srwatson	case PRIV_CRED_SETREUID:
3732164032Srwatson	case PRIV_CRED_SETREGID:
3733164032Srwatson	case PRIV_CRED_SETRESUID:
3734164032Srwatson	case PRIV_CRED_SETRESGID:
3735164032Srwatson
3736164032Srwatson		/*
3737164032Srwatson		 * Jail implements visibility constraints already, so allow
3738164032Srwatson		 * jailed root to override uid/gid-based constraints.
3739164032Srwatson		 */
3740164032Srwatson	case PRIV_SEEOTHERGIDS:
3741164032Srwatson	case PRIV_SEEOTHERUIDS:
3742164032Srwatson
3743164032Srwatson		/*
3744164032Srwatson		 * Jail implements inter-process debugging limits already, so
3745164032Srwatson		 * allow jailed root various debugging privileges.
3746164032Srwatson		 */
3747164032Srwatson	case PRIV_DEBUG_DIFFCRED:
3748164032Srwatson	case PRIV_DEBUG_SUGID:
3749164032Srwatson	case PRIV_DEBUG_UNPRIV:
3750164032Srwatson
3751164032Srwatson		/*
3752164032Srwatson		 * Allow jail to set various resource limits and login
3753164032Srwatson		 * properties, and for now, exceed process resource limits.
3754164032Srwatson		 */
3755164032Srwatson	case PRIV_PROC_LIMIT:
3756164032Srwatson	case PRIV_PROC_SETLOGIN:
3757164032Srwatson	case PRIV_PROC_SETRLIMIT:
3758164032Srwatson
3759164032Srwatson		/*
3760164032Srwatson		 * System V and POSIX IPC privileges are granted in jail.
3761164032Srwatson		 */
3762164032Srwatson	case PRIV_IPC_READ:
3763164032Srwatson	case PRIV_IPC_WRITE:
3764164032Srwatson	case PRIV_IPC_ADMIN:
3765164032Srwatson	case PRIV_IPC_MSGSIZE:
3766164032Srwatson	case PRIV_MQ_ADMIN:
3767164032Srwatson
3768164032Srwatson		/*
3769192895Sjamie		 * Jail operations within a jail work on child jails.
3770192895Sjamie		 */
3771192895Sjamie	case PRIV_JAIL_ATTACH:
3772192895Sjamie	case PRIV_JAIL_SET:
3773192895Sjamie	case PRIV_JAIL_REMOVE:
3774192895Sjamie
3775192895Sjamie		/*
3776164032Srwatson		 * Jail implements its own inter-process limits, so allow
3777164032Srwatson		 * root processes in jail to change scheduling on other
3778164032Srwatson		 * processes in the same jail.  Likewise for signalling.
3779164032Srwatson		 */
3780164032Srwatson	case PRIV_SCHED_DIFFCRED:
3781185435Sbz	case PRIV_SCHED_CPUSET:
3782164032Srwatson	case PRIV_SIGNAL_DIFFCRED:
3783164032Srwatson	case PRIV_SIGNAL_SUGID:
3784164032Srwatson
3785164032Srwatson		/*
3786164032Srwatson		 * Allow jailed processes to write to sysctls marked as jail
3787164032Srwatson		 * writable.
3788164032Srwatson		 */
3789164032Srwatson	case PRIV_SYSCTL_WRITEJAIL:
3790164032Srwatson
3791164032Srwatson		/*
3792164032Srwatson		 * Allow root in jail to manage a variety of quota
3793166831Srwatson		 * properties.  These should likely be conditional on a
3794166831Srwatson		 * configuration option.
3795164032Srwatson		 */
3796166832Srwatson	case PRIV_VFS_GETQUOTA:
3797166832Srwatson	case PRIV_VFS_SETQUOTA:
3798164032Srwatson
3799164032Srwatson		/*
3800164032Srwatson		 * Since Jail relies on chroot() to implement file system
3801164032Srwatson		 * protections, grant many VFS privileges to root in jail.
3802164032Srwatson		 * Be careful to exclude mount-related and NFS-related
3803164032Srwatson		 * privileges.
3804164032Srwatson		 */
3805164032Srwatson	case PRIV_VFS_READ:
3806164032Srwatson	case PRIV_VFS_WRITE:
3807164032Srwatson	case PRIV_VFS_ADMIN:
3808164032Srwatson	case PRIV_VFS_EXEC:
3809164032Srwatson	case PRIV_VFS_LOOKUP:
3810164032Srwatson	case PRIV_VFS_BLOCKRESERVE:	/* XXXRW: Slightly surprising. */
3811164032Srwatson	case PRIV_VFS_CHFLAGS_DEV:
3812164032Srwatson	case PRIV_VFS_CHOWN:
3813164032Srwatson	case PRIV_VFS_CHROOT:
3814167152Spjd	case PRIV_VFS_RETAINSUGID:
3815164032Srwatson	case PRIV_VFS_FCHROOT:
3816164032Srwatson	case PRIV_VFS_LINK:
3817164032Srwatson	case PRIV_VFS_SETGID:
3818172860Srwatson	case PRIV_VFS_STAT:
3819164032Srwatson	case PRIV_VFS_STICKYFILE:
3820164032Srwatson		return (0);
3821164032Srwatson
3822164032Srwatson		/*
3823164032Srwatson		 * Depending on the global setting, allow privilege of
3824164032Srwatson		 * setting system flags.
3825164032Srwatson		 */
3826164032Srwatson	case PRIV_VFS_SYSFLAGS:
3827192895Sjamie		if (cred->cr_prison->pr_allow & PR_ALLOW_CHFLAGS)
3828164032Srwatson			return (0);
3829164032Srwatson		else
3830164032Srwatson			return (EPERM);
3831164032Srwatson
3832164032Srwatson		/*
3833168396Spjd		 * Depending on the global setting, allow privilege of
3834168396Spjd		 * mounting/unmounting file systems.
3835168396Spjd		 */
3836168396Spjd	case PRIV_VFS_MOUNT:
3837168396Spjd	case PRIV_VFS_UNMOUNT:
3838168396Spjd	case PRIV_VFS_MOUNT_NONUSER:
3839168699Spjd	case PRIV_VFS_MOUNT_OWNER:
3840224615Smm		if (cred->cr_prison->pr_allow & PR_ALLOW_MOUNT &&
3841224615Smm		    cred->cr_prison->pr_enforce_statfs < 2)
3842168396Spjd			return (0);
3843168396Spjd		else
3844168396Spjd			return (EPERM);
3845168396Spjd
3846168396Spjd		/*
3847168591Srwatson		 * Allow jailed root to bind reserved ports and reuse in-use
3848168591Srwatson		 * ports.
3849164032Srwatson		 */
3850164032Srwatson	case PRIV_NETINET_RESERVEDPORT:
3851168591Srwatson	case PRIV_NETINET_REUSEPORT:
3852164032Srwatson		return (0);
3853164032Srwatson
3854164032Srwatson		/*
3855175630Sbz		 * Allow jailed root to set certian IPv4/6 (option) headers.
3856175630Sbz		 */
3857175630Sbz	case PRIV_NETINET_SETHDROPTS:
3858175630Sbz		return (0);
3859175630Sbz
3860175630Sbz		/*
3861164032Srwatson		 * Conditionally allow creating raw sockets in jail.
3862164032Srwatson		 */
3863164032Srwatson	case PRIV_NETINET_RAW:
3864192895Sjamie		if (cred->cr_prison->pr_allow & PR_ALLOW_RAW_SOCKETS)
3865164032Srwatson			return (0);
3866164032Srwatson		else
3867164032Srwatson			return (EPERM);
3868164032Srwatson
3869164032Srwatson		/*
3870164032Srwatson		 * Since jail implements its own visibility limits on netstat
3871164032Srwatson		 * sysctls, allow getcred.  This allows identd to work in
3872164032Srwatson		 * jail.
3873164032Srwatson		 */
3874164032Srwatson	case PRIV_NETINET_GETCRED:
3875164032Srwatson		return (0);
3876164032Srwatson
3877219304Strasz		/*
3878219304Strasz		 * Allow jailed root to set loginclass.
3879219304Strasz		 */
3880219304Strasz	case PRIV_PROC_SETLOGINCLASS:
3881219304Strasz		return (0);
3882219304Strasz
3883164032Srwatson	default:
3884164032Srwatson		/*
3885164032Srwatson		 * In all remaining cases, deny the privilege request.  This
3886164032Srwatson		 * includes almost all network privileges, many system
3887164032Srwatson		 * configuration privileges.
3888164032Srwatson		 */
3889164032Srwatson		return (EPERM);
3890164032Srwatson	}
3891164032Srwatson}
3892164032Srwatson
3893192895Sjamie/*
3894192895Sjamie * Return the part of pr2's name that is relative to pr1, or the whole name
3895192895Sjamie * if it does not directly follow.
3896192895Sjamie */
3897192895Sjamie
3898192895Sjamiechar *
3899192895Sjamieprison_name(struct prison *pr1, struct prison *pr2)
3900192895Sjamie{
3901192895Sjamie	char *name;
3902192895Sjamie
3903192895Sjamie	/* Jails see themselves as "0" (if they see themselves at all). */
3904192895Sjamie	if (pr1 == pr2)
3905192895Sjamie		return "0";
3906192895Sjamie	name = pr2->pr_name;
3907192895Sjamie	if (prison_ischild(pr1, pr2)) {
3908192895Sjamie		/*
3909192895Sjamie		 * pr1 isn't locked (and allprison_lock may not be either)
3910192895Sjamie		 * so its length can't be counted on.  But the number of dots
3911192895Sjamie		 * can be counted on - and counted.
3912192895Sjamie		 */
3913192895Sjamie		for (; pr1 != &prison0; pr1 = pr1->pr_parent)
3914192895Sjamie			name = strchr(name, '.') + 1;
3915192895Sjamie	}
3916192895Sjamie	return (name);
3917192895Sjamie}
3918192895Sjamie
3919192895Sjamie/*
3920192895Sjamie * Return the part of pr2's path that is relative to pr1, or the whole path
3921192895Sjamie * if it does not directly follow.
3922192895Sjamie */
3923192895Sjamiestatic char *
3924192895Sjamieprison_path(struct prison *pr1, struct prison *pr2)
3925192895Sjamie{
3926192895Sjamie	char *path1, *path2;
3927192895Sjamie	int len1;
3928192895Sjamie
3929192895Sjamie	path1 = pr1->pr_path;
3930192895Sjamie	path2 = pr2->pr_path;
3931192895Sjamie	if (!strcmp(path1, "/"))
3932192895Sjamie		return (path2);
3933192895Sjamie	len1 = strlen(path1);
3934192895Sjamie	if (strncmp(path1, path2, len1))
3935192895Sjamie		return (path2);
3936192895Sjamie	if (path2[len1] == '\0')
3937192895Sjamie		return "/";
3938192895Sjamie	if (path2[len1] == '/')
3939192895Sjamie		return (path2 + len1);
3940192895Sjamie	return (path2);
3941192895Sjamie}
3942192895Sjamie
3943192895Sjamie
3944192895Sjamie/*
3945192895Sjamie * Jail-related sysctls.
3946192895Sjamie */
3947192895SjamieSYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
3948192895Sjamie    "Jails");
3949192895Sjamie
3950113275Smikestatic int
3951113275Smikesysctl_jail_list(SYSCTL_HANDLER_ARGS)
3952113275Smike{
3953191673Sjamie	struct xprison *xp;
3954192895Sjamie	struct prison *pr, *cpr;
3955191673Sjamie#ifdef INET
3956191673Sjamie	struct in_addr *ip4 = NULL;
3957191673Sjamie	int ip4s = 0;
3958191673Sjamie#endif
3959191673Sjamie#ifdef INET6
3960208803Scperciva	struct in6_addr *ip6 = NULL;
3961191673Sjamie	int ip6s = 0;
3962191673Sjamie#endif
3963192895Sjamie	int descend, error;
3964113275Smike
3965191673Sjamie	xp = malloc(sizeof(*xp), M_TEMP, M_WAITOK);
3966192895Sjamie	pr = req->td->td_ucred->cr_prison;
3967191673Sjamie	error = 0;
3968168401Spjd	sx_slock(&allprison_lock);
3969192895Sjamie	FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
3970192895Sjamie#if defined(INET) || defined(INET6)
3971191673Sjamie again:
3972192895Sjamie#endif
3973192895Sjamie		mtx_lock(&cpr->pr_mtx);
3974185435Sbz#ifdef INET
3975192895Sjamie		if (cpr->pr_ip4s > 0) {
3976192895Sjamie			if (ip4s < cpr->pr_ip4s) {
3977192895Sjamie				ip4s = cpr->pr_ip4s;
3978192895Sjamie				mtx_unlock(&cpr->pr_mtx);
3979191673Sjamie				ip4 = realloc(ip4, ip4s *
3980191673Sjamie				    sizeof(struct in_addr), M_TEMP, M_WAITOK);
3981191673Sjamie				goto again;
3982191673Sjamie			}
3983192895Sjamie			bcopy(cpr->pr_ip4, ip4,
3984192895Sjamie			    cpr->pr_ip4s * sizeof(struct in_addr));
3985191673Sjamie		}
3986185435Sbz#endif
3987185435Sbz#ifdef INET6
3988192895Sjamie		if (cpr->pr_ip6s > 0) {
3989192895Sjamie			if (ip6s < cpr->pr_ip6s) {
3990192895Sjamie				ip6s = cpr->pr_ip6s;
3991192895Sjamie				mtx_unlock(&cpr->pr_mtx);
3992191673Sjamie				ip6 = realloc(ip6, ip6s *
3993191673Sjamie				    sizeof(struct in6_addr), M_TEMP, M_WAITOK);
3994191673Sjamie				goto again;
3995191673Sjamie			}
3996192895Sjamie			bcopy(cpr->pr_ip6, ip6,
3997192895Sjamie			    cpr->pr_ip6s * sizeof(struct in6_addr));
3998191673Sjamie		}
3999185435Sbz#endif
4000192895Sjamie		if (cpr->pr_ref == 0) {
4001192895Sjamie			mtx_unlock(&cpr->pr_mtx);
4002191673Sjamie			continue;
4003191673Sjamie		}
4004191673Sjamie		bzero(xp, sizeof(*xp));
4005113275Smike		xp->pr_version = XPRISON_VERSION;
4006192895Sjamie		xp->pr_id = cpr->pr_id;
4007192895Sjamie		xp->pr_state = cpr->pr_uref > 0
4008191673Sjamie		    ? PRISON_STATE_ALIVE : PRISON_STATE_DYING;
4009192895Sjamie		strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path));
4010194118Sjamie		strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host));
4011192895Sjamie		strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name));
4012185435Sbz#ifdef INET
4013192895Sjamie		xp->pr_ip4s = cpr->pr_ip4s;
4014185435Sbz#endif
4015185435Sbz#ifdef INET6
4016192895Sjamie		xp->pr_ip6s = cpr->pr_ip6s;
4017185435Sbz#endif
4018192895Sjamie		mtx_unlock(&cpr->pr_mtx);
4019191673Sjamie		error = SYSCTL_OUT(req, xp, sizeof(*xp));
4020191673Sjamie		if (error)
4021191673Sjamie			break;
4022185435Sbz#ifdef INET
4023191673Sjamie		if (xp->pr_ip4s > 0) {
4024191673Sjamie			error = SYSCTL_OUT(req, ip4,
4025191673Sjamie			    xp->pr_ip4s * sizeof(struct in_addr));
4026191673Sjamie			if (error)
4027191673Sjamie				break;
4028185435Sbz		}
4029185435Sbz#endif
4030185435Sbz#ifdef INET6
4031191673Sjamie		if (xp->pr_ip6s > 0) {
4032191673Sjamie			error = SYSCTL_OUT(req, ip6,
4033191673Sjamie			    xp->pr_ip6s * sizeof(struct in6_addr));
4034191673Sjamie			if (error)
4035191673Sjamie				break;
4036185435Sbz		}
4037185435Sbz#endif
4038113275Smike	}
4039168401Spjd	sx_sunlock(&allprison_lock);
4040191673Sjamie	free(xp, M_TEMP);
4041191673Sjamie#ifdef INET
4042191673Sjamie	free(ip4, M_TEMP);
4043191673Sjamie#endif
4044191673Sjamie#ifdef INET6
4045191673Sjamie	free(ip6, M_TEMP);
4046191673Sjamie#endif
4047167354Spjd	return (error);
4048113275Smike}
4049113275Smike
4050187864SedSYSCTL_OID(_security_jail, OID_AUTO, list,
4051187864Sed    CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
4052187864Sed    sysctl_jail_list, "S", "List of active jails");
4053126004Spjd
4054126004Spjdstatic int
4055126004Spjdsysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
4056126004Spjd{
4057126004Spjd	int error, injail;
4058126004Spjd
4059126004Spjd	injail = jailed(req->td->td_ucred);
4060126004Spjd	error = SYSCTL_OUT(req, &injail, sizeof(injail));
4061126004Spjd
4062126004Spjd	return (error);
4063126004Spjd}
4064192895Sjamie
4065187864SedSYSCTL_PROC(_security_jail, OID_AUTO, jailed,
4066187864Sed    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
4067187864Sed    sysctl_jail_jailed, "I", "Process in jail?");
4068185435Sbz
4069192895Sjamie#if defined(INET) || defined(INET6)
4070193865SjamieSYSCTL_UINT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW,
4071192895Sjamie    &jail_max_af_ips, 0,
4072192895Sjamie    "Number of IP addresses a jail may have at most per address family");
4073192895Sjamie#endif
4074192895Sjamie
4075192895Sjamie/*
4076192895Sjamie * Default parameters for jail(2) compatability.  For historical reasons,
4077192895Sjamie * the sysctl names have varying similarity to the parameter names.  Prisons
4078192895Sjamie * just see their own parameters, and can't change them.
4079192895Sjamie */
4080192895Sjamiestatic int
4081192895Sjamiesysctl_jail_default_allow(SYSCTL_HANDLER_ARGS)
4082192895Sjamie{
4083192895Sjamie	struct prison *pr;
4084192895Sjamie	int allow, error, i;
4085192895Sjamie
4086192895Sjamie	pr = req->td->td_ucred->cr_prison;
4087192895Sjamie	allow = (pr == &prison0) ? jail_default_allow : pr->pr_allow;
4088192895Sjamie
4089192895Sjamie	/* Get the current flag value, and convert it to a boolean. */
4090192895Sjamie	i = (allow & arg2) ? 1 : 0;
4091192895Sjamie	if (arg1 != NULL)
4092192895Sjamie		i = !i;
4093192895Sjamie	error = sysctl_handle_int(oidp, &i, 0, req);
4094192895Sjamie	if (error || !req->newptr)
4095192895Sjamie		return (error);
4096192895Sjamie	i = i ? arg2 : 0;
4097192895Sjamie	if (arg1 != NULL)
4098192895Sjamie		i ^= arg2;
4099192895Sjamie	/*
4100192895Sjamie	 * The sysctls don't have CTLFLAGS_PRISON, so assume prison0
4101192895Sjamie	 * for writing.
4102192895Sjamie	 */
4103192895Sjamie	mtx_lock(&prison0.pr_mtx);
4104192895Sjamie	jail_default_allow = (jail_default_allow & ~arg2) | i;
4105192895Sjamie	mtx_unlock(&prison0.pr_mtx);
4106192895Sjamie	return (0);
4107192895Sjamie}
4108192895Sjamie
4109192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, set_hostname_allowed,
4110192895Sjamie    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
4111192895Sjamie    NULL, PR_ALLOW_SET_HOSTNAME, sysctl_jail_default_allow, "I",
4112192895Sjamie    "Processes in jail can set their hostnames");
4113192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, socket_unixiproute_only,
4114192895Sjamie    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
4115192895Sjamie    (void *)1, PR_ALLOW_SOCKET_AF, sysctl_jail_default_allow, "I",
4116192895Sjamie    "Processes in jail are limited to creating UNIX/IP/route sockets only");
4117192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, sysvipc_allowed,
4118192895Sjamie    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
4119192895Sjamie    NULL, PR_ALLOW_SYSVIPC, sysctl_jail_default_allow, "I",
4120192895Sjamie    "Processes in jail can use System V IPC primitives");
4121192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, allow_raw_sockets,
4122192895Sjamie    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
4123192895Sjamie    NULL, PR_ALLOW_RAW_SOCKETS, sysctl_jail_default_allow, "I",
4124192895Sjamie    "Prison root can create raw sockets");
4125192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, chflags_allowed,
4126192895Sjamie    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
4127192895Sjamie    NULL, PR_ALLOW_CHFLAGS, sysctl_jail_default_allow, "I",
4128192895Sjamie    "Processes in jail can alter system file flags");
4129192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, mount_allowed,
4130192895Sjamie    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
4131192895Sjamie    NULL, PR_ALLOW_MOUNT, sysctl_jail_default_allow, "I",
4132192895Sjamie    "Processes in jail can mount/unmount jail-friendly file systems");
4133192895Sjamie
4134192895Sjamiestatic int
4135192895Sjamiesysctl_jail_default_level(SYSCTL_HANDLER_ARGS)
4136192895Sjamie{
4137192895Sjamie	struct prison *pr;
4138192895Sjamie	int level, error;
4139192895Sjamie
4140192895Sjamie	pr = req->td->td_ucred->cr_prison;
4141192895Sjamie	level = (pr == &prison0) ? *(int *)arg1 : *(int *)((char *)pr + arg2);
4142192895Sjamie	error = sysctl_handle_int(oidp, &level, 0, req);
4143192895Sjamie	if (error || !req->newptr)
4144192895Sjamie		return (error);
4145192895Sjamie	*(int *)arg1 = level;
4146192895Sjamie	return (0);
4147192895Sjamie}
4148192895Sjamie
4149192895SjamieSYSCTL_PROC(_security_jail, OID_AUTO, enforce_statfs,
4150192895Sjamie    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
4151192895Sjamie    &jail_default_enforce_statfs, offsetof(struct prison, pr_enforce_statfs),
4152192895Sjamie    sysctl_jail_default_level, "I",
4153192895Sjamie    "Processes in jail cannot see all mounted file systems");
4154192895Sjamie
4155192895Sjamie/*
4156192895Sjamie * Nodes to describe jail parameters.  Maximum length of string parameters
4157192895Sjamie * is returned in the string itself, and the other parameters exist merely
4158192895Sjamie * to make themselves and their types known.
4159192895Sjamie */
4160192895SjamieSYSCTL_NODE(_security_jail, OID_AUTO, param, CTLFLAG_RW, 0,
4161192895Sjamie    "Jail parameters");
4162192895Sjamie
4163192895Sjamieint
4164192895Sjamiesysctl_jail_param(SYSCTL_HANDLER_ARGS)
4165192895Sjamie{
4166192895Sjamie	int i;
4167192895Sjamie	long l;
4168192895Sjamie	size_t s;
4169192895Sjamie	char numbuf[12];
4170192895Sjamie
4171192895Sjamie	switch (oidp->oid_kind & CTLTYPE)
4172192895Sjamie	{
4173192895Sjamie	case CTLTYPE_LONG:
4174192895Sjamie	case CTLTYPE_ULONG:
4175192895Sjamie		l = 0;
4176192895Sjamie#ifdef SCTL_MASK32
4177192895Sjamie		if (!(req->flags & SCTL_MASK32))
4178192895Sjamie#endif
4179192895Sjamie			return (SYSCTL_OUT(req, &l, sizeof(l)));
4180192895Sjamie	case CTLTYPE_INT:
4181192895Sjamie	case CTLTYPE_UINT:
4182192895Sjamie		i = 0;
4183192895Sjamie		return (SYSCTL_OUT(req, &i, sizeof(i)));
4184192895Sjamie	case CTLTYPE_STRING:
4185219819Sjeff		snprintf(numbuf, sizeof(numbuf), "%jd", (intmax_t)arg2);
4186192895Sjamie		return
4187192895Sjamie		    (sysctl_handle_string(oidp, numbuf, sizeof(numbuf), req));
4188192895Sjamie	case CTLTYPE_STRUCT:
4189192895Sjamie		s = (size_t)arg2;
4190192895Sjamie		return (SYSCTL_OUT(req, &s, sizeof(s)));
4191192895Sjamie	}
4192192895Sjamie	return (0);
4193192895Sjamie}
4194192895Sjamie
4195192895SjamieSYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID");
4196192895SjamieSYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID");
4197192895SjamieSYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name");
4198192895SjamieSYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path");
4199192895SjamieSYSCTL_JAIL_PARAM(, securelevel, CTLTYPE_INT | CTLFLAG_RW,
4200192895Sjamie    "I", "Jail secure level");
4201192895SjamieSYSCTL_JAIL_PARAM(, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW,
4202192895Sjamie    "I", "Jail cannot see all mounted file systems");
4203192895SjamieSYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW,
4204192895Sjamie    "B", "Jail persistence");
4205194251Sjamie#ifdef VIMAGE
4206194251SjamieSYSCTL_JAIL_PARAM(, vnet, CTLTYPE_INT | CTLFLAG_RDTUN,
4207195870Sjamie    "E,jailsys", "Virtual network stack");
4208194251Sjamie#endif
4209192895SjamieSYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD,
4210192895Sjamie    "B", "Jail is in the process of shutting down");
4211192895Sjamie
4212194762SjamieSYSCTL_JAIL_PARAM_NODE(children, "Number of child jails");
4213194762SjamieSYSCTL_JAIL_PARAM(_children, cur, CTLTYPE_INT | CTLFLAG_RD,
4214194762Sjamie    "I", "Current number of child jails");
4215194762SjamieSYSCTL_JAIL_PARAM(_children, max, CTLTYPE_INT | CTLFLAG_RW,
4216194762Sjamie    "I", "Maximum number of child jails");
4217194762Sjamie
4218195870SjamieSYSCTL_JAIL_PARAM_SYS_NODE(host, CTLFLAG_RW, "Jail host info");
4219192895SjamieSYSCTL_JAIL_PARAM_STRING(_host, hostname, CTLFLAG_RW, MAXHOSTNAMELEN,
4220192895Sjamie    "Jail hostname");
4221193066SjamieSYSCTL_JAIL_PARAM_STRING(_host, domainname, CTLFLAG_RW, MAXHOSTNAMELEN,
4222193066Sjamie    "Jail NIS domainname");
4223193066SjamieSYSCTL_JAIL_PARAM_STRING(_host, hostuuid, CTLFLAG_RW, HOSTUUIDLEN,
4224193066Sjamie    "Jail host UUID");
4225193066SjamieSYSCTL_JAIL_PARAM(_host, hostid, CTLTYPE_ULONG | CTLFLAG_RW,
4226193066Sjamie    "LU", "Jail host ID");
4227192895Sjamie
4228192895SjamieSYSCTL_JAIL_PARAM_NODE(cpuset, "Jail cpuset");
4229192895SjamieSYSCTL_JAIL_PARAM(_cpuset, id, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail cpuset ID");
4230192895Sjamie
4231192895Sjamie#ifdef INET
4232195974SjamieSYSCTL_JAIL_PARAM_SYS_NODE(ip4, CTLFLAG_RDTUN,
4233195974Sjamie    "Jail IPv4 address virtualization");
4234192895SjamieSYSCTL_JAIL_PARAM_STRUCT(_ip4, addr, CTLFLAG_RW, sizeof(struct in_addr),
4235192895Sjamie    "S,in_addr,a", "Jail IPv4 addresses");
4236202468SbzSYSCTL_JAIL_PARAM(_ip4, saddrsel, CTLTYPE_INT | CTLFLAG_RW,
4237202468Sbz    "B", "Do (not) use IPv4 source address selection rather than the "
4238202468Sbz    "primary jail IPv4 address.");
4239192895Sjamie#endif
4240192895Sjamie#ifdef INET6
4241195974SjamieSYSCTL_JAIL_PARAM_SYS_NODE(ip6, CTLFLAG_RDTUN,
4242195974Sjamie    "Jail IPv6 address virtualization");
4243192895SjamieSYSCTL_JAIL_PARAM_STRUCT(_ip6, addr, CTLFLAG_RW, sizeof(struct in6_addr),
4244192895Sjamie    "S,in6_addr,a", "Jail IPv6 addresses");
4245202468SbzSYSCTL_JAIL_PARAM(_ip6, saddrsel, CTLTYPE_INT | CTLFLAG_RW,
4246202468Sbz    "B", "Do (not) use IPv6 source address selection rather than the "
4247202468Sbz    "primary jail IPv6 address.");
4248192895Sjamie#endif
4249192895Sjamie
4250192895SjamieSYSCTL_JAIL_PARAM_NODE(allow, "Jail permission flags");
4251192895SjamieSYSCTL_JAIL_PARAM(_allow, set_hostname, CTLTYPE_INT | CTLFLAG_RW,
4252192895Sjamie    "B", "Jail may set hostname");
4253192895SjamieSYSCTL_JAIL_PARAM(_allow, sysvipc, CTLTYPE_INT | CTLFLAG_RW,
4254192895Sjamie    "B", "Jail may use SYSV IPC");
4255192895SjamieSYSCTL_JAIL_PARAM(_allow, raw_sockets, CTLTYPE_INT | CTLFLAG_RW,
4256192895Sjamie    "B", "Jail may create raw sockets");
4257192895SjamieSYSCTL_JAIL_PARAM(_allow, chflags, CTLTYPE_INT | CTLFLAG_RW,
4258192895Sjamie    "B", "Jail may alter system file flags");
4259192895SjamieSYSCTL_JAIL_PARAM(_allow, mount, CTLTYPE_INT | CTLFLAG_RW,
4260192895Sjamie    "B", "Jail may mount/unmount jail-friendly file systems");
4261192895SjamieSYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW,
4262192895Sjamie    "B", "Jail may set file quotas");
4263192895SjamieSYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW,
4264192895Sjamie    "B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route");
4265192895Sjamie
4266220137Straszvoid
4267220137Straszprison_racct_foreach(void (*callback)(struct racct *racct,
4268220137Strasz    void *arg2, void *arg3), void *arg2, void *arg3)
4269220137Strasz{
4270221362Strasz	struct prison_racct *prr;
4271192895Sjamie
4272220137Strasz	sx_slock(&allprison_lock);
4273221362Strasz	LIST_FOREACH(prr, &allprison_racct, prr_next)
4274221362Strasz		(callback)(prr->prr_racct, arg2, arg3);
4275220137Strasz	sx_sunlock(&allprison_lock);
4276220137Strasz}
4277220137Strasz
4278221362Straszstatic struct prison_racct *
4279221362Straszprison_racct_find_locked(const char *name)
4280221362Strasz{
4281221362Strasz	struct prison_racct *prr;
4282221362Strasz
4283221362Strasz	sx_assert(&allprison_lock, SA_XLOCKED);
4284221362Strasz
4285221362Strasz	if (name[0] == '\0' || strlen(name) >= MAXHOSTNAMELEN)
4286221362Strasz		return (NULL);
4287221362Strasz
4288221362Strasz	LIST_FOREACH(prr, &allprison_racct, prr_next) {
4289221362Strasz		if (strcmp(name, prr->prr_name) != 0)
4290221362Strasz			continue;
4291221362Strasz
4292221362Strasz		/* Found prison_racct with a matching name? */
4293221362Strasz		prison_racct_hold(prr);
4294221362Strasz		return (prr);
4295221362Strasz	}
4296221362Strasz
4297221362Strasz	/* Add new prison_racct. */
4298221362Strasz	prr = malloc(sizeof(*prr), M_PRISON_RACCT, M_ZERO | M_WAITOK);
4299221362Strasz	racct_create(&prr->prr_racct);
4300221362Strasz
4301221362Strasz	strcpy(prr->prr_name, name);
4302221362Strasz	refcount_init(&prr->prr_refcount, 1);
4303221362Strasz	LIST_INSERT_HEAD(&allprison_racct, prr, prr_next);
4304221362Strasz
4305221362Strasz	return (prr);
4306221362Strasz}
4307221362Strasz
4308221362Straszstruct prison_racct *
4309221362Straszprison_racct_find(const char *name)
4310221362Strasz{
4311221362Strasz	struct prison_racct *prr;
4312221362Strasz
4313221362Strasz	sx_xlock(&allprison_lock);
4314221362Strasz	prr = prison_racct_find_locked(name);
4315221362Strasz	sx_xunlock(&allprison_lock);
4316221362Strasz	return (prr);
4317221362Strasz}
4318221362Strasz
4319221362Straszvoid
4320221362Straszprison_racct_hold(struct prison_racct *prr)
4321221362Strasz{
4322221362Strasz
4323221362Strasz	refcount_acquire(&prr->prr_refcount);
4324221362Strasz}
4325221362Strasz
4326221362Straszvoid
4327221362Straszprison_racct_free(struct prison_racct *prr)
4328221362Strasz{
4329221362Strasz	int old;
4330221362Strasz
4331221362Strasz	old = prr->prr_refcount;
4332221362Strasz	if (old > 1 && atomic_cmpset_int(&prr->prr_refcount, old, old - 1))
4333221362Strasz		return;
4334221362Strasz
4335221362Strasz	sx_xlock(&allprison_lock);
4336221362Strasz	if (refcount_release(&prr->prr_refcount)) {
4337221362Strasz		racct_destroy(&prr->prr_racct);
4338221362Strasz		LIST_REMOVE(prr, prr_next);
4339221362Strasz		sx_xunlock(&allprison_lock);
4340221362Strasz		free(prr, M_PRISON_RACCT);
4341221362Strasz
4342221362Strasz		return;
4343221362Strasz	}
4344221362Strasz	sx_xunlock(&allprison_lock);
4345221362Strasz}
4346221362Strasz
4347221362Strasz#ifdef RACCT
4348221362Straszstatic void
4349221362Straszprison_racct_attach(struct prison *pr)
4350221362Strasz{
4351221362Strasz	struct prison_racct *prr;
4352221362Strasz
4353221362Strasz	prr = prison_racct_find_locked(pr->pr_name);
4354221362Strasz	KASSERT(prr != NULL, ("cannot find prison_racct"));
4355221362Strasz
4356221362Strasz	pr->pr_prison_racct = prr;
4357221362Strasz}
4358221362Strasz
4359221362Straszstatic void
4360221362Straszprison_racct_detach(struct prison *pr)
4361221362Strasz{
4362221362Strasz	prison_racct_free(pr->pr_prison_racct);
4363221362Strasz	pr->pr_prison_racct = NULL;
4364221362Strasz}
4365221362Strasz#endif /* RACCT */
4366221362Strasz
4367185435Sbz#ifdef DDB
4368191673Sjamie
4369191673Sjamiestatic void
4370191673Sjamiedb_show_prison(struct prison *pr)
4371185435Sbz{
4372192895Sjamie	int fi;
4373191673Sjamie#if defined(INET) || defined(INET6)
4374191673Sjamie	int ii;
4375185435Sbz#endif
4376195870Sjamie	unsigned jsf;
4377185435Sbz#ifdef INET6
4378185435Sbz	char ip6buf[INET6_ADDRSTRLEN];
4379185435Sbz#endif
4380185435Sbz
4381191673Sjamie	db_printf("prison %p:\n", pr);
4382191673Sjamie	db_printf(" jid             = %d\n", pr->pr_id);
4383191673Sjamie	db_printf(" name            = %s\n", pr->pr_name);
4384192895Sjamie	db_printf(" parent          = %p\n", pr->pr_parent);
4385191673Sjamie	db_printf(" ref             = %d\n", pr->pr_ref);
4386191673Sjamie	db_printf(" uref            = %d\n", pr->pr_uref);
4387191673Sjamie	db_printf(" path            = %s\n", pr->pr_path);
4388191673Sjamie	db_printf(" cpuset          = %d\n", pr->pr_cpuset
4389191673Sjamie	    ? pr->pr_cpuset->cs_id : -1);
4390194251Sjamie#ifdef VIMAGE
4391194251Sjamie	db_printf(" vnet            = %p\n", pr->pr_vnet);
4392194251Sjamie#endif
4393191673Sjamie	db_printf(" root            = %p\n", pr->pr_root);
4394191673Sjamie	db_printf(" securelevel     = %d\n", pr->pr_securelevel);
4395202123Sbz	db_printf(" children.max    = %d\n", pr->pr_childmax);
4396202123Sbz	db_printf(" children.cur    = %d\n", pr->pr_childcount);
4397192895Sjamie	db_printf(" child           = %p\n", LIST_FIRST(&pr->pr_children));
4398192895Sjamie	db_printf(" sibling         = %p\n", LIST_NEXT(pr, pr_sibling));
4399202123Sbz	db_printf(" flags           = 0x%x", pr->pr_flags);
4400192895Sjamie	for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]);
4401192895Sjamie	    fi++)
4402192895Sjamie		if (pr_flag_names[fi] != NULL && (pr->pr_flags & (1 << fi)))
4403192895Sjamie			db_printf(" %s", pr_flag_names[fi]);
4404195870Sjamie	for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]);
4405195870Sjamie	    fi++) {
4406195870Sjamie		jsf = pr->pr_flags &
4407195870Sjamie		    (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new);
4408195870Sjamie		db_printf(" %-16s= %s\n", pr_flag_jailsys[fi].name,
4409195870Sjamie		    pr_flag_jailsys[fi].disable &&
4410195870Sjamie		      (jsf == pr_flag_jailsys[fi].disable) ? "disable"
4411195870Sjamie		    : (jsf == pr_flag_jailsys[fi].new) ? "new"
4412195870Sjamie		    : "inherit");
4413195870Sjamie	}
4414202123Sbz	db_printf(" allow           = 0x%x", pr->pr_allow);
4415192895Sjamie	for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]);
4416192895Sjamie	    fi++)
4417192895Sjamie		if (pr_allow_names[fi] != NULL && (pr->pr_allow & (1 << fi)))
4418192895Sjamie			db_printf(" %s", pr_allow_names[fi]);
4419191673Sjamie	db_printf("\n");
4420192895Sjamie	db_printf(" enforce_statfs  = %d\n", pr->pr_enforce_statfs);
4421194118Sjamie	db_printf(" host.hostname   = %s\n", pr->pr_hostname);
4422194118Sjamie	db_printf(" host.domainname = %s\n", pr->pr_domainname);
4423194118Sjamie	db_printf(" host.hostuuid   = %s\n", pr->pr_hostuuid);
4424193066Sjamie	db_printf(" host.hostid     = %lu\n", pr->pr_hostid);
4425185435Sbz#ifdef INET
4426191673Sjamie	db_printf(" ip4s            = %d\n", pr->pr_ip4s);
4427191673Sjamie	for (ii = 0; ii < pr->pr_ip4s; ii++)
4428191673Sjamie		db_printf(" %s %s\n",
4429202123Sbz		    ii == 0 ? "ip4.addr        =" : "                 ",
4430191673Sjamie		    inet_ntoa(pr->pr_ip4[ii]));
4431185435Sbz#endif
4432185435Sbz#ifdef INET6
4433191673Sjamie	db_printf(" ip6s            = %d\n", pr->pr_ip6s);
4434191673Sjamie	for (ii = 0; ii < pr->pr_ip6s; ii++)
4435191673Sjamie		db_printf(" %s %s\n",
4436202123Sbz		    ii == 0 ? "ip6.addr        =" : "                 ",
4437191673Sjamie		    ip6_sprintf(ip6buf, &pr->pr_ip6[ii]));
4438191673Sjamie#endif
4439191673Sjamie}
4440191673Sjamie
4441191673SjamieDB_SHOW_COMMAND(prison, db_show_prison_command)
4442191673Sjamie{
4443191673Sjamie	struct prison *pr;
4444191673Sjamie
4445191673Sjamie	if (!have_addr) {
4446192895Sjamie		/*
4447192895Sjamie		 * Show all prisons in the list, and prison0 which is not
4448192895Sjamie		 * listed.
4449192895Sjamie		 */
4450192895Sjamie		db_show_prison(&prison0);
4451192895Sjamie		if (!db_pager_quit) {
4452192895Sjamie			TAILQ_FOREACH(pr, &allprison, pr_list) {
4453192895Sjamie				db_show_prison(pr);
4454192895Sjamie				if (db_pager_quit)
4455192895Sjamie					break;
4456192895Sjamie			}
4457191673Sjamie		}
4458191673Sjamie		return;
4459191673Sjamie	}
4460191673Sjamie
4461192895Sjamie	if (addr == 0)
4462192895Sjamie		pr = &prison0;
4463192895Sjamie	else {
4464192895Sjamie		/* Look for a prison with the ID and with references. */
4465191673Sjamie		TAILQ_FOREACH(pr, &allprison, pr_list)
4466192895Sjamie			if (pr->pr_id == addr && pr->pr_ref > 0)
4467191673Sjamie				break;
4468192895Sjamie		if (pr == NULL)
4469192895Sjamie			/* Look again, without requiring a reference. */
4470192895Sjamie			TAILQ_FOREACH(pr, &allprison, pr_list)
4471192895Sjamie				if (pr->pr_id == addr)
4472192895Sjamie					break;
4473192895Sjamie		if (pr == NULL)
4474192895Sjamie			/* Assume address points to a valid prison. */
4475192895Sjamie			pr = (struct prison *)addr;
4476192895Sjamie	}
4477191673Sjamie	db_show_prison(pr);
4478185435Sbz}
4479191673Sjamie
4480185435Sbz#endif /* DDB */
4481