kern_jail.c revision 166827
1139804Simp/*-
246197Sphk * ----------------------------------------------------------------------------
346197Sphk * "THE BEER-WARE LICENSE" (Revision 42):
446197Sphk * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
546197Sphk * can do whatever you want with this stuff. If we meet some day, and you think
646197Sphk * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
746197Sphk * ----------------------------------------------------------------------------
846197Sphk */
946155Sphk
10116182Sobrien#include <sys/cdefs.h>
11116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_jail.c 166827 2007-02-19 13:10:29Z rwatson $");
12116182Sobrien
13131177Spjd#include "opt_mac.h"
14131177Spjd
1546155Sphk#include <sys/param.h>
1646155Sphk#include <sys/types.h>
1746155Sphk#include <sys/kernel.h>
1846155Sphk#include <sys/systm.h>
1946155Sphk#include <sys/errno.h>
2046155Sphk#include <sys/sysproto.h>
2146155Sphk#include <sys/malloc.h>
22164032Srwatson#include <sys/priv.h>
2346155Sphk#include <sys/proc.h>
24124882Srwatson#include <sys/taskqueue.h>
2546155Sphk#include <sys/jail.h>
2687275Srwatson#include <sys/lock.h>
2787275Srwatson#include <sys/mutex.h>
28113275Smike#include <sys/namei.h>
29147185Spjd#include <sys/mount.h>
30113275Smike#include <sys/queue.h>
3146155Sphk#include <sys/socket.h>
32113275Smike#include <sys/syscallsubr.h>
3357163Srwatson#include <sys/sysctl.h>
34113275Smike#include <sys/vnode.h>
3546155Sphk#include <net/if.h>
3646155Sphk#include <netinet/in.h>
3746155Sphk
38163606Srwatson#include <security/mac/mac_framework.h>
39163606Srwatson
4046155SphkMALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
4146155Sphk
4289414SarrSYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
4357163Srwatson    "Jail rules");
4457163Srwatson
4557163Srwatsonint	jail_set_hostname_allowed = 1;
4689414SarrSYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
4757163Srwatson    &jail_set_hostname_allowed, 0,
4857163Srwatson    "Processes in jail can set their hostnames");
4957163Srwatson
5061235Srwatsonint	jail_socket_unixiproute_only = 1;
5189414SarrSYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
5261235Srwatson    &jail_socket_unixiproute_only, 0,
5361235Srwatson    "Processes in jail are limited to creating UNIX/IPv4/route sockets only");
5461235Srwatson
5568024Srwatsonint	jail_sysvipc_allowed = 0;
5689414SarrSYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
5768024Srwatson    &jail_sysvipc_allowed, 0,
5868024Srwatson    "Processes in jail can use System V IPC primitives");
5968024Srwatson
60147185Spjdstatic int jail_enforce_statfs = 2;
61147185SpjdSYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW,
62147185Spjd    &jail_enforce_statfs, 0,
63147185Spjd    "Processes in jail cannot see all mounted file systems");
64125804Srwatson
65128664Sbmilekicint	jail_allow_raw_sockets = 0;
66128664SbmilekicSYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
67128664Sbmilekic    &jail_allow_raw_sockets, 0,
68128664Sbmilekic    "Prison root can create raw sockets");
69128664Sbmilekic
70141543Scpercivaint	jail_chflags_allowed = 0;
71141543ScpercivaSYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW,
72141543Scperciva    &jail_chflags_allowed, 0,
73141543Scperciva    "Processes in jail can alter system file flags");
74141543Scperciva
75113275Smike/* allprison, lastprid, and prisoncount are protected by allprison_mtx. */
76113275Smikestruct	prisonlist allprison;
77113275Smikestruct	mtx allprison_mtx;
78113275Smikeint	lastprid = 0;
79113275Smikeint	prisoncount = 0;
80113275Smike
81113275Smikestatic void		 init_prison(void *);
82124882Srwatsonstatic void		 prison_complete(void *context, int pending);
83113275Smikestatic struct prison	*prison_find(int);
84113275Smikestatic int		 sysctl_jail_list(SYSCTL_HANDLER_ARGS);
85113275Smike
86113275Smikestatic void
87113275Smikeinit_prison(void *data __unused)
88113275Smike{
89113275Smike
90113275Smike	mtx_init(&allprison_mtx, "allprison", NULL, MTX_DEF);
91113275Smike	LIST_INIT(&allprison);
92113275Smike}
93113275Smike
94113275SmikeSYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL);
95113275Smike
9682710Sdillon/*
9782710Sdillon * MPSAFE
98114168Smike *
99114168Smike * struct jail_args {
100114168Smike *	struct jail *jail;
101114168Smike * };
10282710Sdillon */
10346155Sphkint
104114168Smikejail(struct thread *td, struct jail_args *uap)
10546155Sphk{
106113275Smike	struct nameidata nd;
107113275Smike	struct prison *pr, *tpr;
10846155Sphk	struct jail j;
109113275Smike	struct jail_attach_args jaa;
110150652Scsjp	int vfslocked, error, tryprid;
11146155Sphk
112114168Smike	error = copyin(uap->jail, &j, sizeof(j));
11346155Sphk	if (error)
11484828Sjhb		return (error);
11584828Sjhb	if (j.version != 0)
11684828Sjhb		return (EINVAL);
11784828Sjhb
118114168Smike	MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
11993818Sjhb	mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF);
120113275Smike	pr->pr_ref = 1;
121114168Smike	error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0);
122113275Smike	if (error)
123113275Smike		goto e_killmtx;
124150652Scsjp	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE,
125150652Scsjp	    pr->pr_path, td);
126113275Smike	error = namei(&nd);
127150652Scsjp	if (error)
128113275Smike		goto e_killmtx;
129150652Scsjp	vfslocked = NDHASGIANT(&nd);
130113275Smike	pr->pr_root = nd.ni_vp;
131113275Smike	VOP_UNLOCK(nd.ni_vp, 0, td);
132113275Smike	NDFREE(&nd, NDF_ONLY_PNBUF);
133150652Scsjp	VFS_UNLOCK_GIANT(vfslocked);
134114168Smike	error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0);
13584828Sjhb	if (error)
136113275Smike		goto e_dropvnref;
137113275Smike	pr->pr_ip = j.ip_number;
138113275Smike	pr->pr_linux = NULL;
139113275Smike	pr->pr_securelevel = securelevel;
140113275Smike
141113275Smike	/* Determine next pr_id and add prison to allprison list. */
142113275Smike	mtx_lock(&allprison_mtx);
143113275Smike	tryprid = lastprid + 1;
144113275Smike	if (tryprid == JAIL_MAX)
145113275Smike		tryprid = 1;
146113275Smikenext:
147113275Smike	LIST_FOREACH(tpr, &allprison, pr_list) {
148113275Smike		if (tpr->pr_id == tryprid) {
149113275Smike			tryprid++;
150113275Smike			if (tryprid == JAIL_MAX) {
151113275Smike				mtx_unlock(&allprison_mtx);
152113275Smike				error = EAGAIN;
153113275Smike				goto e_dropvnref;
154113275Smike			}
155113275Smike			goto next;
156113275Smike		}
157113275Smike	}
158113275Smike	pr->pr_id = jaa.jid = lastprid = tryprid;
159113275Smike	LIST_INSERT_HEAD(&allprison, pr, pr_list);
160113275Smike	prisoncount++;
161113275Smike	mtx_unlock(&allprison_mtx);
162113275Smike
163113275Smike	error = jail_attach(td, &jaa);
164113275Smike	if (error)
165113275Smike		goto e_dropprref;
166113275Smike	mtx_lock(&pr->pr_mtx);
167113275Smike	pr->pr_ref--;
168113275Smike	mtx_unlock(&pr->pr_mtx);
169113275Smike	td->td_retval[0] = jaa.jid;
170113275Smike	return (0);
171113275Smikee_dropprref:
172113275Smike	mtx_lock(&allprison_mtx);
173113275Smike	LIST_REMOVE(pr, pr_list);
174113275Smike	prisoncount--;
175113275Smike	mtx_unlock(&allprison_mtx);
176113275Smikee_dropvnref:
177150652Scsjp	vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
178113275Smike	vrele(pr->pr_root);
179150652Scsjp	VFS_UNLOCK_GIANT(vfslocked);
180113275Smikee_killmtx:
181113275Smike	mtx_destroy(&pr->pr_mtx);
182113275Smike	FREE(pr, M_PRISON);
183113275Smike	return (error);
184113275Smike}
185113275Smike
186113275Smike/*
187113275Smike * MPSAFE
188114168Smike *
189114168Smike * struct jail_attach_args {
190114168Smike *	int jid;
191114168Smike * };
192113275Smike */
193113275Smikeint
194114168Smikejail_attach(struct thread *td, struct jail_attach_args *uap)
195113275Smike{
196113275Smike	struct proc *p;
197113275Smike	struct ucred *newcred, *oldcred;
198113275Smike	struct prison *pr;
199150652Scsjp	int vfslocked, error;
200113275Smike
201126023Snectar	/*
202126023Snectar	 * XXX: Note that there is a slight race here if two threads
203126023Snectar	 * in the same privileged process attempt to attach to two
204126023Snectar	 * different jails at the same time.  It is important for
205126023Snectar	 * user processes not to do this, or they might end up with
206126023Snectar	 * a process root from one prison, but attached to the jail
207126023Snectar	 * of another.
208126023Snectar	 */
209164032Srwatson	error = priv_check(td, PRIV_JAIL_ATTACH);
210126023Snectar	if (error)
211126023Snectar		return (error);
212126023Snectar
213113275Smike	p = td->td_proc;
214113275Smike	mtx_lock(&allprison_mtx);
215113275Smike	pr = prison_find(uap->jid);
216113275Smike	if (pr == NULL) {
217113275Smike		mtx_unlock(&allprison_mtx);
218113275Smike		return (EINVAL);
219113275Smike	}
220113275Smike	pr->pr_ref++;
221113275Smike	mtx_unlock(&pr->pr_mtx);
222113275Smike	mtx_unlock(&allprison_mtx);
223113275Smike
224150652Scsjp	vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
225113275Smike	vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY, td);
226113275Smike	if ((error = change_dir(pr->pr_root, td)) != 0)
227113275Smike		goto e_unlock;
228113275Smike#ifdef MAC
229113275Smike	if ((error = mac_check_vnode_chroot(td->td_ucred, pr->pr_root)))
230113275Smike		goto e_unlock;
231113275Smike#endif
232113275Smike	VOP_UNLOCK(pr->pr_root, 0, td);
233113275Smike	change_root(pr->pr_root, td);
234150652Scsjp	VFS_UNLOCK_GIANT(vfslocked);
235113275Smike
23684828Sjhb	newcred = crget();
23784828Sjhb	PROC_LOCK(p);
23884828Sjhb	oldcred = p->p_ucred;
239113275Smike	setsugid(p);
24084828Sjhb	crcopy(newcred, oldcred);
241113630Sjhb	newcred->cr_prison = pr;
24284828Sjhb	p->p_ucred = newcred;
24384828Sjhb	PROC_UNLOCK(p);
24484828Sjhb	crfree(oldcred);
24546155Sphk	return (0);
246113275Smikee_unlock:
247113275Smike	VOP_UNLOCK(pr->pr_root, 0, td);
248150652Scsjp	VFS_UNLOCK_GIANT(vfslocked);
249113275Smike	mtx_lock(&pr->pr_mtx);
250113275Smike	pr->pr_ref--;
251113275Smike	mtx_unlock(&pr->pr_mtx);
25246155Sphk	return (error);
25346155Sphk}
25446155Sphk
255113275Smike/*
256113275Smike * Returns a locked prison instance, or NULL on failure.
257113275Smike */
258113275Smikestatic struct prison *
259113275Smikeprison_find(int prid)
260113275Smike{
261113275Smike	struct prison *pr;
262113275Smike
263113275Smike	mtx_assert(&allprison_mtx, MA_OWNED);
264113275Smike	LIST_FOREACH(pr, &allprison, pr_list) {
265113275Smike		if (pr->pr_id == prid) {
266113275Smike			mtx_lock(&pr->pr_mtx);
267113275Smike			return (pr);
268113275Smike		}
269113275Smike	}
270113275Smike	return (NULL);
271113275Smike}
272113275Smike
27372786Srwatsonvoid
27472786Srwatsonprison_free(struct prison *pr)
27572786Srwatson{
27672786Srwatson
277113275Smike	mtx_lock(&allprison_mtx);
27887275Srwatson	mtx_lock(&pr->pr_mtx);
27972786Srwatson	pr->pr_ref--;
28072786Srwatson	if (pr->pr_ref == 0) {
281113275Smike		LIST_REMOVE(pr, pr_list);
28287275Srwatson		mtx_unlock(&pr->pr_mtx);
283113275Smike		prisoncount--;
284113275Smike		mtx_unlock(&allprison_mtx);
285124882Srwatson
286124882Srwatson		TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
287144660Sjeff		taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
28887275Srwatson		return;
28972786Srwatson	}
29087275Srwatson	mtx_unlock(&pr->pr_mtx);
291113275Smike	mtx_unlock(&allprison_mtx);
29272786Srwatson}
29372786Srwatson
294124882Srwatsonstatic void
295124882Srwatsonprison_complete(void *context, int pending)
296124882Srwatson{
297124882Srwatson	struct prison *pr;
298150652Scsjp	int vfslocked;
299124882Srwatson
300124882Srwatson	pr = (struct prison *)context;
301124882Srwatson
302150652Scsjp	vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
303124882Srwatson	vrele(pr->pr_root);
304150652Scsjp	VFS_UNLOCK_GIANT(vfslocked);
305124882Srwatson
306124882Srwatson	mtx_destroy(&pr->pr_mtx);
307124882Srwatson	if (pr->pr_linux != NULL)
308124882Srwatson		FREE(pr->pr_linux, M_PRISON);
309124882Srwatson	FREE(pr, M_PRISON);
310124882Srwatson}
311124882Srwatson
31272786Srwatsonvoid
31372786Srwatsonprison_hold(struct prison *pr)
31472786Srwatson{
31572786Srwatson
31687275Srwatson	mtx_lock(&pr->pr_mtx);
31772786Srwatson	pr->pr_ref++;
31887275Srwatson	mtx_unlock(&pr->pr_mtx);
31972786Srwatson}
32072786Srwatson
32187275Srwatsonu_int32_t
32287275Srwatsonprison_getip(struct ucred *cred)
32387275Srwatson{
32487275Srwatson
32587275Srwatson	return (cred->cr_prison->pr_ip);
32687275Srwatson}
32787275Srwatson
32846155Sphkint
32972786Srwatsonprison_ip(struct ucred *cred, int flag, u_int32_t *ip)
33046155Sphk{
33146155Sphk	u_int32_t tmp;
33246155Sphk
33372786Srwatson	if (!jailed(cred))
33446155Sphk		return (0);
33546155Sphk	if (flag)
33646155Sphk		tmp = *ip;
33746155Sphk	else
33846155Sphk		tmp = ntohl(*ip);
33946155Sphk	if (tmp == INADDR_ANY) {
34046155Sphk		if (flag)
34172786Srwatson			*ip = cred->cr_prison->pr_ip;
34246155Sphk		else
34372786Srwatson			*ip = htonl(cred->cr_prison->pr_ip);
34446155Sphk		return (0);
34546155Sphk	}
34681114Srwatson	if (tmp == INADDR_LOOPBACK) {
34781114Srwatson		if (flag)
34881114Srwatson			*ip = cred->cr_prison->pr_ip;
34981114Srwatson		else
35081114Srwatson			*ip = htonl(cred->cr_prison->pr_ip);
35181114Srwatson		return (0);
35281114Srwatson	}
35372786Srwatson	if (cred->cr_prison->pr_ip != tmp)
35446155Sphk		return (1);
35546155Sphk	return (0);
35646155Sphk}
35746155Sphk
35846155Sphkvoid
35972786Srwatsonprison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip)
36046155Sphk{
36146155Sphk	u_int32_t tmp;
36246155Sphk
36372786Srwatson	if (!jailed(cred))
36446155Sphk		return;
36546155Sphk	if (flag)
36646155Sphk		tmp = *ip;
36746155Sphk	else
36846155Sphk		tmp = ntohl(*ip);
36981114Srwatson	if (tmp == INADDR_LOOPBACK) {
37046155Sphk		if (flag)
37172786Srwatson			*ip = cred->cr_prison->pr_ip;
37246155Sphk		else
37372786Srwatson			*ip = htonl(cred->cr_prison->pr_ip);
37446155Sphk		return;
37546155Sphk	}
37646155Sphk	return;
37746155Sphk}
37846155Sphk
37946155Sphkint
38072786Srwatsonprison_if(struct ucred *cred, struct sockaddr *sa)
38146155Sphk{
382114168Smike	struct sockaddr_in *sai;
38346155Sphk	int ok;
38446155Sphk
385114168Smike	sai = (struct sockaddr_in *)sa;
38661235Srwatson	if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only)
38761235Srwatson		ok = 1;
38861235Srwatson	else if (sai->sin_family != AF_INET)
38946155Sphk		ok = 0;
39072786Srwatson	else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr))
39146155Sphk		ok = 1;
39246155Sphk	else
39346155Sphk		ok = 0;
39446155Sphk	return (ok);
39546155Sphk}
39672786Srwatson
39772786Srwatson/*
39872786Srwatson * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
39972786Srwatson */
40072786Srwatsonint
401114168Smikeprison_check(struct ucred *cred1, struct ucred *cred2)
40272786Srwatson{
40372786Srwatson
40472786Srwatson	if (jailed(cred1)) {
40572786Srwatson		if (!jailed(cred2))
40672786Srwatson			return (ESRCH);
40772786Srwatson		if (cred2->cr_prison != cred1->cr_prison)
40872786Srwatson			return (ESRCH);
40972786Srwatson	}
41072786Srwatson
41172786Srwatson	return (0);
41272786Srwatson}
41372786Srwatson
41472786Srwatson/*
41572786Srwatson * Return 1 if the passed credential is in a jail, otherwise 0.
41672786Srwatson */
41772786Srwatsonint
418114168Smikejailed(struct ucred *cred)
41972786Srwatson{
42072786Srwatson
42172786Srwatson	return (cred->cr_prison != NULL);
42272786Srwatson}
42391384Srobert
42491384Srobert/*
42591384Srobert * Return the correct hostname for the passed credential.
42691384Srobert */
42791391Srobertvoid
428114168Smikegetcredhostname(struct ucred *cred, char *buf, size_t size)
42991384Srobert{
43091384Srobert
43191391Srobert	if (jailed(cred)) {
43291391Srobert		mtx_lock(&cred->cr_prison->pr_mtx);
433105354Srobert		strlcpy(buf, cred->cr_prison->pr_host, size);
43491391Srobert		mtx_unlock(&cred->cr_prison->pr_mtx);
435114168Smike	} else
436105354Srobert		strlcpy(buf, hostname, size);
43791384Srobert}
438113275Smike
439125804Srwatson/*
440147185Spjd * Determine whether the subject represented by cred can "see"
441147185Spjd * status of a mount point.
442147185Spjd * Returns: 0 for permitted, ENOENT otherwise.
443147185Spjd * XXX: This function should be called cr_canseemount() and should be
444147185Spjd *      placed in kern_prot.c.
445125804Srwatson */
446125804Srwatsonint
447147185Spjdprison_canseemount(struct ucred *cred, struct mount *mp)
448125804Srwatson{
449147185Spjd	struct prison *pr;
450147185Spjd	struct statfs *sp;
451147185Spjd	size_t len;
452125804Srwatson
453147185Spjd	if (!jailed(cred) || jail_enforce_statfs == 0)
454147185Spjd		return (0);
455147185Spjd	pr = cred->cr_prison;
456147185Spjd	if (pr->pr_root->v_mount == mp)
457147185Spjd		return (0);
458147185Spjd	if (jail_enforce_statfs == 2)
459147185Spjd		return (ENOENT);
460147185Spjd	/*
461147185Spjd	 * If jail's chroot directory is set to "/" we should be able to see
462147185Spjd	 * all mount-points from inside a jail.
463147185Spjd	 * This is ugly check, but this is the only situation when jail's
464147185Spjd	 * directory ends with '/'.
465147185Spjd	 */
466147185Spjd	if (strcmp(pr->pr_path, "/") == 0)
467147185Spjd		return (0);
468147185Spjd	len = strlen(pr->pr_path);
469147185Spjd	sp = &mp->mnt_stat;
470147185Spjd	if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
471147185Spjd		return (ENOENT);
472147185Spjd	/*
473147185Spjd	 * Be sure that we don't have situation where jail's root directory
474147185Spjd	 * is "/some/path" and mount point is "/some/pathpath".
475147185Spjd	 */
476147185Spjd	if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
477147185Spjd		return (ENOENT);
478147185Spjd	return (0);
479147185Spjd}
480147185Spjd
481147185Spjdvoid
482147185Spjdprison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
483147185Spjd{
484147185Spjd	char jpath[MAXPATHLEN];
485147185Spjd	struct prison *pr;
486147185Spjd	size_t len;
487147185Spjd
488147185Spjd	if (!jailed(cred) || jail_enforce_statfs == 0)
489147185Spjd		return;
490147185Spjd	pr = cred->cr_prison;
491147185Spjd	if (prison_canseemount(cred, mp) != 0) {
492147185Spjd		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
493147185Spjd		strlcpy(sp->f_mntonname, "[restricted]",
494147185Spjd		    sizeof(sp->f_mntonname));
495147185Spjd		return;
496125804Srwatson	}
497147185Spjd	if (pr->pr_root->v_mount == mp) {
498147185Spjd		/*
499147185Spjd		 * Clear current buffer data, so we are sure nothing from
500147185Spjd		 * the valid path left there.
501147185Spjd		 */
502147185Spjd		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
503147185Spjd		*sp->f_mntonname = '/';
504147185Spjd		return;
505147185Spjd	}
506147185Spjd	/*
507147185Spjd	 * If jail's chroot directory is set to "/" we should be able to see
508147185Spjd	 * all mount-points from inside a jail.
509147185Spjd	 */
510147185Spjd	if (strcmp(pr->pr_path, "/") == 0)
511147185Spjd		return;
512147185Spjd	len = strlen(pr->pr_path);
513147185Spjd	strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
514147185Spjd	/*
515147185Spjd	 * Clear current buffer data, so we are sure nothing from
516147185Spjd	 * the valid path left there.
517147185Spjd	 */
518147185Spjd	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
519147185Spjd	if (*jpath == '\0') {
520147185Spjd		/* Should never happen. */
521147185Spjd		*sp->f_mntonname = '/';
522147185Spjd	} else {
523147185Spjd		strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
524147185Spjd	}
525125804Srwatson}
526125804Srwatson
527164032Srwatson/*
528164032Srwatson * Check with permission for a specific privilege is granted within jail.  We
529164032Srwatson * have a specific list of accepted privileges; the rest are denied.
530164032Srwatson */
531164032Srwatsonint
532164032Srwatsonprison_priv_check(struct ucred *cred, int priv)
533164032Srwatson{
534164032Srwatson
535164032Srwatson	if (!jailed(cred))
536164032Srwatson		return (0);
537164032Srwatson
538164032Srwatson	switch (priv) {
539164032Srwatson
540164032Srwatson		/*
541164032Srwatson		 * Allow ktrace privileges for root in jail.
542164032Srwatson		 */
543164032Srwatson	case PRIV_KTRACE:
544164032Srwatson
545166827Srwatson#if 0
546164032Srwatson		/*
547164032Srwatson		 * Allow jailed processes to configure audit identity and
548164032Srwatson		 * submit audit records (login, etc).  In the future we may
549164032Srwatson		 * want to further refine the relationship between audit and
550164032Srwatson		 * jail.
551164032Srwatson		 */
552164032Srwatson	case PRIV_AUDIT_GETAUDIT:
553164032Srwatson	case PRIV_AUDIT_SETAUDIT:
554164032Srwatson	case PRIV_AUDIT_SUBMIT:
555166827Srwatson#endif
556164032Srwatson
557164032Srwatson		/*
558164032Srwatson		 * Allow jailed processes to manipulate process UNIX
559164032Srwatson		 * credentials in any way they see fit.
560164032Srwatson		 */
561164032Srwatson	case PRIV_CRED_SETUID:
562164032Srwatson	case PRIV_CRED_SETEUID:
563164032Srwatson	case PRIV_CRED_SETGID:
564164032Srwatson	case PRIV_CRED_SETEGID:
565164032Srwatson	case PRIV_CRED_SETGROUPS:
566164032Srwatson	case PRIV_CRED_SETREUID:
567164032Srwatson	case PRIV_CRED_SETREGID:
568164032Srwatson	case PRIV_CRED_SETRESUID:
569164032Srwatson	case PRIV_CRED_SETRESGID:
570164032Srwatson
571164032Srwatson		/*
572164032Srwatson		 * Jail implements visibility constraints already, so allow
573164032Srwatson		 * jailed root to override uid/gid-based constraints.
574164032Srwatson		 */
575164032Srwatson	case PRIV_SEEOTHERGIDS:
576164032Srwatson	case PRIV_SEEOTHERUIDS:
577164032Srwatson
578164032Srwatson		/*
579164032Srwatson		 * Jail implements inter-process debugging limits already, so
580164032Srwatson		 * allow jailed root various debugging privileges.
581164032Srwatson		 */
582164032Srwatson	case PRIV_DEBUG_DIFFCRED:
583164032Srwatson	case PRIV_DEBUG_SUGID:
584164032Srwatson	case PRIV_DEBUG_UNPRIV:
585164032Srwatson
586164032Srwatson		/*
587164032Srwatson		 * Allow jail to set various resource limits and login
588164032Srwatson		 * properties, and for now, exceed process resource limits.
589164032Srwatson		 */
590164032Srwatson	case PRIV_PROC_LIMIT:
591164032Srwatson	case PRIV_PROC_SETLOGIN:
592164032Srwatson	case PRIV_PROC_SETRLIMIT:
593164032Srwatson
594164032Srwatson		/*
595164032Srwatson		 * System V and POSIX IPC privileges are granted in jail.
596164032Srwatson		 */
597164032Srwatson	case PRIV_IPC_READ:
598164032Srwatson	case PRIV_IPC_WRITE:
599164032Srwatson	case PRIV_IPC_EXEC:
600164032Srwatson	case PRIV_IPC_ADMIN:
601164032Srwatson	case PRIV_IPC_MSGSIZE:
602164032Srwatson	case PRIV_MQ_ADMIN:
603164032Srwatson
604164032Srwatson		/*
605164032Srwatson		 * Jail implements its own inter-process limits, so allow
606164032Srwatson		 * root processes in jail to change scheduling on other
607164032Srwatson		 * processes in the same jail.  Likewise for signalling.
608164032Srwatson		 */
609164032Srwatson	case PRIV_SCHED_DIFFCRED:
610164032Srwatson	case PRIV_SIGNAL_DIFFCRED:
611164032Srwatson	case PRIV_SIGNAL_SUGID:
612164032Srwatson
613164032Srwatson		/*
614164032Srwatson		 * Allow jailed processes to write to sysctls marked as jail
615164032Srwatson		 * writable.
616164032Srwatson		 */
617164032Srwatson	case PRIV_SYSCTL_WRITEJAIL:
618164032Srwatson
619164032Srwatson		/*
620164032Srwatson		 * Allow root in jail to manage a variety of quota
621164032Srwatson		 * properties.  Some are a bit surprising and should be
622164032Srwatson		 * reconsidered.
623164032Srwatson		 */
624164032Srwatson	case PRIV_UFS_GETQUOTA:
625164032Srwatson	case PRIV_UFS_QUOTAOFF:		/* XXXRW: Slightly surprising. */
626164032Srwatson	case PRIV_UFS_QUOTAON:		/* XXXRW: Slightly surprising. */
627164032Srwatson	case PRIV_UFS_SETQUOTA:
628164032Srwatson	case PRIV_UFS_SETUSE:		/* XXXRW: Slightly surprising. */
629164032Srwatson
630164032Srwatson		/*
631164032Srwatson		 * Since Jail relies on chroot() to implement file system
632164032Srwatson		 * protections, grant many VFS privileges to root in jail.
633164032Srwatson		 * Be careful to exclude mount-related and NFS-related
634164032Srwatson		 * privileges.
635164032Srwatson		 */
636164032Srwatson	case PRIV_VFS_READ:
637164032Srwatson	case PRIV_VFS_WRITE:
638164032Srwatson	case PRIV_VFS_ADMIN:
639164032Srwatson	case PRIV_VFS_EXEC:
640164032Srwatson	case PRIV_VFS_LOOKUP:
641164032Srwatson	case PRIV_VFS_BLOCKRESERVE:	/* XXXRW: Slightly surprising. */
642164032Srwatson	case PRIV_VFS_CHFLAGS_DEV:
643164032Srwatson	case PRIV_VFS_CHOWN:
644164032Srwatson	case PRIV_VFS_CHROOT:
645164032Srwatson	case PRIV_VFS_CLEARSUGID:
646164032Srwatson	case PRIV_VFS_FCHROOT:
647164032Srwatson	case PRIV_VFS_LINK:
648164032Srwatson	case PRIV_VFS_SETGID:
649164032Srwatson	case PRIV_VFS_STICKYFILE:
650164032Srwatson		return (0);
651164032Srwatson
652164032Srwatson		/*
653164032Srwatson		 * Depending on the global setting, allow privilege of
654164032Srwatson		 * setting system flags.
655164032Srwatson		 */
656164032Srwatson	case PRIV_VFS_SYSFLAGS:
657164032Srwatson		if (jail_chflags_allowed)
658164032Srwatson			return (0);
659164032Srwatson		else
660164032Srwatson			return (EPERM);
661164032Srwatson
662164032Srwatson		/*
663164032Srwatson		 * Allow jailed root to bind reserved ports.
664164032Srwatson		 */
665164032Srwatson	case PRIV_NETINET_RESERVEDPORT:
666164032Srwatson		return (0);
667164032Srwatson
668164032Srwatson		/*
669164032Srwatson		 * Conditionally allow creating raw sockets in jail.
670164032Srwatson		 */
671164032Srwatson	case PRIV_NETINET_RAW:
672164032Srwatson		if (jail_allow_raw_sockets)
673164032Srwatson			return (0);
674164032Srwatson		else
675164032Srwatson			return (EPERM);
676164032Srwatson
677164032Srwatson		/*
678164032Srwatson		 * Since jail implements its own visibility limits on netstat
679164032Srwatson		 * sysctls, allow getcred.  This allows identd to work in
680164032Srwatson		 * jail.
681164032Srwatson		 */
682164032Srwatson	case PRIV_NETINET_GETCRED:
683164032Srwatson		return (0);
684164032Srwatson
685164032Srwatson	default:
686164032Srwatson		/*
687164032Srwatson		 * In all remaining cases, deny the privilege request.  This
688164032Srwatson		 * includes almost all network privileges, many system
689164032Srwatson		 * configuration privileges.
690164032Srwatson		 */
691164032Srwatson		return (EPERM);
692164032Srwatson	}
693164032Srwatson}
694164032Srwatson
695113275Smikestatic int
696113275Smikesysctl_jail_list(SYSCTL_HANDLER_ARGS)
697113275Smike{
698113275Smike	struct xprison *xp, *sxp;
699113275Smike	struct prison *pr;
700113275Smike	int count, error;
701113275Smike
702127020Spjd	if (jailed(req->td->td_ucred))
703125806Srwatson		return (0);
704113275Smikeretry:
705113275Smike	mtx_lock(&allprison_mtx);
706113275Smike	count = prisoncount;
707113275Smike	mtx_unlock(&allprison_mtx);
708113275Smike
709113275Smike	if (count == 0)
710113275Smike		return (0);
711113275Smike
712113275Smike	sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO);
713113275Smike	mtx_lock(&allprison_mtx);
714113275Smike	if (count != prisoncount) {
715113275Smike		mtx_unlock(&allprison_mtx);
716113275Smike		free(sxp, M_TEMP);
717113275Smike		goto retry;
718113275Smike	}
719113275Smike
720113275Smike	LIST_FOREACH(pr, &allprison, pr_list) {
721113275Smike		mtx_lock(&pr->pr_mtx);
722113275Smike		xp->pr_version = XPRISON_VERSION;
723113275Smike		xp->pr_id = pr->pr_id;
724113275Smike		strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path));
725113275Smike		strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host));
726113275Smike		xp->pr_ip = pr->pr_ip;
727113275Smike		mtx_unlock(&pr->pr_mtx);
728113275Smike		xp++;
729113275Smike	}
730113275Smike	mtx_unlock(&allprison_mtx);
731113275Smike
732113275Smike	error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count);
733113275Smike	free(sxp, M_TEMP);
734113275Smike	if (error)
735113275Smike		return (error);
736113275Smike	return (0);
737113275Smike}
738113275Smike
739113275SmikeSYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD,
740113275Smike    NULL, 0, sysctl_jail_list, "S", "List of active jails");
741126004Spjd
742126004Spjdstatic int
743126004Spjdsysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
744126004Spjd{
745126004Spjd	int error, injail;
746126004Spjd
747126004Spjd	injail = jailed(req->td->td_ucred);
748126004Spjd	error = SYSCTL_OUT(req, &injail, sizeof(injail));
749126004Spjd
750126004Spjd	return (error);
751126004Spjd}
752126004SpjdSYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD,
753126004Spjd    NULL, 0, sysctl_jail_jailed, "I", "Process in jail?");
754