1/*-
2 * Copyright (c) 2011 James Gritton
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include <sys/types.h>
31#include <sys/event.h>
32#include <sys/mount.h>
33#include <sys/stat.h>
34#include <sys/sysctl.h>
35#include <sys/user.h>
36#include <sys/wait.h>
37
38#include <err.h>
39#include <errno.h>
40#include <fcntl.h>
41#include <kvm.h>
42#include <login_cap.h>
43#include <paths.h>
44#include <pwd.h>
45#include <signal.h>
46#include <stdio.h>
47#include <stdlib.h>
48#include <string.h>
49#include <unistd.h>
50#include <vis.h>
51
52#include "jailp.h"
53
54#define DEFAULT_STOP_TIMEOUT	10
55#define PHASH_SIZE		256
56
57LIST_HEAD(phhead, phash);
58
59struct phash {
60	LIST_ENTRY(phash)	le;
61	struct cfjail		*j;
62	pid_t			pid;
63};
64
65int paralimit = -1;
66
67extern char **environ;
68
69static int run_command(struct cfjail *j);
70static int add_proc(struct cfjail *j, pid_t pid);
71static void clear_procs(struct cfjail *j);
72static struct cfjail *find_proc(pid_t pid);
73static int term_procs(struct cfjail *j);
74static int get_user_info(struct cfjail *j, const char *username,
75    const struct passwd **pwdp, login_cap_t **lcapp);
76static int check_path(struct cfjail *j, const char *pname, const char *path,
77    int isfile, const char *umount_type);
78
79static struct cfjails sleeping = TAILQ_HEAD_INITIALIZER(sleeping);
80static struct cfjails runnable = TAILQ_HEAD_INITIALIZER(runnable);
81static struct cfstring dummystring = { .len = 1 };
82static struct phhead phash[PHASH_SIZE];
83static int kq;
84
85/*
86 * Run the next command associated with a jail.
87 */
88int
89next_command(struct cfjail *j)
90{
91	enum intparam comparam;
92	int create_failed, stopping;
93
94	if (paralimit == 0) {
95		if (j->flags & JF_FROM_RUNQ)
96			requeue_head(j, &runnable);
97		else
98			requeue(j, &runnable);
99		return 1;
100	}
101	j->flags &= ~JF_FROM_RUNQ;
102	create_failed = (j->flags & (JF_STOP | JF_FAILED)) == JF_FAILED;
103	stopping = (j->flags & JF_STOP) != 0;
104	comparam = *j->comparam;
105	for (;;) {
106		if (j->comstring == NULL) {
107			j->comparam += create_failed ? -1 : 1;
108			switch ((comparam = *j->comparam)) {
109			case IP__NULL:
110				return 0;
111			case IP_MOUNT_DEVFS:
112				if (!bool_param(j->intparams[IP_MOUNT_DEVFS]))
113					continue;
114				j->comstring = &dummystring;
115				break;
116			case IP_MOUNT_FDESCFS:
117				if (!bool_param(j->intparams[IP_MOUNT_FDESCFS]))
118					continue;
119				j->comstring = &dummystring;
120				break;
121			case IP_MOUNT_PROCFS:
122				if (!bool_param(j->intparams[IP_MOUNT_PROCFS]))
123					continue;
124				j->comstring = &dummystring;
125				break;
126			case IP__OP:
127			case IP_STOP_TIMEOUT:
128				j->comstring = &dummystring;
129				break;
130			default:
131				if (j->intparams[comparam] == NULL)
132					continue;
133				j->comstring = create_failed || (stopping &&
134				    (j->intparams[comparam]->flags & PF_REV))
135				    ? TAILQ_LAST(&j->intparams[comparam]->val,
136					cfstrings)
137				    : TAILQ_FIRST(&j->intparams[comparam]->val);
138			}
139		} else {
140			j->comstring = j->comstring == &dummystring ? NULL :
141			    create_failed || (stopping &&
142			    (j->intparams[comparam]->flags & PF_REV))
143			    ? TAILQ_PREV(j->comstring, cfstrings, tq)
144			    : TAILQ_NEXT(j->comstring, tq);
145		}
146		if (j->comstring == NULL || j->comstring->len == 0 ||
147		    (create_failed && (comparam == IP_EXEC_PRESTART ||
148		    comparam == IP_EXEC_START || comparam == IP_COMMAND ||
149		    comparam == IP_EXEC_POSTSTART)))
150			continue;
151		switch (run_command(j)) {
152		case -1:
153			failed(j);
154			/* FALLTHROUGH */
155		case 1:
156			return 1;
157		}
158	}
159}
160
161/*
162 * Check command exit status
163 */
164int
165finish_command(struct cfjail *j)
166{
167	struct cfjail *rj;
168	int error;
169
170	if (!(j->flags & JF_SLEEPQ))
171		return 0;
172	j->flags &= ~JF_SLEEPQ;
173	if (*j->comparam == IP_STOP_TIMEOUT) {
174		j->flags &= ~JF_TIMEOUT;
175		j->pstatus = 0;
176		return 0;
177	}
178	paralimit++;
179	if (!TAILQ_EMPTY(&runnable)) {
180		rj = TAILQ_FIRST(&runnable);
181		rj->flags |= JF_FROM_RUNQ;
182		requeue(rj, &ready);
183	}
184	error = 0;
185	if (j->flags & JF_TIMEOUT) {
186		j->flags &= ~JF_TIMEOUT;
187		if (*j->comparam != IP_STOP_TIMEOUT) {
188			jail_warnx(j, "%s: timed out", j->comline);
189			failed(j);
190			error = -1;
191		} else if (verbose > 0)
192			jail_note(j, "timed out\n");
193	} else if (j->pstatus != 0) {
194		if (WIFSIGNALED(j->pstatus))
195			jail_warnx(j, "%s: exited on signal %d",
196			    j->comline, WTERMSIG(j->pstatus));
197		else
198			jail_warnx(j, "%s: failed", j->comline);
199		j->pstatus = 0;
200		failed(j);
201		error = -1;
202	}
203	free(j->comline);
204	j->comline = NULL;
205	return error;
206}
207
208/*
209 * Check for finished processes or timeouts.
210 */
211struct cfjail *
212next_proc(int nonblock)
213{
214	struct kevent ke;
215	struct timespec ts;
216	struct timespec *tsp;
217	struct cfjail *j;
218
219	if (!TAILQ_EMPTY(&sleeping)) {
220	again:
221		tsp = NULL;
222		if ((j = TAILQ_FIRST(&sleeping)) && j->timeout.tv_sec) {
223			clock_gettime(CLOCK_REALTIME, &ts);
224			ts.tv_sec = j->timeout.tv_sec - ts.tv_sec;
225			ts.tv_nsec = j->timeout.tv_nsec - ts.tv_nsec;
226			if (ts.tv_nsec < 0) {
227				ts.tv_sec--;
228				ts.tv_nsec += 1000000000;
229			}
230			if (ts.tv_sec < 0 ||
231			    (ts.tv_sec == 0 && ts.tv_nsec == 0)) {
232				j->flags |= JF_TIMEOUT;
233				clear_procs(j);
234				return j;
235			}
236			tsp = &ts;
237		}
238		if (nonblock) {
239			ts.tv_sec = 0;
240			ts.tv_nsec = 0;
241			tsp = &ts;
242		}
243		switch (kevent(kq, NULL, 0, &ke, 1, tsp)) {
244		case -1:
245			if (errno != EINTR)
246				err(1, "kevent");
247			goto again;
248		case 0:
249			if (!nonblock) {
250				j = TAILQ_FIRST(&sleeping);
251				j->flags |= JF_TIMEOUT;
252				clear_procs(j);
253				return j;
254			}
255			break;
256		case 1:
257			(void)waitpid(ke.ident, NULL, WNOHANG);
258			if ((j = find_proc(ke.ident))) {
259				j->pstatus = ke.data;
260				return j;
261			}
262			goto again;
263		}
264	}
265	return NULL;
266}
267
268/*
269 * Run a single command for a jail, possibly inside the jail.
270 */
271static int
272run_command(struct cfjail *j)
273{
274	const struct passwd *pwd;
275	const struct cfstring *comstring, *s;
276	login_cap_t *lcap;
277	char **argv;
278	char *cs, *comcs, *devpath;
279	const char *jidstr, *conslog, *path, *ruleset, *term, *username;
280	enum intparam comparam;
281	size_t comlen;
282	pid_t pid;
283	int argc, bg, clean, consfd, down, fib, i, injail, sjuser, timeout;
284#if defined(INET) || defined(INET6)
285	char *addr, *extrap, *p, *val;
286#endif
287
288	static char *cleanenv;
289
290	/* Perform some operations that aren't actually commands */
291	comparam = *j->comparam;
292	down = j->flags & (JF_STOP | JF_FAILED);
293	switch (comparam) {
294	case IP_STOP_TIMEOUT:
295		return term_procs(j);
296
297	case IP__OP:
298		if (down) {
299			if (jail_remove(j->jid) < 0 && errno == EPERM) {
300				jail_warnx(j, "jail_remove: %s",
301					   strerror(errno));
302				return -1;
303			}
304			if (verbose > 0 || (verbose == 0 && (j->flags & JF_STOP
305			    ? note_remove : j->name != NULL)))
306			    jail_note(j, "removed\n");
307			j->jid = -1;
308			if (j->flags & JF_STOP)
309				dep_done(j, DF_LIGHT);
310			else
311				j->flags &= ~JF_PERSIST;
312		} else {
313			if (create_jail(j) < 0)
314				return -1;
315			if (iflag)
316				printf("%d\n", j->jid);
317			if (verbose >= 0 && (j->name || verbose > 0))
318				jail_note(j, "created\n");
319			dep_done(j, DF_LIGHT);
320		}
321		return 0;
322
323	default: ;
324	}
325	/*
326	 * Collect exec arguments.  Internal commands for network and
327	 * mounting build their own argument lists.
328	 */
329	comstring = j->comstring;
330	bg = 0;
331	switch (comparam) {
332#ifdef INET
333	case IP__IP4_IFADDR:
334		argc = 0;
335		val = alloca(strlen(comstring->s) + 1);
336		strcpy(val, comstring->s);
337		cs = val;
338		extrap = NULL;
339		while ((p = strchr(cs, ' ')) != NULL && strlen(p) > 1) {
340			if (extrap == NULL) {
341				*p = '\0';
342				extrap = p + 1;
343			}
344			cs = p + 1;
345			argc++;
346		}
347
348		argv = alloca((8 + argc) * sizeof(char *));
349		*(const char **)&argv[0] = _PATH_IFCONFIG;
350		if ((cs = strchr(val, '|'))) {
351			argv[1] = alloca(cs - val + 1);
352			strlcpy(argv[1], val, cs - val + 1);
353			addr = cs + 1;
354		} else {
355			*(const char **)&argv[1] =
356			    string_param(j->intparams[IP_INTERFACE]);
357			addr = val;
358		}
359		*(const char **)&argv[2] = "inet";
360		if (!(cs = strchr(addr, '/'))) {
361			argv[3] = addr;
362			*(const char **)&argv[4] = "netmask";
363			*(const char **)&argv[5] = "255.255.255.255";
364			argc = 6;
365		} else if (strchr(cs + 1, '.')) {
366			argv[3] = alloca(cs - addr + 1);
367			strlcpy(argv[3], addr, cs - addr + 1);
368			*(const char **)&argv[4] = "netmask";
369			*(const char **)&argv[5] = cs + 1;
370			argc = 6;
371		} else {
372			argv[3] = addr;
373			argc = 4;
374		}
375
376		if (!down) {
377			for (cs = strtok(extrap, " "); cs; cs = strtok(NULL, " ")) {
378				size_t len = strlen(cs) + 1;
379				argv[argc] = alloca(len);
380				strlcpy(argv[argc++], cs, len);
381			}
382		}
383
384		*(const char **)&argv[argc] = down ? "-alias" : "alias";
385		argv[argc + 1] = NULL;
386		break;
387#endif
388
389#ifdef INET6
390	case IP__IP6_IFADDR:
391		argc = 0;
392		val = alloca(strlen(comstring->s) + 1);
393		strcpy(val, comstring->s);
394		cs = val;
395		extrap = NULL;
396		while ((p = strchr(cs, ' ')) != NULL && strlen(p) > 1) {
397			if (extrap == NULL) {
398				*p = '\0';
399				extrap = p + 1;
400			}
401			cs = p + 1;
402			argc++;
403		}
404
405		argv = alloca((8 + argc) * sizeof(char *));
406		*(const char **)&argv[0] = _PATH_IFCONFIG;
407		if ((cs = strchr(val, '|'))) {
408			argv[1] = alloca(cs - val + 1);
409			strlcpy(argv[1], val, cs - val + 1);
410			addr = cs + 1;
411		} else {
412			*(const char **)&argv[1] =
413			    string_param(j->intparams[IP_INTERFACE]);
414			addr = val;
415		}
416		*(const char **)&argv[2] = "inet6";
417		argv[3] = addr;
418		if (!(cs = strchr(addr, '/'))) {
419			*(const char **)&argv[4] = "prefixlen";
420			*(const char **)&argv[5] = "128";
421			argc = 6;
422		} else
423			argc = 4;
424
425		if (!down) {
426			for (cs = strtok(extrap, " "); cs; cs = strtok(NULL, " ")) {
427				size_t len = strlen(cs) + 1;
428				argv[argc] = alloca(len);
429				strlcpy(argv[argc++], cs, len);
430			}
431		}
432
433		*(const char **)&argv[argc] = down ? "-alias" : "alias";
434		argv[argc + 1] = NULL;
435		break;
436#endif
437
438	case IP_VNET_INTERFACE:
439		argv = alloca(5 * sizeof(char *));
440		*(const char **)&argv[0] = _PATH_IFCONFIG;
441		argv[1] = comstring->s;
442		*(const char **)&argv[2] = down ? "-vnet" : "vnet";
443		jidstr = string_param(j->intparams[KP_JID]);
444		*(const char **)&argv[3] =
445			jidstr ? jidstr : string_param(j->intparams[KP_NAME]);
446		argv[4] = NULL;
447		break;
448
449	case IP_MOUNT:
450	case IP__MOUNT_FROM_FSTAB:
451		argv = alloca(8 * sizeof(char *));
452		comcs = alloca(comstring->len + 1);
453		strcpy(comcs, comstring->s);
454		argc = 0;
455		for (cs = strtok(comcs, " \t\f\v\r\n"); cs && argc < 4;
456		     cs = strtok(NULL, " \t\f\v\r\n")) {
457			if (argc <= 1 && strunvis(cs, cs) < 0) {
458				jail_warnx(j, "%s: %s: fstab parse error",
459				    j->intparams[comparam]->name, comstring->s);
460				return -1;
461			}
462			argv[argc++] = cs;
463		}
464		if (argc == 0)
465			return 0;
466		if (argc < 3) {
467			jail_warnx(j, "%s: %s: missing information",
468			    j->intparams[comparam]->name, comstring->s);
469			return -1;
470		}
471		if (check_path(j, j->intparams[comparam]->name, argv[1], 0,
472		    down ? argv[2] : NULL) < 0)
473			return -1;
474		if (down) {
475			argv[4] = NULL;
476			argv[3] = argv[1];
477			*(const char **)&argv[0] = "/sbin/umount";
478		} else {
479			if (argc == 4) {
480				argv[7] = NULL;
481				argv[6] = argv[1];
482				argv[5] = argv[0];
483				argv[4] = argv[3];
484				*(const char **)&argv[3] = "-o";
485			} else {
486				argv[5] = NULL;
487				argv[4] = argv[1];
488				argv[3] = argv[0];
489			}
490			*(const char **)&argv[0] = _PATH_MOUNT;
491		}
492		*(const char **)&argv[1] = "-t";
493		break;
494
495	case IP_MOUNT_DEVFS:
496		argv = alloca(7 * sizeof(char *));
497		path = string_param(j->intparams[KP_PATH]);
498		if (path == NULL) {
499			jail_warnx(j, "mount.devfs: no path");
500			return -1;
501		}
502		devpath = alloca(strlen(path) + 5);
503		sprintf(devpath, "%s/dev", path);
504		if (check_path(j, "mount.devfs", devpath, 0,
505		    down ? "devfs" : NULL) < 0)
506			return -1;
507		if (down) {
508			*(const char **)&argv[0] = "/sbin/umount";
509			argv[1] = devpath;
510			argv[2] = NULL;
511		} else {
512			*(const char **)&argv[0] = _PATH_MOUNT;
513			*(const char **)&argv[1] = "-t";
514			*(const char **)&argv[2] = "devfs";
515			ruleset = string_param(j->intparams[KP_DEVFS_RULESET]);
516			if (!ruleset)
517			    ruleset = "4";	/* devfsrules_jail */
518			argv[3] = alloca(11 + strlen(ruleset));
519			sprintf(argv[3], "-oruleset=%s", ruleset);
520			*(const char **)&argv[4] = ".";
521			argv[5] = devpath;
522			argv[6] = NULL;
523		}
524		break;
525
526	case IP_MOUNT_FDESCFS:
527		argv = alloca(7 * sizeof(char *));
528		path = string_param(j->intparams[KP_PATH]);
529		if (path == NULL) {
530			jail_warnx(j, "mount.fdescfs: no path");
531			return -1;
532		}
533		devpath = alloca(strlen(path) + 8);
534		sprintf(devpath, "%s/dev/fd", path);
535		if (check_path(j, "mount.fdescfs", devpath, 0,
536		    down ? "fdescfs" : NULL) < 0)
537			return -1;
538		if (down) {
539			*(const char **)&argv[0] = "/sbin/umount";
540			argv[1] = devpath;
541			argv[2] = NULL;
542		} else {
543			*(const char **)&argv[0] = _PATH_MOUNT;
544			*(const char **)&argv[1] = "-t";
545			*(const char **)&argv[2] = "fdescfs";
546			*(const char **)&argv[3] = ".";
547			argv[4] = devpath;
548			argv[5] = NULL;
549		}
550		break;
551
552	case IP_MOUNT_PROCFS:
553		argv = alloca(7 * sizeof(char *));
554		path = string_param(j->intparams[KP_PATH]);
555		if (path == NULL) {
556			jail_warnx(j, "mount.procfs: no path");
557			return -1;
558		}
559		devpath = alloca(strlen(path) + 6);
560		sprintf(devpath, "%s/proc", path);
561		if (check_path(j, "mount.procfs", devpath, 0,
562		    down ? "procfs" : NULL) < 0)
563			return -1;
564		if (down) {
565			*(const char **)&argv[0] = "/sbin/umount";
566			argv[1] = devpath;
567			argv[2] = NULL;
568		} else {
569			*(const char **)&argv[0] = _PATH_MOUNT;
570			*(const char **)&argv[1] = "-t";
571			*(const char **)&argv[2] = "procfs";
572			*(const char **)&argv[3] = ".";
573			argv[4] = devpath;
574			argv[5] = NULL;
575		}
576		break;
577
578	case IP_COMMAND:
579		if (j->name != NULL)
580			goto default_command;
581		argc = 0;
582		TAILQ_FOREACH(s, &j->intparams[IP_COMMAND]->val, tq)
583			argc++;
584		argv = alloca((argc + 1) * sizeof(char *));
585		argc = 0;
586		TAILQ_FOREACH(s, &j->intparams[IP_COMMAND]->val, tq)
587			argv[argc++] = s->s;
588		argv[argc] = NULL;
589		j->comstring = &dummystring;
590		break;
591
592	default:
593	default_command:
594		if ((cs = strpbrk(comstring->s, "!\"$&'()*;<>?[\\]`{|}~")) &&
595		    !(cs[0] == '&' && cs[1] == '\0')) {
596			argv = alloca(4 * sizeof(char *));
597			*(const char **)&argv[0] = _PATH_BSHELL;
598			*(const char **)&argv[1] = "-c";
599			argv[2] = comstring->s;
600			argv[3] = NULL;
601		} else {
602			if (cs) {
603				*cs = 0;
604				bg = 1;
605			}
606			comcs = alloca(comstring->len + 1);
607			strcpy(comcs, comstring->s);
608			argc = 0;
609			for (cs = strtok(comcs, " \t\f\v\r\n"); cs;
610			     cs = strtok(NULL, " \t\f\v\r\n"))
611				argc++;
612			argv = alloca((argc + 1) * sizeof(char *));
613			strcpy(comcs, comstring->s);
614			argc = 0;
615			for (cs = strtok(comcs, " \t\f\v\r\n"); cs;
616			     cs = strtok(NULL, " \t\f\v\r\n"))
617				argv[argc++] = cs;
618			argv[argc] = NULL;
619		}
620	}
621	if (argv[0] == NULL)
622		return 0;
623
624	if (int_param(j->intparams[IP_EXEC_TIMEOUT], &timeout) &&
625	    timeout != 0) {
626		clock_gettime(CLOCK_REALTIME, &j->timeout);
627		j->timeout.tv_sec += timeout;
628	} else
629		j->timeout.tv_sec = 0;
630
631	injail = comparam == IP_EXEC_START || comparam == IP_COMMAND ||
632	    comparam == IP_EXEC_STOP;
633	clean = bool_param(j->intparams[IP_EXEC_CLEAN]);
634	username = string_param(j->intparams[injail
635	    ? IP_EXEC_JAIL_USER : IP_EXEC_SYSTEM_USER]);
636	sjuser = bool_param(j->intparams[IP_EXEC_SYSTEM_JAIL_USER]);
637
638	consfd = 0;
639	if (injail &&
640	    (conslog = string_param(j->intparams[IP_EXEC_CONSOLELOG]))) {
641		if (check_path(j, "exec.consolelog", conslog, 1, NULL) < 0)
642			return -1;
643		consfd =
644		    open(conslog, O_WRONLY | O_CREAT | O_APPEND, DEFFILEMODE);
645		if (consfd < 0) {
646			jail_warnx(j, "open %s: %s", conslog, strerror(errno));
647			return -1;
648		}
649	}
650
651	comlen = 0;
652	for (i = 0; argv[i]; i++)
653		comlen += strlen(argv[i]) + 1;
654	j->comline = cs = emalloc(comlen);
655	for (i = 0; argv[i]; i++) {
656		strcpy(cs, argv[i]);
657		if (argv[i + 1]) {
658			cs += strlen(argv[i]) + 1;
659			cs[-1] = ' ';
660		}
661	}
662	if (verbose > 0)
663		jail_note(j, "run command%s%s%s: %s\n",
664		    injail ? " in jail" : "", username ? " as " : "",
665		    username ? username : "", j->comline);
666
667	pid = fork();
668	if (pid < 0)
669		err(1, "fork");
670	if (pid > 0) {
671		if (bg || !add_proc(j, pid)) {
672			free(j->comline);
673			j->comline = NULL;
674			return 0;
675		} else {
676			paralimit--;
677			return 1;
678		}
679	}
680	if (bg)
681		setsid();
682
683	/* Set up the environment and run the command */
684	pwd = NULL;
685	lcap = NULL;
686	if ((clean || username) && injail && sjuser &&
687	    get_user_info(j, username, &pwd, &lcap) < 0)
688		exit(1);
689	if (injail) {
690		/* jail_attach won't chdir along with its chroot. */
691		path = string_param(j->intparams[KP_PATH]);
692		if (path && chdir(path) < 0) {
693			jail_warnx(j, "chdir %s: %s", path, strerror(errno));
694			exit(1);
695		}
696		if (int_param(j->intparams[IP_EXEC_FIB], &fib) &&
697		    setfib(fib) < 0) {
698			jail_warnx(j, "setfib: %s", strerror(errno));
699			exit(1);
700		}
701		if (jail_attach(j->jid) < 0) {
702			jail_warnx(j, "jail_attach: %s", strerror(errno));
703			exit(1);
704		}
705	}
706	if (clean || username) {
707		if (!(injail && sjuser) &&
708		    get_user_info(j, username, &pwd, &lcap) < 0)
709			exit(1);
710		if (clean) {
711			term = getenv("TERM");
712			environ = &cleanenv;
713			setenv("PATH", "/bin:/usr/bin", 0);
714			if (term != NULL)
715				setenv("TERM", term, 1);
716		}
717		if (setgid(pwd->pw_gid) < 0) {
718			jail_warnx(j, "setgid %d: %s", pwd->pw_gid,
719			    strerror(errno));
720			exit(1);
721		}
722		if (setusercontext(lcap, pwd, pwd->pw_uid, username
723		    ? LOGIN_SETALL & ~LOGIN_SETGROUP & ~LOGIN_SETLOGIN
724		    : LOGIN_SETPATH | LOGIN_SETENV) < 0) {
725			jail_warnx(j, "setusercontext %s: %s", pwd->pw_name,
726			    strerror(errno));
727			exit(1);
728		}
729		login_close(lcap);
730		setenv("USER", pwd->pw_name, 1);
731		setenv("HOME", pwd->pw_dir, 1);
732		setenv("SHELL",
733		    *pwd->pw_shell ? pwd->pw_shell : _PATH_BSHELL, 1);
734		if (clean && chdir(pwd->pw_dir) < 0) {
735			jail_warnx(j, "chdir %s: %s",
736			    pwd->pw_dir, strerror(errno));
737			exit(1);
738		}
739		endpwent();
740	}
741
742	if (consfd != 0 && (dup2(consfd, 1) < 0 || dup2(consfd, 2) < 0)) {
743		jail_warnx(j, "exec.consolelog: %s", strerror(errno));
744		exit(1);
745	}
746	closefrom(3);
747	execvp(argv[0], argv);
748	jail_warnx(j, "exec %s: %s", argv[0], strerror(errno));
749	exit(1);
750}
751
752/*
753 * Add a process to the hash, tied to a jail.
754 */
755static int
756add_proc(struct cfjail *j, pid_t pid)
757{
758	struct kevent ke;
759	struct cfjail *tj;
760	struct phash *ph;
761
762	if (!kq && (kq = kqueue()) < 0)
763		err(1, "kqueue");
764	EV_SET(&ke, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL);
765	if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
766		if (errno == ESRCH)
767			return 0;
768		err(1, "kevent");
769	}
770	ph = emalloc(sizeof(struct phash));
771	ph->j = j;
772	ph->pid = pid;
773	LIST_INSERT_HEAD(&phash[pid % PHASH_SIZE], ph, le);
774	j->nprocs++;
775	j->flags |= JF_SLEEPQ;
776	if (j->timeout.tv_sec == 0)
777		requeue(j, &sleeping);
778	else {
779		/* File the jail in the sleep queue according to its timeout. */
780		TAILQ_REMOVE(j->queue, j, tq);
781		TAILQ_FOREACH(tj, &sleeping, tq) {
782			if (!tj->timeout.tv_sec ||
783			    j->timeout.tv_sec < tj->timeout.tv_sec ||
784			    (j->timeout.tv_sec == tj->timeout.tv_sec &&
785			    j->timeout.tv_nsec <= tj->timeout.tv_nsec)) {
786				TAILQ_INSERT_BEFORE(tj, j, tq);
787				break;
788			}
789		}
790		if (tj == NULL)
791			TAILQ_INSERT_TAIL(&sleeping, j, tq);
792		j->queue = &sleeping;
793	}
794	return 1;
795}
796
797/*
798 * Remove any processes from the hash that correspond to a jail.
799 */
800static void
801clear_procs(struct cfjail *j)
802{
803	struct kevent ke;
804	struct phash *ph, *tph;
805	int i;
806
807	j->nprocs = 0;
808	for (i = 0; i < PHASH_SIZE; i++)
809		LIST_FOREACH_SAFE(ph, &phash[i], le, tph)
810			if (ph->j == j) {
811				EV_SET(&ke, ph->pid, EVFILT_PROC, EV_DELETE,
812				    NOTE_EXIT, 0, NULL);
813				(void)kevent(kq, &ke, 1, NULL, 0, NULL);
814				LIST_REMOVE(ph, le);
815				free(ph);
816			}
817}
818
819/*
820 * Find the jail that corresponds to an exited process.
821 */
822static struct cfjail *
823find_proc(pid_t pid)
824{
825	struct cfjail *j;
826	struct phash *ph;
827
828	LIST_FOREACH(ph, &phash[pid % PHASH_SIZE], le)
829		if (ph->pid == pid) {
830			j = ph->j;
831			LIST_REMOVE(ph, le);
832			free(ph);
833			return --j->nprocs ? NULL : j;
834		}
835	return NULL;
836}
837
838/*
839 * Send SIGTERM to all processes in a jail and wait for them to die.
840 */
841static int
842term_procs(struct cfjail *j)
843{
844	struct kinfo_proc *ki;
845	int i, noted, pcnt, timeout;
846
847	static kvm_t *kd;
848
849	if (!int_param(j->intparams[IP_STOP_TIMEOUT], &timeout))
850		timeout = DEFAULT_STOP_TIMEOUT;
851	else if (timeout == 0)
852		return 0;
853
854	if (kd == NULL) {
855		kd = kvm_open(NULL, NULL, NULL, O_RDONLY, NULL);
856		if (kd == NULL)
857			return 0;
858	}
859
860	ki = kvm_getprocs(kd, KERN_PROC_PROC, 0, &pcnt);
861	if (ki == NULL)
862		return 0;
863	noted = 0;
864	for (i = 0; i < pcnt; i++)
865		if (ki[i].ki_jid == j->jid &&
866		    kill(ki[i].ki_pid, SIGTERM) == 0) {
867			(void)add_proc(j, ki[i].ki_pid);
868			if (verbose > 0) {
869				if (!noted) {
870					noted = 1;
871					jail_note(j, "sent SIGTERM to:");
872				}
873				printf(" %d", ki[i].ki_pid);
874			}
875		}
876	if (noted)
877		printf("\n");
878	if (j->nprocs > 0) {
879		clock_gettime(CLOCK_REALTIME, &j->timeout);
880		j->timeout.tv_sec += timeout;
881		return 1;
882	}
883	return 0;
884}
885
886/*
887 * Look up a user in the passwd and login.conf files.
888 */
889static int
890get_user_info(struct cfjail *j, const char *username,
891    const struct passwd **pwdp, login_cap_t **lcapp)
892{
893	const struct passwd *pwd;
894
895	errno = 0;
896	*pwdp = pwd = username ? getpwnam(username) : getpwuid(getuid());
897	if (pwd == NULL) {
898		if (errno)
899			jail_warnx(j, "getpwnam%s%s: %s", username ? " " : "",
900			    username ? username : "", strerror(errno));
901		else if (username)
902			jail_warnx(j, "%s: no such user", username);
903		else
904			jail_warnx(j, "unknown uid %d", getuid());
905		return -1;
906	}
907	*lcapp = login_getpwclass(pwd);
908	if (*lcapp == NULL) {
909		jail_warnx(j, "getpwclass %s: %s", pwd->pw_name,
910		    strerror(errno));
911		return -1;
912	}
913	/* Set the groups while the group file is still available */
914	if (initgroups(pwd->pw_name, pwd->pw_gid) < 0) {
915		jail_warnx(j, "initgroups %s: %s", pwd->pw_name,
916		    strerror(errno));
917		return -1;
918	}
919	return 0;
920}
921
922/*
923 * Make sure a mount or consolelog path is a valid absolute pathname
924 * with no symlinks.
925 */
926static int
927check_path(struct cfjail *j, const char *pname, const char *path, int isfile,
928    const char *umount_type)
929{
930	struct stat st, mpst;
931	struct statfs stfs;
932	char *tpath, *p;
933	const char *jailpath;
934	size_t jplen;
935
936	if (path[0] != '/') {
937		jail_warnx(j, "%s: %s: not an absolute pathname",
938		    pname, path);
939		return -1;
940	}
941	/*
942	 * Only check for symlinks in components below the jail's path,
943	 * since that's where the security risk lies.
944	 */
945	jailpath = string_param(j->intparams[KP_PATH]);
946	if (jailpath == NULL)
947		jailpath = "";
948	jplen = strlen(jailpath);
949	if (!strncmp(path, jailpath, jplen) && path[jplen] == '/') {
950		tpath = alloca(strlen(path) + 1);
951		strcpy(tpath, path);
952		for (p = tpath + jplen; p != NULL; ) {
953			p = strchr(p + 1, '/');
954			if (p)
955				*p = '\0';
956			if (lstat(tpath, &st) < 0) {
957				if (errno == ENOENT && isfile && !p)
958					break;
959				jail_warnx(j, "%s: %s: %s", pname, tpath,
960				    strerror(errno));
961				return -1;
962			}
963			if (S_ISLNK(st.st_mode)) {
964				jail_warnx(j, "%s: %s is a symbolic link",
965				    pname, tpath);
966				return -1;
967			}
968			if (p)
969				*p = '/';
970		}
971	}
972	if (umount_type != NULL) {
973		if (stat(path, &st) < 0 || statfs(path, &stfs) < 0) {
974			jail_warnx(j, "%s: %s: %s", pname, path,
975			    strerror(errno));
976			return -1;
977		}
978		if (stat(stfs.f_mntonname, &mpst) < 0) {
979			jail_warnx(j, "%s: %s: %s", pname, stfs.f_mntonname,
980			    strerror(errno));
981			return -1;
982		}
983		if (st.st_ino != mpst.st_ino) {
984			jail_warnx(j, "%s: %s: not a mount point",
985			    pname, path);
986			return -1;
987		}
988		if (strcmp(stfs.f_fstypename, umount_type)) {
989			jail_warnx(j, "%s: %s: not a %s mount",
990			    pname, path, umount_type);
991			return -1;
992		}
993	}
994	return 0;
995}
996