1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2011 James Gritton
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/types.h>
33#include <sys/cpuset.h>
34#include <sys/event.h>
35#include <sys/mount.h>
36#include <sys/stat.h>
37#include <sys/sysctl.h>
38#include <sys/user.h>
39#include <sys/wait.h>
40
41#include <err.h>
42#include <errno.h>
43#include <fcntl.h>
44#include <kvm.h>
45#include <login_cap.h>
46#include <paths.h>
47#include <pwd.h>
48#include <signal.h>
49#include <stdio.h>
50#include <stdlib.h>
51#include <string.h>
52#include <unistd.h>
53#include <vis.h>
54
55#include "jailp.h"
56
57#define DEFAULT_STOP_TIMEOUT	10
58#define PHASH_SIZE		256
59
60LIST_HEAD(phhead, phash);
61
62struct phash {
63	LIST_ENTRY(phash)	le;
64	struct cfjail		*j;
65	pid_t			pid;
66};
67
68int paralimit = -1;
69
70extern char **environ;
71
72static int run_command(struct cfjail *j);
73static int add_proc(struct cfjail *j, pid_t pid);
74static void clear_procs(struct cfjail *j);
75static struct cfjail *find_proc(pid_t pid);
76static int term_procs(struct cfjail *j);
77static int get_user_info(struct cfjail *j, const char *username,
78    const struct passwd **pwdp, login_cap_t **lcapp);
79static int check_path(struct cfjail *j, const char *pname, const char *path,
80    int isfile, const char *umount_type);
81
82static struct cfjails sleeping = TAILQ_HEAD_INITIALIZER(sleeping);
83static struct cfjails runnable = TAILQ_HEAD_INITIALIZER(runnable);
84static struct cfstring dummystring = { .len = 1 };
85static struct phhead phash[PHASH_SIZE];
86static int kq;
87
88static cpusetid_t
89root_cpuset_id(void)
90{
91	static cpusetid_t setid = CPUSET_INVALID;
92	static int error;
93
94	/* Only try to get the cpuset once. */
95	if (error == 0 && setid == CPUSET_INVALID)
96		error = cpuset_getid(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1, &setid);
97	if (error != 0)
98		return (CPUSET_INVALID);
99	return (setid);
100}
101
102/*
103 * Run the next command associated with a jail.
104 */
105int
106next_command(struct cfjail *j)
107{
108	enum intparam comparam;
109	int create_failed, stopping;
110
111	if (paralimit == 0) {
112		if (j->flags & JF_FROM_RUNQ)
113			requeue_head(j, &runnable);
114		else
115			requeue(j, &runnable);
116		return 1;
117	}
118	j->flags &= ~JF_FROM_RUNQ;
119	create_failed = (j->flags & (JF_STOP | JF_FAILED)) == JF_FAILED;
120	stopping = (j->flags & JF_STOP) != 0;
121	comparam = *j->comparam;
122	for (;;) {
123		if (j->comstring == NULL) {
124			j->comparam += create_failed ? -1 : 1;
125			switch ((comparam = *j->comparam)) {
126			case IP__NULL:
127				return 0;
128			case IP_MOUNT_DEVFS:
129				if (!bool_param(j->intparams[IP_MOUNT_DEVFS]))
130					continue;
131				j->comstring = &dummystring;
132				break;
133			case IP_MOUNT_FDESCFS:
134				if (!bool_param(j->intparams[IP_MOUNT_FDESCFS]))
135					continue;
136				j->comstring = &dummystring;
137				break;
138			case IP_MOUNT_PROCFS:
139				if (!bool_param(j->intparams[IP_MOUNT_PROCFS]))
140					continue;
141				j->comstring = &dummystring;
142				break;
143			case IP__OP:
144			case IP_STOP_TIMEOUT:
145				j->comstring = &dummystring;
146				break;
147			default:
148				if (j->intparams[comparam] == NULL)
149					continue;
150				j->comstring = create_failed || (stopping &&
151				    (j->intparams[comparam]->flags & PF_REV))
152				    ? TAILQ_LAST(&j->intparams[comparam]->val,
153					cfstrings)
154				    : TAILQ_FIRST(&j->intparams[comparam]->val);
155			}
156		} else {
157			j->comstring = j->comstring == &dummystring ? NULL :
158			    create_failed || (stopping &&
159			    (j->intparams[comparam]->flags & PF_REV))
160			    ? TAILQ_PREV(j->comstring, cfstrings, tq)
161			    : TAILQ_NEXT(j->comstring, tq);
162		}
163		if (j->comstring == NULL || j->comstring->len == 0 ||
164		    (create_failed && (comparam == IP_EXEC_PRESTART ||
165		    comparam == IP_EXEC_CREATED || comparam == IP_EXEC_START ||
166		    comparam == IP_COMMAND || comparam == IP_EXEC_POSTSTART ||
167		    comparam == IP_EXEC_PREPARE)))
168			continue;
169		switch (run_command(j)) {
170		case -1:
171			failed(j);
172			/* FALLTHROUGH */
173		case 1:
174			return 1;
175		}
176	}
177}
178
179/*
180 * Check command exit status
181 */
182int
183finish_command(struct cfjail *j)
184{
185	struct cfjail *rj;
186	int error;
187
188	if (!(j->flags & JF_SLEEPQ))
189		return 0;
190	j->flags &= ~JF_SLEEPQ;
191	if (*j->comparam == IP_STOP_TIMEOUT) {
192		j->flags &= ~JF_TIMEOUT;
193		j->pstatus = 0;
194		return 0;
195	}
196	paralimit++;
197	if (!TAILQ_EMPTY(&runnable)) {
198		rj = TAILQ_FIRST(&runnable);
199		rj->flags |= JF_FROM_RUNQ;
200		requeue(rj, &ready);
201	}
202	error = 0;
203	if (j->flags & JF_TIMEOUT) {
204		j->flags &= ~JF_TIMEOUT;
205		if (*j->comparam != IP_STOP_TIMEOUT) {
206			jail_warnx(j, "%s: timed out", j->comline);
207			failed(j);
208			error = -1;
209		} else if (verbose > 0)
210			jail_note(j, "timed out\n");
211	} else if (j->pstatus != 0) {
212		if (WIFSIGNALED(j->pstatus))
213			jail_warnx(j, "%s: exited on signal %d",
214			    j->comline, WTERMSIG(j->pstatus));
215		else
216			jail_warnx(j, "%s: failed", j->comline);
217		j->pstatus = 0;
218		failed(j);
219		error = -1;
220	}
221	free(j->comline);
222	j->comline = NULL;
223	return error;
224}
225
226/*
227 * Check for finished processes or timeouts.
228 */
229struct cfjail *
230next_proc(int nonblock)
231{
232	struct kevent ke;
233	struct timespec ts;
234	struct timespec *tsp;
235	struct cfjail *j;
236
237	if (!TAILQ_EMPTY(&sleeping)) {
238	again:
239		tsp = NULL;
240		if ((j = TAILQ_FIRST(&sleeping)) && j->timeout.tv_sec) {
241			clock_gettime(CLOCK_REALTIME, &ts);
242			ts.tv_sec = j->timeout.tv_sec - ts.tv_sec;
243			ts.tv_nsec = j->timeout.tv_nsec - ts.tv_nsec;
244			if (ts.tv_nsec < 0) {
245				ts.tv_sec--;
246				ts.tv_nsec += 1000000000;
247			}
248			if (ts.tv_sec < 0 ||
249			    (ts.tv_sec == 0 && ts.tv_nsec == 0)) {
250				j->flags |= JF_TIMEOUT;
251				clear_procs(j);
252				return j;
253			}
254			tsp = &ts;
255		}
256		if (nonblock) {
257			ts.tv_sec = 0;
258			ts.tv_nsec = 0;
259			tsp = &ts;
260		}
261		switch (kevent(kq, NULL, 0, &ke, 1, tsp)) {
262		case -1:
263			if (errno != EINTR)
264				err(1, "kevent");
265			goto again;
266		case 0:
267			if (!nonblock) {
268				j = TAILQ_FIRST(&sleeping);
269				j->flags |= JF_TIMEOUT;
270				clear_procs(j);
271				return j;
272			}
273			break;
274		case 1:
275			(void)waitpid(ke.ident, NULL, WNOHANG);
276			if ((j = find_proc(ke.ident))) {
277				j->pstatus = ke.data;
278				return j;
279			}
280			goto again;
281		}
282	}
283	return NULL;
284}
285
286/*
287 * Run a single command for a jail, possibly inside the jail.
288 */
289static int
290run_command(struct cfjail *j)
291{
292	const struct passwd *pwd;
293	const struct cfstring *comstring, *s;
294	login_cap_t *lcap;
295	const char **argv;
296	char *acs, *cs, *comcs, *devpath;
297	const char *jidstr, *conslog, *path, *ruleset, *term, *username;
298	enum intparam comparam;
299	size_t comlen;
300	pid_t pid;
301	cpusetid_t setid;
302	int argc, bg, clean, consfd, down, fib, i, injail, sjuser, timeout;
303#if defined(INET) || defined(INET6)
304	char *addr, *extrap, *p, *val;
305#endif
306
307	static char *cleanenv;
308
309	/* Perform some operations that aren't actually commands */
310	comparam = *j->comparam;
311	down = j->flags & (JF_STOP | JF_FAILED);
312	switch (comparam) {
313	case IP_STOP_TIMEOUT:
314		return term_procs(j);
315
316	case IP__OP:
317		if (down) {
318			if (jail_remove(j->jid) < 0 && errno == EPERM) {
319				jail_warnx(j, "jail_remove: %s",
320					   strerror(errno));
321				return -1;
322			}
323			if (verbose > 0 || (verbose == 0 && (j->flags & JF_STOP
324			    ? note_remove : j->name != NULL)))
325			    jail_note(j, "removed\n");
326			j->jid = -1;
327			if (j->flags & JF_STOP)
328				dep_done(j, DF_LIGHT);
329			else
330				j->flags &= ~JF_PERSIST;
331		} else {
332			if (create_jail(j) < 0)
333				return -1;
334			if (iflag)
335				printf("%d\n", j->jid);
336			if (verbose >= 0 && (j->name || verbose > 0))
337				jail_note(j, "created\n");
338			dep_done(j, DF_LIGHT);
339		}
340		return 0;
341
342	default: ;
343	}
344	/*
345	 * Collect exec arguments.  Internal commands for network and
346	 * mounting build their own argument lists.
347	 */
348	comstring = j->comstring;
349	bg = 0;
350	switch (comparam) {
351#ifdef INET
352	case IP__IP4_IFADDR:
353		argc = 0;
354		val = alloca(strlen(comstring->s) + 1);
355		strcpy(val, comstring->s);
356		cs = val;
357		extrap = NULL;
358		while ((p = strchr(cs, ' ')) != NULL && strlen(p) > 1) {
359			if (extrap == NULL) {
360				*p = '\0';
361				extrap = p + 1;
362			}
363			cs = p + 1;
364			argc++;
365		}
366
367		argv = alloca((8 + argc) * sizeof(char *));
368		argv[0] = _PATH_IFCONFIG;
369		if ((cs = strchr(val, '|'))) {
370			argv[1] = acs = alloca(cs - val + 1);
371			strlcpy(acs, val, cs - val + 1);
372			addr = cs + 1;
373		} else {
374			argv[1] = string_param(j->intparams[IP_INTERFACE]);
375			addr = val;
376		}
377		argv[2] = "inet";
378		if (!(cs = strchr(addr, '/'))) {
379			argv[3] = addr;
380			argv[4] = "netmask";
381			argv[5] = "255.255.255.255";
382			argc = 6;
383		} else if (strchr(cs + 1, '.')) {
384			argv[3] = acs = alloca(cs - addr + 1);
385			strlcpy(acs, addr, cs - addr + 1);
386			argv[4] = "netmask";
387			argv[5] = cs + 1;
388			argc = 6;
389		} else {
390			argv[3] = addr;
391			argc = 4;
392		}
393
394		if (!down && extrap != NULL) {
395			for (cs = strtok(extrap, " "); cs;
396			     cs = strtok(NULL, " ")) {
397				size_t len = strlen(cs) + 1;
398				argv[argc++] = acs = alloca(len);
399				strlcpy(acs, cs, len);
400			}
401		}
402
403		argv[argc] = down ? "-alias" : "alias";
404		argv[argc + 1] = NULL;
405		break;
406#endif
407
408#ifdef INET6
409	case IP__IP6_IFADDR:
410		argc = 0;
411		val = alloca(strlen(comstring->s) + 1);
412		strcpy(val, comstring->s);
413		cs = val;
414		extrap = NULL;
415		while ((p = strchr(cs, ' ')) != NULL && strlen(p) > 1) {
416			if (extrap == NULL) {
417				*p = '\0';
418				extrap = p + 1;
419			}
420			cs = p + 1;
421			argc++;
422		}
423
424		argv = alloca((8 + argc) * sizeof(char *));
425		argv[0] = _PATH_IFCONFIG;
426		if ((cs = strchr(val, '|'))) {
427			argv[1] = acs = alloca(cs - val + 1);
428			strlcpy(acs, val, cs - val + 1);
429			addr = cs + 1;
430		} else {
431			argv[1] = string_param(j->intparams[IP_INTERFACE]);
432			addr = val;
433		}
434		argv[2] = "inet6";
435		argv[3] = addr;
436		if (!(cs = strchr(addr, '/'))) {
437			argv[4] = "prefixlen";
438			argv[5] = "128";
439			argc = 6;
440		} else
441			argc = 4;
442
443		if (!down) {
444			for (cs = strtok(extrap, " "); cs;
445			     cs = strtok(NULL, " ")) {
446				size_t len = strlen(cs) + 1;
447				argv[argc++] = acs = alloca(len);
448				strlcpy(acs, cs, len);
449			}
450		}
451
452		argv[argc] = down ? "-alias" : "alias";
453		argv[argc + 1] = NULL;
454		break;
455#endif
456
457	case IP_VNET_INTERFACE:
458		argv = alloca(5 * sizeof(char *));
459		argv[0] = _PATH_IFCONFIG;
460		argv[1] = comstring->s;
461		argv[2] = down ? "-vnet" : "vnet";
462		jidstr = string_param(j->intparams[KP_JID]);
463		argv[3] = jidstr ? jidstr : string_param(j->intparams[KP_NAME]);
464		argv[4] = NULL;
465		break;
466
467	case IP_MOUNT:
468	case IP__MOUNT_FROM_FSTAB:
469		argv = alloca(8 * sizeof(char *));
470		comcs = alloca(comstring->len + 1);
471		strcpy(comcs, comstring->s);
472		argc = 0;
473		for (cs = strtok(comcs, " \t\f\v\r\n"); cs && argc < 4;
474		     cs = strtok(NULL, " \t\f\v\r\n")) {
475			if (argc <= 1 && strunvis(cs, cs) < 0) {
476				jail_warnx(j, "%s: %s: fstab parse error",
477				    j->intparams[comparam]->name, comstring->s);
478				return -1;
479			}
480			argv[argc++] = cs;
481		}
482		if (argc == 0)
483			return 0;
484		if (argc < 3) {
485			jail_warnx(j, "%s: %s: missing information",
486			    j->intparams[comparam]->name, comstring->s);
487			return -1;
488		}
489		if (check_path(j, j->intparams[comparam]->name, argv[1], 0,
490		    down ? argv[2] : NULL) < 0)
491			return -1;
492		if (down) {
493			argv[4] = NULL;
494			argv[3] = argv[1];
495			argv[0] = "/sbin/umount";
496		} else {
497			if (argc == 4) {
498				argv[7] = NULL;
499				argv[6] = argv[1];
500				argv[5] = argv[0];
501				argv[4] = argv[3];
502				argv[3] = "-o";
503			} else {
504				argv[5] = NULL;
505				argv[4] = argv[1];
506				argv[3] = argv[0];
507			}
508			argv[0] = _PATH_MOUNT;
509		}
510		argv[1] = "-t";
511		break;
512
513	case IP_MOUNT_DEVFS:
514		argv = alloca(7 * sizeof(char *));
515		path = string_param(j->intparams[KP_PATH]);
516		if (path == NULL) {
517			jail_warnx(j, "mount.devfs: no jail root path defined");
518			return -1;
519		}
520		devpath = alloca(strlen(path) + 5);
521		sprintf(devpath, "%s/dev", path);
522		if (check_path(j, "mount.devfs", devpath, 0,
523		    down ? "devfs" : NULL) < 0)
524			return -1;
525		if (down) {
526			argv[0] = "/sbin/umount";
527			argv[1] = devpath;
528			argv[2] = NULL;
529		} else {
530			argv[0] = _PATH_MOUNT;
531			argv[1] = "-t";
532			argv[2] = "devfs";
533			ruleset = string_param(j->intparams[KP_DEVFS_RULESET]);
534			if (!ruleset)
535			    ruleset = "4";	/* devfsrules_jail */
536			argv[3] = acs = alloca(11 + strlen(ruleset));
537			sprintf(acs, "-oruleset=%s", ruleset);
538			argv[4] = ".";
539			argv[5] = devpath;
540			argv[6] = NULL;
541		}
542		break;
543
544	case IP_MOUNT_FDESCFS:
545		argv = alloca(7 * sizeof(char *));
546		path = string_param(j->intparams[KP_PATH]);
547		if (path == NULL) {
548			jail_warnx(j, "mount.fdescfs: no jail root path defined");
549			return -1;
550		}
551		devpath = alloca(strlen(path) + 8);
552		sprintf(devpath, "%s/dev/fd", path);
553		if (check_path(j, "mount.fdescfs", devpath, 0,
554		    down ? "fdescfs" : NULL) < 0)
555			return -1;
556		if (down) {
557			argv[0] = "/sbin/umount";
558			argv[1] = devpath;
559			argv[2] = NULL;
560		} else {
561			argv[0] = _PATH_MOUNT;
562			argv[1] = "-t";
563			argv[2] = "fdescfs";
564			argv[3] = ".";
565			argv[4] = devpath;
566			argv[5] = NULL;
567		}
568		break;
569
570	case IP_MOUNT_PROCFS:
571		argv = alloca(7 * sizeof(char *));
572		path = string_param(j->intparams[KP_PATH]);
573		if (path == NULL) {
574			jail_warnx(j, "mount.procfs: no jail root path defined");
575			return -1;
576		}
577		devpath = alloca(strlen(path) + 6);
578		sprintf(devpath, "%s/proc", path);
579		if (check_path(j, "mount.procfs", devpath, 0,
580		    down ? "procfs" : NULL) < 0)
581			return -1;
582		if (down) {
583			argv[0] = "/sbin/umount";
584			argv[1] = devpath;
585			argv[2] = NULL;
586		} else {
587			argv[0] = _PATH_MOUNT;
588			argv[1] = "-t";
589			argv[2] = "procfs";
590			argv[3] = ".";
591			argv[4] = devpath;
592			argv[5] = NULL;
593		}
594		break;
595
596	case IP_COMMAND:
597		if (j->name != NULL)
598			goto default_command;
599		argc = 0;
600		TAILQ_FOREACH(s, &j->intparams[IP_COMMAND]->val, tq)
601			argc++;
602		argv = alloca((argc + 1) * sizeof(char *));
603		argc = 0;
604		TAILQ_FOREACH(s, &j->intparams[IP_COMMAND]->val, tq)
605			argv[argc++] = s->s;
606		argv[argc] = NULL;
607		j->comstring = &dummystring;
608		break;
609
610	default:
611	default_command:
612		if ((cs = strpbrk(comstring->s, "!\"$&'()*;<>?[\\]`{|}~")) &&
613		    !(cs[0] == '&' && cs[1] == '\0')) {
614			argv = alloca(4 * sizeof(char *));
615			argv[0] = _PATH_BSHELL;
616			argv[1] = "-c";
617			argv[2] = comstring->s;
618			argv[3] = NULL;
619		} else {
620			if (cs) {
621				*cs = 0;
622				bg = 1;
623			}
624			comcs = alloca(comstring->len + 1);
625			strcpy(comcs, comstring->s);
626			argc = 0;
627			for (cs = strtok(comcs, " \t\f\v\r\n"); cs;
628			     cs = strtok(NULL, " \t\f\v\r\n"))
629				argc++;
630			argv = alloca((argc + 1) * sizeof(char *));
631			strcpy(comcs, comstring->s);
632			argc = 0;
633			for (cs = strtok(comcs, " \t\f\v\r\n"); cs;
634			     cs = strtok(NULL, " \t\f\v\r\n"))
635				argv[argc++] = cs;
636			argv[argc] = NULL;
637		}
638	}
639	if (argv[0] == NULL)
640		return 0;
641
642	if (int_param(j->intparams[IP_EXEC_TIMEOUT], &timeout) &&
643	    timeout != 0) {
644		clock_gettime(CLOCK_REALTIME, &j->timeout);
645		j->timeout.tv_sec += timeout;
646	} else
647		j->timeout.tv_sec = 0;
648
649	injail = comparam == IP_EXEC_START || comparam == IP_COMMAND ||
650	    comparam == IP_EXEC_STOP;
651	if (injail)
652		setid = root_cpuset_id();
653	else
654		setid = CPUSET_INVALID;
655	clean = bool_param(j->intparams[IP_EXEC_CLEAN]);
656	username = string_param(j->intparams[injail
657	    ? IP_EXEC_JAIL_USER : IP_EXEC_SYSTEM_USER]);
658	sjuser = bool_param(j->intparams[IP_EXEC_SYSTEM_JAIL_USER]);
659
660	consfd = 0;
661	if (injail &&
662	    (conslog = string_param(j->intparams[IP_EXEC_CONSOLELOG]))) {
663		if (check_path(j, "exec.consolelog", conslog, 1, NULL) < 0)
664			return -1;
665		consfd =
666		    open(conslog, O_WRONLY | O_CREAT | O_APPEND, DEFFILEMODE);
667		if (consfd < 0) {
668			jail_warnx(j, "open %s: %s", conslog, strerror(errno));
669			return -1;
670		}
671	}
672
673	comlen = 0;
674	for (i = 0; argv[i]; i++)
675		comlen += strlen(argv[i]) + 1;
676	j->comline = cs = emalloc(comlen);
677	for (i = 0; argv[i]; i++) {
678		strcpy(cs, argv[i]);
679		if (argv[i + 1]) {
680			cs += strlen(argv[i]) + 1;
681			cs[-1] = ' ';
682		}
683	}
684	if (verbose > 0)
685		jail_note(j, "run command%s%s%s: %s\n",
686		    injail ? " in jail" : "", username ? " as " : "",
687		    username ? username : "", j->comline);
688
689	pid = fork();
690	if (pid < 0)
691		err(1, "fork");
692	if (pid > 0) {
693		if (bg || !add_proc(j, pid)) {
694			free(j->comline);
695			j->comline = NULL;
696			return 0;
697		} else {
698			paralimit--;
699			return 1;
700		}
701	}
702	if (bg)
703		setsid();
704
705	/* Set up the environment and run the command */
706	pwd = NULL;
707	lcap = NULL;
708	if ((clean || username) && injail && sjuser &&
709	    get_user_info(j, username, &pwd, &lcap) < 0)
710		exit(1);
711	if (injail) {
712		/* jail_attach won't chdir along with its chroot. */
713		path = string_param(j->intparams[KP_PATH]);
714		if (path && chdir(path) < 0) {
715			jail_warnx(j, "chdir %s: %s", path, strerror(errno));
716			exit(1);
717		}
718		if (int_param(j->intparams[IP_EXEC_FIB], &fib) &&
719		    setfib(fib) < 0) {
720			jail_warnx(j, "setfib: %s", strerror(errno));
721			exit(1);
722		}
723
724		/*
725		 * We wouldn't have specialized our affinity, so just setid to
726		 * root.  We do this prior to attaching to avoid the kernel
727		 * having to create a transient cpuset that we'll promptly
728		 * free up with a reset to the jail's cpuset.
729		 *
730		 * This is just a best-effort to use as wide of mask as
731		 * possible.
732		 */
733		if (setid != CPUSET_INVALID)
734			(void)cpuset_setid(CPU_WHICH_PID, -1, setid);
735
736		if (jail_attach(j->jid) < 0) {
737			jail_warnx(j, "jail_attach: %s", strerror(errno));
738			exit(1);
739		}
740	}
741	if (clean || username) {
742		if (!(injail && sjuser) &&
743		    get_user_info(j, username, &pwd, &lcap) < 0)
744			exit(1);
745		if (clean) {
746			term = getenv("TERM");
747			environ = &cleanenv;
748			setenv("PATH", "/bin:/usr/bin", 0);
749			if (term != NULL)
750				setenv("TERM", term, 1);
751		}
752		if (setgid(pwd->pw_gid) < 0) {
753			jail_warnx(j, "setgid %d: %s", pwd->pw_gid,
754			    strerror(errno));
755			exit(1);
756		}
757		if (setusercontext(lcap, pwd, pwd->pw_uid, username
758		    ? LOGIN_SETALL & ~LOGIN_SETGROUP & ~LOGIN_SETLOGIN
759		    : LOGIN_SETPATH | LOGIN_SETENV) < 0) {
760			jail_warnx(j, "setusercontext %s: %s", pwd->pw_name,
761			    strerror(errno));
762			exit(1);
763		}
764		login_close(lcap);
765		setenv("USER", pwd->pw_name, 1);
766		setenv("HOME", pwd->pw_dir, 1);
767		setenv("SHELL",
768		    *pwd->pw_shell ? pwd->pw_shell : _PATH_BSHELL, 1);
769		if (clean && chdir(pwd->pw_dir) < 0) {
770			jail_warnx(j, "chdir %s: %s",
771			    pwd->pw_dir, strerror(errno));
772			exit(1);
773		}
774		endpwent();
775	}
776
777	if (consfd != 0 && (dup2(consfd, 1) < 0 || dup2(consfd, 2) < 0)) {
778		jail_warnx(j, "exec.consolelog: %s", strerror(errno));
779		exit(1);
780	}
781	closefrom(3);
782	execvp(argv[0], __DECONST(char *const*, argv));
783	jail_warnx(j, "exec %s: %s", argv[0], strerror(errno));
784	exit(1);
785}
786
787/*
788 * Add a process to the hash, tied to a jail.
789 */
790static int
791add_proc(struct cfjail *j, pid_t pid)
792{
793	struct kevent ke;
794	struct cfjail *tj;
795	struct phash *ph;
796
797	if (!kq && (kq = kqueue()) < 0)
798		err(1, "kqueue");
799	EV_SET(&ke, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL);
800	if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
801		if (errno == ESRCH)
802			return 0;
803		err(1, "kevent");
804	}
805	ph = emalloc(sizeof(struct phash));
806	ph->j = j;
807	ph->pid = pid;
808	LIST_INSERT_HEAD(&phash[pid % PHASH_SIZE], ph, le);
809	j->nprocs++;
810	j->flags |= JF_SLEEPQ;
811	if (j->timeout.tv_sec == 0)
812		requeue(j, &sleeping);
813	else {
814		/* File the jail in the sleep queue according to its timeout. */
815		TAILQ_REMOVE(j->queue, j, tq);
816		TAILQ_FOREACH(tj, &sleeping, tq) {
817			if (!tj->timeout.tv_sec ||
818			    j->timeout.tv_sec < tj->timeout.tv_sec ||
819			    (j->timeout.tv_sec == tj->timeout.tv_sec &&
820			    j->timeout.tv_nsec <= tj->timeout.tv_nsec)) {
821				TAILQ_INSERT_BEFORE(tj, j, tq);
822				break;
823			}
824		}
825		if (tj == NULL)
826			TAILQ_INSERT_TAIL(&sleeping, j, tq);
827		j->queue = &sleeping;
828	}
829	return 1;
830}
831
832/*
833 * Remove any processes from the hash that correspond to a jail.
834 */
835static void
836clear_procs(struct cfjail *j)
837{
838	struct kevent ke;
839	struct phash *ph, *tph;
840	int i;
841
842	j->nprocs = 0;
843	for (i = 0; i < PHASH_SIZE; i++)
844		LIST_FOREACH_SAFE(ph, &phash[i], le, tph)
845			if (ph->j == j) {
846				EV_SET(&ke, ph->pid, EVFILT_PROC, EV_DELETE,
847				    NOTE_EXIT, 0, NULL);
848				(void)kevent(kq, &ke, 1, NULL, 0, NULL);
849				LIST_REMOVE(ph, le);
850				free(ph);
851			}
852}
853
854/*
855 * Find the jail that corresponds to an exited process.
856 */
857static struct cfjail *
858find_proc(pid_t pid)
859{
860	struct cfjail *j;
861	struct phash *ph;
862
863	LIST_FOREACH(ph, &phash[pid % PHASH_SIZE], le)
864		if (ph->pid == pid) {
865			j = ph->j;
866			LIST_REMOVE(ph, le);
867			free(ph);
868			return --j->nprocs ? NULL : j;
869		}
870	return NULL;
871}
872
873/*
874 * Send SIGTERM to all processes in a jail and wait for them to die.
875 */
876static int
877term_procs(struct cfjail *j)
878{
879	struct kinfo_proc *ki;
880	int i, noted, pcnt, timeout;
881
882	static kvm_t *kd;
883
884	if (!int_param(j->intparams[IP_STOP_TIMEOUT], &timeout))
885		timeout = DEFAULT_STOP_TIMEOUT;
886	else if (timeout == 0)
887		return 0;
888
889	if (kd == NULL) {
890		kd = kvm_open(NULL, NULL, NULL, O_RDONLY, NULL);
891		if (kd == NULL)
892			return 0;
893	}
894
895	ki = kvm_getprocs(kd, KERN_PROC_PROC, 0, &pcnt);
896	if (ki == NULL)
897		return 0;
898	noted = 0;
899	for (i = 0; i < pcnt; i++)
900		if (ki[i].ki_jid == j->jid &&
901		    kill(ki[i].ki_pid, SIGTERM) == 0) {
902			(void)add_proc(j, ki[i].ki_pid);
903			if (verbose > 0) {
904				if (!noted) {
905					noted = 1;
906					jail_note(j, "sent SIGTERM to:");
907				}
908				printf(" %d", ki[i].ki_pid);
909			}
910		}
911	if (noted)
912		printf("\n");
913	if (j->nprocs > 0) {
914		clock_gettime(CLOCK_REALTIME, &j->timeout);
915		j->timeout.tv_sec += timeout;
916		return 1;
917	}
918	return 0;
919}
920
921/*
922 * Look up a user in the passwd and login.conf files.
923 */
924static int
925get_user_info(struct cfjail *j, const char *username,
926    const struct passwd **pwdp, login_cap_t **lcapp)
927{
928	const struct passwd *pwd;
929
930	errno = 0;
931	*pwdp = pwd = username ? getpwnam(username) : getpwuid(getuid());
932	if (pwd == NULL) {
933		if (errno)
934			jail_warnx(j, "getpwnam%s%s: %s", username ? " " : "",
935			    username ? username : "", strerror(errno));
936		else if (username)
937			jail_warnx(j, "%s: no such user", username);
938		else
939			jail_warnx(j, "unknown uid %d", getuid());
940		return -1;
941	}
942	*lcapp = login_getpwclass(pwd);
943	if (*lcapp == NULL) {
944		jail_warnx(j, "getpwclass %s: %s", pwd->pw_name,
945		    strerror(errno));
946		return -1;
947	}
948	/* Set the groups while the group file is still available */
949	if (initgroups(pwd->pw_name, pwd->pw_gid) < 0) {
950		jail_warnx(j, "initgroups %s: %s", pwd->pw_name,
951		    strerror(errno));
952		return -1;
953	}
954	return 0;
955}
956
957/*
958 * Make sure a mount or consolelog path is a valid absolute pathname
959 * with no symlinks.
960 */
961static int
962check_path(struct cfjail *j, const char *pname, const char *path, int isfile,
963    const char *umount_type)
964{
965	struct stat st, mpst;
966	struct statfs stfs;
967	char *tpath, *p;
968	const char *jailpath;
969	size_t jplen;
970
971	if (path[0] != '/') {
972		jail_warnx(j, "%s: %s: not an absolute pathname",
973		    pname, path);
974		return -1;
975	}
976	/*
977	 * Only check for symlinks in components below the jail's path,
978	 * since that's where the security risk lies.
979	 */
980	jailpath = string_param(j->intparams[KP_PATH]);
981	if (jailpath == NULL)
982		jailpath = "";
983	jplen = strlen(jailpath);
984	if (!strncmp(path, jailpath, jplen) && path[jplen] == '/') {
985		tpath = alloca(strlen(path) + 1);
986		strcpy(tpath, path);
987		for (p = tpath + jplen; p != NULL; ) {
988			p = strchr(p + 1, '/');
989			if (p)
990				*p = '\0';
991			if (lstat(tpath, &st) < 0) {
992				if (errno == ENOENT && isfile && !p)
993					break;
994				jail_warnx(j, "%s: %s: %s", pname, tpath,
995				    strerror(errno));
996				return -1;
997			}
998			if (S_ISLNK(st.st_mode)) {
999				jail_warnx(j, "%s: %s is a symbolic link",
1000				    pname, tpath);
1001				return -1;
1002			}
1003			if (p)
1004				*p = '/';
1005		}
1006	}
1007	if (umount_type != NULL) {
1008		if (stat(path, &st) < 0 || statfs(path, &stfs) < 0) {
1009			jail_warnx(j, "%s: %s: %s", pname, path,
1010			    strerror(errno));
1011			return -1;
1012		}
1013		if (stat(stfs.f_mntonname, &mpst) < 0) {
1014			jail_warnx(j, "%s: %s: %s", pname, stfs.f_mntonname,
1015			    strerror(errno));
1016			return -1;
1017		}
1018		if (st.st_ino != mpst.st_ino) {
1019			jail_warnx(j, "%s: %s: not a mount point",
1020			    pname, path);
1021			return -1;
1022		}
1023		if (strcmp(stfs.f_fstypename, umount_type)) {
1024			jail_warnx(j, "%s: %s: not a %s mount",
1025			    pname, path, umount_type);
1026			return -1;
1027		}
1028	}
1029	return 0;
1030}
1031