1/*-
2 * Copyright (c) 2014 John Baldwin
3 * Copyright (c) 2014, 2016 The FreeBSD Foundation
4 *
5 * Portions of this software were developed by Konstantin Belousov
6 * under sponsorship from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include "opt_ktrace.h"
31
32#include <sys/param.h>
33#include <sys/_unrhdr.h>
34#include <sys/systm.h>
35#include <sys/capsicum.h>
36#include <sys/lock.h>
37#include <sys/malloc.h>
38#include <sys/mman.h>
39#include <sys/mutex.h>
40#include <sys/priv.h>
41#include <sys/proc.h>
42#include <sys/procctl.h>
43#include <sys/sx.h>
44#include <sys/syscallsubr.h>
45#include <sys/sysproto.h>
46#include <sys/taskqueue.h>
47#include <sys/wait.h>
48
49#include <vm/vm.h>
50#include <vm/pmap.h>
51#include <vm/vm_map.h>
52#include <vm/vm_extern.h>
53
54static int
55protect_setchild(struct thread *td, struct proc *p, int flags)
56{
57
58	PROC_LOCK_ASSERT(p, MA_OWNED);
59	if (p->p_flag & P_SYSTEM || p_cansched(td, p) != 0)
60		return (0);
61	if (flags & PPROT_SET) {
62		p->p_flag |= P_PROTECTED;
63		if (flags & PPROT_INHERIT)
64			p->p_flag2 |= P2_INHERIT_PROTECTED;
65	} else {
66		p->p_flag &= ~P_PROTECTED;
67		p->p_flag2 &= ~P2_INHERIT_PROTECTED;
68	}
69	return (1);
70}
71
72static int
73protect_setchildren(struct thread *td, struct proc *top, int flags)
74{
75	struct proc *p;
76	int ret;
77
78	p = top;
79	ret = 0;
80	sx_assert(&proctree_lock, SX_LOCKED);
81	for (;;) {
82		ret |= protect_setchild(td, p, flags);
83		PROC_UNLOCK(p);
84		/*
85		 * If this process has children, descend to them next,
86		 * otherwise do any siblings, and if done with this level,
87		 * follow back up the tree (but not past top).
88		 */
89		if (!LIST_EMPTY(&p->p_children))
90			p = LIST_FIRST(&p->p_children);
91		else for (;;) {
92			if (p == top) {
93				PROC_LOCK(p);
94				return (ret);
95			}
96			if (LIST_NEXT(p, p_sibling)) {
97				p = LIST_NEXT(p, p_sibling);
98				break;
99			}
100			p = p->p_pptr;
101		}
102		PROC_LOCK(p);
103	}
104}
105
106static int
107protect_set(struct thread *td, struct proc *p, void *data)
108{
109	int error, flags, ret;
110
111	flags = *(int *)data;
112	switch (PPROT_OP(flags)) {
113	case PPROT_SET:
114	case PPROT_CLEAR:
115		break;
116	default:
117		return (EINVAL);
118	}
119
120	if ((PPROT_FLAGS(flags) & ~(PPROT_DESCEND | PPROT_INHERIT)) != 0)
121		return (EINVAL);
122
123	error = priv_check(td, PRIV_VM_MADV_PROTECT);
124	if (error)
125		return (error);
126
127	if (flags & PPROT_DESCEND)
128		ret = protect_setchildren(td, p, flags);
129	else
130		ret = protect_setchild(td, p, flags);
131	if (ret == 0)
132		return (EPERM);
133	return (0);
134}
135
136static int
137reap_acquire(struct thread *td, struct proc *p, void *data __unused)
138{
139
140	sx_assert(&proctree_lock, SX_XLOCKED);
141	if (p != td->td_proc)
142		return (EPERM);
143	if ((p->p_treeflag & P_TREE_REAPER) != 0)
144		return (EBUSY);
145	p->p_treeflag |= P_TREE_REAPER;
146	/*
147	 * We do not reattach existing children and the whole tree
148	 * under them to us, since p->p_reaper already seen them.
149	 */
150	return (0);
151}
152
153static int
154reap_release(struct thread *td, struct proc *p, void *data __unused)
155{
156
157	sx_assert(&proctree_lock, SX_XLOCKED);
158	if (p != td->td_proc)
159		return (EPERM);
160	if (p == initproc)
161		return (EINVAL);
162	if ((p->p_treeflag & P_TREE_REAPER) == 0)
163		return (EINVAL);
164	reaper_abandon_children(p, false);
165	return (0);
166}
167
168static int
169reap_status(struct thread *td, struct proc *p, void *data)
170{
171	struct proc *reap, *p2, *first_p;
172	struct procctl_reaper_status *rs;
173
174	rs = data;
175	sx_assert(&proctree_lock, SX_LOCKED);
176	if ((p->p_treeflag & P_TREE_REAPER) == 0) {
177		reap = p->p_reaper;
178	} else {
179		reap = p;
180		rs->rs_flags |= REAPER_STATUS_OWNED;
181	}
182	if (reap == initproc)
183		rs->rs_flags |= REAPER_STATUS_REALINIT;
184	rs->rs_reaper = reap->p_pid;
185	rs->rs_descendants = 0;
186	rs->rs_children = 0;
187	if (!LIST_EMPTY(&reap->p_reaplist)) {
188		first_p = LIST_FIRST(&reap->p_children);
189		if (first_p == NULL)
190			first_p = LIST_FIRST(&reap->p_reaplist);
191		rs->rs_pid = first_p->p_pid;
192		LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
193			if (proc_realparent(p2) == reap)
194				rs->rs_children++;
195			rs->rs_descendants++;
196		}
197	} else {
198		rs->rs_pid = -1;
199	}
200	return (0);
201}
202
203static int
204reap_getpids(struct thread *td, struct proc *p, void *data)
205{
206	struct proc *reap, *p2;
207	struct procctl_reaper_pidinfo *pi, *pip;
208	struct procctl_reaper_pids *rp;
209	u_int i, n;
210	int error;
211
212	rp = data;
213	sx_assert(&proctree_lock, SX_LOCKED);
214	PROC_UNLOCK(p);
215	reap = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
216	n = i = 0;
217	error = 0;
218	LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling)
219		n++;
220	sx_unlock(&proctree_lock);
221	if (rp->rp_count < n)
222		n = rp->rp_count;
223	pi = malloc(n * sizeof(*pi), M_TEMP, M_WAITOK);
224	sx_slock(&proctree_lock);
225	LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
226		if (i == n)
227			break;
228		pip = &pi[i];
229		bzero(pip, sizeof(*pip));
230		pip->pi_pid = p2->p_pid;
231		pip->pi_subtree = p2->p_reapsubtree;
232		pip->pi_flags = REAPER_PIDINFO_VALID;
233		if (proc_realparent(p2) == reap)
234			pip->pi_flags |= REAPER_PIDINFO_CHILD;
235		if ((p2->p_treeflag & P_TREE_REAPER) != 0)
236			pip->pi_flags |= REAPER_PIDINFO_REAPER;
237		if ((p2->p_flag & P_STOPPED) != 0)
238			pip->pi_flags |= REAPER_PIDINFO_STOPPED;
239		if (p2->p_state == PRS_ZOMBIE)
240			pip->pi_flags |= REAPER_PIDINFO_ZOMBIE;
241		else if ((p2->p_flag & P_WEXIT) != 0)
242			pip->pi_flags |= REAPER_PIDINFO_EXITING;
243		i++;
244	}
245	sx_sunlock(&proctree_lock);
246	error = copyout(pi, rp->rp_pids, i * sizeof(*pi));
247	free(pi, M_TEMP);
248	sx_slock(&proctree_lock);
249	PROC_LOCK(p);
250	return (error);
251}
252
253struct reap_kill_proc_work {
254	struct ucred *cr;
255	struct proc *target;
256	ksiginfo_t *ksi;
257	struct procctl_reaper_kill *rk;
258	int *error;
259	struct task t;
260};
261
262static void
263reap_kill_proc_locked(struct reap_kill_proc_work *w)
264{
265	int error1;
266	bool need_stop;
267
268	PROC_LOCK_ASSERT(w->target, MA_OWNED);
269	PROC_ASSERT_HELD(w->target);
270
271	error1 = cr_cansignal(w->cr, w->target, w->rk->rk_sig);
272	if (error1 != 0) {
273		if (*w->error == ESRCH) {
274			w->rk->rk_fpid = w->target->p_pid;
275			*w->error = error1;
276		}
277		return;
278	}
279
280	/*
281	 * The need_stop indicates if the target process needs to be
282	 * suspended before being signalled.  This is needed when we
283	 * guarantee that all processes in subtree are signalled,
284	 * avoiding the race with some process not yet fully linked
285	 * into all structures during fork, ignored by iterator, and
286	 * then escaping signalling.
287	 *
288	 * The thread cannot usefully stop itself anyway, and if other
289	 * thread of the current process forks while the current
290	 * thread signals the whole subtree, it is an application
291	 * race.
292	 */
293	if ((w->target->p_flag & (P_KPROC | P_SYSTEM | P_STOPPED)) == 0)
294		need_stop = thread_single(w->target, SINGLE_ALLPROC) == 0;
295	else
296		need_stop = false;
297
298	(void)pksignal(w->target, w->rk->rk_sig, w->ksi);
299	w->rk->rk_killed++;
300	*w->error = error1;
301
302	if (need_stop)
303		thread_single_end(w->target, SINGLE_ALLPROC);
304}
305
306static void
307reap_kill_proc_work(void *arg, int pending __unused)
308{
309	struct reap_kill_proc_work *w;
310
311	w = arg;
312	PROC_LOCK(w->target);
313	if ((w->target->p_flag2 & P2_WEXIT) == 0)
314		reap_kill_proc_locked(w);
315	PROC_UNLOCK(w->target);
316
317	sx_xlock(&proctree_lock);
318	w->target = NULL;
319	wakeup(&w->target);
320	sx_xunlock(&proctree_lock);
321}
322
323struct reap_kill_tracker {
324	struct proc *parent;
325	TAILQ_ENTRY(reap_kill_tracker) link;
326};
327
328TAILQ_HEAD(reap_kill_tracker_head, reap_kill_tracker);
329
330static void
331reap_kill_sched(struct reap_kill_tracker_head *tracker, struct proc *p2)
332{
333	struct reap_kill_tracker *t;
334
335	PROC_LOCK(p2);
336	if ((p2->p_flag2 & P2_WEXIT) != 0) {
337		PROC_UNLOCK(p2);
338		return;
339	}
340	_PHOLD_LITE(p2);
341	PROC_UNLOCK(p2);
342	t = malloc(sizeof(struct reap_kill_tracker), M_TEMP, M_WAITOK);
343	t->parent = p2;
344	TAILQ_INSERT_TAIL(tracker, t, link);
345}
346
347static void
348reap_kill_sched_free(struct reap_kill_tracker *t)
349{
350	PRELE(t->parent);
351	free(t, M_TEMP);
352}
353
354static void
355reap_kill_children(struct thread *td, struct proc *reaper,
356    struct procctl_reaper_kill *rk, ksiginfo_t *ksi, int *error)
357{
358	struct proc *p2;
359	int error1;
360
361	LIST_FOREACH(p2, &reaper->p_children, p_sibling) {
362		PROC_LOCK(p2);
363		if ((p2->p_flag2 & P2_WEXIT) == 0) {
364			error1 = p_cansignal(td, p2, rk->rk_sig);
365			if (error1 != 0) {
366				if (*error == ESRCH) {
367					rk->rk_fpid = p2->p_pid;
368					*error = error1;
369				}
370
371				/*
372				 * Do not end the loop on error,
373				 * signal everything we can.
374				 */
375			} else {
376				(void)pksignal(p2, rk->rk_sig, ksi);
377				rk->rk_killed++;
378			}
379		}
380		PROC_UNLOCK(p2);
381	}
382}
383
384static bool
385reap_kill_subtree_once(struct thread *td, struct proc *p, struct proc *reaper,
386    struct unrhdr *pids, struct reap_kill_proc_work *w)
387{
388	struct reap_kill_tracker_head tracker;
389	struct reap_kill_tracker *t;
390	struct proc *p2;
391	int r, xlocked;
392	bool res, st;
393
394	res = false;
395	TAILQ_INIT(&tracker);
396	reap_kill_sched(&tracker, reaper);
397	while ((t = TAILQ_FIRST(&tracker)) != NULL) {
398		TAILQ_REMOVE(&tracker, t, link);
399
400		/*
401		 * Since reap_kill_proc() drops proctree_lock sx, it
402		 * is possible that the tracked reaper is no longer.
403		 * In this case the subtree is reparented to the new
404		 * reaper, which should handle it.
405		 */
406		if ((t->parent->p_treeflag & P_TREE_REAPER) == 0) {
407			reap_kill_sched_free(t);
408			res = true;
409			continue;
410		}
411
412		LIST_FOREACH(p2, &t->parent->p_reaplist, p_reapsibling) {
413			if (t->parent == reaper &&
414			    (w->rk->rk_flags & REAPER_KILL_SUBTREE) != 0 &&
415			    p2->p_reapsubtree != w->rk->rk_subtree)
416				continue;
417			if ((p2->p_treeflag & P_TREE_REAPER) != 0)
418				reap_kill_sched(&tracker, p2);
419
420			/*
421			 * Handle possible pid reuse.  If we recorded
422			 * p2 as killed but its p_flag2 does not
423			 * confirm it, that means that the process
424			 * terminated and its id was reused by other
425			 * process in the reaper subtree.
426			 *
427			 * Unlocked read of p2->p_flag2 is fine, it is
428			 * our thread that set the tested flag.
429			 */
430			if (alloc_unr_specific(pids, p2->p_pid) != p2->p_pid &&
431			    (atomic_load_int(&p2->p_flag2) &
432			    (P2_REAPKILLED | P2_WEXIT)) != 0)
433				continue;
434
435			if (p2 == td->td_proc) {
436				if ((p2->p_flag & P_HADTHREADS) != 0 &&
437				    (p2->p_flag2 & P2_WEXIT) == 0) {
438					xlocked = sx_xlocked(&proctree_lock);
439					sx_unlock(&proctree_lock);
440					st = true;
441				} else {
442					st = false;
443				}
444				PROC_LOCK(p2);
445				/*
446				 * sapblk ensures that only one thread
447				 * in the system sets this flag.
448				 */
449				p2->p_flag2 |= P2_REAPKILLED;
450				if (st)
451					r = thread_single(p2, SINGLE_NO_EXIT);
452				(void)pksignal(p2, w->rk->rk_sig, w->ksi);
453				w->rk->rk_killed++;
454				if (st && r == 0)
455					thread_single_end(p2, SINGLE_NO_EXIT);
456				PROC_UNLOCK(p2);
457				if (st) {
458					if (xlocked)
459						sx_xlock(&proctree_lock);
460					else
461						sx_slock(&proctree_lock);
462				}
463			} else {
464				PROC_LOCK(p2);
465				if ((p2->p_flag2 & P2_WEXIT) == 0) {
466					_PHOLD_LITE(p2);
467					p2->p_flag2 |= P2_REAPKILLED;
468					PROC_UNLOCK(p2);
469					w->target = p2;
470					taskqueue_enqueue(taskqueue_thread,
471					    &w->t);
472					while (w->target != NULL) {
473						sx_sleep(&w->target,
474						    &proctree_lock, PWAIT,
475						    "reapst", 0);
476					}
477					PROC_LOCK(p2);
478					_PRELE(p2);
479				}
480				PROC_UNLOCK(p2);
481			}
482			res = true;
483		}
484		reap_kill_sched_free(t);
485	}
486	return (res);
487}
488
489static void
490reap_kill_subtree(struct thread *td, struct proc *p, struct proc *reaper,
491    struct reap_kill_proc_work *w)
492{
493	struct unrhdr pids;
494	void *ihandle;
495	struct proc *p2;
496	int pid;
497
498	/*
499	 * pids records processes which were already signalled, to
500	 * avoid doubling signals to them if iteration needs to be
501	 * repeated.
502	 */
503	init_unrhdr(&pids, 1, PID_MAX, UNR_NO_MTX);
504	PROC_LOCK(td->td_proc);
505	if ((td->td_proc->p_flag2 & P2_WEXIT) != 0) {
506		PROC_UNLOCK(td->td_proc);
507		goto out;
508	}
509	PROC_UNLOCK(td->td_proc);
510	while (reap_kill_subtree_once(td, p, reaper, &pids, w))
511	       ;
512
513	ihandle = create_iter_unr(&pids);
514	while ((pid = next_iter_unr(ihandle)) != -1) {
515		p2 = pfind(pid);
516		if (p2 != NULL) {
517			p2->p_flag2 &= ~P2_REAPKILLED;
518			PROC_UNLOCK(p2);
519		}
520	}
521	free_iter_unr(ihandle);
522
523out:
524	clean_unrhdr(&pids);
525	clear_unrhdr(&pids);
526}
527
528static bool
529reap_kill_sapblk(struct thread *td __unused, void *data)
530{
531	struct procctl_reaper_kill *rk;
532
533	rk = data;
534	return ((rk->rk_flags & REAPER_KILL_CHILDREN) == 0);
535}
536
537static int
538reap_kill(struct thread *td, struct proc *p, void *data)
539{
540	struct reap_kill_proc_work w;
541	struct proc *reaper;
542	ksiginfo_t ksi;
543	struct procctl_reaper_kill *rk;
544	int error;
545
546	rk = data;
547	sx_assert(&proctree_lock, SX_LOCKED);
548	if (CAP_TRACING(td))
549		ktrcapfail(CAPFAIL_SIGNAL, &rk->rk_sig);
550	if (IN_CAPABILITY_MODE(td))
551		return (ECAPMODE);
552	if (rk->rk_sig <= 0 || rk->rk_sig > _SIG_MAXSIG ||
553	    (rk->rk_flags & ~(REAPER_KILL_CHILDREN |
554	    REAPER_KILL_SUBTREE)) != 0 || (rk->rk_flags &
555	    (REAPER_KILL_CHILDREN | REAPER_KILL_SUBTREE)) ==
556	    (REAPER_KILL_CHILDREN | REAPER_KILL_SUBTREE))
557		return (EINVAL);
558	PROC_UNLOCK(p);
559	reaper = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
560	ksiginfo_init(&ksi);
561	ksi.ksi_signo = rk->rk_sig;
562	ksi.ksi_code = SI_USER;
563	ksi.ksi_pid = td->td_proc->p_pid;
564	ksi.ksi_uid = td->td_ucred->cr_ruid;
565	error = ESRCH;
566	rk->rk_killed = 0;
567	rk->rk_fpid = -1;
568	if ((rk->rk_flags & REAPER_KILL_CHILDREN) != 0) {
569		reap_kill_children(td, reaper, rk, &ksi, &error);
570	} else {
571		w.cr = crhold(td->td_ucred);
572		w.ksi = &ksi;
573		w.rk = rk;
574		w.error = &error;
575		TASK_INIT(&w.t, 0, reap_kill_proc_work, &w);
576
577		/*
578		 * Prevent swapout, since w, ksi, and possibly rk, are
579		 * allocated on the stack.  We sleep in
580		 * reap_kill_subtree_once() waiting for task to
581		 * complete single-threading.
582		 */
583		PHOLD(td->td_proc);
584
585		reap_kill_subtree(td, p, reaper, &w);
586		PRELE(td->td_proc);
587		crfree(w.cr);
588	}
589	PROC_LOCK(p);
590	return (error);
591}
592
593static int
594trace_ctl(struct thread *td, struct proc *p, void *data)
595{
596	int state;
597
598	PROC_LOCK_ASSERT(p, MA_OWNED);
599	state = *(int *)data;
600
601	/*
602	 * Ktrace changes p_traceflag from or to zero under the
603	 * process lock, so the test does not need to acquire ktrace
604	 * mutex.
605	 */
606	if ((p->p_flag & P_TRACED) != 0 || p->p_traceflag != 0)
607		return (EBUSY);
608
609	switch (state) {
610	case PROC_TRACE_CTL_ENABLE:
611		if (td->td_proc != p)
612			return (EPERM);
613		p->p_flag2 &= ~(P2_NOTRACE | P2_NOTRACE_EXEC);
614		break;
615	case PROC_TRACE_CTL_DISABLE_EXEC:
616		p->p_flag2 |= P2_NOTRACE_EXEC | P2_NOTRACE;
617		break;
618	case PROC_TRACE_CTL_DISABLE:
619		if ((p->p_flag2 & P2_NOTRACE_EXEC) != 0) {
620			KASSERT((p->p_flag2 & P2_NOTRACE) != 0,
621			    ("dandling P2_NOTRACE_EXEC"));
622			if (td->td_proc != p)
623				return (EPERM);
624			p->p_flag2 &= ~P2_NOTRACE_EXEC;
625		} else {
626			p->p_flag2 |= P2_NOTRACE;
627		}
628		break;
629	default:
630		return (EINVAL);
631	}
632	return (0);
633}
634
635static int
636trace_status(struct thread *td, struct proc *p, void *data)
637{
638	int *status;
639
640	status = data;
641	if ((p->p_flag2 & P2_NOTRACE) != 0) {
642		KASSERT((p->p_flag & P_TRACED) == 0,
643		    ("%d traced but tracing disabled", p->p_pid));
644		*status = -1;
645	} else if ((p->p_flag & P_TRACED) != 0) {
646		*status = p->p_pptr->p_pid;
647	} else {
648		*status = 0;
649	}
650	return (0);
651}
652
653static int
654trapcap_ctl(struct thread *td, struct proc *p, void *data)
655{
656	int state;
657
658	PROC_LOCK_ASSERT(p, MA_OWNED);
659	state = *(int *)data;
660
661	switch (state) {
662	case PROC_TRAPCAP_CTL_ENABLE:
663		p->p_flag2 |= P2_TRAPCAP;
664		break;
665	case PROC_TRAPCAP_CTL_DISABLE:
666		p->p_flag2 &= ~P2_TRAPCAP;
667		break;
668	default:
669		return (EINVAL);
670	}
671	return (0);
672}
673
674static int
675trapcap_status(struct thread *td, struct proc *p, void *data)
676{
677	int *status;
678
679	status = data;
680	*status = (p->p_flag2 & P2_TRAPCAP) != 0 ? PROC_TRAPCAP_CTL_ENABLE :
681	    PROC_TRAPCAP_CTL_DISABLE;
682	return (0);
683}
684
685static int
686no_new_privs_ctl(struct thread *td, struct proc *p, void *data)
687{
688	int state;
689
690	PROC_LOCK_ASSERT(p, MA_OWNED);
691	state = *(int *)data;
692
693	if (state != PROC_NO_NEW_PRIVS_ENABLE)
694		return (EINVAL);
695	p->p_flag2 |= P2_NO_NEW_PRIVS;
696	return (0);
697}
698
699static int
700no_new_privs_status(struct thread *td, struct proc *p, void *data)
701{
702
703	*(int *)data = (p->p_flag2 & P2_NO_NEW_PRIVS) != 0 ?
704	    PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE;
705	return (0);
706}
707
708static int
709protmax_ctl(struct thread *td, struct proc *p, void *data)
710{
711	int state;
712
713	PROC_LOCK_ASSERT(p, MA_OWNED);
714	state = *(int *)data;
715
716	switch (state) {
717	case PROC_PROTMAX_FORCE_ENABLE:
718		p->p_flag2 &= ~P2_PROTMAX_DISABLE;
719		p->p_flag2 |= P2_PROTMAX_ENABLE;
720		break;
721	case PROC_PROTMAX_FORCE_DISABLE:
722		p->p_flag2 |= P2_PROTMAX_DISABLE;
723		p->p_flag2 &= ~P2_PROTMAX_ENABLE;
724		break;
725	case PROC_PROTMAX_NOFORCE:
726		p->p_flag2 &= ~(P2_PROTMAX_ENABLE | P2_PROTMAX_DISABLE);
727		break;
728	default:
729		return (EINVAL);
730	}
731	return (0);
732}
733
734static int
735protmax_status(struct thread *td, struct proc *p, void *data)
736{
737	int d;
738
739	switch (p->p_flag2 & (P2_PROTMAX_ENABLE | P2_PROTMAX_DISABLE)) {
740	case 0:
741		d = PROC_PROTMAX_NOFORCE;
742		break;
743	case P2_PROTMAX_ENABLE:
744		d = PROC_PROTMAX_FORCE_ENABLE;
745		break;
746	case P2_PROTMAX_DISABLE:
747		d = PROC_PROTMAX_FORCE_DISABLE;
748		break;
749	}
750	if (kern_mmap_maxprot(p, PROT_READ) == PROT_READ)
751		d |= PROC_PROTMAX_ACTIVE;
752	*(int *)data = d;
753	return (0);
754}
755
756static int
757aslr_ctl(struct thread *td, struct proc *p, void *data)
758{
759	int state;
760
761	PROC_LOCK_ASSERT(p, MA_OWNED);
762	state = *(int *)data;
763
764	switch (state) {
765	case PROC_ASLR_FORCE_ENABLE:
766		p->p_flag2 &= ~P2_ASLR_DISABLE;
767		p->p_flag2 |= P2_ASLR_ENABLE;
768		break;
769	case PROC_ASLR_FORCE_DISABLE:
770		p->p_flag2 |= P2_ASLR_DISABLE;
771		p->p_flag2 &= ~P2_ASLR_ENABLE;
772		break;
773	case PROC_ASLR_NOFORCE:
774		p->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE);
775		break;
776	default:
777		return (EINVAL);
778	}
779	return (0);
780}
781
782static int
783aslr_status(struct thread *td, struct proc *p, void *data)
784{
785	struct vmspace *vm;
786	int d;
787
788	switch (p->p_flag2 & (P2_ASLR_ENABLE | P2_ASLR_DISABLE)) {
789	case 0:
790		d = PROC_ASLR_NOFORCE;
791		break;
792	case P2_ASLR_ENABLE:
793		d = PROC_ASLR_FORCE_ENABLE;
794		break;
795	case P2_ASLR_DISABLE:
796		d = PROC_ASLR_FORCE_DISABLE;
797		break;
798	}
799	if ((p->p_flag & P_WEXIT) == 0) {
800		_PHOLD(p);
801		PROC_UNLOCK(p);
802		vm = vmspace_acquire_ref(p);
803		if (vm != NULL) {
804			if ((vm->vm_map.flags & MAP_ASLR) != 0)
805				d |= PROC_ASLR_ACTIVE;
806			vmspace_free(vm);
807		}
808		PROC_LOCK(p);
809		_PRELE(p);
810	}
811	*(int *)data = d;
812	return (0);
813}
814
815static int
816stackgap_ctl(struct thread *td, struct proc *p, void *data)
817{
818	int state;
819
820	PROC_LOCK_ASSERT(p, MA_OWNED);
821	state = *(int *)data;
822
823	if ((state & ~(PROC_STACKGAP_ENABLE | PROC_STACKGAP_DISABLE |
824	    PROC_STACKGAP_ENABLE_EXEC | PROC_STACKGAP_DISABLE_EXEC)) != 0)
825		return (EINVAL);
826	switch (state & (PROC_STACKGAP_ENABLE | PROC_STACKGAP_DISABLE)) {
827	case PROC_STACKGAP_ENABLE:
828		if ((p->p_flag2 & P2_STKGAP_DISABLE) != 0)
829			return (EINVAL);
830		break;
831	case PROC_STACKGAP_DISABLE:
832		p->p_flag2 |= P2_STKGAP_DISABLE;
833		break;
834	case 0:
835		break;
836	default:
837		return (EINVAL);
838	}
839	switch (state & (PROC_STACKGAP_ENABLE_EXEC |
840	    PROC_STACKGAP_DISABLE_EXEC)) {
841	case PROC_STACKGAP_ENABLE_EXEC:
842		p->p_flag2 &= ~P2_STKGAP_DISABLE_EXEC;
843		break;
844	case PROC_STACKGAP_DISABLE_EXEC:
845		p->p_flag2 |= P2_STKGAP_DISABLE_EXEC;
846		break;
847	case 0:
848		break;
849	default:
850		return (EINVAL);
851	}
852	return (0);
853}
854
855static int
856stackgap_status(struct thread *td, struct proc *p, void *data)
857{
858	int d;
859
860	PROC_LOCK_ASSERT(p, MA_OWNED);
861
862	d = (p->p_flag2 & P2_STKGAP_DISABLE) != 0 ? PROC_STACKGAP_DISABLE :
863	    PROC_STACKGAP_ENABLE;
864	d |= (p->p_flag2 & P2_STKGAP_DISABLE_EXEC) != 0 ?
865	    PROC_STACKGAP_DISABLE_EXEC : PROC_STACKGAP_ENABLE_EXEC;
866	*(int *)data = d;
867	return (0);
868}
869
870static int
871wxmap_ctl(struct thread *td, struct proc *p, void *data)
872{
873	struct vmspace *vm;
874	vm_map_t map;
875	int state;
876
877	PROC_LOCK_ASSERT(p, MA_OWNED);
878	if ((p->p_flag & P_WEXIT) != 0)
879		return (ESRCH);
880	state = *(int *)data;
881
882	switch (state) {
883	case PROC_WX_MAPPINGS_PERMIT:
884		p->p_flag2 |= P2_WXORX_DISABLE;
885		_PHOLD(p);
886		PROC_UNLOCK(p);
887		vm = vmspace_acquire_ref(p);
888		if (vm != NULL) {
889			map = &vm->vm_map;
890			vm_map_lock(map);
891			map->flags &= ~MAP_WXORX;
892			vm_map_unlock(map);
893			vmspace_free(vm);
894		}
895		PROC_LOCK(p);
896		_PRELE(p);
897		break;
898	case PROC_WX_MAPPINGS_DISALLOW_EXEC:
899		p->p_flag2 |= P2_WXORX_ENABLE_EXEC;
900		break;
901	default:
902		return (EINVAL);
903	}
904
905	return (0);
906}
907
908static int
909wxmap_status(struct thread *td, struct proc *p, void *data)
910{
911	struct vmspace *vm;
912	int d;
913
914	PROC_LOCK_ASSERT(p, MA_OWNED);
915	if ((p->p_flag & P_WEXIT) != 0)
916		return (ESRCH);
917
918	d = 0;
919	if ((p->p_flag2 & P2_WXORX_DISABLE) != 0)
920		d |= PROC_WX_MAPPINGS_PERMIT;
921	if ((p->p_flag2 & P2_WXORX_ENABLE_EXEC) != 0)
922		d |= PROC_WX_MAPPINGS_DISALLOW_EXEC;
923	_PHOLD(p);
924	PROC_UNLOCK(p);
925	vm = vmspace_acquire_ref(p);
926	if (vm != NULL) {
927		if ((vm->vm_map.flags & MAP_WXORX) != 0)
928			d |= PROC_WXORX_ENFORCE;
929		vmspace_free(vm);
930	}
931	PROC_LOCK(p);
932	_PRELE(p);
933	*(int *)data = d;
934	return (0);
935}
936
937static int
938pdeathsig_ctl(struct thread *td, struct proc *p, void *data)
939{
940	int signum;
941
942	signum = *(int *)data;
943	if (p != td->td_proc || (signum != 0 && !_SIG_VALID(signum)))
944		return (EINVAL);
945	p->p_pdeathsig = signum;
946	return (0);
947}
948
949static int
950pdeathsig_status(struct thread *td, struct proc *p, void *data)
951{
952	if (p != td->td_proc)
953		return (EINVAL);
954	*(int *)data = p->p_pdeathsig;
955	return (0);
956}
957
958enum {
959	PCTL_SLOCKED,
960	PCTL_XLOCKED,
961	PCTL_UNLOCKED,
962};
963
964struct procctl_cmd_info {
965	int lock_tree;
966	bool one_proc : 1;
967	bool esrch_is_einval : 1;
968	bool copyout_on_error : 1;
969	bool no_nonnull_data : 1;
970	bool need_candebug : 1;
971	int copyin_sz;
972	int copyout_sz;
973	int (*exec)(struct thread *, struct proc *, void *);
974	bool (*sapblk)(struct thread *, void *);
975};
976static const struct procctl_cmd_info procctl_cmds_info[] = {
977	[PROC_SPROTECT] =
978	    { .lock_tree = PCTL_SLOCKED, .one_proc = false,
979	      .esrch_is_einval = false, .no_nonnull_data = false,
980	      .need_candebug = false,
981	      .copyin_sz = sizeof(int), .copyout_sz = 0,
982	      .exec = protect_set, .copyout_on_error = false, },
983	[PROC_REAP_ACQUIRE] =
984	    { .lock_tree = PCTL_XLOCKED, .one_proc = true,
985	      .esrch_is_einval = false, .no_nonnull_data = true,
986	      .need_candebug = false,
987	      .copyin_sz = 0, .copyout_sz = 0,
988	      .exec = reap_acquire, .copyout_on_error = false, },
989	[PROC_REAP_RELEASE] =
990	    { .lock_tree = PCTL_XLOCKED, .one_proc = true,
991	      .esrch_is_einval = false, .no_nonnull_data = true,
992	      .need_candebug = false,
993	      .copyin_sz = 0, .copyout_sz = 0,
994	      .exec = reap_release, .copyout_on_error = false, },
995	[PROC_REAP_STATUS] =
996	    { .lock_tree = PCTL_SLOCKED, .one_proc = true,
997	      .esrch_is_einval = false, .no_nonnull_data = false,
998	      .need_candebug = false,
999	      .copyin_sz = 0,
1000	      .copyout_sz = sizeof(struct procctl_reaper_status),
1001	      .exec = reap_status, .copyout_on_error = false, },
1002	[PROC_REAP_GETPIDS] =
1003	    { .lock_tree = PCTL_SLOCKED, .one_proc = true,
1004	      .esrch_is_einval = false, .no_nonnull_data = false,
1005	      .need_candebug = false,
1006	      .copyin_sz = sizeof(struct procctl_reaper_pids),
1007	      .copyout_sz = 0,
1008	      .exec = reap_getpids, .copyout_on_error = false, },
1009	[PROC_REAP_KILL] =
1010	    { .lock_tree = PCTL_SLOCKED, .one_proc = true,
1011	      .esrch_is_einval = false, .no_nonnull_data = false,
1012	      .need_candebug = false,
1013	      .copyin_sz = sizeof(struct procctl_reaper_kill),
1014	      .copyout_sz = sizeof(struct procctl_reaper_kill),
1015	      .exec = reap_kill, .copyout_on_error = true,
1016	      .sapblk = reap_kill_sapblk, },
1017	[PROC_TRACE_CTL] =
1018	    { .lock_tree = PCTL_SLOCKED, .one_proc = false,
1019	      .esrch_is_einval = false, .no_nonnull_data = false,
1020	      .need_candebug = true,
1021	      .copyin_sz = sizeof(int), .copyout_sz = 0,
1022	      .exec = trace_ctl, .copyout_on_error = false, },
1023	[PROC_TRACE_STATUS] =
1024	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1025	      .esrch_is_einval = false, .no_nonnull_data = false,
1026	      .need_candebug = false,
1027	      .copyin_sz = 0, .copyout_sz = sizeof(int),
1028	      .exec = trace_status, .copyout_on_error = false, },
1029	[PROC_TRAPCAP_CTL] =
1030	    { .lock_tree = PCTL_SLOCKED, .one_proc = false,
1031	      .esrch_is_einval = false, .no_nonnull_data = false,
1032	      .need_candebug = true,
1033	      .copyin_sz = sizeof(int), .copyout_sz = 0,
1034	      .exec = trapcap_ctl, .copyout_on_error = false, },
1035	[PROC_TRAPCAP_STATUS] =
1036	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1037	      .esrch_is_einval = false, .no_nonnull_data = false,
1038	      .need_candebug = false,
1039	      .copyin_sz = 0, .copyout_sz = sizeof(int),
1040	      .exec = trapcap_status, .copyout_on_error = false, },
1041	[PROC_PDEATHSIG_CTL] =
1042	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1043	      .esrch_is_einval = true, .no_nonnull_data = false,
1044	      .need_candebug = false,
1045	      .copyin_sz = sizeof(int), .copyout_sz = 0,
1046	      .exec = pdeathsig_ctl, .copyout_on_error = false, },
1047	[PROC_PDEATHSIG_STATUS] =
1048	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1049	      .esrch_is_einval = true, .no_nonnull_data = false,
1050	      .need_candebug = false,
1051	      .copyin_sz = 0, .copyout_sz = sizeof(int),
1052	      .exec = pdeathsig_status, .copyout_on_error = false, },
1053	[PROC_ASLR_CTL] =
1054	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1055	      .esrch_is_einval = false, .no_nonnull_data = false,
1056	      .need_candebug = true,
1057	      .copyin_sz = sizeof(int), .copyout_sz = 0,
1058	      .exec = aslr_ctl, .copyout_on_error = false, },
1059	[PROC_ASLR_STATUS] =
1060	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1061	      .esrch_is_einval = false, .no_nonnull_data = false,
1062	      .need_candebug = false,
1063	      .copyin_sz = 0, .copyout_sz = sizeof(int),
1064	      .exec = aslr_status, .copyout_on_error = false, },
1065	[PROC_PROTMAX_CTL] =
1066	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1067	      .esrch_is_einval = false, .no_nonnull_data = false,
1068	      .need_candebug = true,
1069	      .copyin_sz = sizeof(int), .copyout_sz = 0,
1070	      .exec = protmax_ctl, .copyout_on_error = false, },
1071	[PROC_PROTMAX_STATUS] =
1072	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1073	      .esrch_is_einval = false, .no_nonnull_data = false,
1074	      .need_candebug = false,
1075	      .copyin_sz = 0, .copyout_sz = sizeof(int),
1076	      .exec = protmax_status, .copyout_on_error = false, },
1077	[PROC_STACKGAP_CTL] =
1078	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1079	      .esrch_is_einval = false, .no_nonnull_data = false,
1080	      .need_candebug = true,
1081	      .copyin_sz = sizeof(int), .copyout_sz = 0,
1082	      .exec = stackgap_ctl, .copyout_on_error = false, },
1083	[PROC_STACKGAP_STATUS] =
1084	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1085	      .esrch_is_einval = false, .no_nonnull_data = false,
1086	      .need_candebug = false,
1087	      .copyin_sz = 0, .copyout_sz = sizeof(int),
1088	      .exec = stackgap_status, .copyout_on_error = false, },
1089	[PROC_NO_NEW_PRIVS_CTL] =
1090	    { .lock_tree = PCTL_SLOCKED, .one_proc = true,
1091	      .esrch_is_einval = false, .no_nonnull_data = false,
1092	      .need_candebug = true,
1093	      .copyin_sz = sizeof(int), .copyout_sz = 0,
1094	      .exec = no_new_privs_ctl, .copyout_on_error = false, },
1095	[PROC_NO_NEW_PRIVS_STATUS] =
1096	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1097	      .esrch_is_einval = false, .no_nonnull_data = false,
1098	      .need_candebug = false,
1099	      .copyin_sz = 0, .copyout_sz = sizeof(int),
1100	      .exec = no_new_privs_status, .copyout_on_error = false, },
1101	[PROC_WXMAP_CTL] =
1102	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1103	      .esrch_is_einval = false, .no_nonnull_data = false,
1104	      .need_candebug = true,
1105	      .copyin_sz = sizeof(int), .copyout_sz = 0,
1106	      .exec = wxmap_ctl, .copyout_on_error = false, },
1107	[PROC_WXMAP_STATUS] =
1108	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1109	      .esrch_is_einval = false, .no_nonnull_data = false,
1110	      .need_candebug = false,
1111	      .copyin_sz = 0, .copyout_sz = sizeof(int),
1112	      .exec = wxmap_status, .copyout_on_error = false, },
1113};
1114
1115int
1116sys_procctl(struct thread *td, struct procctl_args *uap)
1117{
1118	union {
1119		struct procctl_reaper_status rs;
1120		struct procctl_reaper_pids rp;
1121		struct procctl_reaper_kill rk;
1122		int flags;
1123	} x;
1124	const struct procctl_cmd_info *cmd_info;
1125	int error, error1;
1126
1127	if (uap->com >= PROC_PROCCTL_MD_MIN)
1128		return (cpu_procctl(td, uap->idtype, uap->id,
1129		    uap->com, uap->data));
1130	if (uap->com <= 0 || uap->com >= nitems(procctl_cmds_info))
1131		return (EINVAL);
1132	cmd_info = &procctl_cmds_info[uap->com];
1133	bzero(&x, sizeof(x));
1134
1135	if (cmd_info->copyin_sz > 0) {
1136		error = copyin(uap->data, &x, cmd_info->copyin_sz);
1137		if (error != 0)
1138			return (error);
1139	} else if (cmd_info->no_nonnull_data && uap->data != NULL) {
1140		return (EINVAL);
1141	}
1142
1143	error = kern_procctl(td, uap->idtype, uap->id, uap->com, &x);
1144
1145	if (cmd_info->copyout_sz > 0 && (error == 0 ||
1146	    cmd_info->copyout_on_error)) {
1147		error1 = copyout(&x, uap->data, cmd_info->copyout_sz);
1148		if (error == 0)
1149			error = error1;
1150	}
1151	return (error);
1152}
1153
1154static int
1155kern_procctl_single(struct thread *td, struct proc *p, int com, void *data)
1156{
1157
1158	PROC_LOCK_ASSERT(p, MA_OWNED);
1159	return (procctl_cmds_info[com].exec(td, p, data));
1160}
1161
1162int
1163kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data)
1164{
1165	struct pgrp *pg;
1166	struct proc *p;
1167	const struct procctl_cmd_info *cmd_info;
1168	int error, first_error, ok;
1169	bool sapblk;
1170
1171	MPASS(com > 0 && com < nitems(procctl_cmds_info));
1172	cmd_info = &procctl_cmds_info[com];
1173	if (idtype != P_PID && cmd_info->one_proc)
1174		return (EINVAL);
1175
1176	sapblk = false;
1177	if (cmd_info->sapblk != NULL) {
1178		sapblk = cmd_info->sapblk(td, data);
1179		if (sapblk && !stop_all_proc_block())
1180			return (ERESTART);
1181	}
1182
1183	switch (cmd_info->lock_tree) {
1184	case PCTL_XLOCKED:
1185		sx_xlock(&proctree_lock);
1186		break;
1187	case PCTL_SLOCKED:
1188		sx_slock(&proctree_lock);
1189		break;
1190	default:
1191		break;
1192	}
1193
1194	switch (idtype) {
1195	case P_PID:
1196		if (id == 0) {
1197			p = td->td_proc;
1198			error = 0;
1199			PROC_LOCK(p);
1200		} else {
1201			p = pfind(id);
1202			if (p == NULL) {
1203				error = cmd_info->esrch_is_einval ?
1204				    EINVAL : ESRCH;
1205				break;
1206			}
1207			error = cmd_info->need_candebug ? p_candebug(td, p) :
1208			    p_cansee(td, p);
1209		}
1210		if (error == 0)
1211			error = kern_procctl_single(td, p, com, data);
1212		PROC_UNLOCK(p);
1213		break;
1214	case P_PGID:
1215		/*
1216		 * Attempt to apply the operation to all members of the
1217		 * group.  Ignore processes in the group that can't be
1218		 * seen.  Ignore errors so long as at least one process is
1219		 * able to complete the request successfully.
1220		 */
1221		pg = pgfind(id);
1222		if (pg == NULL) {
1223			error = ESRCH;
1224			break;
1225		}
1226		PGRP_UNLOCK(pg);
1227		ok = 0;
1228		first_error = 0;
1229		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
1230			PROC_LOCK(p);
1231			if (p->p_state == PRS_NEW ||
1232			    p->p_state == PRS_ZOMBIE ||
1233			    (cmd_info->need_candebug ? p_candebug(td, p) :
1234			    p_cansee(td, p)) != 0) {
1235				PROC_UNLOCK(p);
1236				continue;
1237			}
1238			error = kern_procctl_single(td, p, com, data);
1239			PROC_UNLOCK(p);
1240			if (error == 0)
1241				ok = 1;
1242			else if (first_error == 0)
1243				first_error = error;
1244		}
1245		if (ok)
1246			error = 0;
1247		else if (first_error != 0)
1248			error = first_error;
1249		else
1250			/*
1251			 * Was not able to see any processes in the
1252			 * process group.
1253			 */
1254			error = ESRCH;
1255		break;
1256	default:
1257		error = EINVAL;
1258		break;
1259	}
1260
1261	switch (cmd_info->lock_tree) {
1262	case PCTL_XLOCKED:
1263		sx_xunlock(&proctree_lock);
1264		break;
1265	case PCTL_SLOCKED:
1266		sx_sunlock(&proctree_lock);
1267		break;
1268	default:
1269		break;
1270	}
1271	if (sapblk)
1272		stop_all_proc_unblock();
1273	return (error);
1274}
1275