1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1989, 1991, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33#include "opt_ddb.h"
34#include "opt_ktrace.h"
35#include "opt_kstack_pages.h"
36#include "opt_stack.h"
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/bitstring.h>
41#include <sys/elf.h>
42#include <sys/eventhandler.h>
43#include <sys/exec.h>
44#include <sys/fcntl.h>
45#include <sys/jail.h>
46#include <sys/kernel.h>
47#include <sys/limits.h>
48#include <sys/lock.h>
49#include <sys/loginclass.h>
50#include <sys/malloc.h>
51#include <sys/mman.h>
52#include <sys/mount.h>
53#include <sys/mutex.h>
54#include <sys/namei.h>
55#include <sys/proc.h>
56#include <sys/ptrace.h>
57#include <sys/refcount.h>
58#include <sys/resourcevar.h>
59#include <sys/rwlock.h>
60#include <sys/sbuf.h>
61#include <sys/sysent.h>
62#include <sys/sched.h>
63#include <sys/smp.h>
64#include <sys/stack.h>
65#include <sys/stat.h>
66#include <sys/dtrace_bsd.h>
67#include <sys/sysctl.h>
68#include <sys/filedesc.h>
69#include <sys/tty.h>
70#include <sys/signalvar.h>
71#include <sys/sdt.h>
72#include <sys/sx.h>
73#include <sys/user.h>
74#include <sys/vnode.h>
75#include <sys/wait.h>
76#ifdef KTRACE
77#include <sys/ktrace.h>
78#endif
79
80#ifdef DDB
81#include <ddb/ddb.h>
82#endif
83
84#include <vm/vm.h>
85#include <vm/vm_param.h>
86#include <vm/vm_extern.h>
87#include <vm/pmap.h>
88#include <vm/vm_map.h>
89#include <vm/vm_object.h>
90#include <vm/vm_page.h>
91#include <vm/uma.h>
92
93#include <fs/devfs/devfs.h>
94
95#ifdef COMPAT_FREEBSD32
96#include <compat/freebsd32/freebsd32.h>
97#include <compat/freebsd32/freebsd32_util.h>
98#endif
99
100SDT_PROVIDER_DEFINE(proc);
101
102MALLOC_DEFINE(M_SESSION, "session", "session header");
103static MALLOC_DEFINE(M_PROC, "proc", "Proc structures");
104MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
105
106static void doenterpgrp(struct proc *, struct pgrp *);
107static void orphanpg(struct pgrp *pg);
108static void fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp);
109static void fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp);
110static void fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp,
111    int preferthread);
112static void pgdelete(struct pgrp *);
113static int pgrp_init(void *mem, int size, int flags);
114static int proc_ctor(void *mem, int size, void *arg, int flags);
115static void proc_dtor(void *mem, int size, void *arg);
116static int proc_init(void *mem, int size, int flags);
117static void proc_fini(void *mem, int size);
118static void pargs_free(struct pargs *pa);
119
120/*
121 * Other process lists
122 */
123struct pidhashhead *pidhashtbl = NULL;
124struct sx *pidhashtbl_lock;
125u_long pidhash;
126u_long pidhashlock;
127struct pgrphashhead *pgrphashtbl;
128u_long pgrphash;
129struct proclist allproc = LIST_HEAD_INITIALIZER(allproc);
130struct sx __exclusive_cache_line allproc_lock;
131struct sx __exclusive_cache_line proctree_lock;
132struct mtx __exclusive_cache_line ppeers_lock;
133struct mtx __exclusive_cache_line procid_lock;
134uma_zone_t proc_zone;
135uma_zone_t pgrp_zone;
136
137/*
138 * The offset of various fields in struct proc and struct thread.
139 * These are used by kernel debuggers to enumerate kernel threads and
140 * processes.
141 */
142const int proc_off_p_pid = offsetof(struct proc, p_pid);
143const int proc_off_p_comm = offsetof(struct proc, p_comm);
144const int proc_off_p_list = offsetof(struct proc, p_list);
145const int proc_off_p_hash = offsetof(struct proc, p_hash);
146const int proc_off_p_threads = offsetof(struct proc, p_threads);
147const int thread_off_td_tid = offsetof(struct thread, td_tid);
148const int thread_off_td_name = offsetof(struct thread, td_name);
149const int thread_off_td_oncpu = offsetof(struct thread, td_oncpu);
150const int thread_off_td_pcb = offsetof(struct thread, td_pcb);
151const int thread_off_td_plist = offsetof(struct thread, td_plist);
152
153EVENTHANDLER_LIST_DEFINE(process_ctor);
154EVENTHANDLER_LIST_DEFINE(process_dtor);
155EVENTHANDLER_LIST_DEFINE(process_init);
156EVENTHANDLER_LIST_DEFINE(process_fini);
157EVENTHANDLER_LIST_DEFINE(process_exit);
158EVENTHANDLER_LIST_DEFINE(process_fork);
159EVENTHANDLER_LIST_DEFINE(process_exec);
160
161int kstack_pages = KSTACK_PAGES;
162SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
163    &kstack_pages, 0,
164    "Kernel stack size in pages");
165static int vmmap_skip_res_cnt = 0;
166SYSCTL_INT(_kern, OID_AUTO, proc_vmmap_skip_resident_count, CTLFLAG_RW,
167    &vmmap_skip_res_cnt, 0,
168    "Skip calculation of the pages resident count in kern.proc.vmmap");
169
170CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
171#ifdef COMPAT_FREEBSD32
172CTASSERT(sizeof(struct kinfo_proc32) == KINFO_PROC32_SIZE);
173#endif
174
175/*
176 * Initialize global process hashing structures.
177 */
178void
179procinit(void)
180{
181	u_long i;
182
183	sx_init(&allproc_lock, "allproc");
184	sx_init(&proctree_lock, "proctree");
185	mtx_init(&ppeers_lock, "p_peers", NULL, MTX_DEF);
186	mtx_init(&procid_lock, "procid", NULL, MTX_DEF);
187	pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
188	pidhashlock = (pidhash + 1) / 64;
189	if (pidhashlock > 0)
190		pidhashlock--;
191	pidhashtbl_lock = malloc(sizeof(*pidhashtbl_lock) * (pidhashlock + 1),
192	    M_PROC, M_WAITOK | M_ZERO);
193	for (i = 0; i < pidhashlock + 1; i++)
194		sx_init_flags(&pidhashtbl_lock[i], "pidhash", SX_DUPOK);
195	pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
196	proc_zone = uma_zcreate("PROC", sched_sizeof_proc(),
197	    proc_ctor, proc_dtor, proc_init, proc_fini,
198	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
199	pgrp_zone = uma_zcreate("PGRP", sizeof(struct pgrp), NULL, NULL,
200	    pgrp_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
201	uihashinit();
202}
203
204/*
205 * Prepare a proc for use.
206 */
207static int
208proc_ctor(void *mem, int size, void *arg, int flags)
209{
210	struct proc *p;
211	struct thread *td;
212
213	p = (struct proc *)mem;
214#ifdef KDTRACE_HOOKS
215	kdtrace_proc_ctor(p);
216#endif
217	EVENTHANDLER_DIRECT_INVOKE(process_ctor, p);
218	td = FIRST_THREAD_IN_PROC(p);
219	if (td != NULL) {
220		/* Make sure all thread constructors are executed */
221		EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td);
222	}
223	return (0);
224}
225
226/*
227 * Reclaim a proc after use.
228 */
229static void
230proc_dtor(void *mem, int size, void *arg)
231{
232	struct proc *p;
233	struct thread *td;
234
235	/* INVARIANTS checks go here */
236	p = (struct proc *)mem;
237	td = FIRST_THREAD_IN_PROC(p);
238	if (td != NULL) {
239#ifdef INVARIANTS
240		KASSERT((p->p_numthreads == 1),
241		    ("bad number of threads in exiting process"));
242		KASSERT(STAILQ_EMPTY(&p->p_ktr), ("proc_dtor: non-empty p_ktr"));
243#endif
244		/* Free all OSD associated to this thread. */
245		osd_thread_exit(td);
246		ast_kclear(td);
247
248		/* Make sure all thread destructors are executed */
249		EVENTHANDLER_DIRECT_INVOKE(thread_dtor, td);
250	}
251	EVENTHANDLER_DIRECT_INVOKE(process_dtor, p);
252#ifdef KDTRACE_HOOKS
253	kdtrace_proc_dtor(p);
254#endif
255	if (p->p_ksi != NULL)
256		KASSERT(! KSI_ONQ(p->p_ksi), ("SIGCHLD queue"));
257}
258
259/*
260 * Initialize type-stable parts of a proc (when newly created).
261 */
262static int
263proc_init(void *mem, int size, int flags)
264{
265	struct proc *p;
266
267	p = (struct proc *)mem;
268	mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK | MTX_NEW);
269	mtx_init(&p->p_slock, "process slock", NULL, MTX_SPIN | MTX_NEW);
270	mtx_init(&p->p_statmtx, "pstatl", NULL, MTX_SPIN | MTX_NEW);
271	mtx_init(&p->p_itimmtx, "pitiml", NULL, MTX_SPIN | MTX_NEW);
272	mtx_init(&p->p_profmtx, "pprofl", NULL, MTX_SPIN | MTX_NEW);
273	cv_init(&p->p_pwait, "ppwait");
274	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
275	EVENTHANDLER_DIRECT_INVOKE(process_init, p);
276	p->p_stats = pstats_alloc();
277	p->p_pgrp = NULL;
278	TAILQ_INIT(&p->p_kqtim_stop);
279	return (0);
280}
281
282/*
283 * UMA should ensure that this function is never called.
284 * Freeing a proc structure would violate type stability.
285 */
286static void
287proc_fini(void *mem, int size)
288{
289#ifdef notnow
290	struct proc *p;
291
292	p = (struct proc *)mem;
293	EVENTHANDLER_DIRECT_INVOKE(process_fini, p);
294	pstats_free(p->p_stats);
295	thread_free(FIRST_THREAD_IN_PROC(p));
296	mtx_destroy(&p->p_mtx);
297	if (p->p_ksi != NULL)
298		ksiginfo_free(p->p_ksi);
299#else
300	panic("proc reclaimed");
301#endif
302}
303
304static int
305pgrp_init(void *mem, int size, int flags)
306{
307	struct pgrp *pg;
308
309	pg = mem;
310	mtx_init(&pg->pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
311	sx_init(&pg->pg_killsx, "killpg racer");
312	return (0);
313}
314
315/*
316 * PID space management.
317 *
318 * These bitmaps are used by fork_findpid.
319 */
320bitstr_t bit_decl(proc_id_pidmap, PID_MAX);
321bitstr_t bit_decl(proc_id_grpidmap, PID_MAX);
322bitstr_t bit_decl(proc_id_sessidmap, PID_MAX);
323bitstr_t bit_decl(proc_id_reapmap, PID_MAX);
324
325static bitstr_t *proc_id_array[] = {
326	proc_id_pidmap,
327	proc_id_grpidmap,
328	proc_id_sessidmap,
329	proc_id_reapmap,
330};
331
332void
333proc_id_set(int type, pid_t id)
334{
335
336	KASSERT(type >= 0 && type < nitems(proc_id_array),
337	    ("invalid type %d\n", type));
338	mtx_lock(&procid_lock);
339	KASSERT(bit_test(proc_id_array[type], id) == 0,
340	    ("bit %d already set in %d\n", id, type));
341	bit_set(proc_id_array[type], id);
342	mtx_unlock(&procid_lock);
343}
344
345void
346proc_id_set_cond(int type, pid_t id)
347{
348
349	KASSERT(type >= 0 && type < nitems(proc_id_array),
350	    ("invalid type %d\n", type));
351	if (bit_test(proc_id_array[type], id))
352		return;
353	mtx_lock(&procid_lock);
354	bit_set(proc_id_array[type], id);
355	mtx_unlock(&procid_lock);
356}
357
358void
359proc_id_clear(int type, pid_t id)
360{
361
362	KASSERT(type >= 0 && type < nitems(proc_id_array),
363	    ("invalid type %d\n", type));
364	mtx_lock(&procid_lock);
365	KASSERT(bit_test(proc_id_array[type], id) != 0,
366	    ("bit %d not set in %d\n", id, type));
367	bit_clear(proc_id_array[type], id);
368	mtx_unlock(&procid_lock);
369}
370
371/*
372 * Is p an inferior of the current process?
373 */
374int
375inferior(struct proc *p)
376{
377
378	sx_assert(&proctree_lock, SX_LOCKED);
379	PROC_LOCK_ASSERT(p, MA_OWNED);
380	for (; p != curproc; p = proc_realparent(p)) {
381		if (p->p_pid == 0)
382			return (0);
383	}
384	return (1);
385}
386
387/*
388 * Shared lock all the pid hash lists.
389 */
390void
391pidhash_slockall(void)
392{
393	u_long i;
394
395	for (i = 0; i < pidhashlock + 1; i++)
396		sx_slock(&pidhashtbl_lock[i]);
397}
398
399/*
400 * Shared unlock all the pid hash lists.
401 */
402void
403pidhash_sunlockall(void)
404{
405	u_long i;
406
407	for (i = 0; i < pidhashlock + 1; i++)
408		sx_sunlock(&pidhashtbl_lock[i]);
409}
410
411/*
412 * Similar to pfind_any(), this function finds zombies.
413 */
414struct proc *
415pfind_any_locked(pid_t pid)
416{
417	struct proc *p;
418
419	sx_assert(PIDHASHLOCK(pid), SX_LOCKED);
420	LIST_FOREACH(p, PIDHASH(pid), p_hash) {
421		if (p->p_pid == pid) {
422			PROC_LOCK(p);
423			if (p->p_state == PRS_NEW) {
424				PROC_UNLOCK(p);
425				p = NULL;
426			}
427			break;
428		}
429	}
430	return (p);
431}
432
433/*
434 * Locate a process by number.
435 *
436 * By not returning processes in the PRS_NEW state, we allow callers to avoid
437 * testing for that condition to avoid dereferencing p_ucred, et al.
438 */
439static __always_inline struct proc *
440_pfind(pid_t pid, bool zombie)
441{
442	struct proc *p;
443
444	p = curproc;
445	if (p->p_pid == pid) {
446		PROC_LOCK(p);
447		return (p);
448	}
449	sx_slock(PIDHASHLOCK(pid));
450	LIST_FOREACH(p, PIDHASH(pid), p_hash) {
451		if (p->p_pid == pid) {
452			PROC_LOCK(p);
453			if (p->p_state == PRS_NEW ||
454			    (!zombie && p->p_state == PRS_ZOMBIE)) {
455				PROC_UNLOCK(p);
456				p = NULL;
457			}
458			break;
459		}
460	}
461	sx_sunlock(PIDHASHLOCK(pid));
462	return (p);
463}
464
465struct proc *
466pfind(pid_t pid)
467{
468
469	return (_pfind(pid, false));
470}
471
472/*
473 * Same as pfind but allow zombies.
474 */
475struct proc *
476pfind_any(pid_t pid)
477{
478
479	return (_pfind(pid, true));
480}
481
482/*
483 * Locate a process group by number.
484 * The caller must hold proctree_lock.
485 */
486struct pgrp *
487pgfind(pid_t pgid)
488{
489	struct pgrp *pgrp;
490
491	sx_assert(&proctree_lock, SX_LOCKED);
492
493	LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) {
494		if (pgrp->pg_id == pgid) {
495			PGRP_LOCK(pgrp);
496			return (pgrp);
497		}
498	}
499	return (NULL);
500}
501
502/*
503 * Locate process and do additional manipulations, depending on flags.
504 */
505int
506pget(pid_t pid, int flags, struct proc **pp)
507{
508	struct proc *p;
509	struct thread *td1;
510	int error;
511
512	p = curproc;
513	if (p->p_pid == pid) {
514		PROC_LOCK(p);
515	} else {
516		p = NULL;
517		if (pid <= PID_MAX) {
518			if ((flags & PGET_NOTWEXIT) == 0)
519				p = pfind_any(pid);
520			else
521				p = pfind(pid);
522		} else if ((flags & PGET_NOTID) == 0) {
523			td1 = tdfind(pid, -1);
524			if (td1 != NULL)
525				p = td1->td_proc;
526		}
527		if (p == NULL)
528			return (ESRCH);
529		if ((flags & PGET_CANSEE) != 0) {
530			error = p_cansee(curthread, p);
531			if (error != 0)
532				goto errout;
533		}
534	}
535	if ((flags & PGET_CANDEBUG) != 0) {
536		error = p_candebug(curthread, p);
537		if (error != 0)
538			goto errout;
539	}
540	if ((flags & PGET_ISCURRENT) != 0 && curproc != p) {
541		error = EPERM;
542		goto errout;
543	}
544	if ((flags & PGET_NOTWEXIT) != 0 && (p->p_flag & P_WEXIT) != 0) {
545		error = ESRCH;
546		goto errout;
547	}
548	if ((flags & PGET_NOTINEXEC) != 0 && (p->p_flag & P_INEXEC) != 0) {
549		/*
550		 * XXXRW: Not clear ESRCH is the right error during proc
551		 * execve().
552		 */
553		error = ESRCH;
554		goto errout;
555	}
556	if ((flags & PGET_HOLD) != 0) {
557		_PHOLD(p);
558		PROC_UNLOCK(p);
559	}
560	*pp = p;
561	return (0);
562errout:
563	PROC_UNLOCK(p);
564	return (error);
565}
566
567/*
568 * Create a new process group.
569 * pgid must be equal to the pid of p.
570 * Begin a new session if required.
571 */
572int
573enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess)
574{
575	struct pgrp *old_pgrp;
576
577	sx_assert(&proctree_lock, SX_XLOCKED);
578
579	KASSERT(pgrp != NULL, ("enterpgrp: pgrp == NULL"));
580	KASSERT(p->p_pid == pgid,
581	    ("enterpgrp: new pgrp and pid != pgid"));
582	KASSERT(pgfind(pgid) == NULL,
583	    ("enterpgrp: pgrp with pgid exists"));
584	KASSERT(!SESS_LEADER(p),
585	    ("enterpgrp: session leader attempted setpgrp"));
586
587	old_pgrp = p->p_pgrp;
588	if (!sx_try_xlock(&old_pgrp->pg_killsx)) {
589		sx_xunlock(&proctree_lock);
590		sx_xlock(&old_pgrp->pg_killsx);
591		sx_xunlock(&old_pgrp->pg_killsx);
592		return (ERESTART);
593	}
594	MPASS(old_pgrp == p->p_pgrp);
595
596	if (sess != NULL) {
597		/*
598		 * new session
599		 */
600		mtx_init(&sess->s_mtx, "session", NULL, MTX_DEF);
601		PROC_LOCK(p);
602		p->p_flag &= ~P_CONTROLT;
603		PROC_UNLOCK(p);
604		PGRP_LOCK(pgrp);
605		sess->s_leader = p;
606		sess->s_sid = p->p_pid;
607		proc_id_set(PROC_ID_SESSION, p->p_pid);
608		refcount_init(&sess->s_count, 1);
609		sess->s_ttyvp = NULL;
610		sess->s_ttydp = NULL;
611		sess->s_ttyp = NULL;
612		bcopy(p->p_session->s_login, sess->s_login,
613			    sizeof(sess->s_login));
614		pgrp->pg_session = sess;
615		KASSERT(p == curproc,
616		    ("enterpgrp: mksession and p != curproc"));
617	} else {
618		pgrp->pg_session = p->p_session;
619		sess_hold(pgrp->pg_session);
620		PGRP_LOCK(pgrp);
621	}
622	pgrp->pg_id = pgid;
623	proc_id_set(PROC_ID_GROUP, p->p_pid);
624	LIST_INIT(&pgrp->pg_members);
625	pgrp->pg_flags = 0;
626
627	/*
628	 * As we have an exclusive lock of proctree_lock,
629	 * this should not deadlock.
630	 */
631	LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash);
632	SLIST_INIT(&pgrp->pg_sigiolst);
633	PGRP_UNLOCK(pgrp);
634
635	doenterpgrp(p, pgrp);
636
637	sx_xunlock(&old_pgrp->pg_killsx);
638	return (0);
639}
640
641/*
642 * Move p to an existing process group
643 */
644int
645enterthispgrp(struct proc *p, struct pgrp *pgrp)
646{
647	struct pgrp *old_pgrp;
648
649	sx_assert(&proctree_lock, SX_XLOCKED);
650	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
651	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
652	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
653	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
654	KASSERT(pgrp->pg_session == p->p_session,
655	    ("%s: pgrp's session %p, p->p_session %p proc %p\n",
656	    __func__, pgrp->pg_session, p->p_session, p));
657	KASSERT(pgrp != p->p_pgrp,
658	    ("%s: p %p belongs to pgrp %p", __func__, p, pgrp));
659
660	old_pgrp = p->p_pgrp;
661	if (!sx_try_xlock(&old_pgrp->pg_killsx)) {
662		sx_xunlock(&proctree_lock);
663		sx_xlock(&old_pgrp->pg_killsx);
664		sx_xunlock(&old_pgrp->pg_killsx);
665		return (ERESTART);
666	}
667	MPASS(old_pgrp == p->p_pgrp);
668	if (!sx_try_xlock(&pgrp->pg_killsx)) {
669		sx_xunlock(&old_pgrp->pg_killsx);
670		sx_xunlock(&proctree_lock);
671		sx_xlock(&pgrp->pg_killsx);
672		sx_xunlock(&pgrp->pg_killsx);
673		return (ERESTART);
674	}
675
676	doenterpgrp(p, pgrp);
677
678	sx_xunlock(&pgrp->pg_killsx);
679	sx_xunlock(&old_pgrp->pg_killsx);
680	return (0);
681}
682
683/*
684 * If true, any child of q which belongs to group pgrp, qualifies the
685 * process group pgrp as not orphaned.
686 */
687static bool
688isjobproc(struct proc *q, struct pgrp *pgrp)
689{
690	sx_assert(&proctree_lock, SX_LOCKED);
691
692	return (q->p_pgrp != pgrp &&
693	    q->p_pgrp->pg_session == pgrp->pg_session);
694}
695
696static struct proc *
697jobc_reaper(struct proc *p)
698{
699	struct proc *pp;
700
701	sx_assert(&proctree_lock, SA_LOCKED);
702
703	for (pp = p;;) {
704		pp = pp->p_reaper;
705		if (pp->p_reaper == pp ||
706		    (pp->p_treeflag & P_TREE_GRPEXITED) == 0)
707			return (pp);
708	}
709}
710
711static struct proc *
712jobc_parent(struct proc *p, struct proc *p_exiting)
713{
714	struct proc *pp;
715
716	sx_assert(&proctree_lock, SA_LOCKED);
717
718	pp = proc_realparent(p);
719	if (pp->p_pptr == NULL || pp == p_exiting ||
720	    (pp->p_treeflag & P_TREE_GRPEXITED) == 0)
721		return (pp);
722	return (jobc_reaper(pp));
723}
724
725static int
726pgrp_calc_jobc(struct pgrp *pgrp)
727{
728	struct proc *q;
729	int cnt;
730
731#ifdef INVARIANTS
732	if (!mtx_owned(&pgrp->pg_mtx))
733		sx_assert(&proctree_lock, SA_LOCKED);
734#endif
735
736	cnt = 0;
737	LIST_FOREACH(q, &pgrp->pg_members, p_pglist) {
738		if ((q->p_treeflag & P_TREE_GRPEXITED) != 0 ||
739		    q->p_pptr == NULL)
740			continue;
741		if (isjobproc(jobc_parent(q, NULL), pgrp))
742			cnt++;
743	}
744	return (cnt);
745}
746
747/*
748 * Move p to a process group
749 */
750static void
751doenterpgrp(struct proc *p, struct pgrp *pgrp)
752{
753	struct pgrp *savepgrp;
754	struct proc *pp;
755
756	sx_assert(&proctree_lock, SX_XLOCKED);
757	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
758	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
759	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
760	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
761
762	savepgrp = p->p_pgrp;
763	pp = jobc_parent(p, NULL);
764
765	PGRP_LOCK(pgrp);
766	PGRP_LOCK(savepgrp);
767	if (isjobproc(pp, savepgrp) && pgrp_calc_jobc(savepgrp) == 1)
768		orphanpg(savepgrp);
769	PROC_LOCK(p);
770	LIST_REMOVE(p, p_pglist);
771	p->p_pgrp = pgrp;
772	PROC_UNLOCK(p);
773	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
774	if (isjobproc(pp, pgrp))
775		pgrp->pg_flags &= ~PGRP_ORPHANED;
776	PGRP_UNLOCK(savepgrp);
777	PGRP_UNLOCK(pgrp);
778	if (LIST_EMPTY(&savepgrp->pg_members))
779		pgdelete(savepgrp);
780}
781
782/*
783 * remove process from process group
784 */
785int
786leavepgrp(struct proc *p)
787{
788	struct pgrp *savepgrp;
789
790	sx_assert(&proctree_lock, SX_XLOCKED);
791	savepgrp = p->p_pgrp;
792	PGRP_LOCK(savepgrp);
793	PROC_LOCK(p);
794	LIST_REMOVE(p, p_pglist);
795	p->p_pgrp = NULL;
796	PROC_UNLOCK(p);
797	PGRP_UNLOCK(savepgrp);
798	if (LIST_EMPTY(&savepgrp->pg_members))
799		pgdelete(savepgrp);
800	return (0);
801}
802
803/*
804 * delete a process group
805 */
806static void
807pgdelete(struct pgrp *pgrp)
808{
809	struct session *savesess;
810	struct tty *tp;
811
812	sx_assert(&proctree_lock, SX_XLOCKED);
813	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
814	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
815
816	/*
817	 * Reset any sigio structures pointing to us as a result of
818	 * F_SETOWN with our pgid.  The proctree lock ensures that
819	 * new sigio structures will not be added after this point.
820	 */
821	funsetownlst(&pgrp->pg_sigiolst);
822
823	PGRP_LOCK(pgrp);
824	tp = pgrp->pg_session->s_ttyp;
825	LIST_REMOVE(pgrp, pg_hash);
826	savesess = pgrp->pg_session;
827	PGRP_UNLOCK(pgrp);
828
829	/* Remove the reference to the pgrp before deallocating it. */
830	if (tp != NULL) {
831		tty_lock(tp);
832		tty_rel_pgrp(tp, pgrp);
833	}
834
835	proc_id_clear(PROC_ID_GROUP, pgrp->pg_id);
836	uma_zfree(pgrp_zone, pgrp);
837	sess_release(savesess);
838}
839
840
841static void
842fixjobc_kill(struct proc *p)
843{
844	struct proc *q;
845	struct pgrp *pgrp;
846
847	sx_assert(&proctree_lock, SX_LOCKED);
848	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
849	pgrp = p->p_pgrp;
850	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
851	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
852
853	/*
854	 * p no longer affects process group orphanage for children.
855	 * It is marked by the flag because p is only physically
856	 * removed from its process group on wait(2).
857	 */
858	MPASS((p->p_treeflag & P_TREE_GRPEXITED) == 0);
859	p->p_treeflag |= P_TREE_GRPEXITED;
860
861	/*
862	 * Check if exiting p orphans its own group.
863	 */
864	pgrp = p->p_pgrp;
865	if (isjobproc(jobc_parent(p, NULL), pgrp)) {
866		PGRP_LOCK(pgrp);
867		if (pgrp_calc_jobc(pgrp) == 0)
868			orphanpg(pgrp);
869		PGRP_UNLOCK(pgrp);
870	}
871
872	/*
873	 * Check this process' children to see whether they qualify
874	 * their process groups after reparenting to reaper.
875	 */
876	LIST_FOREACH(q, &p->p_children, p_sibling) {
877		pgrp = q->p_pgrp;
878		PGRP_LOCK(pgrp);
879		if (pgrp_calc_jobc(pgrp) == 0) {
880			/*
881			 * We want to handle exactly the children that
882			 * has p as realparent.  Then, when calculating
883			 * jobc_parent for children, we should ignore
884			 * P_TREE_GRPEXITED flag already set on p.
885			 */
886			if (jobc_parent(q, p) == p && isjobproc(p, pgrp))
887				orphanpg(pgrp);
888		} else
889			pgrp->pg_flags &= ~PGRP_ORPHANED;
890		PGRP_UNLOCK(pgrp);
891	}
892	LIST_FOREACH(q, &p->p_orphans, p_orphan) {
893		pgrp = q->p_pgrp;
894		PGRP_LOCK(pgrp);
895		if (pgrp_calc_jobc(pgrp) == 0) {
896			if (isjobproc(p, pgrp))
897				orphanpg(pgrp);
898		} else
899			pgrp->pg_flags &= ~PGRP_ORPHANED;
900		PGRP_UNLOCK(pgrp);
901	}
902}
903
904void
905killjobc(void)
906{
907	struct session *sp;
908	struct tty *tp;
909	struct proc *p;
910	struct vnode *ttyvp;
911
912	p = curproc;
913	MPASS(p->p_flag & P_WEXIT);
914	sx_assert(&proctree_lock, SX_LOCKED);
915
916	if (SESS_LEADER(p)) {
917		sp = p->p_session;
918
919		/*
920		 * s_ttyp is not zero'd; we use this to indicate that
921		 * the session once had a controlling terminal. (for
922		 * logging and informational purposes)
923		 */
924		SESS_LOCK(sp);
925		ttyvp = sp->s_ttyvp;
926		tp = sp->s_ttyp;
927		sp->s_ttyvp = NULL;
928		sp->s_ttydp = NULL;
929		sp->s_leader = NULL;
930		SESS_UNLOCK(sp);
931
932		/*
933		 * Signal foreground pgrp and revoke access to
934		 * controlling terminal if it has not been revoked
935		 * already.
936		 *
937		 * Because the TTY may have been revoked in the mean
938		 * time and could already have a new session associated
939		 * with it, make sure we don't send a SIGHUP to a
940		 * foreground process group that does not belong to this
941		 * session.
942		 */
943
944		if (tp != NULL) {
945			tty_lock(tp);
946			if (tp->t_session == sp)
947				tty_signal_pgrp(tp, SIGHUP);
948			tty_unlock(tp);
949		}
950
951		if (ttyvp != NULL) {
952			sx_xunlock(&proctree_lock);
953			if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) {
954				VOP_REVOKE(ttyvp, REVOKEALL);
955				VOP_UNLOCK(ttyvp);
956			}
957			devfs_ctty_unref(ttyvp);
958			sx_xlock(&proctree_lock);
959		}
960	}
961	fixjobc_kill(p);
962}
963
964/*
965 * A process group has become orphaned, mark it as such for signal
966 * delivery code.  If there are any stopped processes in the group,
967 * hang-up all process in that group.
968 */
969static void
970orphanpg(struct pgrp *pg)
971{
972	struct proc *p;
973
974	PGRP_LOCK_ASSERT(pg, MA_OWNED);
975
976	pg->pg_flags |= PGRP_ORPHANED;
977
978	LIST_FOREACH(p, &pg->pg_members, p_pglist) {
979		PROC_LOCK(p);
980		if (P_SHOULDSTOP(p) == P_STOPPED_SIG) {
981			PROC_UNLOCK(p);
982			LIST_FOREACH(p, &pg->pg_members, p_pglist) {
983				PROC_LOCK(p);
984				kern_psignal(p, SIGHUP);
985				kern_psignal(p, SIGCONT);
986				PROC_UNLOCK(p);
987			}
988			return;
989		}
990		PROC_UNLOCK(p);
991	}
992}
993
994void
995sess_hold(struct session *s)
996{
997
998	refcount_acquire(&s->s_count);
999}
1000
1001void
1002sess_release(struct session *s)
1003{
1004
1005	if (refcount_release(&s->s_count)) {
1006		if (s->s_ttyp != NULL) {
1007			tty_lock(s->s_ttyp);
1008			tty_rel_sess(s->s_ttyp, s);
1009		}
1010		proc_id_clear(PROC_ID_SESSION, s->s_sid);
1011		mtx_destroy(&s->s_mtx);
1012		free(s, M_SESSION);
1013	}
1014}
1015
1016#ifdef DDB
1017
1018static void
1019db_print_pgrp_one(struct pgrp *pgrp, struct proc *p)
1020{
1021	db_printf(
1022	    "    pid %d at %p pr %d pgrp %p e %d jc %d\n",
1023	    p->p_pid, p, p->p_pptr == NULL ? -1 : p->p_pptr->p_pid,
1024	    p->p_pgrp, (p->p_treeflag & P_TREE_GRPEXITED) != 0,
1025	    p->p_pptr == NULL ? 0 : isjobproc(p->p_pptr, pgrp));
1026}
1027
1028DB_SHOW_COMMAND_FLAGS(pgrpdump, pgrpdump, DB_CMD_MEMSAFE)
1029{
1030	struct pgrp *pgrp;
1031	struct proc *p;
1032	int i;
1033
1034	for (i = 0; i <= pgrphash; i++) {
1035		if (!LIST_EMPTY(&pgrphashtbl[i])) {
1036			db_printf("indx %d\n", i);
1037			LIST_FOREACH(pgrp, &pgrphashtbl[i], pg_hash) {
1038				db_printf(
1039			"  pgrp %p, pgid %d, sess %p, sesscnt %d, mem %p\n",
1040				    pgrp, (int)pgrp->pg_id, pgrp->pg_session,
1041				    pgrp->pg_session->s_count,
1042				    LIST_FIRST(&pgrp->pg_members));
1043				LIST_FOREACH(p, &pgrp->pg_members, p_pglist)
1044					db_print_pgrp_one(pgrp, p);
1045			}
1046		}
1047	}
1048}
1049#endif /* DDB */
1050
1051/*
1052 * Calculate the kinfo_proc members which contain process-wide
1053 * informations.
1054 * Must be called with the target process locked.
1055 */
1056static void
1057fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp)
1058{
1059	struct thread *td;
1060
1061	PROC_LOCK_ASSERT(p, MA_OWNED);
1062
1063	kp->ki_estcpu = 0;
1064	kp->ki_pctcpu = 0;
1065	FOREACH_THREAD_IN_PROC(p, td) {
1066		thread_lock(td);
1067		kp->ki_pctcpu += sched_pctcpu(td);
1068		kp->ki_estcpu += sched_estcpu(td);
1069		thread_unlock(td);
1070	}
1071}
1072
1073/*
1074 * Fill in any information that is common to all threads in the process.
1075 * Must be called with the target process locked.
1076 */
1077static void
1078fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
1079{
1080	struct thread *td0;
1081	struct ucred *cred;
1082	struct sigacts *ps;
1083	struct timeval boottime;
1084
1085	PROC_LOCK_ASSERT(p, MA_OWNED);
1086
1087	kp->ki_structsize = sizeof(*kp);
1088	kp->ki_paddr = p;
1089	kp->ki_addr =/* p->p_addr; */0; /* XXX */
1090	kp->ki_args = p->p_args;
1091	kp->ki_textvp = p->p_textvp;
1092#ifdef KTRACE
1093	kp->ki_tracep = ktr_get_tracevp(p, false);
1094	kp->ki_traceflag = p->p_traceflag;
1095#endif
1096	kp->ki_fd = p->p_fd;
1097	kp->ki_pd = p->p_pd;
1098	kp->ki_vmspace = p->p_vmspace;
1099	kp->ki_flag = p->p_flag;
1100	kp->ki_flag2 = p->p_flag2;
1101	cred = p->p_ucred;
1102	if (cred) {
1103		kp->ki_uid = cred->cr_uid;
1104		kp->ki_ruid = cred->cr_ruid;
1105		kp->ki_svuid = cred->cr_svuid;
1106		kp->ki_cr_flags = 0;
1107		if (cred->cr_flags & CRED_FLAG_CAPMODE)
1108			kp->ki_cr_flags |= KI_CRF_CAPABILITY_MODE;
1109		/* XXX bde doesn't like KI_NGROUPS */
1110		if (cred->cr_ngroups > KI_NGROUPS) {
1111			kp->ki_ngroups = KI_NGROUPS;
1112			kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW;
1113		} else
1114			kp->ki_ngroups = cred->cr_ngroups;
1115		bcopy(cred->cr_groups, kp->ki_groups,
1116		    kp->ki_ngroups * sizeof(gid_t));
1117		kp->ki_rgid = cred->cr_rgid;
1118		kp->ki_svgid = cred->cr_svgid;
1119		/* If jailed(cred), emulate the old P_JAILED flag. */
1120		if (jailed(cred)) {
1121			kp->ki_flag |= P_JAILED;
1122			/* If inside the jail, use 0 as a jail ID. */
1123			if (cred->cr_prison != curthread->td_ucred->cr_prison)
1124				kp->ki_jid = cred->cr_prison->pr_id;
1125		}
1126		strlcpy(kp->ki_loginclass, cred->cr_loginclass->lc_name,
1127		    sizeof(kp->ki_loginclass));
1128	}
1129	ps = p->p_sigacts;
1130	if (ps) {
1131		mtx_lock(&ps->ps_mtx);
1132		kp->ki_sigignore = ps->ps_sigignore;
1133		kp->ki_sigcatch = ps->ps_sigcatch;
1134		mtx_unlock(&ps->ps_mtx);
1135	}
1136	if (p->p_state != PRS_NEW &&
1137	    p->p_state != PRS_ZOMBIE &&
1138	    p->p_vmspace != NULL) {
1139		struct vmspace *vm = p->p_vmspace;
1140
1141		kp->ki_size = vm->vm_map.size;
1142		kp->ki_rssize = vmspace_resident_count(vm); /*XXX*/
1143		FOREACH_THREAD_IN_PROC(p, td0) {
1144			if (!TD_IS_SWAPPED(td0))
1145				kp->ki_rssize += td0->td_kstack_pages;
1146		}
1147		kp->ki_swrss = vm->vm_swrss;
1148		kp->ki_tsize = vm->vm_tsize;
1149		kp->ki_dsize = vm->vm_dsize;
1150		kp->ki_ssize = vm->vm_ssize;
1151	} else if (p->p_state == PRS_ZOMBIE)
1152		kp->ki_stat = SZOMB;
1153	if (kp->ki_flag & P_INMEM)
1154		kp->ki_sflag = PS_INMEM;
1155	else
1156		kp->ki_sflag = 0;
1157	/* Calculate legacy swtime as seconds since 'swtick'. */
1158	kp->ki_swtime = (ticks - p->p_swtick) / hz;
1159	kp->ki_pid = p->p_pid;
1160	kp->ki_nice = p->p_nice;
1161	kp->ki_fibnum = p->p_fibnum;
1162	kp->ki_start = p->p_stats->p_start;
1163	getboottime(&boottime);
1164	timevaladd(&kp->ki_start, &boottime);
1165	PROC_STATLOCK(p);
1166	rufetch(p, &kp->ki_rusage);
1167	kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime);
1168	calcru(p, &kp->ki_rusage.ru_utime, &kp->ki_rusage.ru_stime);
1169	PROC_STATUNLOCK(p);
1170	calccru(p, &kp->ki_childutime, &kp->ki_childstime);
1171	/* Some callers want child times in a single value. */
1172	kp->ki_childtime = kp->ki_childstime;
1173	timevaladd(&kp->ki_childtime, &kp->ki_childutime);
1174
1175	FOREACH_THREAD_IN_PROC(p, td0)
1176		kp->ki_cow += td0->td_cow;
1177
1178	if (p->p_comm[0] != '\0')
1179		strlcpy(kp->ki_comm, p->p_comm, sizeof(kp->ki_comm));
1180	if (p->p_sysent && p->p_sysent->sv_name != NULL &&
1181	    p->p_sysent->sv_name[0] != '\0')
1182		strlcpy(kp->ki_emul, p->p_sysent->sv_name, sizeof(kp->ki_emul));
1183	kp->ki_siglist = p->p_siglist;
1184	kp->ki_xstat = KW_EXITCODE(p->p_xexit, p->p_xsig);
1185	kp->ki_acflag = p->p_acflag;
1186	kp->ki_lock = p->p_lock;
1187	if (p->p_pptr) {
1188		kp->ki_ppid = p->p_oppid;
1189		if (p->p_flag & P_TRACED)
1190			kp->ki_tracer = p->p_pptr->p_pid;
1191	}
1192}
1193
1194/*
1195 * Fill job-related process information.
1196 */
1197static void
1198fill_kinfo_proc_pgrp(struct proc *p, struct kinfo_proc *kp)
1199{
1200	struct tty *tp;
1201	struct session *sp;
1202	struct pgrp *pgrp;
1203
1204	sx_assert(&proctree_lock, SA_LOCKED);
1205	PROC_LOCK_ASSERT(p, MA_OWNED);
1206
1207	pgrp = p->p_pgrp;
1208	if (pgrp == NULL)
1209		return;
1210
1211	kp->ki_pgid = pgrp->pg_id;
1212	kp->ki_jobc = pgrp_calc_jobc(pgrp);
1213
1214	sp = pgrp->pg_session;
1215	tp = NULL;
1216
1217	if (sp != NULL) {
1218		kp->ki_sid = sp->s_sid;
1219		SESS_LOCK(sp);
1220		strlcpy(kp->ki_login, sp->s_login, sizeof(kp->ki_login));
1221		if (sp->s_ttyvp)
1222			kp->ki_kiflag |= KI_CTTY;
1223		if (SESS_LEADER(p))
1224			kp->ki_kiflag |= KI_SLEADER;
1225		tp = sp->s_ttyp;
1226		SESS_UNLOCK(sp);
1227	}
1228
1229	if ((p->p_flag & P_CONTROLT) && tp != NULL) {
1230		kp->ki_tdev = tty_udev(tp);
1231		kp->ki_tdev_freebsd11 = kp->ki_tdev; /* truncate */
1232		kp->ki_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
1233		if (tp->t_session)
1234			kp->ki_tsid = tp->t_session->s_sid;
1235	} else {
1236		kp->ki_tdev = NODEV;
1237		kp->ki_tdev_freebsd11 = kp->ki_tdev; /* truncate */
1238	}
1239}
1240
1241/*
1242 * Fill in information that is thread specific.  Must be called with
1243 * target process locked.  If 'preferthread' is set, overwrite certain
1244 * process-related fields that are maintained for both threads and
1245 * processes.
1246 */
1247static void
1248fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread)
1249{
1250	struct proc *p;
1251
1252	p = td->td_proc;
1253	kp->ki_tdaddr = td;
1254	PROC_LOCK_ASSERT(p, MA_OWNED);
1255
1256	if (preferthread)
1257		PROC_STATLOCK(p);
1258	thread_lock(td);
1259	if (td->td_wmesg != NULL)
1260		strlcpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg));
1261	else
1262		bzero(kp->ki_wmesg, sizeof(kp->ki_wmesg));
1263	if (strlcpy(kp->ki_tdname, td->td_name, sizeof(kp->ki_tdname)) >=
1264	    sizeof(kp->ki_tdname)) {
1265		strlcpy(kp->ki_moretdname,
1266		    td->td_name + sizeof(kp->ki_tdname) - 1,
1267		    sizeof(kp->ki_moretdname));
1268	} else {
1269		bzero(kp->ki_moretdname, sizeof(kp->ki_moretdname));
1270	}
1271	if (TD_ON_LOCK(td)) {
1272		kp->ki_kiflag |= KI_LOCKBLOCK;
1273		strlcpy(kp->ki_lockname, td->td_lockname,
1274		    sizeof(kp->ki_lockname));
1275	} else {
1276		kp->ki_kiflag &= ~KI_LOCKBLOCK;
1277		bzero(kp->ki_lockname, sizeof(kp->ki_lockname));
1278	}
1279
1280	if (p->p_state == PRS_NORMAL) { /* approximate. */
1281		if (TD_ON_RUNQ(td) ||
1282		    TD_CAN_RUN(td) ||
1283		    TD_IS_RUNNING(td)) {
1284			kp->ki_stat = SRUN;
1285		} else if (P_SHOULDSTOP(p)) {
1286			kp->ki_stat = SSTOP;
1287		} else if (TD_IS_SLEEPING(td)) {
1288			kp->ki_stat = SSLEEP;
1289		} else if (TD_ON_LOCK(td)) {
1290			kp->ki_stat = SLOCK;
1291		} else {
1292			kp->ki_stat = SWAIT;
1293		}
1294	} else if (p->p_state == PRS_ZOMBIE) {
1295		kp->ki_stat = SZOMB;
1296	} else {
1297		kp->ki_stat = SIDL;
1298	}
1299
1300	/* Things in the thread */
1301	kp->ki_wchan = td->td_wchan;
1302	kp->ki_pri.pri_level = td->td_priority;
1303	kp->ki_pri.pri_native = td->td_base_pri;
1304
1305	/*
1306	 * Note: legacy fields; clamp at the old NOCPU value and/or
1307	 * the maximum u_char CPU value.
1308	 */
1309	if (td->td_lastcpu == NOCPU)
1310		kp->ki_lastcpu_old = NOCPU_OLD;
1311	else if (td->td_lastcpu > MAXCPU_OLD)
1312		kp->ki_lastcpu_old = MAXCPU_OLD;
1313	else
1314		kp->ki_lastcpu_old = td->td_lastcpu;
1315
1316	if (td->td_oncpu == NOCPU)
1317		kp->ki_oncpu_old = NOCPU_OLD;
1318	else if (td->td_oncpu > MAXCPU_OLD)
1319		kp->ki_oncpu_old = MAXCPU_OLD;
1320	else
1321		kp->ki_oncpu_old = td->td_oncpu;
1322
1323	kp->ki_lastcpu = td->td_lastcpu;
1324	kp->ki_oncpu = td->td_oncpu;
1325	kp->ki_tdflags = td->td_flags;
1326	kp->ki_tid = td->td_tid;
1327	kp->ki_numthreads = p->p_numthreads;
1328	kp->ki_pcb = td->td_pcb;
1329	kp->ki_kstack = (void *)td->td_kstack;
1330	kp->ki_slptime = (ticks - td->td_slptick) / hz;
1331	kp->ki_pri.pri_class = td->td_pri_class;
1332	kp->ki_pri.pri_user = td->td_user_pri;
1333
1334	if (preferthread) {
1335		rufetchtd(td, &kp->ki_rusage);
1336		kp->ki_runtime = cputick2usec(td->td_rux.rux_runtime);
1337		kp->ki_pctcpu = sched_pctcpu(td);
1338		kp->ki_estcpu = sched_estcpu(td);
1339		kp->ki_cow = td->td_cow;
1340	}
1341
1342	/* We can't get this anymore but ps etc never used it anyway. */
1343	kp->ki_rqindex = 0;
1344
1345	if (preferthread)
1346		kp->ki_siglist = td->td_siglist;
1347	kp->ki_sigmask = td->td_sigmask;
1348	thread_unlock(td);
1349	if (preferthread)
1350		PROC_STATUNLOCK(p);
1351}
1352
1353/*
1354 * Fill in a kinfo_proc structure for the specified process.
1355 * Must be called with the target process locked.
1356 */
1357void
1358fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp)
1359{
1360	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
1361
1362	bzero(kp, sizeof(*kp));
1363
1364	fill_kinfo_proc_pgrp(p,kp);
1365	fill_kinfo_proc_only(p, kp);
1366	fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), kp, 0);
1367	fill_kinfo_aggregate(p, kp);
1368}
1369
1370struct pstats *
1371pstats_alloc(void)
1372{
1373
1374	return (malloc(sizeof(struct pstats), M_SUBPROC, M_ZERO|M_WAITOK));
1375}
1376
1377/*
1378 * Copy parts of p_stats; zero the rest of p_stats (statistics).
1379 */
1380void
1381pstats_fork(struct pstats *src, struct pstats *dst)
1382{
1383
1384	bzero(&dst->pstat_startzero,
1385	    __rangeof(struct pstats, pstat_startzero, pstat_endzero));
1386	bcopy(&src->pstat_startcopy, &dst->pstat_startcopy,
1387	    __rangeof(struct pstats, pstat_startcopy, pstat_endcopy));
1388}
1389
1390void
1391pstats_free(struct pstats *ps)
1392{
1393
1394	free(ps, M_SUBPROC);
1395}
1396
1397#ifdef COMPAT_FREEBSD32
1398
1399/*
1400 * This function is typically used to copy out the kernel address, so
1401 * it can be replaced by assignment of zero.
1402 */
1403static inline uint32_t
1404ptr32_trim(const void *ptr)
1405{
1406	uintptr_t uptr;
1407
1408	uptr = (uintptr_t)ptr;
1409	return ((uptr > UINT_MAX) ? 0 : uptr);
1410}
1411
1412#define PTRTRIM_CP(src,dst,fld) \
1413	do { (dst).fld = ptr32_trim((src).fld); } while (0)
1414
1415static void
1416freebsd32_kinfo_proc_out(const struct kinfo_proc *ki, struct kinfo_proc32 *ki32)
1417{
1418	int i;
1419
1420	bzero(ki32, sizeof(struct kinfo_proc32));
1421	ki32->ki_structsize = sizeof(struct kinfo_proc32);
1422	CP(*ki, *ki32, ki_layout);
1423	PTRTRIM_CP(*ki, *ki32, ki_args);
1424	PTRTRIM_CP(*ki, *ki32, ki_paddr);
1425	PTRTRIM_CP(*ki, *ki32, ki_addr);
1426	PTRTRIM_CP(*ki, *ki32, ki_tracep);
1427	PTRTRIM_CP(*ki, *ki32, ki_textvp);
1428	PTRTRIM_CP(*ki, *ki32, ki_fd);
1429	PTRTRIM_CP(*ki, *ki32, ki_vmspace);
1430	PTRTRIM_CP(*ki, *ki32, ki_wchan);
1431	CP(*ki, *ki32, ki_pid);
1432	CP(*ki, *ki32, ki_ppid);
1433	CP(*ki, *ki32, ki_pgid);
1434	CP(*ki, *ki32, ki_tpgid);
1435	CP(*ki, *ki32, ki_sid);
1436	CP(*ki, *ki32, ki_tsid);
1437	CP(*ki, *ki32, ki_jobc);
1438	CP(*ki, *ki32, ki_tdev);
1439	CP(*ki, *ki32, ki_tdev_freebsd11);
1440	CP(*ki, *ki32, ki_siglist);
1441	CP(*ki, *ki32, ki_sigmask);
1442	CP(*ki, *ki32, ki_sigignore);
1443	CP(*ki, *ki32, ki_sigcatch);
1444	CP(*ki, *ki32, ki_uid);
1445	CP(*ki, *ki32, ki_ruid);
1446	CP(*ki, *ki32, ki_svuid);
1447	CP(*ki, *ki32, ki_rgid);
1448	CP(*ki, *ki32, ki_svgid);
1449	CP(*ki, *ki32, ki_ngroups);
1450	for (i = 0; i < KI_NGROUPS; i++)
1451		CP(*ki, *ki32, ki_groups[i]);
1452	CP(*ki, *ki32, ki_size);
1453	CP(*ki, *ki32, ki_rssize);
1454	CP(*ki, *ki32, ki_swrss);
1455	CP(*ki, *ki32, ki_tsize);
1456	CP(*ki, *ki32, ki_dsize);
1457	CP(*ki, *ki32, ki_ssize);
1458	CP(*ki, *ki32, ki_xstat);
1459	CP(*ki, *ki32, ki_acflag);
1460	CP(*ki, *ki32, ki_pctcpu);
1461	CP(*ki, *ki32, ki_estcpu);
1462	CP(*ki, *ki32, ki_slptime);
1463	CP(*ki, *ki32, ki_swtime);
1464	CP(*ki, *ki32, ki_cow);
1465	CP(*ki, *ki32, ki_runtime);
1466	TV_CP(*ki, *ki32, ki_start);
1467	TV_CP(*ki, *ki32, ki_childtime);
1468	CP(*ki, *ki32, ki_flag);
1469	CP(*ki, *ki32, ki_kiflag);
1470	CP(*ki, *ki32, ki_traceflag);
1471	CP(*ki, *ki32, ki_stat);
1472	CP(*ki, *ki32, ki_nice);
1473	CP(*ki, *ki32, ki_lock);
1474	CP(*ki, *ki32, ki_rqindex);
1475	CP(*ki, *ki32, ki_oncpu);
1476	CP(*ki, *ki32, ki_lastcpu);
1477
1478	/* XXX TODO: wrap cpu value as appropriate */
1479	CP(*ki, *ki32, ki_oncpu_old);
1480	CP(*ki, *ki32, ki_lastcpu_old);
1481
1482	bcopy(ki->ki_tdname, ki32->ki_tdname, TDNAMLEN + 1);
1483	bcopy(ki->ki_wmesg, ki32->ki_wmesg, WMESGLEN + 1);
1484	bcopy(ki->ki_login, ki32->ki_login, LOGNAMELEN + 1);
1485	bcopy(ki->ki_lockname, ki32->ki_lockname, LOCKNAMELEN + 1);
1486	bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1);
1487	bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1);
1488	bcopy(ki->ki_loginclass, ki32->ki_loginclass, LOGINCLASSLEN + 1);
1489	bcopy(ki->ki_moretdname, ki32->ki_moretdname, MAXCOMLEN - TDNAMLEN + 1);
1490	CP(*ki, *ki32, ki_tracer);
1491	CP(*ki, *ki32, ki_flag2);
1492	CP(*ki, *ki32, ki_fibnum);
1493	CP(*ki, *ki32, ki_cr_flags);
1494	CP(*ki, *ki32, ki_jid);
1495	CP(*ki, *ki32, ki_numthreads);
1496	CP(*ki, *ki32, ki_tid);
1497	CP(*ki, *ki32, ki_pri);
1498	freebsd32_rusage_out(&ki->ki_rusage, &ki32->ki_rusage);
1499	freebsd32_rusage_out(&ki->ki_rusage_ch, &ki32->ki_rusage_ch);
1500	PTRTRIM_CP(*ki, *ki32, ki_pcb);
1501	PTRTRIM_CP(*ki, *ki32, ki_kstack);
1502	PTRTRIM_CP(*ki, *ki32, ki_udata);
1503	PTRTRIM_CP(*ki, *ki32, ki_tdaddr);
1504	CP(*ki, *ki32, ki_sflag);
1505	CP(*ki, *ki32, ki_tdflags);
1506}
1507#endif
1508
1509static ssize_t
1510kern_proc_out_size(struct proc *p, int flags)
1511{
1512	ssize_t size = 0;
1513
1514	PROC_LOCK_ASSERT(p, MA_OWNED);
1515
1516	if ((flags & KERN_PROC_NOTHREADS) != 0) {
1517#ifdef COMPAT_FREEBSD32
1518		if ((flags & KERN_PROC_MASK32) != 0) {
1519			size += sizeof(struct kinfo_proc32);
1520		} else
1521#endif
1522			size += sizeof(struct kinfo_proc);
1523	} else {
1524#ifdef COMPAT_FREEBSD32
1525		if ((flags & KERN_PROC_MASK32) != 0)
1526			size += sizeof(struct kinfo_proc32) * p->p_numthreads;
1527		else
1528#endif
1529			size += sizeof(struct kinfo_proc) * p->p_numthreads;
1530	}
1531	PROC_UNLOCK(p);
1532	return (size);
1533}
1534
1535int
1536kern_proc_out(struct proc *p, struct sbuf *sb, int flags)
1537{
1538	struct thread *td;
1539	struct kinfo_proc ki;
1540#ifdef COMPAT_FREEBSD32
1541	struct kinfo_proc32 ki32;
1542#endif
1543	int error;
1544
1545	PROC_LOCK_ASSERT(p, MA_OWNED);
1546	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
1547
1548	error = 0;
1549	fill_kinfo_proc(p, &ki);
1550	if ((flags & KERN_PROC_NOTHREADS) != 0) {
1551#ifdef COMPAT_FREEBSD32
1552		if ((flags & KERN_PROC_MASK32) != 0) {
1553			freebsd32_kinfo_proc_out(&ki, &ki32);
1554			if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
1555				error = ENOMEM;
1556		} else
1557#endif
1558			if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
1559				error = ENOMEM;
1560	} else {
1561		FOREACH_THREAD_IN_PROC(p, td) {
1562			fill_kinfo_thread(td, &ki, 1);
1563#ifdef COMPAT_FREEBSD32
1564			if ((flags & KERN_PROC_MASK32) != 0) {
1565				freebsd32_kinfo_proc_out(&ki, &ki32);
1566				if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
1567					error = ENOMEM;
1568			} else
1569#endif
1570				if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
1571					error = ENOMEM;
1572			if (error != 0)
1573				break;
1574		}
1575	}
1576	PROC_UNLOCK(p);
1577	return (error);
1578}
1579
1580static int
1581sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags)
1582{
1583	struct sbuf sb;
1584	struct kinfo_proc ki;
1585	int error, error2;
1586
1587	if (req->oldptr == NULL)
1588		return (SYSCTL_OUT(req, 0, kern_proc_out_size(p, flags)));
1589
1590	sbuf_new_for_sysctl(&sb, (char *)&ki, sizeof(ki), req);
1591	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
1592	error = kern_proc_out(p, &sb, flags);
1593	error2 = sbuf_finish(&sb);
1594	sbuf_delete(&sb);
1595	if (error != 0)
1596		return (error);
1597	else if (error2 != 0)
1598		return (error2);
1599	return (0);
1600}
1601
1602int
1603proc_iterate(int (*cb)(struct proc *, void *), void *cbarg)
1604{
1605	struct proc *p;
1606	int error, i, j;
1607
1608	for (i = 0; i < pidhashlock + 1; i++) {
1609		sx_slock(&proctree_lock);
1610		sx_slock(&pidhashtbl_lock[i]);
1611		for (j = i; j <= pidhash; j += pidhashlock + 1) {
1612			LIST_FOREACH(p, &pidhashtbl[j], p_hash) {
1613				if (p->p_state == PRS_NEW)
1614					continue;
1615				error = cb(p, cbarg);
1616				PROC_LOCK_ASSERT(p, MA_NOTOWNED);
1617				if (error != 0) {
1618					sx_sunlock(&pidhashtbl_lock[i]);
1619					sx_sunlock(&proctree_lock);
1620					return (error);
1621				}
1622			}
1623		}
1624		sx_sunlock(&pidhashtbl_lock[i]);
1625		sx_sunlock(&proctree_lock);
1626	}
1627	return (0);
1628}
1629
1630struct kern_proc_out_args {
1631	struct sysctl_req *req;
1632	int flags;
1633	int oid_number;
1634	int *name;
1635};
1636
1637static int
1638sysctl_kern_proc_iterate(struct proc *p, void *origarg)
1639{
1640	struct kern_proc_out_args *arg = origarg;
1641	int *name = arg->name;
1642	int oid_number = arg->oid_number;
1643	int flags = arg->flags;
1644	struct sysctl_req *req = arg->req;
1645	int error = 0;
1646
1647	PROC_LOCK(p);
1648
1649	KASSERT(p->p_ucred != NULL,
1650	    ("process credential is NULL for non-NEW proc"));
1651	/*
1652	 * Show a user only appropriate processes.
1653	 */
1654	if (p_cansee(curthread, p))
1655		goto skip;
1656	/*
1657	 * TODO - make more efficient (see notes below).
1658	 * do by session.
1659	 */
1660	switch (oid_number) {
1661	case KERN_PROC_GID:
1662		if (p->p_ucred->cr_gid != (gid_t)name[0])
1663			goto skip;
1664		break;
1665
1666	case KERN_PROC_PGRP:
1667		/* could do this by traversing pgrp */
1668		if (p->p_pgrp == NULL ||
1669		    p->p_pgrp->pg_id != (pid_t)name[0])
1670			goto skip;
1671		break;
1672
1673	case KERN_PROC_RGID:
1674		if (p->p_ucred->cr_rgid != (gid_t)name[0])
1675			goto skip;
1676		break;
1677
1678	case KERN_PROC_SESSION:
1679		if (p->p_session == NULL ||
1680		    p->p_session->s_sid != (pid_t)name[0])
1681			goto skip;
1682		break;
1683
1684	case KERN_PROC_TTY:
1685		if ((p->p_flag & P_CONTROLT) == 0 ||
1686		    p->p_session == NULL)
1687			goto skip;
1688		/* XXX proctree_lock */
1689		SESS_LOCK(p->p_session);
1690		if (p->p_session->s_ttyp == NULL ||
1691		    tty_udev(p->p_session->s_ttyp) !=
1692		    (dev_t)name[0]) {
1693			SESS_UNLOCK(p->p_session);
1694			goto skip;
1695		}
1696		SESS_UNLOCK(p->p_session);
1697		break;
1698
1699	case KERN_PROC_UID:
1700		if (p->p_ucred->cr_uid != (uid_t)name[0])
1701			goto skip;
1702		break;
1703
1704	case KERN_PROC_RUID:
1705		if (p->p_ucred->cr_ruid != (uid_t)name[0])
1706			goto skip;
1707		break;
1708
1709	case KERN_PROC_PROC:
1710		break;
1711
1712	default:
1713		break;
1714	}
1715	error = sysctl_out_proc(p, req, flags);
1716	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
1717	return (error);
1718skip:
1719	PROC_UNLOCK(p);
1720	return (0);
1721}
1722
1723static int
1724sysctl_kern_proc(SYSCTL_HANDLER_ARGS)
1725{
1726	struct kern_proc_out_args iterarg;
1727	int *name = (int *)arg1;
1728	u_int namelen = arg2;
1729	struct proc *p;
1730	int flags, oid_number;
1731	int error = 0;
1732
1733	oid_number = oidp->oid_number;
1734	if (oid_number != KERN_PROC_ALL &&
1735	    (oid_number & KERN_PROC_INC_THREAD) == 0)
1736		flags = KERN_PROC_NOTHREADS;
1737	else {
1738		flags = 0;
1739		oid_number &= ~KERN_PROC_INC_THREAD;
1740	}
1741#ifdef COMPAT_FREEBSD32
1742	if (req->flags & SCTL_MASK32)
1743		flags |= KERN_PROC_MASK32;
1744#endif
1745	if (oid_number == KERN_PROC_PID) {
1746		if (namelen != 1)
1747			return (EINVAL);
1748		error = sysctl_wire_old_buffer(req, 0);
1749		if (error)
1750			return (error);
1751		sx_slock(&proctree_lock);
1752		error = pget((pid_t)name[0], PGET_CANSEE, &p);
1753		if (error == 0)
1754			error = sysctl_out_proc(p, req, flags);
1755		sx_sunlock(&proctree_lock);
1756		return (error);
1757	}
1758
1759	switch (oid_number) {
1760	case KERN_PROC_ALL:
1761		if (namelen != 0)
1762			return (EINVAL);
1763		break;
1764	case KERN_PROC_PROC:
1765		if (namelen != 0 && namelen != 1)
1766			return (EINVAL);
1767		break;
1768	default:
1769		if (namelen != 1)
1770			return (EINVAL);
1771		break;
1772	}
1773
1774	if (req->oldptr == NULL) {
1775		/* overestimate by 5 procs */
1776		error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5);
1777		if (error)
1778			return (error);
1779	} else {
1780		error = sysctl_wire_old_buffer(req, 0);
1781		if (error != 0)
1782			return (error);
1783	}
1784	iterarg.flags = flags;
1785	iterarg.oid_number = oid_number;
1786	iterarg.req = req;
1787	iterarg.name = name;
1788	error = proc_iterate(sysctl_kern_proc_iterate, &iterarg);
1789	return (error);
1790}
1791
1792struct pargs *
1793pargs_alloc(int len)
1794{
1795	struct pargs *pa;
1796
1797	pa = malloc(sizeof(struct pargs) + len, M_PARGS,
1798		M_WAITOK);
1799	refcount_init(&pa->ar_ref, 1);
1800	pa->ar_length = len;
1801	return (pa);
1802}
1803
1804static void
1805pargs_free(struct pargs *pa)
1806{
1807
1808	free(pa, M_PARGS);
1809}
1810
1811void
1812pargs_hold(struct pargs *pa)
1813{
1814
1815	if (pa == NULL)
1816		return;
1817	refcount_acquire(&pa->ar_ref);
1818}
1819
1820void
1821pargs_drop(struct pargs *pa)
1822{
1823
1824	if (pa == NULL)
1825		return;
1826	if (refcount_release(&pa->ar_ref))
1827		pargs_free(pa);
1828}
1829
1830static int
1831proc_read_string(struct thread *td, struct proc *p, const char *sptr, char *buf,
1832    size_t len)
1833{
1834	ssize_t n;
1835
1836	/*
1837	 * This may return a short read if the string is shorter than the chunk
1838	 * and is aligned at the end of the page, and the following page is not
1839	 * mapped.
1840	 */
1841	n = proc_readmem(td, p, (vm_offset_t)sptr, buf, len);
1842	if (n <= 0)
1843		return (ENOMEM);
1844	return (0);
1845}
1846
1847#define PROC_AUXV_MAX	256	/* Safety limit on auxv size. */
1848
1849enum proc_vector_type {
1850	PROC_ARG,
1851	PROC_ENV,
1852	PROC_AUX,
1853};
1854
1855#ifdef COMPAT_FREEBSD32
1856static int
1857get_proc_vector32(struct thread *td, struct proc *p, char ***proc_vectorp,
1858    size_t *vsizep, enum proc_vector_type type)
1859{
1860	struct freebsd32_ps_strings pss;
1861	Elf32_Auxinfo aux;
1862	vm_offset_t vptr, ptr;
1863	uint32_t *proc_vector32;
1864	char **proc_vector;
1865	size_t vsize, size;
1866	int i, error;
1867
1868	error = 0;
1869	if (proc_readmem(td, p, PROC_PS_STRINGS(p), &pss, sizeof(pss)) !=
1870	    sizeof(pss))
1871		return (ENOMEM);
1872	switch (type) {
1873	case PROC_ARG:
1874		vptr = (vm_offset_t)PTRIN(pss.ps_argvstr);
1875		vsize = pss.ps_nargvstr;
1876		if (vsize > ARG_MAX)
1877			return (ENOEXEC);
1878		size = vsize * sizeof(int32_t);
1879		break;
1880	case PROC_ENV:
1881		vptr = (vm_offset_t)PTRIN(pss.ps_envstr);
1882		vsize = pss.ps_nenvstr;
1883		if (vsize > ARG_MAX)
1884			return (ENOEXEC);
1885		size = vsize * sizeof(int32_t);
1886		break;
1887	case PROC_AUX:
1888		vptr = (vm_offset_t)PTRIN(pss.ps_envstr) +
1889		    (pss.ps_nenvstr + 1) * sizeof(int32_t);
1890		if (vptr % 4 != 0)
1891			return (ENOEXEC);
1892		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
1893			if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) !=
1894			    sizeof(aux))
1895				return (ENOMEM);
1896			if (aux.a_type == AT_NULL)
1897				break;
1898			ptr += sizeof(aux);
1899		}
1900		if (aux.a_type != AT_NULL)
1901			return (ENOEXEC);
1902		vsize = i + 1;
1903		size = vsize * sizeof(aux);
1904		break;
1905	default:
1906		KASSERT(0, ("Wrong proc vector type: %d", type));
1907		return (EINVAL);
1908	}
1909	proc_vector32 = malloc(size, M_TEMP, M_WAITOK);
1910	if (proc_readmem(td, p, vptr, proc_vector32, size) != size) {
1911		error = ENOMEM;
1912		goto done;
1913	}
1914	if (type == PROC_AUX) {
1915		*proc_vectorp = (char **)proc_vector32;
1916		*vsizep = vsize;
1917		return (0);
1918	}
1919	proc_vector = malloc(vsize * sizeof(char *), M_TEMP, M_WAITOK);
1920	for (i = 0; i < (int)vsize; i++)
1921		proc_vector[i] = PTRIN(proc_vector32[i]);
1922	*proc_vectorp = proc_vector;
1923	*vsizep = vsize;
1924done:
1925	free(proc_vector32, M_TEMP);
1926	return (error);
1927}
1928#endif
1929
1930static int
1931get_proc_vector(struct thread *td, struct proc *p, char ***proc_vectorp,
1932    size_t *vsizep, enum proc_vector_type type)
1933{
1934	struct ps_strings pss;
1935	Elf_Auxinfo aux;
1936	vm_offset_t vptr, ptr;
1937	char **proc_vector;
1938	size_t vsize, size;
1939	int i;
1940
1941#ifdef COMPAT_FREEBSD32
1942	if (SV_PROC_FLAG(p, SV_ILP32) != 0)
1943		return (get_proc_vector32(td, p, proc_vectorp, vsizep, type));
1944#endif
1945	if (proc_readmem(td, p, PROC_PS_STRINGS(p), &pss, sizeof(pss)) !=
1946	    sizeof(pss))
1947		return (ENOMEM);
1948	switch (type) {
1949	case PROC_ARG:
1950		vptr = (vm_offset_t)pss.ps_argvstr;
1951		vsize = pss.ps_nargvstr;
1952		if (vsize > ARG_MAX)
1953			return (ENOEXEC);
1954		size = vsize * sizeof(char *);
1955		break;
1956	case PROC_ENV:
1957		vptr = (vm_offset_t)pss.ps_envstr;
1958		vsize = pss.ps_nenvstr;
1959		if (vsize > ARG_MAX)
1960			return (ENOEXEC);
1961		size = vsize * sizeof(char *);
1962		break;
1963	case PROC_AUX:
1964		/*
1965		 * The aux array is just above env array on the stack. Check
1966		 * that the address is naturally aligned.
1967		 */
1968		vptr = (vm_offset_t)pss.ps_envstr + (pss.ps_nenvstr + 1)
1969		    * sizeof(char *);
1970#if __ELF_WORD_SIZE == 64
1971		if (vptr % sizeof(uint64_t) != 0)
1972#else
1973		if (vptr % sizeof(uint32_t) != 0)
1974#endif
1975			return (ENOEXEC);
1976		/*
1977		 * We count the array size reading the aux vectors from the
1978		 * stack until AT_NULL vector is returned.  So (to keep the code
1979		 * simple) we read the process stack twice: the first time here
1980		 * to find the size and the second time when copying the vectors
1981		 * to the allocated proc_vector.
1982		 */
1983		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
1984			if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) !=
1985			    sizeof(aux))
1986				return (ENOMEM);
1987			if (aux.a_type == AT_NULL)
1988				break;
1989			ptr += sizeof(aux);
1990		}
1991		/*
1992		 * If the PROC_AUXV_MAX entries are iterated over, and we have
1993		 * not reached AT_NULL, it is most likely we are reading wrong
1994		 * data: either the process doesn't have auxv array or data has
1995		 * been modified. Return the error in this case.
1996		 */
1997		if (aux.a_type != AT_NULL)
1998			return (ENOEXEC);
1999		vsize = i + 1;
2000		size = vsize * sizeof(aux);
2001		break;
2002	default:
2003		KASSERT(0, ("Wrong proc vector type: %d", type));
2004		return (EINVAL); /* In case we are built without INVARIANTS. */
2005	}
2006	proc_vector = malloc(size, M_TEMP, M_WAITOK);
2007	if (proc_readmem(td, p, vptr, proc_vector, size) != size) {
2008		free(proc_vector, M_TEMP);
2009		return (ENOMEM);
2010	}
2011	*proc_vectorp = proc_vector;
2012	*vsizep = vsize;
2013
2014	return (0);
2015}
2016
2017#define GET_PS_STRINGS_CHUNK_SZ	256	/* Chunk size (bytes) for ps_strings operations. */
2018
2019static int
2020get_ps_strings(struct thread *td, struct proc *p, struct sbuf *sb,
2021    enum proc_vector_type type)
2022{
2023	size_t done, len, nchr, vsize;
2024	int error, i;
2025	char **proc_vector, *sptr;
2026	char pss_string[GET_PS_STRINGS_CHUNK_SZ];
2027
2028	PROC_ASSERT_HELD(p);
2029
2030	/*
2031	 * We are not going to read more than 2 * (PATH_MAX + ARG_MAX) bytes.
2032	 */
2033	nchr = 2 * (PATH_MAX + ARG_MAX);
2034
2035	error = get_proc_vector(td, p, &proc_vector, &vsize, type);
2036	if (error != 0)
2037		return (error);
2038	for (done = 0, i = 0; i < (int)vsize && done < nchr; i++) {
2039		/*
2040		 * The program may have scribbled into its argv array, e.g. to
2041		 * remove some arguments.  If that has happened, break out
2042		 * before trying to read from NULL.
2043		 */
2044		if (proc_vector[i] == NULL)
2045			break;
2046		for (sptr = proc_vector[i]; ; sptr += GET_PS_STRINGS_CHUNK_SZ) {
2047			error = proc_read_string(td, p, sptr, pss_string,
2048			    sizeof(pss_string));
2049			if (error != 0)
2050				goto done;
2051			len = strnlen(pss_string, GET_PS_STRINGS_CHUNK_SZ);
2052			if (done + len >= nchr)
2053				len = nchr - done - 1;
2054			sbuf_bcat(sb, pss_string, len);
2055			if (len != GET_PS_STRINGS_CHUNK_SZ)
2056				break;
2057			done += GET_PS_STRINGS_CHUNK_SZ;
2058		}
2059		sbuf_bcat(sb, "", 1);
2060		done += len + 1;
2061	}
2062done:
2063	free(proc_vector, M_TEMP);
2064	return (error);
2065}
2066
2067int
2068proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb)
2069{
2070
2071	return (get_ps_strings(curthread, p, sb, PROC_ARG));
2072}
2073
2074int
2075proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb)
2076{
2077
2078	return (get_ps_strings(curthread, p, sb, PROC_ENV));
2079}
2080
2081int
2082proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb)
2083{
2084	size_t vsize, size;
2085	char **auxv;
2086	int error;
2087
2088	error = get_proc_vector(td, p, &auxv, &vsize, PROC_AUX);
2089	if (error == 0) {
2090#ifdef COMPAT_FREEBSD32
2091		if (SV_PROC_FLAG(p, SV_ILP32) != 0)
2092			size = vsize * sizeof(Elf32_Auxinfo);
2093		else
2094#endif
2095			size = vsize * sizeof(Elf_Auxinfo);
2096		if (sbuf_bcat(sb, auxv, size) != 0)
2097			error = ENOMEM;
2098		free(auxv, M_TEMP);
2099	}
2100	return (error);
2101}
2102
2103/*
2104 * This sysctl allows a process to retrieve the argument list or process
2105 * title for another process without groping around in the address space
2106 * of the other process.  It also allow a process to set its own "process
2107 * title to a string of its own choice.
2108 */
2109static int
2110sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS)
2111{
2112	int *name = (int *)arg1;
2113	u_int namelen = arg2;
2114	struct pargs *newpa, *pa;
2115	struct proc *p;
2116	struct sbuf sb;
2117	int flags, error = 0, error2;
2118	pid_t pid;
2119
2120	if (namelen != 1)
2121		return (EINVAL);
2122
2123	p = curproc;
2124	pid = (pid_t)name[0];
2125	if (pid == -1) {
2126		pid = p->p_pid;
2127	}
2128
2129	/*
2130	 * If the query is for this process and it is single-threaded, there
2131	 * is nobody to modify pargs, thus we can just read.
2132	 */
2133	if (pid == p->p_pid && p->p_numthreads == 1 && req->newptr == NULL &&
2134	    (pa = p->p_args) != NULL)
2135		return (SYSCTL_OUT(req, pa->ar_args, pa->ar_length));
2136
2137	flags = PGET_CANSEE;
2138	if (req->newptr != NULL)
2139		flags |= PGET_ISCURRENT;
2140	error = pget(pid, flags, &p);
2141	if (error)
2142		return (error);
2143
2144	pa = p->p_args;
2145	if (pa != NULL) {
2146		pargs_hold(pa);
2147		PROC_UNLOCK(p);
2148		error = SYSCTL_OUT(req, pa->ar_args, pa->ar_length);
2149		pargs_drop(pa);
2150	} else if ((p->p_flag & (P_WEXIT | P_SYSTEM)) == 0) {
2151		_PHOLD(p);
2152		PROC_UNLOCK(p);
2153		sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
2154		sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
2155		error = proc_getargv(curthread, p, &sb);
2156		error2 = sbuf_finish(&sb);
2157		PRELE(p);
2158		sbuf_delete(&sb);
2159		if (error == 0 && error2 != 0)
2160			error = error2;
2161	} else {
2162		PROC_UNLOCK(p);
2163	}
2164	if (error != 0 || req->newptr == NULL)
2165		return (error);
2166
2167	if (req->newlen > ps_arg_cache_limit - sizeof(struct pargs))
2168		return (ENOMEM);
2169
2170	if (req->newlen == 0) {
2171		/*
2172		 * Clear the argument pointer, so that we'll fetch arguments
2173		 * with proc_getargv() until further notice.
2174		 */
2175		newpa = NULL;
2176	} else {
2177		newpa = pargs_alloc(req->newlen);
2178		error = SYSCTL_IN(req, newpa->ar_args, req->newlen);
2179		if (error != 0) {
2180			pargs_free(newpa);
2181			return (error);
2182		}
2183	}
2184	PROC_LOCK(p);
2185	pa = p->p_args;
2186	p->p_args = newpa;
2187	PROC_UNLOCK(p);
2188	pargs_drop(pa);
2189	return (0);
2190}
2191
2192/*
2193 * This sysctl allows a process to retrieve environment of another process.
2194 */
2195static int
2196sysctl_kern_proc_env(SYSCTL_HANDLER_ARGS)
2197{
2198	int *name = (int *)arg1;
2199	u_int namelen = arg2;
2200	struct proc *p;
2201	struct sbuf sb;
2202	int error, error2;
2203
2204	if (namelen != 1)
2205		return (EINVAL);
2206
2207	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
2208	if (error != 0)
2209		return (error);
2210	if ((p->p_flag & P_SYSTEM) != 0) {
2211		PRELE(p);
2212		return (0);
2213	}
2214
2215	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
2216	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
2217	error = proc_getenvv(curthread, p, &sb);
2218	error2 = sbuf_finish(&sb);
2219	PRELE(p);
2220	sbuf_delete(&sb);
2221	return (error != 0 ? error : error2);
2222}
2223
2224/*
2225 * This sysctl allows a process to retrieve ELF auxiliary vector of
2226 * another process.
2227 */
2228static int
2229sysctl_kern_proc_auxv(SYSCTL_HANDLER_ARGS)
2230{
2231	int *name = (int *)arg1;
2232	u_int namelen = arg2;
2233	struct proc *p;
2234	struct sbuf sb;
2235	int error, error2;
2236
2237	if (namelen != 1)
2238		return (EINVAL);
2239
2240	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
2241	if (error != 0)
2242		return (error);
2243	if ((p->p_flag & P_SYSTEM) != 0) {
2244		PRELE(p);
2245		return (0);
2246	}
2247	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
2248	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
2249	error = proc_getauxv(curthread, p, &sb);
2250	error2 = sbuf_finish(&sb);
2251	PRELE(p);
2252	sbuf_delete(&sb);
2253	return (error != 0 ? error : error2);
2254}
2255
2256/*
2257 * Look up the canonical executable path running in the specified process.
2258 * It tries to return the same hardlink name as was used for execve(2).
2259 * This allows the programs that modify their behavior based on their progname,
2260 * to operate correctly.
2261 *
2262 * Result is returned in retbuf, it must not be freed, similar to vn_fullpath()
2263 *   calling conventions.
2264 * binname is a pointer to temporary string buffer of length MAXPATHLEN,
2265 *   allocated and freed by caller.
2266 * freebuf should be freed by caller, from the M_TEMP malloc type.
2267 */
2268int
2269proc_get_binpath(struct proc *p, char *binname, char **retbuf,
2270    char **freebuf)
2271{
2272	struct nameidata nd;
2273	struct vnode *vp, *dvp;
2274	size_t freepath_size;
2275	int error;
2276	bool do_fullpath;
2277
2278	PROC_LOCK_ASSERT(p, MA_OWNED);
2279
2280	vp = p->p_textvp;
2281	if (vp == NULL) {
2282		PROC_UNLOCK(p);
2283		*retbuf = "";
2284		*freebuf = NULL;
2285		return (0);
2286	}
2287	vref(vp);
2288	dvp = p->p_textdvp;
2289	if (dvp != NULL)
2290		vref(dvp);
2291	if (p->p_binname != NULL)
2292		strlcpy(binname, p->p_binname, MAXPATHLEN);
2293	PROC_UNLOCK(p);
2294
2295	do_fullpath = true;
2296	*freebuf = NULL;
2297	if (dvp != NULL && binname[0] != '\0') {
2298		freepath_size = MAXPATHLEN;
2299		if (vn_fullpath_hardlink(vp, dvp, binname, strlen(binname),
2300		    retbuf, freebuf, &freepath_size) == 0) {
2301			/*
2302			 * Recheck the looked up path.  The binary
2303			 * might have been renamed or replaced, in
2304			 * which case we should not report old name.
2305			 */
2306			NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, *retbuf);
2307			error = namei(&nd);
2308			if (error == 0) {
2309				if (nd.ni_vp == vp)
2310					do_fullpath = false;
2311				vrele(nd.ni_vp);
2312				NDFREE_PNBUF(&nd);
2313			}
2314		}
2315	}
2316	if (do_fullpath) {
2317		free(*freebuf, M_TEMP);
2318		*freebuf = NULL;
2319		error = vn_fullpath(vp, retbuf, freebuf);
2320	}
2321	vrele(vp);
2322	if (dvp != NULL)
2323		vrele(dvp);
2324	return (error);
2325}
2326
2327/*
2328 * This sysctl allows a process to retrieve the path of the executable for
2329 * itself or another process.
2330 */
2331static int
2332sysctl_kern_proc_pathname(SYSCTL_HANDLER_ARGS)
2333{
2334	pid_t *pidp = (pid_t *)arg1;
2335	unsigned int arglen = arg2;
2336	struct proc *p;
2337	char *retbuf, *freebuf, *binname;
2338	int error;
2339
2340	if (arglen != 1)
2341		return (EINVAL);
2342	binname = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
2343	binname[0] = '\0';
2344	if (*pidp == -1) {	/* -1 means this process */
2345		error = 0;
2346		p = req->td->td_proc;
2347		PROC_LOCK(p);
2348	} else {
2349		error = pget(*pidp, PGET_CANSEE, &p);
2350	}
2351
2352	if (error == 0)
2353		error = proc_get_binpath(p, binname, &retbuf, &freebuf);
2354	free(binname, M_TEMP);
2355	if (error != 0)
2356		return (error);
2357	error = SYSCTL_OUT(req, retbuf, strlen(retbuf) + 1);
2358	free(freebuf, M_TEMP);
2359	return (error);
2360}
2361
2362static int
2363sysctl_kern_proc_sv_name(SYSCTL_HANDLER_ARGS)
2364{
2365	struct proc *p;
2366	char *sv_name;
2367	int *name;
2368	int namelen;
2369	int error;
2370
2371	namelen = arg2;
2372	if (namelen != 1)
2373		return (EINVAL);
2374
2375	name = (int *)arg1;
2376	error = pget((pid_t)name[0], PGET_CANSEE, &p);
2377	if (error != 0)
2378		return (error);
2379	sv_name = p->p_sysent->sv_name;
2380	PROC_UNLOCK(p);
2381	return (sysctl_handle_string(oidp, sv_name, 0, req));
2382}
2383
2384#ifdef KINFO_OVMENTRY_SIZE
2385CTASSERT(sizeof(struct kinfo_ovmentry) == KINFO_OVMENTRY_SIZE);
2386#endif
2387
2388#ifdef COMPAT_FREEBSD7
2389static int
2390sysctl_kern_proc_ovmmap(SYSCTL_HANDLER_ARGS)
2391{
2392	vm_map_entry_t entry, tmp_entry;
2393	unsigned int last_timestamp, namelen;
2394	char *fullpath, *freepath;
2395	struct kinfo_ovmentry *kve;
2396	struct vattr va;
2397	struct ucred *cred;
2398	int error, *name;
2399	struct vnode *vp;
2400	struct proc *p;
2401	vm_map_t map;
2402	struct vmspace *vm;
2403
2404	namelen = arg2;
2405	if (namelen != 1)
2406		return (EINVAL);
2407
2408	name = (int *)arg1;
2409	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
2410	if (error != 0)
2411		return (error);
2412	vm = vmspace_acquire_ref(p);
2413	if (vm == NULL) {
2414		PRELE(p);
2415		return (ESRCH);
2416	}
2417	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
2418
2419	map = &vm->vm_map;
2420	vm_map_lock_read(map);
2421	VM_MAP_ENTRY_FOREACH(entry, map) {
2422		vm_object_t obj, tobj, lobj;
2423		vm_offset_t addr;
2424
2425		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2426			continue;
2427
2428		bzero(kve, sizeof(*kve));
2429		kve->kve_structsize = sizeof(*kve);
2430
2431		kve->kve_private_resident = 0;
2432		obj = entry->object.vm_object;
2433		if (obj != NULL) {
2434			VM_OBJECT_RLOCK(obj);
2435			if (obj->shadow_count == 1)
2436				kve->kve_private_resident =
2437				    obj->resident_page_count;
2438		}
2439		kve->kve_resident = 0;
2440		addr = entry->start;
2441		while (addr < entry->end) {
2442			if (pmap_extract(map->pmap, addr))
2443				kve->kve_resident++;
2444			addr += PAGE_SIZE;
2445		}
2446
2447		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
2448			if (tobj != obj) {
2449				VM_OBJECT_RLOCK(tobj);
2450				kve->kve_offset += tobj->backing_object_offset;
2451			}
2452			if (lobj != obj)
2453				VM_OBJECT_RUNLOCK(lobj);
2454			lobj = tobj;
2455		}
2456
2457		kve->kve_start = (void*)entry->start;
2458		kve->kve_end = (void*)entry->end;
2459		kve->kve_offset += (off_t)entry->offset;
2460
2461		if (entry->protection & VM_PROT_READ)
2462			kve->kve_protection |= KVME_PROT_READ;
2463		if (entry->protection & VM_PROT_WRITE)
2464			kve->kve_protection |= KVME_PROT_WRITE;
2465		if (entry->protection & VM_PROT_EXECUTE)
2466			kve->kve_protection |= KVME_PROT_EXEC;
2467
2468		if (entry->eflags & MAP_ENTRY_COW)
2469			kve->kve_flags |= KVME_FLAG_COW;
2470		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
2471			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
2472		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
2473			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
2474
2475		last_timestamp = map->timestamp;
2476		vm_map_unlock_read(map);
2477
2478		kve->kve_fileid = 0;
2479		kve->kve_fsid = 0;
2480		freepath = NULL;
2481		fullpath = "";
2482		if (lobj) {
2483			kve->kve_type = vm_object_kvme_type(lobj, &vp);
2484			if (kve->kve_type == KVME_TYPE_MGTDEVICE)
2485				kve->kve_type = KVME_TYPE_UNKNOWN;
2486			if (vp != NULL)
2487				vref(vp);
2488			if (lobj != obj)
2489				VM_OBJECT_RUNLOCK(lobj);
2490
2491			kve->kve_ref_count = obj->ref_count;
2492			kve->kve_shadow_count = obj->shadow_count;
2493			VM_OBJECT_RUNLOCK(obj);
2494			if (vp != NULL) {
2495				vn_fullpath(vp, &fullpath, &freepath);
2496				cred = curthread->td_ucred;
2497				vn_lock(vp, LK_SHARED | LK_RETRY);
2498				if (VOP_GETATTR(vp, &va, cred) == 0) {
2499					kve->kve_fileid = va.va_fileid;
2500					/* truncate */
2501					kve->kve_fsid = va.va_fsid;
2502				}
2503				vput(vp);
2504			}
2505		} else {
2506			kve->kve_type = KVME_TYPE_NONE;
2507			kve->kve_ref_count = 0;
2508			kve->kve_shadow_count = 0;
2509		}
2510
2511		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
2512		if (freepath != NULL)
2513			free(freepath, M_TEMP);
2514
2515		error = SYSCTL_OUT(req, kve, sizeof(*kve));
2516		vm_map_lock_read(map);
2517		if (error)
2518			break;
2519		if (last_timestamp != map->timestamp) {
2520			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
2521			entry = tmp_entry;
2522		}
2523	}
2524	vm_map_unlock_read(map);
2525	vmspace_free(vm);
2526	PRELE(p);
2527	free(kve, M_TEMP);
2528	return (error);
2529}
2530#endif	/* COMPAT_FREEBSD7 */
2531
2532#ifdef KINFO_VMENTRY_SIZE
2533CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE);
2534#endif
2535
2536void
2537kern_proc_vmmap_resident(vm_map_t map, vm_map_entry_t entry,
2538    int *resident_count, bool *super)
2539{
2540	vm_object_t obj, tobj;
2541	vm_page_t m, m_adv;
2542	vm_offset_t addr;
2543	vm_paddr_t pa;
2544	vm_pindex_t pi, pi_adv, pindex;
2545
2546	*super = false;
2547	*resident_count = 0;
2548	if (vmmap_skip_res_cnt)
2549		return;
2550
2551	pa = 0;
2552	obj = entry->object.vm_object;
2553	addr = entry->start;
2554	m_adv = NULL;
2555	pi = OFF_TO_IDX(entry->offset);
2556	for (; addr < entry->end; addr += IDX_TO_OFF(pi_adv), pi += pi_adv) {
2557		if (m_adv != NULL) {
2558			m = m_adv;
2559		} else {
2560			pi_adv = atop(entry->end - addr);
2561			pindex = pi;
2562			for (tobj = obj;; tobj = tobj->backing_object) {
2563				m = vm_page_find_least(tobj, pindex);
2564				if (m != NULL) {
2565					if (m->pindex == pindex)
2566						break;
2567					if (pi_adv > m->pindex - pindex) {
2568						pi_adv = m->pindex - pindex;
2569						m_adv = m;
2570					}
2571				}
2572				if (tobj->backing_object == NULL)
2573					goto next;
2574				pindex += OFF_TO_IDX(tobj->
2575				    backing_object_offset);
2576			}
2577		}
2578		m_adv = NULL;
2579		if (m->psind != 0 && addr + pagesizes[1] <= entry->end &&
2580		    (addr & (pagesizes[1] - 1)) == 0 &&
2581		    (pmap_mincore(map->pmap, addr, &pa) & MINCORE_SUPER) != 0) {
2582			*super = true;
2583			pi_adv = atop(pagesizes[1]);
2584		} else {
2585			/*
2586			 * We do not test the found page on validity.
2587			 * Either the page is busy and being paged in,
2588			 * or it was invalidated.  The first case
2589			 * should be counted as resident, the second
2590			 * is not so clear; we do account both.
2591			 */
2592			pi_adv = 1;
2593		}
2594		*resident_count += pi_adv;
2595next:;
2596	}
2597}
2598
2599/*
2600 * Must be called with the process locked and will return unlocked.
2601 */
2602int
2603kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags)
2604{
2605	vm_map_entry_t entry, tmp_entry;
2606	struct vattr va;
2607	vm_map_t map;
2608	vm_object_t lobj, nobj, obj, tobj;
2609	char *fullpath, *freepath;
2610	struct kinfo_vmentry *kve;
2611	struct ucred *cred;
2612	struct vnode *vp;
2613	struct vmspace *vm;
2614	vm_offset_t addr;
2615	unsigned int last_timestamp;
2616	int error;
2617	bool guard, super;
2618
2619	PROC_LOCK_ASSERT(p, MA_OWNED);
2620
2621	_PHOLD(p);
2622	PROC_UNLOCK(p);
2623	vm = vmspace_acquire_ref(p);
2624	if (vm == NULL) {
2625		PRELE(p);
2626		return (ESRCH);
2627	}
2628	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK | M_ZERO);
2629
2630	error = 0;
2631	map = &vm->vm_map;
2632	vm_map_lock_read(map);
2633	VM_MAP_ENTRY_FOREACH(entry, map) {
2634		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2635			continue;
2636
2637		addr = entry->end;
2638		bzero(kve, sizeof(*kve));
2639		obj = entry->object.vm_object;
2640		if (obj != NULL) {
2641			if ((obj->flags & OBJ_ANON) != 0)
2642				kve->kve_obj = (uintptr_t)obj;
2643
2644			for (tobj = obj; tobj != NULL;
2645			    tobj = tobj->backing_object) {
2646				VM_OBJECT_RLOCK(tobj);
2647				kve->kve_offset += tobj->backing_object_offset;
2648				lobj = tobj;
2649			}
2650			if (obj->backing_object == NULL)
2651				kve->kve_private_resident =
2652				    obj->resident_page_count;
2653			kern_proc_vmmap_resident(map, entry,
2654			    &kve->kve_resident, &super);
2655			if (super)
2656				kve->kve_flags |= KVME_FLAG_SUPER;
2657			for (tobj = obj; tobj != NULL; tobj = nobj) {
2658				nobj = tobj->backing_object;
2659				if (tobj != obj && tobj != lobj)
2660					VM_OBJECT_RUNLOCK(tobj);
2661			}
2662		} else {
2663			lobj = NULL;
2664		}
2665
2666		kve->kve_start = entry->start;
2667		kve->kve_end = entry->end;
2668		kve->kve_offset += entry->offset;
2669
2670		if (entry->protection & VM_PROT_READ)
2671			kve->kve_protection |= KVME_PROT_READ;
2672		if (entry->protection & VM_PROT_WRITE)
2673			kve->kve_protection |= KVME_PROT_WRITE;
2674		if (entry->protection & VM_PROT_EXECUTE)
2675			kve->kve_protection |= KVME_PROT_EXEC;
2676
2677		if (entry->eflags & MAP_ENTRY_COW)
2678			kve->kve_flags |= KVME_FLAG_COW;
2679		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
2680			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
2681		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
2682			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
2683		if (entry->eflags & MAP_ENTRY_GROWS_UP)
2684			kve->kve_flags |= KVME_FLAG_GROWS_UP;
2685		if (entry->eflags & MAP_ENTRY_GROWS_DOWN)
2686			kve->kve_flags |= KVME_FLAG_GROWS_DOWN;
2687		if (entry->eflags & MAP_ENTRY_USER_WIRED)
2688			kve->kve_flags |= KVME_FLAG_USER_WIRED;
2689
2690		guard = (entry->eflags & MAP_ENTRY_GUARD) != 0;
2691
2692		last_timestamp = map->timestamp;
2693		vm_map_unlock_read(map);
2694
2695		freepath = NULL;
2696		fullpath = "";
2697		if (lobj != NULL) {
2698			kve->kve_type = vm_object_kvme_type(lobj, &vp);
2699			if (vp != NULL)
2700				vref(vp);
2701			if (lobj != obj)
2702				VM_OBJECT_RUNLOCK(lobj);
2703
2704			kve->kve_ref_count = obj->ref_count;
2705			kve->kve_shadow_count = obj->shadow_count;
2706			VM_OBJECT_RUNLOCK(obj);
2707			if (vp != NULL) {
2708				vn_fullpath(vp, &fullpath, &freepath);
2709				kve->kve_vn_type = vntype_to_kinfo(vp->v_type);
2710				cred = curthread->td_ucred;
2711				vn_lock(vp, LK_SHARED | LK_RETRY);
2712				if (VOP_GETATTR(vp, &va, cred) == 0) {
2713					kve->kve_vn_fileid = va.va_fileid;
2714					kve->kve_vn_fsid = va.va_fsid;
2715					kve->kve_vn_fsid_freebsd11 =
2716					    kve->kve_vn_fsid; /* truncate */
2717					kve->kve_vn_mode =
2718					    MAKEIMODE(va.va_type, va.va_mode);
2719					kve->kve_vn_size = va.va_size;
2720					kve->kve_vn_rdev = va.va_rdev;
2721					kve->kve_vn_rdev_freebsd11 =
2722					    kve->kve_vn_rdev; /* truncate */
2723					kve->kve_status = KF_ATTR_VALID;
2724				}
2725				vput(vp);
2726			}
2727		} else {
2728			kve->kve_type = guard ? KVME_TYPE_GUARD :
2729			    KVME_TYPE_NONE;
2730			kve->kve_ref_count = 0;
2731			kve->kve_shadow_count = 0;
2732		}
2733
2734		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
2735		if (freepath != NULL)
2736			free(freepath, M_TEMP);
2737
2738		/* Pack record size down */
2739		if ((flags & KERN_VMMAP_PACK_KINFO) != 0)
2740			kve->kve_structsize =
2741			    offsetof(struct kinfo_vmentry, kve_path) +
2742			    strlen(kve->kve_path) + 1;
2743		else
2744			kve->kve_structsize = sizeof(*kve);
2745		kve->kve_structsize = roundup(kve->kve_structsize,
2746		    sizeof(uint64_t));
2747
2748		/* Halt filling and truncate rather than exceeding maxlen */
2749		if (maxlen != -1 && maxlen < kve->kve_structsize) {
2750			error = 0;
2751			vm_map_lock_read(map);
2752			break;
2753		} else if (maxlen != -1)
2754			maxlen -= kve->kve_structsize;
2755
2756		if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0)
2757			error = ENOMEM;
2758		vm_map_lock_read(map);
2759		if (error != 0)
2760			break;
2761		if (last_timestamp != map->timestamp) {
2762			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
2763			entry = tmp_entry;
2764		}
2765	}
2766	vm_map_unlock_read(map);
2767	vmspace_free(vm);
2768	PRELE(p);
2769	free(kve, M_TEMP);
2770	return (error);
2771}
2772
2773static int
2774sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS)
2775{
2776	struct proc *p;
2777	struct sbuf sb;
2778	u_int namelen;
2779	int error, error2, *name;
2780
2781	namelen = arg2;
2782	if (namelen != 1)
2783		return (EINVAL);
2784
2785	name = (int *)arg1;
2786	sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_vmentry), req);
2787	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
2788	error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
2789	if (error != 0) {
2790		sbuf_delete(&sb);
2791		return (error);
2792	}
2793	error = kern_proc_vmmap_out(p, &sb, -1, KERN_VMMAP_PACK_KINFO);
2794	error2 = sbuf_finish(&sb);
2795	sbuf_delete(&sb);
2796	return (error != 0 ? error : error2);
2797}
2798
2799#if defined(STACK) || defined(DDB)
2800static int
2801sysctl_kern_proc_kstack(SYSCTL_HANDLER_ARGS)
2802{
2803	struct kinfo_kstack *kkstp;
2804	int error, i, *name, numthreads;
2805	lwpid_t *lwpidarray;
2806	struct thread *td;
2807	struct stack *st;
2808	struct sbuf sb;
2809	struct proc *p;
2810	u_int namelen;
2811
2812	namelen = arg2;
2813	if (namelen != 1)
2814		return (EINVAL);
2815
2816	name = (int *)arg1;
2817	error = pget((pid_t)name[0], PGET_NOTINEXEC | PGET_WANTREAD, &p);
2818	if (error != 0)
2819		return (error);
2820
2821	kkstp = malloc(sizeof(*kkstp), M_TEMP, M_WAITOK);
2822	st = stack_create(M_WAITOK);
2823
2824	lwpidarray = NULL;
2825	PROC_LOCK(p);
2826	do {
2827		if (lwpidarray != NULL) {
2828			free(lwpidarray, M_TEMP);
2829			lwpidarray = NULL;
2830		}
2831		numthreads = p->p_numthreads;
2832		PROC_UNLOCK(p);
2833		lwpidarray = malloc(sizeof(*lwpidarray) * numthreads, M_TEMP,
2834		    M_WAITOK | M_ZERO);
2835		PROC_LOCK(p);
2836	} while (numthreads < p->p_numthreads);
2837
2838	/*
2839	 * XXXRW: During the below loop, execve(2) and countless other sorts
2840	 * of changes could have taken place.  Should we check to see if the
2841	 * vmspace has been replaced, or the like, in order to prevent
2842	 * giving a snapshot that spans, say, execve(2), with some threads
2843	 * before and some after?  Among other things, the credentials could
2844	 * have changed, in which case the right to extract debug info might
2845	 * no longer be assured.
2846	 */
2847	i = 0;
2848	FOREACH_THREAD_IN_PROC(p, td) {
2849		KASSERT(i < numthreads,
2850		    ("sysctl_kern_proc_kstack: numthreads"));
2851		lwpidarray[i] = td->td_tid;
2852		i++;
2853	}
2854	PROC_UNLOCK(p);
2855	numthreads = i;
2856	for (i = 0; i < numthreads; i++) {
2857		td = tdfind(lwpidarray[i], p->p_pid);
2858		if (td == NULL) {
2859			continue;
2860		}
2861		bzero(kkstp, sizeof(*kkstp));
2862		(void)sbuf_new(&sb, kkstp->kkst_trace,
2863		    sizeof(kkstp->kkst_trace), SBUF_FIXEDLEN);
2864		thread_lock(td);
2865		kkstp->kkst_tid = td->td_tid;
2866		if (TD_IS_SWAPPED(td))
2867			kkstp->kkst_state = KKST_STATE_SWAPPED;
2868		else if (stack_save_td(st, td) == 0)
2869			kkstp->kkst_state = KKST_STATE_STACKOK;
2870		else
2871			kkstp->kkst_state = KKST_STATE_RUNNING;
2872		thread_unlock(td);
2873		PROC_UNLOCK(p);
2874		stack_sbuf_print(&sb, st);
2875		sbuf_finish(&sb);
2876		sbuf_delete(&sb);
2877		error = SYSCTL_OUT(req, kkstp, sizeof(*kkstp));
2878		if (error)
2879			break;
2880	}
2881	PRELE(p);
2882	if (lwpidarray != NULL)
2883		free(lwpidarray, M_TEMP);
2884	stack_destroy(st);
2885	free(kkstp, M_TEMP);
2886	return (error);
2887}
2888#endif
2889
2890/*
2891 * This sysctl allows a process to retrieve the full list of groups from
2892 * itself or another process.
2893 */
2894static int
2895sysctl_kern_proc_groups(SYSCTL_HANDLER_ARGS)
2896{
2897	pid_t *pidp = (pid_t *)arg1;
2898	unsigned int arglen = arg2;
2899	struct proc *p;
2900	struct ucred *cred;
2901	int error;
2902
2903	if (arglen != 1)
2904		return (EINVAL);
2905	if (*pidp == -1) {	/* -1 means this process */
2906		p = req->td->td_proc;
2907		PROC_LOCK(p);
2908	} else {
2909		error = pget(*pidp, PGET_CANSEE, &p);
2910		if (error != 0)
2911			return (error);
2912	}
2913
2914	cred = crhold(p->p_ucred);
2915	PROC_UNLOCK(p);
2916
2917	error = SYSCTL_OUT(req, cred->cr_groups,
2918	    cred->cr_ngroups * sizeof(gid_t));
2919	crfree(cred);
2920	return (error);
2921}
2922
2923/*
2924 * This sysctl allows a process to retrieve or/and set the resource limit for
2925 * another process.
2926 */
2927static int
2928sysctl_kern_proc_rlimit(SYSCTL_HANDLER_ARGS)
2929{
2930	int *name = (int *)arg1;
2931	u_int namelen = arg2;
2932	struct rlimit rlim;
2933	struct proc *p;
2934	u_int which;
2935	int flags, error;
2936
2937	if (namelen != 2)
2938		return (EINVAL);
2939
2940	which = (u_int)name[1];
2941	if (which >= RLIM_NLIMITS)
2942		return (EINVAL);
2943
2944	if (req->newptr != NULL && req->newlen != sizeof(rlim))
2945		return (EINVAL);
2946
2947	flags = PGET_HOLD | PGET_NOTWEXIT;
2948	if (req->newptr != NULL)
2949		flags |= PGET_CANDEBUG;
2950	else
2951		flags |= PGET_CANSEE;
2952	error = pget((pid_t)name[0], flags, &p);
2953	if (error != 0)
2954		return (error);
2955
2956	/*
2957	 * Retrieve limit.
2958	 */
2959	if (req->oldptr != NULL) {
2960		PROC_LOCK(p);
2961		lim_rlimit_proc(p, which, &rlim);
2962		PROC_UNLOCK(p);
2963	}
2964	error = SYSCTL_OUT(req, &rlim, sizeof(rlim));
2965	if (error != 0)
2966		goto errout;
2967
2968	/*
2969	 * Set limit.
2970	 */
2971	if (req->newptr != NULL) {
2972		error = SYSCTL_IN(req, &rlim, sizeof(rlim));
2973		if (error == 0)
2974			error = kern_proc_setrlimit(curthread, p, which, &rlim);
2975	}
2976
2977errout:
2978	PRELE(p);
2979	return (error);
2980}
2981
2982/*
2983 * This sysctl allows a process to retrieve ps_strings structure location of
2984 * another process.
2985 */
2986static int
2987sysctl_kern_proc_ps_strings(SYSCTL_HANDLER_ARGS)
2988{
2989	int *name = (int *)arg1;
2990	u_int namelen = arg2;
2991	struct proc *p;
2992	vm_offset_t ps_strings;
2993	int error;
2994#ifdef COMPAT_FREEBSD32
2995	uint32_t ps_strings32;
2996#endif
2997
2998	if (namelen != 1)
2999		return (EINVAL);
3000
3001	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
3002	if (error != 0)
3003		return (error);
3004#ifdef COMPAT_FREEBSD32
3005	if ((req->flags & SCTL_MASK32) != 0) {
3006		/*
3007		 * We return 0 if the 32 bit emulation request is for a 64 bit
3008		 * process.
3009		 */
3010		ps_strings32 = SV_PROC_FLAG(p, SV_ILP32) != 0 ?
3011		    PTROUT(PROC_PS_STRINGS(p)) : 0;
3012		PROC_UNLOCK(p);
3013		error = SYSCTL_OUT(req, &ps_strings32, sizeof(ps_strings32));
3014		return (error);
3015	}
3016#endif
3017	ps_strings = PROC_PS_STRINGS(p);
3018	PROC_UNLOCK(p);
3019	error = SYSCTL_OUT(req, &ps_strings, sizeof(ps_strings));
3020	return (error);
3021}
3022
3023/*
3024 * This sysctl allows a process to retrieve umask of another process.
3025 */
3026static int
3027sysctl_kern_proc_umask(SYSCTL_HANDLER_ARGS)
3028{
3029	int *name = (int *)arg1;
3030	u_int namelen = arg2;
3031	struct proc *p;
3032	int error;
3033	u_short cmask;
3034	pid_t pid;
3035
3036	if (namelen != 1)
3037		return (EINVAL);
3038
3039	pid = (pid_t)name[0];
3040	p = curproc;
3041	if (pid == p->p_pid || pid == 0) {
3042		cmask = p->p_pd->pd_cmask;
3043		goto out;
3044	}
3045
3046	error = pget(pid, PGET_WANTREAD, &p);
3047	if (error != 0)
3048		return (error);
3049
3050	cmask = p->p_pd->pd_cmask;
3051	PRELE(p);
3052out:
3053	error = SYSCTL_OUT(req, &cmask, sizeof(cmask));
3054	return (error);
3055}
3056
3057/*
3058 * This sysctl allows a process to set and retrieve binary osreldate of
3059 * another process.
3060 */
3061static int
3062sysctl_kern_proc_osrel(SYSCTL_HANDLER_ARGS)
3063{
3064	int *name = (int *)arg1;
3065	u_int namelen = arg2;
3066	struct proc *p;
3067	int flags, error, osrel;
3068
3069	if (namelen != 1)
3070		return (EINVAL);
3071
3072	if (req->newptr != NULL && req->newlen != sizeof(osrel))
3073		return (EINVAL);
3074
3075	flags = PGET_HOLD | PGET_NOTWEXIT;
3076	if (req->newptr != NULL)
3077		flags |= PGET_CANDEBUG;
3078	else
3079		flags |= PGET_CANSEE;
3080	error = pget((pid_t)name[0], flags, &p);
3081	if (error != 0)
3082		return (error);
3083
3084	error = SYSCTL_OUT(req, &p->p_osrel, sizeof(p->p_osrel));
3085	if (error != 0)
3086		goto errout;
3087
3088	if (req->newptr != NULL) {
3089		error = SYSCTL_IN(req, &osrel, sizeof(osrel));
3090		if (error != 0)
3091			goto errout;
3092		if (osrel < 0) {
3093			error = EINVAL;
3094			goto errout;
3095		}
3096		p->p_osrel = osrel;
3097	}
3098errout:
3099	PRELE(p);
3100	return (error);
3101}
3102
3103static int
3104sysctl_kern_proc_sigtramp(SYSCTL_HANDLER_ARGS)
3105{
3106	int *name = (int *)arg1;
3107	u_int namelen = arg2;
3108	struct proc *p;
3109	struct kinfo_sigtramp kst;
3110	const struct sysentvec *sv;
3111	int error;
3112#ifdef COMPAT_FREEBSD32
3113	struct kinfo_sigtramp32 kst32;
3114#endif
3115
3116	if (namelen != 1)
3117		return (EINVAL);
3118
3119	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
3120	if (error != 0)
3121		return (error);
3122	sv = p->p_sysent;
3123#ifdef COMPAT_FREEBSD32
3124	if ((req->flags & SCTL_MASK32) != 0) {
3125		bzero(&kst32, sizeof(kst32));
3126		if (SV_PROC_FLAG(p, SV_ILP32)) {
3127			if (PROC_HAS_SHP(p)) {
3128				kst32.ksigtramp_start = PROC_SIGCODE(p);
3129				kst32.ksigtramp_end = kst32.ksigtramp_start +
3130				    ((sv->sv_flags & SV_DSO_SIG) == 0 ?
3131				    *sv->sv_szsigcode :
3132				    (uintptr_t)sv->sv_szsigcode);
3133			} else {
3134				kst32.ksigtramp_start = PROC_PS_STRINGS(p) -
3135				    *sv->sv_szsigcode;
3136				kst32.ksigtramp_end = PROC_PS_STRINGS(p);
3137			}
3138		}
3139		PROC_UNLOCK(p);
3140		error = SYSCTL_OUT(req, &kst32, sizeof(kst32));
3141		return (error);
3142	}
3143#endif
3144	bzero(&kst, sizeof(kst));
3145	if (PROC_HAS_SHP(p)) {
3146		kst.ksigtramp_start = (char *)PROC_SIGCODE(p);
3147		kst.ksigtramp_end = (char *)kst.ksigtramp_start +
3148		    ((sv->sv_flags & SV_DSO_SIG) == 0 ? *sv->sv_szsigcode :
3149		    (uintptr_t)sv->sv_szsigcode);
3150	} else {
3151		kst.ksigtramp_start = (char *)PROC_PS_STRINGS(p) -
3152		    *sv->sv_szsigcode;
3153		kst.ksigtramp_end = (char *)PROC_PS_STRINGS(p);
3154	}
3155	PROC_UNLOCK(p);
3156	error = SYSCTL_OUT(req, &kst, sizeof(kst));
3157	return (error);
3158}
3159
3160static int
3161sysctl_kern_proc_sigfastblk(SYSCTL_HANDLER_ARGS)
3162{
3163	int *name = (int *)arg1;
3164	u_int namelen = arg2;
3165	pid_t pid;
3166	struct proc *p;
3167	struct thread *td1;
3168	uintptr_t addr;
3169#ifdef COMPAT_FREEBSD32
3170	uint32_t addr32;
3171#endif
3172	int error;
3173
3174	if (namelen != 1 || req->newptr != NULL)
3175		return (EINVAL);
3176
3177	pid = (pid_t)name[0];
3178	error = pget(pid, PGET_HOLD | PGET_NOTWEXIT | PGET_CANDEBUG, &p);
3179	if (error != 0)
3180		return (error);
3181
3182	PROC_LOCK(p);
3183#ifdef COMPAT_FREEBSD32
3184	if (SV_CURPROC_FLAG(SV_ILP32)) {
3185		if (!SV_PROC_FLAG(p, SV_ILP32)) {
3186			error = EINVAL;
3187			goto errlocked;
3188		}
3189	}
3190#endif
3191	if (pid <= PID_MAX) {
3192		td1 = FIRST_THREAD_IN_PROC(p);
3193	} else {
3194		FOREACH_THREAD_IN_PROC(p, td1) {
3195			if (td1->td_tid == pid)
3196				break;
3197		}
3198	}
3199	if (td1 == NULL) {
3200		error = ESRCH;
3201		goto errlocked;
3202	}
3203	/*
3204	 * The access to the private thread flags.  It is fine as far
3205	 * as no out-of-thin-air values are read from td_pflags, and
3206	 * usermode read of the td_sigblock_ptr is racy inherently,
3207	 * since target process might have already changed it
3208	 * meantime.
3209	 */
3210	if ((td1->td_pflags & TDP_SIGFASTBLOCK) != 0)
3211		addr = (uintptr_t)td1->td_sigblock_ptr;
3212	else
3213		error = ENOTTY;
3214
3215errlocked:
3216	_PRELE(p);
3217	PROC_UNLOCK(p);
3218	if (error != 0)
3219		return (error);
3220
3221#ifdef COMPAT_FREEBSD32
3222	if (SV_CURPROC_FLAG(SV_ILP32)) {
3223		addr32 = addr;
3224		error = SYSCTL_OUT(req, &addr32, sizeof(addr32));
3225	} else
3226#endif
3227		error = SYSCTL_OUT(req, &addr, sizeof(addr));
3228	return (error);
3229}
3230
3231static int
3232sysctl_kern_proc_vm_layout(SYSCTL_HANDLER_ARGS)
3233{
3234	struct kinfo_vm_layout kvm;
3235	struct proc *p;
3236	struct vmspace *vmspace;
3237	int error, *name;
3238
3239	name = (int *)arg1;
3240	if ((u_int)arg2 != 1)
3241		return (EINVAL);
3242
3243	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
3244	if (error != 0)
3245		return (error);
3246#ifdef COMPAT_FREEBSD32
3247	if (SV_CURPROC_FLAG(SV_ILP32)) {
3248		if (!SV_PROC_FLAG(p, SV_ILP32)) {
3249			PROC_UNLOCK(p);
3250			return (EINVAL);
3251		}
3252	}
3253#endif
3254	vmspace = vmspace_acquire_ref(p);
3255	PROC_UNLOCK(p);
3256
3257	memset(&kvm, 0, sizeof(kvm));
3258	kvm.kvm_min_user_addr = vm_map_min(&vmspace->vm_map);
3259	kvm.kvm_max_user_addr = vm_map_max(&vmspace->vm_map);
3260	kvm.kvm_text_addr = (uintptr_t)vmspace->vm_taddr;
3261	kvm.kvm_text_size = vmspace->vm_tsize;
3262	kvm.kvm_data_addr = (uintptr_t)vmspace->vm_daddr;
3263	kvm.kvm_data_size = vmspace->vm_dsize;
3264	kvm.kvm_stack_addr = (uintptr_t)vmspace->vm_maxsaddr;
3265	kvm.kvm_stack_size = vmspace->vm_ssize;
3266	kvm.kvm_shp_addr = vmspace->vm_shp_base;
3267	kvm.kvm_shp_size = p->p_sysent->sv_shared_page_len;
3268	if ((vmspace->vm_map.flags & MAP_WIREFUTURE) != 0)
3269		kvm.kvm_map_flags |= KMAP_FLAG_WIREFUTURE;
3270	if ((vmspace->vm_map.flags & MAP_ASLR) != 0)
3271		kvm.kvm_map_flags |= KMAP_FLAG_ASLR;
3272	if ((vmspace->vm_map.flags & MAP_ASLR_IGNSTART) != 0)
3273		kvm.kvm_map_flags |= KMAP_FLAG_ASLR_IGNSTART;
3274	if ((vmspace->vm_map.flags & MAP_WXORX) != 0)
3275		kvm.kvm_map_flags |= KMAP_FLAG_WXORX;
3276	if ((vmspace->vm_map.flags & MAP_ASLR_STACK) != 0)
3277		kvm.kvm_map_flags |= KMAP_FLAG_ASLR_STACK;
3278	if (vmspace->vm_shp_base != p->p_sysent->sv_shared_page_base &&
3279	    PROC_HAS_SHP(p))
3280		kvm.kvm_map_flags |= KMAP_FLAG_ASLR_SHARED_PAGE;
3281
3282#ifdef COMPAT_FREEBSD32
3283	if (SV_CURPROC_FLAG(SV_ILP32)) {
3284		struct kinfo_vm_layout32 kvm32;
3285
3286		memset(&kvm32, 0, sizeof(kvm32));
3287		kvm32.kvm_min_user_addr = (uint32_t)kvm.kvm_min_user_addr;
3288		kvm32.kvm_max_user_addr = (uint32_t)kvm.kvm_max_user_addr;
3289		kvm32.kvm_text_addr = (uint32_t)kvm.kvm_text_addr;
3290		kvm32.kvm_text_size = (uint32_t)kvm.kvm_text_size;
3291		kvm32.kvm_data_addr = (uint32_t)kvm.kvm_data_addr;
3292		kvm32.kvm_data_size = (uint32_t)kvm.kvm_data_size;
3293		kvm32.kvm_stack_addr = (uint32_t)kvm.kvm_stack_addr;
3294		kvm32.kvm_stack_size = (uint32_t)kvm.kvm_stack_size;
3295		kvm32.kvm_shp_addr = (uint32_t)kvm.kvm_shp_addr;
3296		kvm32.kvm_shp_size = (uint32_t)kvm.kvm_shp_size;
3297		kvm32.kvm_map_flags = kvm.kvm_map_flags;
3298		error = SYSCTL_OUT(req, &kvm32, sizeof(kvm32));
3299		goto out;
3300	}
3301#endif
3302
3303	error = SYSCTL_OUT(req, &kvm, sizeof(kvm));
3304#ifdef COMPAT_FREEBSD32
3305out:
3306#endif
3307	vmspace_free(vmspace);
3308	return (error);
3309}
3310
3311SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE,  0,
3312    "Process table");
3313
3314SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT|
3315	CTLFLAG_MPSAFE, 0, 0, sysctl_kern_proc, "S,proc",
3316	"Return entire process table");
3317
3318static SYSCTL_NODE(_kern_proc, KERN_PROC_GID, gid, CTLFLAG_RD | CTLFLAG_MPSAFE,
3319	sysctl_kern_proc, "Process table");
3320
3321static SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD | CTLFLAG_MPSAFE,
3322	sysctl_kern_proc, "Process table");
3323
3324static SYSCTL_NODE(_kern_proc, KERN_PROC_RGID, rgid, CTLFLAG_RD | CTLFLAG_MPSAFE,
3325	sysctl_kern_proc, "Process table");
3326
3327static SYSCTL_NODE(_kern_proc, KERN_PROC_SESSION, sid, CTLFLAG_RD |
3328	CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
3329
3330static SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD | CTLFLAG_MPSAFE,
3331	sysctl_kern_proc, "Process table");
3332
3333static SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD | CTLFLAG_MPSAFE,
3334	sysctl_kern_proc, "Process table");
3335
3336static SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD | CTLFLAG_MPSAFE,
3337	sysctl_kern_proc, "Process table");
3338
3339static SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD | CTLFLAG_MPSAFE,
3340	sysctl_kern_proc, "Process table");
3341
3342static SYSCTL_NODE(_kern_proc, KERN_PROC_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE,
3343	sysctl_kern_proc, "Return process table, no threads");
3344
3345static SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args,
3346	CTLFLAG_RW | CTLFLAG_CAPWR | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE,
3347	sysctl_kern_proc_args, "Process argument list");
3348
3349static SYSCTL_NODE(_kern_proc, KERN_PROC_ENV, env, CTLFLAG_RD | CTLFLAG_MPSAFE,
3350	sysctl_kern_proc_env, "Process environment");
3351
3352static SYSCTL_NODE(_kern_proc, KERN_PROC_AUXV, auxv, CTLFLAG_RD |
3353	CTLFLAG_MPSAFE, sysctl_kern_proc_auxv, "Process ELF auxiliary vector");
3354
3355static SYSCTL_NODE(_kern_proc, KERN_PROC_PATHNAME, pathname, CTLFLAG_RD |
3356	CTLFLAG_MPSAFE, sysctl_kern_proc_pathname, "Process executable path");
3357
3358static SYSCTL_NODE(_kern_proc, KERN_PROC_SV_NAME, sv_name, CTLFLAG_RD |
3359	CTLFLAG_MPSAFE, sysctl_kern_proc_sv_name,
3360	"Process syscall vector name (ABI type)");
3361
3362static SYSCTL_NODE(_kern_proc, (KERN_PROC_GID | KERN_PROC_INC_THREAD), gid_td,
3363	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
3364
3365static SYSCTL_NODE(_kern_proc, (KERN_PROC_PGRP | KERN_PROC_INC_THREAD), pgrp_td,
3366	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
3367
3368static SYSCTL_NODE(_kern_proc, (KERN_PROC_RGID | KERN_PROC_INC_THREAD), rgid_td,
3369	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
3370
3371static SYSCTL_NODE(_kern_proc, (KERN_PROC_SESSION | KERN_PROC_INC_THREAD),
3372	sid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
3373
3374static SYSCTL_NODE(_kern_proc, (KERN_PROC_TTY | KERN_PROC_INC_THREAD), tty_td,
3375	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
3376
3377static SYSCTL_NODE(_kern_proc, (KERN_PROC_UID | KERN_PROC_INC_THREAD), uid_td,
3378	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
3379
3380static SYSCTL_NODE(_kern_proc, (KERN_PROC_RUID | KERN_PROC_INC_THREAD), ruid_td,
3381	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
3382
3383static SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_INC_THREAD), pid_td,
3384	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
3385
3386static SYSCTL_NODE(_kern_proc, (KERN_PROC_PROC | KERN_PROC_INC_THREAD), proc_td,
3387	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc,
3388	"Return process table, including threads");
3389
3390#ifdef COMPAT_FREEBSD7
3391static SYSCTL_NODE(_kern_proc, KERN_PROC_OVMMAP, ovmmap, CTLFLAG_RD |
3392	CTLFLAG_MPSAFE, sysctl_kern_proc_ovmmap, "Old Process vm map entries");
3393#endif
3394
3395static SYSCTL_NODE(_kern_proc, KERN_PROC_VMMAP, vmmap, CTLFLAG_RD |
3396	CTLFLAG_MPSAFE, sysctl_kern_proc_vmmap, "Process vm map entries");
3397
3398#if defined(STACK) || defined(DDB)
3399static SYSCTL_NODE(_kern_proc, KERN_PROC_KSTACK, kstack, CTLFLAG_RD |
3400	CTLFLAG_MPSAFE, sysctl_kern_proc_kstack, "Process kernel stacks");
3401#endif
3402
3403static SYSCTL_NODE(_kern_proc, KERN_PROC_GROUPS, groups, CTLFLAG_RD |
3404	CTLFLAG_MPSAFE, sysctl_kern_proc_groups, "Process groups");
3405
3406static SYSCTL_NODE(_kern_proc, KERN_PROC_RLIMIT, rlimit, CTLFLAG_RW |
3407	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_rlimit,
3408	"Process resource limits");
3409
3410static SYSCTL_NODE(_kern_proc, KERN_PROC_PS_STRINGS, ps_strings, CTLFLAG_RD |
3411	CTLFLAG_MPSAFE, sysctl_kern_proc_ps_strings,
3412	"Process ps_strings location");
3413
3414static SYSCTL_NODE(_kern_proc, KERN_PROC_UMASK, umask, CTLFLAG_RD |
3415	CTLFLAG_MPSAFE, sysctl_kern_proc_umask, "Process umask");
3416
3417static SYSCTL_NODE(_kern_proc, KERN_PROC_OSREL, osrel, CTLFLAG_RW |
3418	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_osrel,
3419	"Process binary osreldate");
3420
3421static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGTRAMP, sigtramp, CTLFLAG_RD |
3422	CTLFLAG_MPSAFE, sysctl_kern_proc_sigtramp,
3423	"Process signal trampoline location");
3424
3425static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGFASTBLK, sigfastblk, CTLFLAG_RD |
3426	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_sigfastblk,
3427	"Thread sigfastblock address");
3428
3429static SYSCTL_NODE(_kern_proc, KERN_PROC_VM_LAYOUT, vm_layout, CTLFLAG_RD |
3430	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_vm_layout,
3431	"Process virtual address space layout info");
3432
3433static struct sx stop_all_proc_blocker;
3434SX_SYSINIT(stop_all_proc_blocker, &stop_all_proc_blocker, "sapblk");
3435
3436bool
3437stop_all_proc_block(void)
3438{
3439	return (sx_xlock_sig(&stop_all_proc_blocker) == 0);
3440}
3441
3442void
3443stop_all_proc_unblock(void)
3444{
3445	sx_xunlock(&stop_all_proc_blocker);
3446}
3447
3448int allproc_gen;
3449
3450/*
3451 * stop_all_proc() purpose is to stop all process which have usermode,
3452 * except current process for obvious reasons.  This makes it somewhat
3453 * unreliable when invoked from multithreaded process.  The service
3454 * must not be user-callable anyway.
3455 */
3456void
3457stop_all_proc(void)
3458{
3459	struct proc *cp, *p;
3460	int r, gen;
3461	bool restart, seen_stopped, seen_exiting, stopped_some;
3462
3463	if (!stop_all_proc_block())
3464		return;
3465
3466	cp = curproc;
3467allproc_loop:
3468	sx_xlock(&allproc_lock);
3469	gen = allproc_gen;
3470	seen_exiting = seen_stopped = stopped_some = restart = false;
3471	LIST_REMOVE(cp, p_list);
3472	LIST_INSERT_HEAD(&allproc, cp, p_list);
3473	for (;;) {
3474		p = LIST_NEXT(cp, p_list);
3475		if (p == NULL)
3476			break;
3477		LIST_REMOVE(cp, p_list);
3478		LIST_INSERT_AFTER(p, cp, p_list);
3479		PROC_LOCK(p);
3480		if ((p->p_flag & (P_KPROC | P_SYSTEM | P_TOTAL_STOP |
3481		    P_STOPPED_SIG)) != 0) {
3482			PROC_UNLOCK(p);
3483			continue;
3484		}
3485		if ((p->p_flag2 & P2_WEXIT) != 0) {
3486			seen_exiting = true;
3487			PROC_UNLOCK(p);
3488			continue;
3489		}
3490		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
3491			/*
3492			 * Stopped processes are tolerated when there
3493			 * are no other processes which might continue
3494			 * them.  P_STOPPED_SINGLE but not
3495			 * P_TOTAL_STOP process still has at least one
3496			 * thread running.
3497			 */
3498			seen_stopped = true;
3499			PROC_UNLOCK(p);
3500			continue;
3501		}
3502		if ((p->p_flag & P_TRACED) != 0) {
3503			/*
3504			 * thread_single() below cannot stop traced p,
3505			 * so skip it.  OTOH, we cannot require
3506			 * restart because debugger might be either
3507			 * already stopped or traced as well.
3508			 */
3509			PROC_UNLOCK(p);
3510			continue;
3511		}
3512		sx_xunlock(&allproc_lock);
3513		_PHOLD(p);
3514		r = thread_single(p, SINGLE_ALLPROC);
3515		if (r != 0)
3516			restart = true;
3517		else
3518			stopped_some = true;
3519		_PRELE(p);
3520		PROC_UNLOCK(p);
3521		sx_xlock(&allproc_lock);
3522	}
3523	/* Catch forked children we did not see in iteration. */
3524	if (gen != allproc_gen)
3525		restart = true;
3526	sx_xunlock(&allproc_lock);
3527	if (restart || stopped_some || seen_exiting || seen_stopped) {
3528		kern_yield(PRI_USER);
3529		goto allproc_loop;
3530	}
3531}
3532
3533void
3534resume_all_proc(void)
3535{
3536	struct proc *cp, *p;
3537
3538	cp = curproc;
3539	sx_xlock(&allproc_lock);
3540again:
3541	LIST_REMOVE(cp, p_list);
3542	LIST_INSERT_HEAD(&allproc, cp, p_list);
3543	for (;;) {
3544		p = LIST_NEXT(cp, p_list);
3545		if (p == NULL)
3546			break;
3547		LIST_REMOVE(cp, p_list);
3548		LIST_INSERT_AFTER(p, cp, p_list);
3549		PROC_LOCK(p);
3550		if ((p->p_flag & P_TOTAL_STOP) != 0) {
3551			sx_xunlock(&allproc_lock);
3552			_PHOLD(p);
3553			thread_single_end(p, SINGLE_ALLPROC);
3554			_PRELE(p);
3555			PROC_UNLOCK(p);
3556			sx_xlock(&allproc_lock);
3557		} else {
3558			PROC_UNLOCK(p);
3559		}
3560	}
3561	/*  Did the loop above missed any stopped process ? */
3562	FOREACH_PROC_IN_SYSTEM(p) {
3563		/* No need for proc lock. */
3564		if ((p->p_flag & P_TOTAL_STOP) != 0)
3565			goto again;
3566	}
3567	sx_xunlock(&allproc_lock);
3568
3569	stop_all_proc_unblock();
3570}
3571
3572/* #define	TOTAL_STOP_DEBUG	1 */
3573#ifdef TOTAL_STOP_DEBUG
3574volatile static int ap_resume;
3575#include <sys/mount.h>
3576
3577static int
3578sysctl_debug_stop_all_proc(SYSCTL_HANDLER_ARGS)
3579{
3580	int error, val;
3581
3582	val = 0;
3583	ap_resume = 0;
3584	error = sysctl_handle_int(oidp, &val, 0, req);
3585	if (error != 0 || req->newptr == NULL)
3586		return (error);
3587	if (val != 0) {
3588		stop_all_proc();
3589		syncer_suspend();
3590		while (ap_resume == 0)
3591			;
3592		syncer_resume();
3593		resume_all_proc();
3594	}
3595	return (0);
3596}
3597
3598SYSCTL_PROC(_debug, OID_AUTO, stop_all_proc, CTLTYPE_INT | CTLFLAG_RW |
3599    CTLFLAG_MPSAFE, __DEVOLATILE(int *, &ap_resume), 0,
3600    sysctl_debug_stop_all_proc, "I",
3601    "");
3602#endif
3603