kern_proc.c revision 330897
1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1989, 1991, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 4. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 *	@(#)kern_proc.c	8.7 (Berkeley) 2/14/95
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: stable/11/sys/kern/kern_proc.c 330897 2018-03-14 03:19:51Z eadler $");
36
37#include "opt_compat.h"
38#include "opt_ddb.h"
39#include "opt_ktrace.h"
40#include "opt_kstack_pages.h"
41#include "opt_stack.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/elf.h>
46#include <sys/eventhandler.h>
47#include <sys/exec.h>
48#include <sys/jail.h>
49#include <sys/kernel.h>
50#include <sys/limits.h>
51#include <sys/lock.h>
52#include <sys/loginclass.h>
53#include <sys/malloc.h>
54#include <sys/mman.h>
55#include <sys/mount.h>
56#include <sys/mutex.h>
57#include <sys/proc.h>
58#include <sys/ptrace.h>
59#include <sys/refcount.h>
60#include <sys/resourcevar.h>
61#include <sys/rwlock.h>
62#include <sys/sbuf.h>
63#include <sys/sysent.h>
64#include <sys/sched.h>
65#include <sys/smp.h>
66#include <sys/stack.h>
67#include <sys/stat.h>
68#include <sys/sysctl.h>
69#include <sys/filedesc.h>
70#include <sys/tty.h>
71#include <sys/signalvar.h>
72#include <sys/sdt.h>
73#include <sys/sx.h>
74#include <sys/user.h>
75#include <sys/vnode.h>
76#include <sys/wait.h>
77
78#ifdef DDB
79#include <ddb/ddb.h>
80#endif
81
82#include <vm/vm.h>
83#include <vm/vm_param.h>
84#include <vm/vm_extern.h>
85#include <vm/pmap.h>
86#include <vm/vm_map.h>
87#include <vm/vm_object.h>
88#include <vm/vm_page.h>
89#include <vm/uma.h>
90
91#ifdef COMPAT_FREEBSD32
92#include <compat/freebsd32/freebsd32.h>
93#include <compat/freebsd32/freebsd32_util.h>
94#endif
95
96SDT_PROVIDER_DEFINE(proc);
97SDT_PROBE_DEFINE4(proc, , ctor, entry, "struct proc *", "int", "void *",
98    "int");
99SDT_PROBE_DEFINE4(proc, , ctor, return, "struct proc *", "int", "void *",
100    "int");
101SDT_PROBE_DEFINE4(proc, , dtor, entry, "struct proc *", "int", "void *",
102    "struct thread *");
103SDT_PROBE_DEFINE3(proc, , dtor, return, "struct proc *", "int", "void *");
104SDT_PROBE_DEFINE3(proc, , init, entry, "struct proc *", "int", "int");
105SDT_PROBE_DEFINE3(proc, , init, return, "struct proc *", "int", "int");
106
107MALLOC_DEFINE(M_PGRP, "pgrp", "process group header");
108MALLOC_DEFINE(M_SESSION, "session", "session header");
109static MALLOC_DEFINE(M_PROC, "proc", "Proc structures");
110MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
111
112static void doenterpgrp(struct proc *, struct pgrp *);
113static void orphanpg(struct pgrp *pg);
114static void fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp);
115static void fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp);
116static void fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp,
117    int preferthread);
118static void pgadjustjobc(struct pgrp *pgrp, int entering);
119static void pgdelete(struct pgrp *);
120static int proc_ctor(void *mem, int size, void *arg, int flags);
121static void proc_dtor(void *mem, int size, void *arg);
122static int proc_init(void *mem, int size, int flags);
123static void proc_fini(void *mem, int size);
124static void pargs_free(struct pargs *pa);
125static struct proc *zpfind_locked(pid_t pid);
126
127/*
128 * Other process lists
129 */
130struct pidhashhead *pidhashtbl;
131u_long pidhash;
132struct pgrphashhead *pgrphashtbl;
133u_long pgrphash;
134struct proclist allproc;
135struct proclist zombproc;
136struct sx __exclusive_cache_line allproc_lock;
137struct sx __exclusive_cache_line proctree_lock;
138struct mtx __exclusive_cache_line ppeers_lock;
139uma_zone_t proc_zone;
140
141/*
142 * The offset of various fields in struct proc and struct thread.
143 * These are used by kernel debuggers to enumerate kernel threads and
144 * processes.
145 */
146const int proc_off_p_pid = offsetof(struct proc, p_pid);
147const int proc_off_p_comm = offsetof(struct proc, p_comm);
148const int proc_off_p_list = offsetof(struct proc, p_list);
149const int proc_off_p_threads = offsetof(struct proc, p_threads);
150const int thread_off_td_tid = offsetof(struct thread, td_tid);
151const int thread_off_td_name = offsetof(struct thread, td_name);
152const int thread_off_td_oncpu = offsetof(struct thread, td_oncpu);
153const int thread_off_td_pcb = offsetof(struct thread, td_pcb);
154const int thread_off_td_plist = offsetof(struct thread, td_plist);
155
156int kstack_pages = KSTACK_PAGES;
157SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0,
158    "Kernel stack size in pages");
159static int vmmap_skip_res_cnt = 0;
160SYSCTL_INT(_kern, OID_AUTO, proc_vmmap_skip_resident_count, CTLFLAG_RW,
161    &vmmap_skip_res_cnt, 0,
162    "Skip calculation of the pages resident count in kern.proc.vmmap");
163
164CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
165#ifdef COMPAT_FREEBSD32
166CTASSERT(sizeof(struct kinfo_proc32) == KINFO_PROC32_SIZE);
167#endif
168
169/*
170 * Initialize global process hashing structures.
171 */
172void
173procinit(void)
174{
175
176	sx_init(&allproc_lock, "allproc");
177	sx_init(&proctree_lock, "proctree");
178	mtx_init(&ppeers_lock, "p_peers", NULL, MTX_DEF);
179	LIST_INIT(&allproc);
180	LIST_INIT(&zombproc);
181	pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
182	pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
183	proc_zone = uma_zcreate("PROC", sched_sizeof_proc(),
184	    proc_ctor, proc_dtor, proc_init, proc_fini,
185	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
186	uihashinit();
187}
188
189/*
190 * Prepare a proc for use.
191 */
192static int
193proc_ctor(void *mem, int size, void *arg, int flags)
194{
195	struct proc *p;
196	struct thread *td;
197
198	p = (struct proc *)mem;
199	SDT_PROBE4(proc, , ctor , entry, p, size, arg, flags);
200	EVENTHANDLER_INVOKE(process_ctor, p);
201	SDT_PROBE4(proc, , ctor , return, p, size, arg, flags);
202	td = FIRST_THREAD_IN_PROC(p);
203	if (td != NULL) {
204		/* Make sure all thread constructors are executed */
205		EVENTHANDLER_INVOKE(thread_ctor, td);
206	}
207	return (0);
208}
209
210/*
211 * Reclaim a proc after use.
212 */
213static void
214proc_dtor(void *mem, int size, void *arg)
215{
216	struct proc *p;
217	struct thread *td;
218
219	/* INVARIANTS checks go here */
220	p = (struct proc *)mem;
221	td = FIRST_THREAD_IN_PROC(p);
222	SDT_PROBE4(proc, , dtor, entry, p, size, arg, td);
223	if (td != NULL) {
224#ifdef INVARIANTS
225		KASSERT((p->p_numthreads == 1),
226		    ("bad number of threads in exiting process"));
227		KASSERT(STAILQ_EMPTY(&p->p_ktr), ("proc_dtor: non-empty p_ktr"));
228#endif
229		/* Free all OSD associated to this thread. */
230		osd_thread_exit(td);
231		td_softdep_cleanup(td);
232		MPASS(td->td_su == NULL);
233
234		/* Make sure all thread destructors are executed */
235		EVENTHANDLER_INVOKE(thread_dtor, td);
236	}
237	EVENTHANDLER_INVOKE(process_dtor, p);
238	if (p->p_ksi != NULL)
239		KASSERT(! KSI_ONQ(p->p_ksi), ("SIGCHLD queue"));
240	SDT_PROBE3(proc, , dtor, return, p, size, arg);
241}
242
243/*
244 * Initialize type-stable parts of a proc (when newly created).
245 */
246static int
247proc_init(void *mem, int size, int flags)
248{
249	struct proc *p;
250
251	p = (struct proc *)mem;
252	SDT_PROBE3(proc, , init, entry, p, size, flags);
253	mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK | MTX_NEW);
254	mtx_init(&p->p_slock, "process slock", NULL, MTX_SPIN | MTX_NEW);
255	mtx_init(&p->p_statmtx, "pstatl", NULL, MTX_SPIN | MTX_NEW);
256	mtx_init(&p->p_itimmtx, "pitiml", NULL, MTX_SPIN | MTX_NEW);
257	mtx_init(&p->p_profmtx, "pprofl", NULL, MTX_SPIN | MTX_NEW);
258	cv_init(&p->p_pwait, "ppwait");
259	cv_init(&p->p_dbgwait, "dbgwait");
260	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
261	EVENTHANDLER_INVOKE(process_init, p);
262	p->p_stats = pstats_alloc();
263	p->p_pgrp = NULL;
264	SDT_PROBE3(proc, , init, return, p, size, flags);
265	return (0);
266}
267
268/*
269 * UMA should ensure that this function is never called.
270 * Freeing a proc structure would violate type stability.
271 */
272static void
273proc_fini(void *mem, int size)
274{
275#ifdef notnow
276	struct proc *p;
277
278	p = (struct proc *)mem;
279	EVENTHANDLER_INVOKE(process_fini, p);
280	pstats_free(p->p_stats);
281	thread_free(FIRST_THREAD_IN_PROC(p));
282	mtx_destroy(&p->p_mtx);
283	if (p->p_ksi != NULL)
284		ksiginfo_free(p->p_ksi);
285#else
286	panic("proc reclaimed");
287#endif
288}
289
290/*
291 * Is p an inferior of the current process?
292 */
293int
294inferior(struct proc *p)
295{
296
297	sx_assert(&proctree_lock, SX_LOCKED);
298	PROC_LOCK_ASSERT(p, MA_OWNED);
299	for (; p != curproc; p = proc_realparent(p)) {
300		if (p->p_pid == 0)
301			return (0);
302	}
303	return (1);
304}
305
306struct proc *
307pfind_locked(pid_t pid)
308{
309	struct proc *p;
310
311	sx_assert(&allproc_lock, SX_LOCKED);
312	LIST_FOREACH(p, PIDHASH(pid), p_hash) {
313		if (p->p_pid == pid) {
314			PROC_LOCK(p);
315			if (p->p_state == PRS_NEW) {
316				PROC_UNLOCK(p);
317				p = NULL;
318			}
319			break;
320		}
321	}
322	return (p);
323}
324
325/*
326 * Locate a process by number; return only "live" processes -- i.e., neither
327 * zombies nor newly born but incompletely initialized processes.  By not
328 * returning processes in the PRS_NEW state, we allow callers to avoid
329 * testing for that condition to avoid dereferencing p_ucred, et al.
330 */
331struct proc *
332pfind(pid_t pid)
333{
334	struct proc *p;
335
336	sx_slock(&allproc_lock);
337	p = pfind_locked(pid);
338	sx_sunlock(&allproc_lock);
339	return (p);
340}
341
342static struct proc *
343pfind_tid_locked(pid_t tid)
344{
345	struct proc *p;
346	struct thread *td;
347
348	sx_assert(&allproc_lock, SX_LOCKED);
349	FOREACH_PROC_IN_SYSTEM(p) {
350		PROC_LOCK(p);
351		if (p->p_state == PRS_NEW) {
352			PROC_UNLOCK(p);
353			continue;
354		}
355		FOREACH_THREAD_IN_PROC(p, td) {
356			if (td->td_tid == tid)
357				goto found;
358		}
359		PROC_UNLOCK(p);
360	}
361found:
362	return (p);
363}
364
365/*
366 * Locate a process group by number.
367 * The caller must hold proctree_lock.
368 */
369struct pgrp *
370pgfind(pgid)
371	register pid_t pgid;
372{
373	register struct pgrp *pgrp;
374
375	sx_assert(&proctree_lock, SX_LOCKED);
376
377	LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) {
378		if (pgrp->pg_id == pgid) {
379			PGRP_LOCK(pgrp);
380			return (pgrp);
381		}
382	}
383	return (NULL);
384}
385
386/*
387 * Locate process and do additional manipulations, depending on flags.
388 */
389int
390pget(pid_t pid, int flags, struct proc **pp)
391{
392	struct proc *p;
393	int error;
394
395	sx_slock(&allproc_lock);
396	if (pid <= PID_MAX) {
397		p = pfind_locked(pid);
398		if (p == NULL && (flags & PGET_NOTWEXIT) == 0)
399			p = zpfind_locked(pid);
400	} else if ((flags & PGET_NOTID) == 0) {
401		p = pfind_tid_locked(pid);
402	} else {
403		p = NULL;
404	}
405	sx_sunlock(&allproc_lock);
406	if (p == NULL)
407		return (ESRCH);
408	if ((flags & PGET_CANSEE) != 0) {
409		error = p_cansee(curthread, p);
410		if (error != 0)
411			goto errout;
412	}
413	if ((flags & PGET_CANDEBUG) != 0) {
414		error = p_candebug(curthread, p);
415		if (error != 0)
416			goto errout;
417	}
418	if ((flags & PGET_ISCURRENT) != 0 && curproc != p) {
419		error = EPERM;
420		goto errout;
421	}
422	if ((flags & PGET_NOTWEXIT) != 0 && (p->p_flag & P_WEXIT) != 0) {
423		error = ESRCH;
424		goto errout;
425	}
426	if ((flags & PGET_NOTINEXEC) != 0 && (p->p_flag & P_INEXEC) != 0) {
427		/*
428		 * XXXRW: Not clear ESRCH is the right error during proc
429		 * execve().
430		 */
431		error = ESRCH;
432		goto errout;
433	}
434	if ((flags & PGET_HOLD) != 0) {
435		_PHOLD(p);
436		PROC_UNLOCK(p);
437	}
438	*pp = p;
439	return (0);
440errout:
441	PROC_UNLOCK(p);
442	return (error);
443}
444
445/*
446 * Create a new process group.
447 * pgid must be equal to the pid of p.
448 * Begin a new session if required.
449 */
450int
451enterpgrp(p, pgid, pgrp, sess)
452	register struct proc *p;
453	pid_t pgid;
454	struct pgrp *pgrp;
455	struct session *sess;
456{
457
458	sx_assert(&proctree_lock, SX_XLOCKED);
459
460	KASSERT(pgrp != NULL, ("enterpgrp: pgrp == NULL"));
461	KASSERT(p->p_pid == pgid,
462	    ("enterpgrp: new pgrp and pid != pgid"));
463	KASSERT(pgfind(pgid) == NULL,
464	    ("enterpgrp: pgrp with pgid exists"));
465	KASSERT(!SESS_LEADER(p),
466	    ("enterpgrp: session leader attempted setpgrp"));
467
468	mtx_init(&pgrp->pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
469
470	if (sess != NULL) {
471		/*
472		 * new session
473		 */
474		mtx_init(&sess->s_mtx, "session", NULL, MTX_DEF);
475		PROC_LOCK(p);
476		p->p_flag &= ~P_CONTROLT;
477		PROC_UNLOCK(p);
478		PGRP_LOCK(pgrp);
479		sess->s_leader = p;
480		sess->s_sid = p->p_pid;
481		refcount_init(&sess->s_count, 1);
482		sess->s_ttyvp = NULL;
483		sess->s_ttydp = NULL;
484		sess->s_ttyp = NULL;
485		bcopy(p->p_session->s_login, sess->s_login,
486			    sizeof(sess->s_login));
487		pgrp->pg_session = sess;
488		KASSERT(p == curproc,
489		    ("enterpgrp: mksession and p != curproc"));
490	} else {
491		pgrp->pg_session = p->p_session;
492		sess_hold(pgrp->pg_session);
493		PGRP_LOCK(pgrp);
494	}
495	pgrp->pg_id = pgid;
496	LIST_INIT(&pgrp->pg_members);
497
498	/*
499	 * As we have an exclusive lock of proctree_lock,
500	 * this should not deadlock.
501	 */
502	LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash);
503	pgrp->pg_jobc = 0;
504	SLIST_INIT(&pgrp->pg_sigiolst);
505	PGRP_UNLOCK(pgrp);
506
507	doenterpgrp(p, pgrp);
508
509	return (0);
510}
511
512/*
513 * Move p to an existing process group
514 */
515int
516enterthispgrp(p, pgrp)
517	register struct proc *p;
518	struct pgrp *pgrp;
519{
520
521	sx_assert(&proctree_lock, SX_XLOCKED);
522	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
523	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
524	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
525	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
526	KASSERT(pgrp->pg_session == p->p_session,
527		("%s: pgrp's session %p, p->p_session %p.\n",
528		__func__,
529		pgrp->pg_session,
530		p->p_session));
531	KASSERT(pgrp != p->p_pgrp,
532		("%s: p belongs to pgrp.", __func__));
533
534	doenterpgrp(p, pgrp);
535
536	return (0);
537}
538
539/*
540 * Move p to a process group
541 */
542static void
543doenterpgrp(p, pgrp)
544	struct proc *p;
545	struct pgrp *pgrp;
546{
547	struct pgrp *savepgrp;
548
549	sx_assert(&proctree_lock, SX_XLOCKED);
550	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
551	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
552	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
553	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
554
555	savepgrp = p->p_pgrp;
556
557	/*
558	 * Adjust eligibility of affected pgrps to participate in job control.
559	 * Increment eligibility counts before decrementing, otherwise we
560	 * could reach 0 spuriously during the first call.
561	 */
562	fixjobc(p, pgrp, 1);
563	fixjobc(p, p->p_pgrp, 0);
564
565	PGRP_LOCK(pgrp);
566	PGRP_LOCK(savepgrp);
567	PROC_LOCK(p);
568	LIST_REMOVE(p, p_pglist);
569	p->p_pgrp = pgrp;
570	PROC_UNLOCK(p);
571	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
572	PGRP_UNLOCK(savepgrp);
573	PGRP_UNLOCK(pgrp);
574	if (LIST_EMPTY(&savepgrp->pg_members))
575		pgdelete(savepgrp);
576}
577
578/*
579 * remove process from process group
580 */
581int
582leavepgrp(p)
583	register struct proc *p;
584{
585	struct pgrp *savepgrp;
586
587	sx_assert(&proctree_lock, SX_XLOCKED);
588	savepgrp = p->p_pgrp;
589	PGRP_LOCK(savepgrp);
590	PROC_LOCK(p);
591	LIST_REMOVE(p, p_pglist);
592	p->p_pgrp = NULL;
593	PROC_UNLOCK(p);
594	PGRP_UNLOCK(savepgrp);
595	if (LIST_EMPTY(&savepgrp->pg_members))
596		pgdelete(savepgrp);
597	return (0);
598}
599
600/*
601 * delete a process group
602 */
603static void
604pgdelete(pgrp)
605	register struct pgrp *pgrp;
606{
607	struct session *savesess;
608	struct tty *tp;
609
610	sx_assert(&proctree_lock, SX_XLOCKED);
611	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
612	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
613
614	/*
615	 * Reset any sigio structures pointing to us as a result of
616	 * F_SETOWN with our pgid.
617	 */
618	funsetownlst(&pgrp->pg_sigiolst);
619
620	PGRP_LOCK(pgrp);
621	tp = pgrp->pg_session->s_ttyp;
622	LIST_REMOVE(pgrp, pg_hash);
623	savesess = pgrp->pg_session;
624	PGRP_UNLOCK(pgrp);
625
626	/* Remove the reference to the pgrp before deallocating it. */
627	if (tp != NULL) {
628		tty_lock(tp);
629		tty_rel_pgrp(tp, pgrp);
630	}
631
632	mtx_destroy(&pgrp->pg_mtx);
633	free(pgrp, M_PGRP);
634	sess_release(savesess);
635}
636
637static void
638pgadjustjobc(pgrp, entering)
639	struct pgrp *pgrp;
640	int entering;
641{
642
643	PGRP_LOCK(pgrp);
644	if (entering)
645		pgrp->pg_jobc++;
646	else {
647		--pgrp->pg_jobc;
648		if (pgrp->pg_jobc == 0)
649			orphanpg(pgrp);
650	}
651	PGRP_UNLOCK(pgrp);
652}
653
654/*
655 * Adjust pgrp jobc counters when specified process changes process group.
656 * We count the number of processes in each process group that "qualify"
657 * the group for terminal job control (those with a parent in a different
658 * process group of the same session).  If that count reaches zero, the
659 * process group becomes orphaned.  Check both the specified process'
660 * process group and that of its children.
661 * entering == 0 => p is leaving specified group.
662 * entering == 1 => p is entering specified group.
663 */
664void
665fixjobc(struct proc *p, struct pgrp *pgrp, int entering)
666{
667	struct pgrp *hispgrp;
668	struct session *mysession;
669	struct proc *q;
670
671	sx_assert(&proctree_lock, SX_LOCKED);
672	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
673	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
674	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
675
676	/*
677	 * Check p's parent to see whether p qualifies its own process
678	 * group; if so, adjust count for p's process group.
679	 */
680	mysession = pgrp->pg_session;
681	if ((hispgrp = p->p_pptr->p_pgrp) != pgrp &&
682	    hispgrp->pg_session == mysession)
683		pgadjustjobc(pgrp, entering);
684
685	/*
686	 * Check this process' children to see whether they qualify
687	 * their process groups; if so, adjust counts for children's
688	 * process groups.
689	 */
690	LIST_FOREACH(q, &p->p_children, p_sibling) {
691		hispgrp = q->p_pgrp;
692		if (hispgrp == pgrp ||
693		    hispgrp->pg_session != mysession)
694			continue;
695		if (q->p_state == PRS_ZOMBIE)
696			continue;
697		pgadjustjobc(hispgrp, entering);
698	}
699}
700
701void
702killjobc(void)
703{
704	struct session *sp;
705	struct tty *tp;
706	struct proc *p;
707	struct vnode *ttyvp;
708
709	p = curproc;
710	MPASS(p->p_flag & P_WEXIT);
711	/*
712	 * Do a quick check to see if there is anything to do with the
713	 * proctree_lock held. pgrp and LIST_EMPTY checks are for fixjobc().
714	 */
715	PROC_LOCK(p);
716	if (!SESS_LEADER(p) &&
717	    (p->p_pgrp == p->p_pptr->p_pgrp) &&
718	    LIST_EMPTY(&p->p_children)) {
719		PROC_UNLOCK(p);
720		return;
721	}
722	PROC_UNLOCK(p);
723
724	sx_xlock(&proctree_lock);
725	if (SESS_LEADER(p)) {
726		sp = p->p_session;
727
728		/*
729		 * s_ttyp is not zero'd; we use this to indicate that
730		 * the session once had a controlling terminal. (for
731		 * logging and informational purposes)
732		 */
733		SESS_LOCK(sp);
734		ttyvp = sp->s_ttyvp;
735		tp = sp->s_ttyp;
736		sp->s_ttyvp = NULL;
737		sp->s_ttydp = NULL;
738		sp->s_leader = NULL;
739		SESS_UNLOCK(sp);
740
741		/*
742		 * Signal foreground pgrp and revoke access to
743		 * controlling terminal if it has not been revoked
744		 * already.
745		 *
746		 * Because the TTY may have been revoked in the mean
747		 * time and could already have a new session associated
748		 * with it, make sure we don't send a SIGHUP to a
749		 * foreground process group that does not belong to this
750		 * session.
751		 */
752
753		if (tp != NULL) {
754			tty_lock(tp);
755			if (tp->t_session == sp)
756				tty_signal_pgrp(tp, SIGHUP);
757			tty_unlock(tp);
758		}
759
760		if (ttyvp != NULL) {
761			sx_xunlock(&proctree_lock);
762			if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) {
763				VOP_REVOKE(ttyvp, REVOKEALL);
764				VOP_UNLOCK(ttyvp, 0);
765			}
766			vrele(ttyvp);
767			sx_xlock(&proctree_lock);
768		}
769	}
770	fixjobc(p, p->p_pgrp, 0);
771	sx_xunlock(&proctree_lock);
772}
773
774/*
775 * A process group has become orphaned;
776 * if there are any stopped processes in the group,
777 * hang-up all process in that group.
778 */
779static void
780orphanpg(pg)
781	struct pgrp *pg;
782{
783	register struct proc *p;
784
785	PGRP_LOCK_ASSERT(pg, MA_OWNED);
786
787	LIST_FOREACH(p, &pg->pg_members, p_pglist) {
788		PROC_LOCK(p);
789		if (P_SHOULDSTOP(p) == P_STOPPED_SIG) {
790			PROC_UNLOCK(p);
791			LIST_FOREACH(p, &pg->pg_members, p_pglist) {
792				PROC_LOCK(p);
793				kern_psignal(p, SIGHUP);
794				kern_psignal(p, SIGCONT);
795				PROC_UNLOCK(p);
796			}
797			return;
798		}
799		PROC_UNLOCK(p);
800	}
801}
802
803void
804sess_hold(struct session *s)
805{
806
807	refcount_acquire(&s->s_count);
808}
809
810void
811sess_release(struct session *s)
812{
813
814	if (refcount_release(&s->s_count)) {
815		if (s->s_ttyp != NULL) {
816			tty_lock(s->s_ttyp);
817			tty_rel_sess(s->s_ttyp, s);
818		}
819		mtx_destroy(&s->s_mtx);
820		free(s, M_SESSION);
821	}
822}
823
824#ifdef DDB
825
826DB_SHOW_COMMAND(pgrpdump, pgrpdump)
827{
828	register struct pgrp *pgrp;
829	register struct proc *p;
830	register int i;
831
832	for (i = 0; i <= pgrphash; i++) {
833		if (!LIST_EMPTY(&pgrphashtbl[i])) {
834			printf("\tindx %d\n", i);
835			LIST_FOREACH(pgrp, &pgrphashtbl[i], pg_hash) {
836				printf(
837			"\tpgrp %p, pgid %ld, sess %p, sesscnt %d, mem %p\n",
838				    (void *)pgrp, (long)pgrp->pg_id,
839				    (void *)pgrp->pg_session,
840				    pgrp->pg_session->s_count,
841				    (void *)LIST_FIRST(&pgrp->pg_members));
842				LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
843					printf("\t\tpid %ld addr %p pgrp %p\n",
844					    (long)p->p_pid, (void *)p,
845					    (void *)p->p_pgrp);
846				}
847			}
848		}
849	}
850}
851#endif /* DDB */
852
853/*
854 * Calculate the kinfo_proc members which contain process-wide
855 * informations.
856 * Must be called with the target process locked.
857 */
858static void
859fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp)
860{
861	struct thread *td;
862
863	PROC_LOCK_ASSERT(p, MA_OWNED);
864
865	kp->ki_estcpu = 0;
866	kp->ki_pctcpu = 0;
867	FOREACH_THREAD_IN_PROC(p, td) {
868		thread_lock(td);
869		kp->ki_pctcpu += sched_pctcpu(td);
870		kp->ki_estcpu += sched_estcpu(td);
871		thread_unlock(td);
872	}
873}
874
875/*
876 * Clear kinfo_proc and fill in any information that is common
877 * to all threads in the process.
878 * Must be called with the target process locked.
879 */
880static void
881fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
882{
883	struct thread *td0;
884	struct tty *tp;
885	struct session *sp;
886	struct ucred *cred;
887	struct sigacts *ps;
888	struct timeval boottime;
889
890	/* For proc_realparent. */
891	sx_assert(&proctree_lock, SX_LOCKED);
892	PROC_LOCK_ASSERT(p, MA_OWNED);
893	bzero(kp, sizeof(*kp));
894
895	kp->ki_structsize = sizeof(*kp);
896	kp->ki_paddr = p;
897	kp->ki_addr =/* p->p_addr; */0; /* XXX */
898	kp->ki_args = p->p_args;
899	kp->ki_textvp = p->p_textvp;
900#ifdef KTRACE
901	kp->ki_tracep = p->p_tracevp;
902	kp->ki_traceflag = p->p_traceflag;
903#endif
904	kp->ki_fd = p->p_fd;
905	kp->ki_vmspace = p->p_vmspace;
906	kp->ki_flag = p->p_flag;
907	kp->ki_flag2 = p->p_flag2;
908	cred = p->p_ucred;
909	if (cred) {
910		kp->ki_uid = cred->cr_uid;
911		kp->ki_ruid = cred->cr_ruid;
912		kp->ki_svuid = cred->cr_svuid;
913		kp->ki_cr_flags = 0;
914		if (cred->cr_flags & CRED_FLAG_CAPMODE)
915			kp->ki_cr_flags |= KI_CRF_CAPABILITY_MODE;
916		/* XXX bde doesn't like KI_NGROUPS */
917		if (cred->cr_ngroups > KI_NGROUPS) {
918			kp->ki_ngroups = KI_NGROUPS;
919			kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW;
920		} else
921			kp->ki_ngroups = cred->cr_ngroups;
922		bcopy(cred->cr_groups, kp->ki_groups,
923		    kp->ki_ngroups * sizeof(gid_t));
924		kp->ki_rgid = cred->cr_rgid;
925		kp->ki_svgid = cred->cr_svgid;
926		/* If jailed(cred), emulate the old P_JAILED flag. */
927		if (jailed(cred)) {
928			kp->ki_flag |= P_JAILED;
929			/* If inside the jail, use 0 as a jail ID. */
930			if (cred->cr_prison != curthread->td_ucred->cr_prison)
931				kp->ki_jid = cred->cr_prison->pr_id;
932		}
933		strlcpy(kp->ki_loginclass, cred->cr_loginclass->lc_name,
934		    sizeof(kp->ki_loginclass));
935	}
936	ps = p->p_sigacts;
937	if (ps) {
938		mtx_lock(&ps->ps_mtx);
939		kp->ki_sigignore = ps->ps_sigignore;
940		kp->ki_sigcatch = ps->ps_sigcatch;
941		mtx_unlock(&ps->ps_mtx);
942	}
943	if (p->p_state != PRS_NEW &&
944	    p->p_state != PRS_ZOMBIE &&
945	    p->p_vmspace != NULL) {
946		struct vmspace *vm = p->p_vmspace;
947
948		kp->ki_size = vm->vm_map.size;
949		kp->ki_rssize = vmspace_resident_count(vm); /*XXX*/
950		FOREACH_THREAD_IN_PROC(p, td0) {
951			if (!TD_IS_SWAPPED(td0))
952				kp->ki_rssize += td0->td_kstack_pages;
953		}
954		kp->ki_swrss = vm->vm_swrss;
955		kp->ki_tsize = vm->vm_tsize;
956		kp->ki_dsize = vm->vm_dsize;
957		kp->ki_ssize = vm->vm_ssize;
958	} else if (p->p_state == PRS_ZOMBIE)
959		kp->ki_stat = SZOMB;
960	if (kp->ki_flag & P_INMEM)
961		kp->ki_sflag = PS_INMEM;
962	else
963		kp->ki_sflag = 0;
964	/* Calculate legacy swtime as seconds since 'swtick'. */
965	kp->ki_swtime = (ticks - p->p_swtick) / hz;
966	kp->ki_pid = p->p_pid;
967	kp->ki_nice = p->p_nice;
968	kp->ki_fibnum = p->p_fibnum;
969	kp->ki_start = p->p_stats->p_start;
970	getboottime(&boottime);
971	timevaladd(&kp->ki_start, &boottime);
972	PROC_STATLOCK(p);
973	rufetch(p, &kp->ki_rusage);
974	kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime);
975	calcru(p, &kp->ki_rusage.ru_utime, &kp->ki_rusage.ru_stime);
976	PROC_STATUNLOCK(p);
977	calccru(p, &kp->ki_childutime, &kp->ki_childstime);
978	/* Some callers want child times in a single value. */
979	kp->ki_childtime = kp->ki_childstime;
980	timevaladd(&kp->ki_childtime, &kp->ki_childutime);
981
982	FOREACH_THREAD_IN_PROC(p, td0)
983		kp->ki_cow += td0->td_cow;
984
985	tp = NULL;
986	if (p->p_pgrp) {
987		kp->ki_pgid = p->p_pgrp->pg_id;
988		kp->ki_jobc = p->p_pgrp->pg_jobc;
989		sp = p->p_pgrp->pg_session;
990
991		if (sp != NULL) {
992			kp->ki_sid = sp->s_sid;
993			SESS_LOCK(sp);
994			strlcpy(kp->ki_login, sp->s_login,
995			    sizeof(kp->ki_login));
996			if (sp->s_ttyvp)
997				kp->ki_kiflag |= KI_CTTY;
998			if (SESS_LEADER(p))
999				kp->ki_kiflag |= KI_SLEADER;
1000			/* XXX proctree_lock */
1001			tp = sp->s_ttyp;
1002			SESS_UNLOCK(sp);
1003		}
1004	}
1005	if ((p->p_flag & P_CONTROLT) && tp != NULL) {
1006		kp->ki_tdev = tty_udev(tp);
1007		kp->ki_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
1008		if (tp->t_session)
1009			kp->ki_tsid = tp->t_session->s_sid;
1010	} else
1011		kp->ki_tdev = NODEV;
1012	if (p->p_comm[0] != '\0')
1013		strlcpy(kp->ki_comm, p->p_comm, sizeof(kp->ki_comm));
1014	if (p->p_sysent && p->p_sysent->sv_name != NULL &&
1015	    p->p_sysent->sv_name[0] != '\0')
1016		strlcpy(kp->ki_emul, p->p_sysent->sv_name, sizeof(kp->ki_emul));
1017	kp->ki_siglist = p->p_siglist;
1018	kp->ki_xstat = KW_EXITCODE(p->p_xexit, p->p_xsig);
1019	kp->ki_acflag = p->p_acflag;
1020	kp->ki_lock = p->p_lock;
1021	if (p->p_pptr) {
1022		kp->ki_ppid = proc_realparent(p)->p_pid;
1023		if (p->p_flag & P_TRACED)
1024			kp->ki_tracer = p->p_pptr->p_pid;
1025	}
1026}
1027
1028/*
1029 * Fill in information that is thread specific.  Must be called with
1030 * target process locked.  If 'preferthread' is set, overwrite certain
1031 * process-related fields that are maintained for both threads and
1032 * processes.
1033 */
1034static void
1035fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread)
1036{
1037	struct proc *p;
1038
1039	p = td->td_proc;
1040	kp->ki_tdaddr = td;
1041	PROC_LOCK_ASSERT(p, MA_OWNED);
1042
1043	if (preferthread)
1044		PROC_STATLOCK(p);
1045	thread_lock(td);
1046	if (td->td_wmesg != NULL)
1047		strlcpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg));
1048	else
1049		bzero(kp->ki_wmesg, sizeof(kp->ki_wmesg));
1050	if (strlcpy(kp->ki_tdname, td->td_name, sizeof(kp->ki_tdname)) >=
1051	    sizeof(kp->ki_tdname)) {
1052		strlcpy(kp->ki_moretdname,
1053		    td->td_name + sizeof(kp->ki_tdname) - 1,
1054		    sizeof(kp->ki_moretdname));
1055	} else {
1056		bzero(kp->ki_moretdname, sizeof(kp->ki_moretdname));
1057	}
1058	if (TD_ON_LOCK(td)) {
1059		kp->ki_kiflag |= KI_LOCKBLOCK;
1060		strlcpy(kp->ki_lockname, td->td_lockname,
1061		    sizeof(kp->ki_lockname));
1062	} else {
1063		kp->ki_kiflag &= ~KI_LOCKBLOCK;
1064		bzero(kp->ki_lockname, sizeof(kp->ki_lockname));
1065	}
1066
1067	if (p->p_state == PRS_NORMAL) { /* approximate. */
1068		if (TD_ON_RUNQ(td) ||
1069		    TD_CAN_RUN(td) ||
1070		    TD_IS_RUNNING(td)) {
1071			kp->ki_stat = SRUN;
1072		} else if (P_SHOULDSTOP(p)) {
1073			kp->ki_stat = SSTOP;
1074		} else if (TD_IS_SLEEPING(td)) {
1075			kp->ki_stat = SSLEEP;
1076		} else if (TD_ON_LOCK(td)) {
1077			kp->ki_stat = SLOCK;
1078		} else {
1079			kp->ki_stat = SWAIT;
1080		}
1081	} else if (p->p_state == PRS_ZOMBIE) {
1082		kp->ki_stat = SZOMB;
1083	} else {
1084		kp->ki_stat = SIDL;
1085	}
1086
1087	/* Things in the thread */
1088	kp->ki_wchan = td->td_wchan;
1089	kp->ki_pri.pri_level = td->td_priority;
1090	kp->ki_pri.pri_native = td->td_base_pri;
1091
1092	/*
1093	 * Note: legacy fields; clamp at the old NOCPU value and/or
1094	 * the maximum u_char CPU value.
1095	 */
1096	if (td->td_lastcpu == NOCPU)
1097		kp->ki_lastcpu_old = NOCPU_OLD;
1098	else if (td->td_lastcpu > MAXCPU_OLD)
1099		kp->ki_lastcpu_old = MAXCPU_OLD;
1100	else
1101		kp->ki_lastcpu_old = td->td_lastcpu;
1102
1103	if (td->td_oncpu == NOCPU)
1104		kp->ki_oncpu_old = NOCPU_OLD;
1105	else if (td->td_oncpu > MAXCPU_OLD)
1106		kp->ki_oncpu_old = MAXCPU_OLD;
1107	else
1108		kp->ki_oncpu_old = td->td_oncpu;
1109
1110	kp->ki_lastcpu = td->td_lastcpu;
1111	kp->ki_oncpu = td->td_oncpu;
1112	kp->ki_tdflags = td->td_flags;
1113	kp->ki_tid = td->td_tid;
1114	kp->ki_numthreads = p->p_numthreads;
1115	kp->ki_pcb = td->td_pcb;
1116	kp->ki_kstack = (void *)td->td_kstack;
1117	kp->ki_slptime = (ticks - td->td_slptick) / hz;
1118	kp->ki_pri.pri_class = td->td_pri_class;
1119	kp->ki_pri.pri_user = td->td_user_pri;
1120
1121	if (preferthread) {
1122		rufetchtd(td, &kp->ki_rusage);
1123		kp->ki_runtime = cputick2usec(td->td_rux.rux_runtime);
1124		kp->ki_pctcpu = sched_pctcpu(td);
1125		kp->ki_estcpu = sched_estcpu(td);
1126		kp->ki_cow = td->td_cow;
1127	}
1128
1129	/* We can't get this anymore but ps etc never used it anyway. */
1130	kp->ki_rqindex = 0;
1131
1132	if (preferthread)
1133		kp->ki_siglist = td->td_siglist;
1134	kp->ki_sigmask = td->td_sigmask;
1135	thread_unlock(td);
1136	if (preferthread)
1137		PROC_STATUNLOCK(p);
1138}
1139
1140/*
1141 * Fill in a kinfo_proc structure for the specified process.
1142 * Must be called with the target process locked.
1143 */
1144void
1145fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp)
1146{
1147
1148	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
1149
1150	fill_kinfo_proc_only(p, kp);
1151	fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), kp, 0);
1152	fill_kinfo_aggregate(p, kp);
1153}
1154
1155struct pstats *
1156pstats_alloc(void)
1157{
1158
1159	return (malloc(sizeof(struct pstats), M_SUBPROC, M_ZERO|M_WAITOK));
1160}
1161
1162/*
1163 * Copy parts of p_stats; zero the rest of p_stats (statistics).
1164 */
1165void
1166pstats_fork(struct pstats *src, struct pstats *dst)
1167{
1168
1169	bzero(&dst->pstat_startzero,
1170	    __rangeof(struct pstats, pstat_startzero, pstat_endzero));
1171	bcopy(&src->pstat_startcopy, &dst->pstat_startcopy,
1172	    __rangeof(struct pstats, pstat_startcopy, pstat_endcopy));
1173}
1174
1175void
1176pstats_free(struct pstats *ps)
1177{
1178
1179	free(ps, M_SUBPROC);
1180}
1181
1182static struct proc *
1183zpfind_locked(pid_t pid)
1184{
1185	struct proc *p;
1186
1187	sx_assert(&allproc_lock, SX_LOCKED);
1188	LIST_FOREACH(p, &zombproc, p_list) {
1189		if (p->p_pid == pid) {
1190			PROC_LOCK(p);
1191			break;
1192		}
1193	}
1194	return (p);
1195}
1196
1197/*
1198 * Locate a zombie process by number
1199 */
1200struct proc *
1201zpfind(pid_t pid)
1202{
1203	struct proc *p;
1204
1205	sx_slock(&allproc_lock);
1206	p = zpfind_locked(pid);
1207	sx_sunlock(&allproc_lock);
1208	return (p);
1209}
1210
1211#ifdef COMPAT_FREEBSD32
1212
1213/*
1214 * This function is typically used to copy out the kernel address, so
1215 * it can be replaced by assignment of zero.
1216 */
1217static inline uint32_t
1218ptr32_trim(void *ptr)
1219{
1220	uintptr_t uptr;
1221
1222	uptr = (uintptr_t)ptr;
1223	return ((uptr > UINT_MAX) ? 0 : uptr);
1224}
1225
1226#define PTRTRIM_CP(src,dst,fld) \
1227	do { (dst).fld = ptr32_trim((src).fld); } while (0)
1228
1229static void
1230freebsd32_kinfo_proc_out(const struct kinfo_proc *ki, struct kinfo_proc32 *ki32)
1231{
1232	int i;
1233
1234	bzero(ki32, sizeof(struct kinfo_proc32));
1235	ki32->ki_structsize = sizeof(struct kinfo_proc32);
1236	CP(*ki, *ki32, ki_layout);
1237	PTRTRIM_CP(*ki, *ki32, ki_args);
1238	PTRTRIM_CP(*ki, *ki32, ki_paddr);
1239	PTRTRIM_CP(*ki, *ki32, ki_addr);
1240	PTRTRIM_CP(*ki, *ki32, ki_tracep);
1241	PTRTRIM_CP(*ki, *ki32, ki_textvp);
1242	PTRTRIM_CP(*ki, *ki32, ki_fd);
1243	PTRTRIM_CP(*ki, *ki32, ki_vmspace);
1244	PTRTRIM_CP(*ki, *ki32, ki_wchan);
1245	CP(*ki, *ki32, ki_pid);
1246	CP(*ki, *ki32, ki_ppid);
1247	CP(*ki, *ki32, ki_pgid);
1248	CP(*ki, *ki32, ki_tpgid);
1249	CP(*ki, *ki32, ki_sid);
1250	CP(*ki, *ki32, ki_tsid);
1251	CP(*ki, *ki32, ki_jobc);
1252	CP(*ki, *ki32, ki_tdev);
1253	CP(*ki, *ki32, ki_siglist);
1254	CP(*ki, *ki32, ki_sigmask);
1255	CP(*ki, *ki32, ki_sigignore);
1256	CP(*ki, *ki32, ki_sigcatch);
1257	CP(*ki, *ki32, ki_uid);
1258	CP(*ki, *ki32, ki_ruid);
1259	CP(*ki, *ki32, ki_svuid);
1260	CP(*ki, *ki32, ki_rgid);
1261	CP(*ki, *ki32, ki_svgid);
1262	CP(*ki, *ki32, ki_ngroups);
1263	for (i = 0; i < KI_NGROUPS; i++)
1264		CP(*ki, *ki32, ki_groups[i]);
1265	CP(*ki, *ki32, ki_size);
1266	CP(*ki, *ki32, ki_rssize);
1267	CP(*ki, *ki32, ki_swrss);
1268	CP(*ki, *ki32, ki_tsize);
1269	CP(*ki, *ki32, ki_dsize);
1270	CP(*ki, *ki32, ki_ssize);
1271	CP(*ki, *ki32, ki_xstat);
1272	CP(*ki, *ki32, ki_acflag);
1273	CP(*ki, *ki32, ki_pctcpu);
1274	CP(*ki, *ki32, ki_estcpu);
1275	CP(*ki, *ki32, ki_slptime);
1276	CP(*ki, *ki32, ki_swtime);
1277	CP(*ki, *ki32, ki_cow);
1278	CP(*ki, *ki32, ki_runtime);
1279	TV_CP(*ki, *ki32, ki_start);
1280	TV_CP(*ki, *ki32, ki_childtime);
1281	CP(*ki, *ki32, ki_flag);
1282	CP(*ki, *ki32, ki_kiflag);
1283	CP(*ki, *ki32, ki_traceflag);
1284	CP(*ki, *ki32, ki_stat);
1285	CP(*ki, *ki32, ki_nice);
1286	CP(*ki, *ki32, ki_lock);
1287	CP(*ki, *ki32, ki_rqindex);
1288	CP(*ki, *ki32, ki_oncpu);
1289	CP(*ki, *ki32, ki_lastcpu);
1290
1291	/* XXX TODO: wrap cpu value as appropriate */
1292	CP(*ki, *ki32, ki_oncpu_old);
1293	CP(*ki, *ki32, ki_lastcpu_old);
1294
1295	bcopy(ki->ki_tdname, ki32->ki_tdname, TDNAMLEN + 1);
1296	bcopy(ki->ki_wmesg, ki32->ki_wmesg, WMESGLEN + 1);
1297	bcopy(ki->ki_login, ki32->ki_login, LOGNAMELEN + 1);
1298	bcopy(ki->ki_lockname, ki32->ki_lockname, LOCKNAMELEN + 1);
1299	bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1);
1300	bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1);
1301	bcopy(ki->ki_loginclass, ki32->ki_loginclass, LOGINCLASSLEN + 1);
1302	bcopy(ki->ki_moretdname, ki32->ki_moretdname, MAXCOMLEN - TDNAMLEN + 1);
1303	CP(*ki, *ki32, ki_tracer);
1304	CP(*ki, *ki32, ki_flag2);
1305	CP(*ki, *ki32, ki_fibnum);
1306	CP(*ki, *ki32, ki_cr_flags);
1307	CP(*ki, *ki32, ki_jid);
1308	CP(*ki, *ki32, ki_numthreads);
1309	CP(*ki, *ki32, ki_tid);
1310	CP(*ki, *ki32, ki_pri);
1311	freebsd32_rusage_out(&ki->ki_rusage, &ki32->ki_rusage);
1312	freebsd32_rusage_out(&ki->ki_rusage_ch, &ki32->ki_rusage_ch);
1313	PTRTRIM_CP(*ki, *ki32, ki_pcb);
1314	PTRTRIM_CP(*ki, *ki32, ki_kstack);
1315	PTRTRIM_CP(*ki, *ki32, ki_udata);
1316	PTRTRIM_CP(*ki, *ki32, ki_tdaddr);
1317	CP(*ki, *ki32, ki_sflag);
1318	CP(*ki, *ki32, ki_tdflags);
1319}
1320#endif
1321
1322int
1323kern_proc_out(struct proc *p, struct sbuf *sb, int flags)
1324{
1325	struct thread *td;
1326	struct kinfo_proc ki;
1327#ifdef COMPAT_FREEBSD32
1328	struct kinfo_proc32 ki32;
1329#endif
1330	int error;
1331
1332	PROC_LOCK_ASSERT(p, MA_OWNED);
1333	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
1334
1335	error = 0;
1336	fill_kinfo_proc(p, &ki);
1337	if ((flags & KERN_PROC_NOTHREADS) != 0) {
1338#ifdef COMPAT_FREEBSD32
1339		if ((flags & KERN_PROC_MASK32) != 0) {
1340			freebsd32_kinfo_proc_out(&ki, &ki32);
1341			if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
1342				error = ENOMEM;
1343		} else
1344#endif
1345			if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
1346				error = ENOMEM;
1347	} else {
1348		FOREACH_THREAD_IN_PROC(p, td) {
1349			fill_kinfo_thread(td, &ki, 1);
1350#ifdef COMPAT_FREEBSD32
1351			if ((flags & KERN_PROC_MASK32) != 0) {
1352				freebsd32_kinfo_proc_out(&ki, &ki32);
1353				if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
1354					error = ENOMEM;
1355			} else
1356#endif
1357				if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
1358					error = ENOMEM;
1359			if (error != 0)
1360				break;
1361		}
1362	}
1363	PROC_UNLOCK(p);
1364	return (error);
1365}
1366
1367static int
1368sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags,
1369    int doingzomb)
1370{
1371	struct sbuf sb;
1372	struct kinfo_proc ki;
1373	struct proc *np;
1374	int error, error2;
1375	pid_t pid;
1376
1377	pid = p->p_pid;
1378	sbuf_new_for_sysctl(&sb, (char *)&ki, sizeof(ki), req);
1379	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
1380	error = kern_proc_out(p, &sb, flags);
1381	error2 = sbuf_finish(&sb);
1382	sbuf_delete(&sb);
1383	if (error != 0)
1384		return (error);
1385	else if (error2 != 0)
1386		return (error2);
1387	if (doingzomb)
1388		np = zpfind(pid);
1389	else {
1390		if (pid == 0)
1391			return (0);
1392		np = pfind(pid);
1393	}
1394	if (np == NULL)
1395		return (ESRCH);
1396	if (np != p) {
1397		PROC_UNLOCK(np);
1398		return (ESRCH);
1399	}
1400	PROC_UNLOCK(np);
1401	return (0);
1402}
1403
1404static int
1405sysctl_kern_proc(SYSCTL_HANDLER_ARGS)
1406{
1407	int *name = (int *)arg1;
1408	u_int namelen = arg2;
1409	struct proc *p;
1410	int flags, doingzomb, oid_number;
1411	int error = 0;
1412
1413	oid_number = oidp->oid_number;
1414	if (oid_number != KERN_PROC_ALL &&
1415	    (oid_number & KERN_PROC_INC_THREAD) == 0)
1416		flags = KERN_PROC_NOTHREADS;
1417	else {
1418		flags = 0;
1419		oid_number &= ~KERN_PROC_INC_THREAD;
1420	}
1421#ifdef COMPAT_FREEBSD32
1422	if (req->flags & SCTL_MASK32)
1423		flags |= KERN_PROC_MASK32;
1424#endif
1425	if (oid_number == KERN_PROC_PID) {
1426		if (namelen != 1)
1427			return (EINVAL);
1428		error = sysctl_wire_old_buffer(req, 0);
1429		if (error)
1430			return (error);
1431		sx_slock(&proctree_lock);
1432		error = pget((pid_t)name[0], PGET_CANSEE, &p);
1433		if (error == 0)
1434			error = sysctl_out_proc(p, req, flags, 0);
1435		sx_sunlock(&proctree_lock);
1436		return (error);
1437	}
1438
1439	switch (oid_number) {
1440	case KERN_PROC_ALL:
1441		if (namelen != 0)
1442			return (EINVAL);
1443		break;
1444	case KERN_PROC_PROC:
1445		if (namelen != 0 && namelen != 1)
1446			return (EINVAL);
1447		break;
1448	default:
1449		if (namelen != 1)
1450			return (EINVAL);
1451		break;
1452	}
1453
1454	if (!req->oldptr) {
1455		/* overestimate by 5 procs */
1456		error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5);
1457		if (error)
1458			return (error);
1459	}
1460	error = sysctl_wire_old_buffer(req, 0);
1461	if (error != 0)
1462		return (error);
1463	sx_slock(&proctree_lock);
1464	sx_slock(&allproc_lock);
1465	for (doingzomb=0 ; doingzomb < 2 ; doingzomb++) {
1466		if (!doingzomb)
1467			p = LIST_FIRST(&allproc);
1468		else
1469			p = LIST_FIRST(&zombproc);
1470		for (; p != NULL; p = LIST_NEXT(p, p_list)) {
1471			/*
1472			 * Skip embryonic processes.
1473			 */
1474			PROC_LOCK(p);
1475			if (p->p_state == PRS_NEW) {
1476				PROC_UNLOCK(p);
1477				continue;
1478			}
1479			KASSERT(p->p_ucred != NULL,
1480			    ("process credential is NULL for non-NEW proc"));
1481			/*
1482			 * Show a user only appropriate processes.
1483			 */
1484			if (p_cansee(curthread, p)) {
1485				PROC_UNLOCK(p);
1486				continue;
1487			}
1488			/*
1489			 * TODO - make more efficient (see notes below).
1490			 * do by session.
1491			 */
1492			switch (oid_number) {
1493
1494			case KERN_PROC_GID:
1495				if (p->p_ucred->cr_gid != (gid_t)name[0]) {
1496					PROC_UNLOCK(p);
1497					continue;
1498				}
1499				break;
1500
1501			case KERN_PROC_PGRP:
1502				/* could do this by traversing pgrp */
1503				if (p->p_pgrp == NULL ||
1504				    p->p_pgrp->pg_id != (pid_t)name[0]) {
1505					PROC_UNLOCK(p);
1506					continue;
1507				}
1508				break;
1509
1510			case KERN_PROC_RGID:
1511				if (p->p_ucred->cr_rgid != (gid_t)name[0]) {
1512					PROC_UNLOCK(p);
1513					continue;
1514				}
1515				break;
1516
1517			case KERN_PROC_SESSION:
1518				if (p->p_session == NULL ||
1519				    p->p_session->s_sid != (pid_t)name[0]) {
1520					PROC_UNLOCK(p);
1521					continue;
1522				}
1523				break;
1524
1525			case KERN_PROC_TTY:
1526				if ((p->p_flag & P_CONTROLT) == 0 ||
1527				    p->p_session == NULL) {
1528					PROC_UNLOCK(p);
1529					continue;
1530				}
1531				/* XXX proctree_lock */
1532				SESS_LOCK(p->p_session);
1533				if (p->p_session->s_ttyp == NULL ||
1534				    tty_udev(p->p_session->s_ttyp) !=
1535				    (dev_t)name[0]) {
1536					SESS_UNLOCK(p->p_session);
1537					PROC_UNLOCK(p);
1538					continue;
1539				}
1540				SESS_UNLOCK(p->p_session);
1541				break;
1542
1543			case KERN_PROC_UID:
1544				if (p->p_ucred->cr_uid != (uid_t)name[0]) {
1545					PROC_UNLOCK(p);
1546					continue;
1547				}
1548				break;
1549
1550			case KERN_PROC_RUID:
1551				if (p->p_ucred->cr_ruid != (uid_t)name[0]) {
1552					PROC_UNLOCK(p);
1553					continue;
1554				}
1555				break;
1556
1557			case KERN_PROC_PROC:
1558				break;
1559
1560			default:
1561				break;
1562
1563			}
1564
1565			error = sysctl_out_proc(p, req, flags, doingzomb);
1566			if (error) {
1567				sx_sunlock(&allproc_lock);
1568				sx_sunlock(&proctree_lock);
1569				return (error);
1570			}
1571		}
1572	}
1573	sx_sunlock(&allproc_lock);
1574	sx_sunlock(&proctree_lock);
1575	return (0);
1576}
1577
1578struct pargs *
1579pargs_alloc(int len)
1580{
1581	struct pargs *pa;
1582
1583	pa = malloc(sizeof(struct pargs) + len, M_PARGS,
1584		M_WAITOK);
1585	refcount_init(&pa->ar_ref, 1);
1586	pa->ar_length = len;
1587	return (pa);
1588}
1589
1590static void
1591pargs_free(struct pargs *pa)
1592{
1593
1594	free(pa, M_PARGS);
1595}
1596
1597void
1598pargs_hold(struct pargs *pa)
1599{
1600
1601	if (pa == NULL)
1602		return;
1603	refcount_acquire(&pa->ar_ref);
1604}
1605
1606void
1607pargs_drop(struct pargs *pa)
1608{
1609
1610	if (pa == NULL)
1611		return;
1612	if (refcount_release(&pa->ar_ref))
1613		pargs_free(pa);
1614}
1615
1616static int
1617proc_read_string(struct thread *td, struct proc *p, const char *sptr, char *buf,
1618    size_t len)
1619{
1620	ssize_t n;
1621
1622	/*
1623	 * This may return a short read if the string is shorter than the chunk
1624	 * and is aligned at the end of the page, and the following page is not
1625	 * mapped.
1626	 */
1627	n = proc_readmem(td, p, (vm_offset_t)sptr, buf, len);
1628	if (n <= 0)
1629		return (ENOMEM);
1630	return (0);
1631}
1632
1633#define PROC_AUXV_MAX	256	/* Safety limit on auxv size. */
1634
1635enum proc_vector_type {
1636	PROC_ARG,
1637	PROC_ENV,
1638	PROC_AUX,
1639};
1640
1641#ifdef COMPAT_FREEBSD32
1642static int
1643get_proc_vector32(struct thread *td, struct proc *p, char ***proc_vectorp,
1644    size_t *vsizep, enum proc_vector_type type)
1645{
1646	struct freebsd32_ps_strings pss;
1647	Elf32_Auxinfo aux;
1648	vm_offset_t vptr, ptr;
1649	uint32_t *proc_vector32;
1650	char **proc_vector;
1651	size_t vsize, size;
1652	int i, error;
1653
1654	error = 0;
1655	if (proc_readmem(td, p, (vm_offset_t)p->p_sysent->sv_psstrings, &pss,
1656	    sizeof(pss)) != sizeof(pss))
1657		return (ENOMEM);
1658	switch (type) {
1659	case PROC_ARG:
1660		vptr = (vm_offset_t)PTRIN(pss.ps_argvstr);
1661		vsize = pss.ps_nargvstr;
1662		if (vsize > ARG_MAX)
1663			return (ENOEXEC);
1664		size = vsize * sizeof(int32_t);
1665		break;
1666	case PROC_ENV:
1667		vptr = (vm_offset_t)PTRIN(pss.ps_envstr);
1668		vsize = pss.ps_nenvstr;
1669		if (vsize > ARG_MAX)
1670			return (ENOEXEC);
1671		size = vsize * sizeof(int32_t);
1672		break;
1673	case PROC_AUX:
1674		vptr = (vm_offset_t)PTRIN(pss.ps_envstr) +
1675		    (pss.ps_nenvstr + 1) * sizeof(int32_t);
1676		if (vptr % 4 != 0)
1677			return (ENOEXEC);
1678		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
1679			if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) !=
1680			    sizeof(aux))
1681				return (ENOMEM);
1682			if (aux.a_type == AT_NULL)
1683				break;
1684			ptr += sizeof(aux);
1685		}
1686		if (aux.a_type != AT_NULL)
1687			return (ENOEXEC);
1688		vsize = i + 1;
1689		size = vsize * sizeof(aux);
1690		break;
1691	default:
1692		KASSERT(0, ("Wrong proc vector type: %d", type));
1693		return (EINVAL);
1694	}
1695	proc_vector32 = malloc(size, M_TEMP, M_WAITOK);
1696	if (proc_readmem(td, p, vptr, proc_vector32, size) != size) {
1697		error = ENOMEM;
1698		goto done;
1699	}
1700	if (type == PROC_AUX) {
1701		*proc_vectorp = (char **)proc_vector32;
1702		*vsizep = vsize;
1703		return (0);
1704	}
1705	proc_vector = malloc(vsize * sizeof(char *), M_TEMP, M_WAITOK);
1706	for (i = 0; i < (int)vsize; i++)
1707		proc_vector[i] = PTRIN(proc_vector32[i]);
1708	*proc_vectorp = proc_vector;
1709	*vsizep = vsize;
1710done:
1711	free(proc_vector32, M_TEMP);
1712	return (error);
1713}
1714#endif
1715
1716static int
1717get_proc_vector(struct thread *td, struct proc *p, char ***proc_vectorp,
1718    size_t *vsizep, enum proc_vector_type type)
1719{
1720	struct ps_strings pss;
1721	Elf_Auxinfo aux;
1722	vm_offset_t vptr, ptr;
1723	char **proc_vector;
1724	size_t vsize, size;
1725	int i;
1726
1727#ifdef COMPAT_FREEBSD32
1728	if (SV_PROC_FLAG(p, SV_ILP32) != 0)
1729		return (get_proc_vector32(td, p, proc_vectorp, vsizep, type));
1730#endif
1731	if (proc_readmem(td, p, (vm_offset_t)p->p_sysent->sv_psstrings, &pss,
1732	    sizeof(pss)) != sizeof(pss))
1733		return (ENOMEM);
1734	switch (type) {
1735	case PROC_ARG:
1736		vptr = (vm_offset_t)pss.ps_argvstr;
1737		vsize = pss.ps_nargvstr;
1738		if (vsize > ARG_MAX)
1739			return (ENOEXEC);
1740		size = vsize * sizeof(char *);
1741		break;
1742	case PROC_ENV:
1743		vptr = (vm_offset_t)pss.ps_envstr;
1744		vsize = pss.ps_nenvstr;
1745		if (vsize > ARG_MAX)
1746			return (ENOEXEC);
1747		size = vsize * sizeof(char *);
1748		break;
1749	case PROC_AUX:
1750		/*
1751		 * The aux array is just above env array on the stack. Check
1752		 * that the address is naturally aligned.
1753		 */
1754		vptr = (vm_offset_t)pss.ps_envstr + (pss.ps_nenvstr + 1)
1755		    * sizeof(char *);
1756#if __ELF_WORD_SIZE == 64
1757		if (vptr % sizeof(uint64_t) != 0)
1758#else
1759		if (vptr % sizeof(uint32_t) != 0)
1760#endif
1761			return (ENOEXEC);
1762		/*
1763		 * We count the array size reading the aux vectors from the
1764		 * stack until AT_NULL vector is returned.  So (to keep the code
1765		 * simple) we read the process stack twice: the first time here
1766		 * to find the size and the second time when copying the vectors
1767		 * to the allocated proc_vector.
1768		 */
1769		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
1770			if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) !=
1771			    sizeof(aux))
1772				return (ENOMEM);
1773			if (aux.a_type == AT_NULL)
1774				break;
1775			ptr += sizeof(aux);
1776		}
1777		/*
1778		 * If the PROC_AUXV_MAX entries are iterated over, and we have
1779		 * not reached AT_NULL, it is most likely we are reading wrong
1780		 * data: either the process doesn't have auxv array or data has
1781		 * been modified. Return the error in this case.
1782		 */
1783		if (aux.a_type != AT_NULL)
1784			return (ENOEXEC);
1785		vsize = i + 1;
1786		size = vsize * sizeof(aux);
1787		break;
1788	default:
1789		KASSERT(0, ("Wrong proc vector type: %d", type));
1790		return (EINVAL); /* In case we are built without INVARIANTS. */
1791	}
1792	proc_vector = malloc(size, M_TEMP, M_WAITOK);
1793	if (proc_readmem(td, p, vptr, proc_vector, size) != size) {
1794		free(proc_vector, M_TEMP);
1795		return (ENOMEM);
1796	}
1797	*proc_vectorp = proc_vector;
1798	*vsizep = vsize;
1799
1800	return (0);
1801}
1802
1803#define GET_PS_STRINGS_CHUNK_SZ	256	/* Chunk size (bytes) for ps_strings operations. */
1804
1805static int
1806get_ps_strings(struct thread *td, struct proc *p, struct sbuf *sb,
1807    enum proc_vector_type type)
1808{
1809	size_t done, len, nchr, vsize;
1810	int error, i;
1811	char **proc_vector, *sptr;
1812	char pss_string[GET_PS_STRINGS_CHUNK_SZ];
1813
1814	PROC_ASSERT_HELD(p);
1815
1816	/*
1817	 * We are not going to read more than 2 * (PATH_MAX + ARG_MAX) bytes.
1818	 */
1819	nchr = 2 * (PATH_MAX + ARG_MAX);
1820
1821	error = get_proc_vector(td, p, &proc_vector, &vsize, type);
1822	if (error != 0)
1823		return (error);
1824	for (done = 0, i = 0; i < (int)vsize && done < nchr; i++) {
1825		/*
1826		 * The program may have scribbled into its argv array, e.g. to
1827		 * remove some arguments.  If that has happened, break out
1828		 * before trying to read from NULL.
1829		 */
1830		if (proc_vector[i] == NULL)
1831			break;
1832		for (sptr = proc_vector[i]; ; sptr += GET_PS_STRINGS_CHUNK_SZ) {
1833			error = proc_read_string(td, p, sptr, pss_string,
1834			    sizeof(pss_string));
1835			if (error != 0)
1836				goto done;
1837			len = strnlen(pss_string, GET_PS_STRINGS_CHUNK_SZ);
1838			if (done + len >= nchr)
1839				len = nchr - done - 1;
1840			sbuf_bcat(sb, pss_string, len);
1841			if (len != GET_PS_STRINGS_CHUNK_SZ)
1842				break;
1843			done += GET_PS_STRINGS_CHUNK_SZ;
1844		}
1845		sbuf_bcat(sb, "", 1);
1846		done += len + 1;
1847	}
1848done:
1849	free(proc_vector, M_TEMP);
1850	return (error);
1851}
1852
1853int
1854proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb)
1855{
1856
1857	return (get_ps_strings(curthread, p, sb, PROC_ARG));
1858}
1859
1860int
1861proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb)
1862{
1863
1864	return (get_ps_strings(curthread, p, sb, PROC_ENV));
1865}
1866
1867int
1868proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb)
1869{
1870	size_t vsize, size;
1871	char **auxv;
1872	int error;
1873
1874	error = get_proc_vector(td, p, &auxv, &vsize, PROC_AUX);
1875	if (error == 0) {
1876#ifdef COMPAT_FREEBSD32
1877		if (SV_PROC_FLAG(p, SV_ILP32) != 0)
1878			size = vsize * sizeof(Elf32_Auxinfo);
1879		else
1880#endif
1881			size = vsize * sizeof(Elf_Auxinfo);
1882		if (sbuf_bcat(sb, auxv, size) != 0)
1883			error = ENOMEM;
1884		free(auxv, M_TEMP);
1885	}
1886	return (error);
1887}
1888
1889/*
1890 * This sysctl allows a process to retrieve the argument list or process
1891 * title for another process without groping around in the address space
1892 * of the other process.  It also allow a process to set its own "process
1893 * title to a string of its own choice.
1894 */
1895static int
1896sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS)
1897{
1898	int *name = (int *)arg1;
1899	u_int namelen = arg2;
1900	struct pargs *newpa, *pa;
1901	struct proc *p;
1902	struct sbuf sb;
1903	int flags, error = 0, error2;
1904
1905	if (namelen != 1)
1906		return (EINVAL);
1907
1908	flags = PGET_CANSEE;
1909	if (req->newptr != NULL)
1910		flags |= PGET_ISCURRENT;
1911	error = pget((pid_t)name[0], flags, &p);
1912	if (error)
1913		return (error);
1914
1915	pa = p->p_args;
1916	if (pa != NULL) {
1917		pargs_hold(pa);
1918		PROC_UNLOCK(p);
1919		error = SYSCTL_OUT(req, pa->ar_args, pa->ar_length);
1920		pargs_drop(pa);
1921	} else if ((p->p_flag & (P_WEXIT | P_SYSTEM)) == 0) {
1922		_PHOLD(p);
1923		PROC_UNLOCK(p);
1924		sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
1925		sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
1926		error = proc_getargv(curthread, p, &sb);
1927		error2 = sbuf_finish(&sb);
1928		PRELE(p);
1929		sbuf_delete(&sb);
1930		if (error == 0 && error2 != 0)
1931			error = error2;
1932	} else {
1933		PROC_UNLOCK(p);
1934	}
1935	if (error != 0 || req->newptr == NULL)
1936		return (error);
1937
1938	if (req->newlen > ps_arg_cache_limit - sizeof(struct pargs))
1939		return (ENOMEM);
1940	newpa = pargs_alloc(req->newlen);
1941	error = SYSCTL_IN(req, newpa->ar_args, req->newlen);
1942	if (error != 0) {
1943		pargs_free(newpa);
1944		return (error);
1945	}
1946	PROC_LOCK(p);
1947	pa = p->p_args;
1948	p->p_args = newpa;
1949	PROC_UNLOCK(p);
1950	pargs_drop(pa);
1951	return (0);
1952}
1953
1954/*
1955 * This sysctl allows a process to retrieve environment of another process.
1956 */
1957static int
1958sysctl_kern_proc_env(SYSCTL_HANDLER_ARGS)
1959{
1960	int *name = (int *)arg1;
1961	u_int namelen = arg2;
1962	struct proc *p;
1963	struct sbuf sb;
1964	int error, error2;
1965
1966	if (namelen != 1)
1967		return (EINVAL);
1968
1969	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
1970	if (error != 0)
1971		return (error);
1972	if ((p->p_flag & P_SYSTEM) != 0) {
1973		PRELE(p);
1974		return (0);
1975	}
1976
1977	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
1978	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
1979	error = proc_getenvv(curthread, p, &sb);
1980	error2 = sbuf_finish(&sb);
1981	PRELE(p);
1982	sbuf_delete(&sb);
1983	return (error != 0 ? error : error2);
1984}
1985
1986/*
1987 * This sysctl allows a process to retrieve ELF auxiliary vector of
1988 * another process.
1989 */
1990static int
1991sysctl_kern_proc_auxv(SYSCTL_HANDLER_ARGS)
1992{
1993	int *name = (int *)arg1;
1994	u_int namelen = arg2;
1995	struct proc *p;
1996	struct sbuf sb;
1997	int error, error2;
1998
1999	if (namelen != 1)
2000		return (EINVAL);
2001
2002	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
2003	if (error != 0)
2004		return (error);
2005	if ((p->p_flag & P_SYSTEM) != 0) {
2006		PRELE(p);
2007		return (0);
2008	}
2009	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
2010	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
2011	error = proc_getauxv(curthread, p, &sb);
2012	error2 = sbuf_finish(&sb);
2013	PRELE(p);
2014	sbuf_delete(&sb);
2015	return (error != 0 ? error : error2);
2016}
2017
2018/*
2019 * This sysctl allows a process to retrieve the path of the executable for
2020 * itself or another process.
2021 */
2022static int
2023sysctl_kern_proc_pathname(SYSCTL_HANDLER_ARGS)
2024{
2025	pid_t *pidp = (pid_t *)arg1;
2026	unsigned int arglen = arg2;
2027	struct proc *p;
2028	struct vnode *vp;
2029	char *retbuf, *freebuf;
2030	int error;
2031
2032	if (arglen != 1)
2033		return (EINVAL);
2034	if (*pidp == -1) {	/* -1 means this process */
2035		p = req->td->td_proc;
2036	} else {
2037		error = pget(*pidp, PGET_CANSEE, &p);
2038		if (error != 0)
2039			return (error);
2040	}
2041
2042	vp = p->p_textvp;
2043	if (vp == NULL) {
2044		if (*pidp != -1)
2045			PROC_UNLOCK(p);
2046		return (0);
2047	}
2048	vref(vp);
2049	if (*pidp != -1)
2050		PROC_UNLOCK(p);
2051	error = vn_fullpath(req->td, vp, &retbuf, &freebuf);
2052	vrele(vp);
2053	if (error)
2054		return (error);
2055	error = SYSCTL_OUT(req, retbuf, strlen(retbuf) + 1);
2056	free(freebuf, M_TEMP);
2057	return (error);
2058}
2059
2060static int
2061sysctl_kern_proc_sv_name(SYSCTL_HANDLER_ARGS)
2062{
2063	struct proc *p;
2064	char *sv_name;
2065	int *name;
2066	int namelen;
2067	int error;
2068
2069	namelen = arg2;
2070	if (namelen != 1)
2071		return (EINVAL);
2072
2073	name = (int *)arg1;
2074	error = pget((pid_t)name[0], PGET_CANSEE, &p);
2075	if (error != 0)
2076		return (error);
2077	sv_name = p->p_sysent->sv_name;
2078	PROC_UNLOCK(p);
2079	return (sysctl_handle_string(oidp, sv_name, 0, req));
2080}
2081
2082#ifdef KINFO_OVMENTRY_SIZE
2083CTASSERT(sizeof(struct kinfo_ovmentry) == KINFO_OVMENTRY_SIZE);
2084#endif
2085
2086#ifdef COMPAT_FREEBSD7
2087static int
2088sysctl_kern_proc_ovmmap(SYSCTL_HANDLER_ARGS)
2089{
2090	vm_map_entry_t entry, tmp_entry;
2091	unsigned int last_timestamp;
2092	char *fullpath, *freepath;
2093	struct kinfo_ovmentry *kve;
2094	struct vattr va;
2095	struct ucred *cred;
2096	int error, *name;
2097	struct vnode *vp;
2098	struct proc *p;
2099	vm_map_t map;
2100	struct vmspace *vm;
2101
2102	name = (int *)arg1;
2103	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
2104	if (error != 0)
2105		return (error);
2106	vm = vmspace_acquire_ref(p);
2107	if (vm == NULL) {
2108		PRELE(p);
2109		return (ESRCH);
2110	}
2111	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
2112
2113	map = &vm->vm_map;
2114	vm_map_lock_read(map);
2115	for (entry = map->header.next; entry != &map->header;
2116	    entry = entry->next) {
2117		vm_object_t obj, tobj, lobj;
2118		vm_offset_t addr;
2119
2120		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2121			continue;
2122
2123		bzero(kve, sizeof(*kve));
2124		kve->kve_structsize = sizeof(*kve);
2125
2126		kve->kve_private_resident = 0;
2127		obj = entry->object.vm_object;
2128		if (obj != NULL) {
2129			VM_OBJECT_RLOCK(obj);
2130			if (obj->shadow_count == 1)
2131				kve->kve_private_resident =
2132				    obj->resident_page_count;
2133		}
2134		kve->kve_resident = 0;
2135		addr = entry->start;
2136		while (addr < entry->end) {
2137			if (pmap_extract(map->pmap, addr))
2138				kve->kve_resident++;
2139			addr += PAGE_SIZE;
2140		}
2141
2142		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
2143			if (tobj != obj) {
2144				VM_OBJECT_RLOCK(tobj);
2145				kve->kve_offset += tobj->backing_object_offset;
2146			}
2147			if (lobj != obj)
2148				VM_OBJECT_RUNLOCK(lobj);
2149			lobj = tobj;
2150		}
2151
2152		kve->kve_start = (void*)entry->start;
2153		kve->kve_end = (void*)entry->end;
2154		kve->kve_offset += (off_t)entry->offset;
2155
2156		if (entry->protection & VM_PROT_READ)
2157			kve->kve_protection |= KVME_PROT_READ;
2158		if (entry->protection & VM_PROT_WRITE)
2159			kve->kve_protection |= KVME_PROT_WRITE;
2160		if (entry->protection & VM_PROT_EXECUTE)
2161			kve->kve_protection |= KVME_PROT_EXEC;
2162
2163		if (entry->eflags & MAP_ENTRY_COW)
2164			kve->kve_flags |= KVME_FLAG_COW;
2165		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
2166			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
2167		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
2168			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
2169
2170		last_timestamp = map->timestamp;
2171		vm_map_unlock_read(map);
2172
2173		kve->kve_fileid = 0;
2174		kve->kve_fsid = 0;
2175		freepath = NULL;
2176		fullpath = "";
2177		if (lobj) {
2178			vp = NULL;
2179			switch (lobj->type) {
2180			case OBJT_DEFAULT:
2181				kve->kve_type = KVME_TYPE_DEFAULT;
2182				break;
2183			case OBJT_VNODE:
2184				kve->kve_type = KVME_TYPE_VNODE;
2185				vp = lobj->handle;
2186				vref(vp);
2187				break;
2188			case OBJT_SWAP:
2189				if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
2190					kve->kve_type = KVME_TYPE_VNODE;
2191					if ((lobj->flags & OBJ_TMPFS) != 0) {
2192						vp = lobj->un_pager.swp.swp_tmpfs;
2193						vref(vp);
2194					}
2195				} else {
2196					kve->kve_type = KVME_TYPE_SWAP;
2197				}
2198				break;
2199			case OBJT_DEVICE:
2200				kve->kve_type = KVME_TYPE_DEVICE;
2201				break;
2202			case OBJT_PHYS:
2203				kve->kve_type = KVME_TYPE_PHYS;
2204				break;
2205			case OBJT_DEAD:
2206				kve->kve_type = KVME_TYPE_DEAD;
2207				break;
2208			case OBJT_SG:
2209				kve->kve_type = KVME_TYPE_SG;
2210				break;
2211			default:
2212				kve->kve_type = KVME_TYPE_UNKNOWN;
2213				break;
2214			}
2215			if (lobj != obj)
2216				VM_OBJECT_RUNLOCK(lobj);
2217
2218			kve->kve_ref_count = obj->ref_count;
2219			kve->kve_shadow_count = obj->shadow_count;
2220			VM_OBJECT_RUNLOCK(obj);
2221			if (vp != NULL) {
2222				vn_fullpath(curthread, vp, &fullpath,
2223				    &freepath);
2224				cred = curthread->td_ucred;
2225				vn_lock(vp, LK_SHARED | LK_RETRY);
2226				if (VOP_GETATTR(vp, &va, cred) == 0) {
2227					kve->kve_fileid = va.va_fileid;
2228					kve->kve_fsid = va.va_fsid;
2229				}
2230				vput(vp);
2231			}
2232		} else {
2233			kve->kve_type = KVME_TYPE_NONE;
2234			kve->kve_ref_count = 0;
2235			kve->kve_shadow_count = 0;
2236		}
2237
2238		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
2239		if (freepath != NULL)
2240			free(freepath, M_TEMP);
2241
2242		error = SYSCTL_OUT(req, kve, sizeof(*kve));
2243		vm_map_lock_read(map);
2244		if (error)
2245			break;
2246		if (last_timestamp != map->timestamp) {
2247			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
2248			entry = tmp_entry;
2249		}
2250	}
2251	vm_map_unlock_read(map);
2252	vmspace_free(vm);
2253	PRELE(p);
2254	free(kve, M_TEMP);
2255	return (error);
2256}
2257#endif	/* COMPAT_FREEBSD7 */
2258
2259#ifdef KINFO_VMENTRY_SIZE
2260CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE);
2261#endif
2262
2263void
2264kern_proc_vmmap_resident(vm_map_t map, vm_map_entry_t entry,
2265    int *resident_count, bool *super)
2266{
2267	vm_object_t obj, tobj;
2268	vm_page_t m, m_adv;
2269	vm_offset_t addr;
2270	vm_paddr_t locked_pa;
2271	vm_pindex_t pi, pi_adv, pindex;
2272
2273	*super = false;
2274	*resident_count = 0;
2275	if (vmmap_skip_res_cnt)
2276		return;
2277
2278	locked_pa = 0;
2279	obj = entry->object.vm_object;
2280	addr = entry->start;
2281	m_adv = NULL;
2282	pi = OFF_TO_IDX(entry->offset);
2283	for (; addr < entry->end; addr += IDX_TO_OFF(pi_adv), pi += pi_adv) {
2284		if (m_adv != NULL) {
2285			m = m_adv;
2286		} else {
2287			pi_adv = atop(entry->end - addr);
2288			pindex = pi;
2289			for (tobj = obj;; tobj = tobj->backing_object) {
2290				m = vm_page_find_least(tobj, pindex);
2291				if (m != NULL) {
2292					if (m->pindex == pindex)
2293						break;
2294					if (pi_adv > m->pindex - pindex) {
2295						pi_adv = m->pindex - pindex;
2296						m_adv = m;
2297					}
2298				}
2299				if (tobj->backing_object == NULL)
2300					goto next;
2301				pindex += OFF_TO_IDX(tobj->
2302				    backing_object_offset);
2303			}
2304		}
2305		m_adv = NULL;
2306		if (m->psind != 0 && addr + pagesizes[1] <= entry->end &&
2307		    (addr & (pagesizes[1] - 1)) == 0 &&
2308		    (pmap_mincore(map->pmap, addr, &locked_pa) &
2309		    MINCORE_SUPER) != 0) {
2310			*super = true;
2311			pi_adv = atop(pagesizes[1]);
2312		} else {
2313			/*
2314			 * We do not test the found page on validity.
2315			 * Either the page is busy and being paged in,
2316			 * or it was invalidated.  The first case
2317			 * should be counted as resident, the second
2318			 * is not so clear; we do account both.
2319			 */
2320			pi_adv = 1;
2321		}
2322		*resident_count += pi_adv;
2323next:;
2324	}
2325	PA_UNLOCK_COND(locked_pa);
2326}
2327
2328/*
2329 * Must be called with the process locked and will return unlocked.
2330 */
2331int
2332kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags)
2333{
2334	vm_map_entry_t entry, tmp_entry;
2335	struct vattr va;
2336	vm_map_t map;
2337	vm_object_t obj, tobj, lobj;
2338	char *fullpath, *freepath;
2339	struct kinfo_vmentry *kve;
2340	struct ucred *cred;
2341	struct vnode *vp;
2342	struct vmspace *vm;
2343	vm_offset_t addr;
2344	unsigned int last_timestamp;
2345	int error;
2346	bool super;
2347
2348	PROC_LOCK_ASSERT(p, MA_OWNED);
2349
2350	_PHOLD(p);
2351	PROC_UNLOCK(p);
2352	vm = vmspace_acquire_ref(p);
2353	if (vm == NULL) {
2354		PRELE(p);
2355		return (ESRCH);
2356	}
2357	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK | M_ZERO);
2358
2359	error = 0;
2360	map = &vm->vm_map;
2361	vm_map_lock_read(map);
2362	for (entry = map->header.next; entry != &map->header;
2363	    entry = entry->next) {
2364		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2365			continue;
2366
2367		addr = entry->end;
2368		bzero(kve, sizeof(*kve));
2369		obj = entry->object.vm_object;
2370		if (obj != NULL) {
2371			for (tobj = obj; tobj != NULL;
2372			    tobj = tobj->backing_object) {
2373				VM_OBJECT_RLOCK(tobj);
2374				kve->kve_offset += tobj->backing_object_offset;
2375				lobj = tobj;
2376			}
2377			if (obj->backing_object == NULL)
2378				kve->kve_private_resident =
2379				    obj->resident_page_count;
2380			kern_proc_vmmap_resident(map, entry,
2381			    &kve->kve_resident, &super);
2382			if (super)
2383				kve->kve_flags |= KVME_FLAG_SUPER;
2384			for (tobj = obj; tobj != NULL;
2385			    tobj = tobj->backing_object) {
2386				if (tobj != obj && tobj != lobj)
2387					VM_OBJECT_RUNLOCK(tobj);
2388			}
2389		} else {
2390			lobj = NULL;
2391		}
2392
2393		kve->kve_start = entry->start;
2394		kve->kve_end = entry->end;
2395		kve->kve_offset += entry->offset;
2396
2397		if (entry->protection & VM_PROT_READ)
2398			kve->kve_protection |= KVME_PROT_READ;
2399		if (entry->protection & VM_PROT_WRITE)
2400			kve->kve_protection |= KVME_PROT_WRITE;
2401		if (entry->protection & VM_PROT_EXECUTE)
2402			kve->kve_protection |= KVME_PROT_EXEC;
2403
2404		if (entry->eflags & MAP_ENTRY_COW)
2405			kve->kve_flags |= KVME_FLAG_COW;
2406		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
2407			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
2408		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
2409			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
2410		if (entry->eflags & MAP_ENTRY_GROWS_UP)
2411			kve->kve_flags |= KVME_FLAG_GROWS_UP;
2412		if (entry->eflags & MAP_ENTRY_GROWS_DOWN)
2413			kve->kve_flags |= KVME_FLAG_GROWS_DOWN;
2414
2415		last_timestamp = map->timestamp;
2416		vm_map_unlock_read(map);
2417
2418		freepath = NULL;
2419		fullpath = "";
2420		if (lobj != NULL) {
2421			vp = NULL;
2422			switch (lobj->type) {
2423			case OBJT_DEFAULT:
2424				kve->kve_type = KVME_TYPE_DEFAULT;
2425				break;
2426			case OBJT_VNODE:
2427				kve->kve_type = KVME_TYPE_VNODE;
2428				vp = lobj->handle;
2429				vref(vp);
2430				break;
2431			case OBJT_SWAP:
2432				if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
2433					kve->kve_type = KVME_TYPE_VNODE;
2434					if ((lobj->flags & OBJ_TMPFS) != 0) {
2435						vp = lobj->un_pager.swp.swp_tmpfs;
2436						vref(vp);
2437					}
2438				} else {
2439					kve->kve_type = KVME_TYPE_SWAP;
2440				}
2441				break;
2442			case OBJT_DEVICE:
2443				kve->kve_type = KVME_TYPE_DEVICE;
2444				break;
2445			case OBJT_PHYS:
2446				kve->kve_type = KVME_TYPE_PHYS;
2447				break;
2448			case OBJT_DEAD:
2449				kve->kve_type = KVME_TYPE_DEAD;
2450				break;
2451			case OBJT_SG:
2452				kve->kve_type = KVME_TYPE_SG;
2453				break;
2454			case OBJT_MGTDEVICE:
2455				kve->kve_type = KVME_TYPE_MGTDEVICE;
2456				break;
2457			default:
2458				kve->kve_type = KVME_TYPE_UNKNOWN;
2459				break;
2460			}
2461			if (lobj != obj)
2462				VM_OBJECT_RUNLOCK(lobj);
2463
2464			kve->kve_ref_count = obj->ref_count;
2465			kve->kve_shadow_count = obj->shadow_count;
2466			VM_OBJECT_RUNLOCK(obj);
2467			if (vp != NULL) {
2468				vn_fullpath(curthread, vp, &fullpath,
2469				    &freepath);
2470				kve->kve_vn_type = vntype_to_kinfo(vp->v_type);
2471				cred = curthread->td_ucred;
2472				vn_lock(vp, LK_SHARED | LK_RETRY);
2473				if (VOP_GETATTR(vp, &va, cred) == 0) {
2474					kve->kve_vn_fileid = va.va_fileid;
2475					kve->kve_vn_fsid = va.va_fsid;
2476					kve->kve_vn_mode =
2477					    MAKEIMODE(va.va_type, va.va_mode);
2478					kve->kve_vn_size = va.va_size;
2479					kve->kve_vn_rdev = va.va_rdev;
2480					kve->kve_status = KF_ATTR_VALID;
2481				}
2482				vput(vp);
2483			}
2484		} else {
2485			kve->kve_type = KVME_TYPE_NONE;
2486			kve->kve_ref_count = 0;
2487			kve->kve_shadow_count = 0;
2488		}
2489
2490		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
2491		if (freepath != NULL)
2492			free(freepath, M_TEMP);
2493
2494		/* Pack record size down */
2495		if ((flags & KERN_VMMAP_PACK_KINFO) != 0)
2496			kve->kve_structsize =
2497			    offsetof(struct kinfo_vmentry, kve_path) +
2498			    strlen(kve->kve_path) + 1;
2499		else
2500			kve->kve_structsize = sizeof(*kve);
2501		kve->kve_structsize = roundup(kve->kve_structsize,
2502		    sizeof(uint64_t));
2503
2504		/* Halt filling and truncate rather than exceeding maxlen */
2505		if (maxlen != -1 && maxlen < kve->kve_structsize) {
2506			error = 0;
2507			vm_map_lock_read(map);
2508			break;
2509		} else if (maxlen != -1)
2510			maxlen -= kve->kve_structsize;
2511
2512		if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0)
2513			error = ENOMEM;
2514		vm_map_lock_read(map);
2515		if (error != 0)
2516			break;
2517		if (last_timestamp != map->timestamp) {
2518			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
2519			entry = tmp_entry;
2520		}
2521	}
2522	vm_map_unlock_read(map);
2523	vmspace_free(vm);
2524	PRELE(p);
2525	free(kve, M_TEMP);
2526	return (error);
2527}
2528
2529static int
2530sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS)
2531{
2532	struct proc *p;
2533	struct sbuf sb;
2534	int error, error2, *name;
2535
2536	name = (int *)arg1;
2537	sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_vmentry), req);
2538	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
2539	error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
2540	if (error != 0) {
2541		sbuf_delete(&sb);
2542		return (error);
2543	}
2544	error = kern_proc_vmmap_out(p, &sb, -1, KERN_VMMAP_PACK_KINFO);
2545	error2 = sbuf_finish(&sb);
2546	sbuf_delete(&sb);
2547	return (error != 0 ? error : error2);
2548}
2549
2550#if defined(STACK) || defined(DDB)
2551static int
2552sysctl_kern_proc_kstack(SYSCTL_HANDLER_ARGS)
2553{
2554	struct kinfo_kstack *kkstp;
2555	int error, i, *name, numthreads;
2556	lwpid_t *lwpidarray;
2557	struct thread *td;
2558	struct stack *st;
2559	struct sbuf sb;
2560	struct proc *p;
2561
2562	name = (int *)arg1;
2563	error = pget((pid_t)name[0], PGET_NOTINEXEC | PGET_WANTREAD, &p);
2564	if (error != 0)
2565		return (error);
2566
2567	kkstp = malloc(sizeof(*kkstp), M_TEMP, M_WAITOK);
2568	st = stack_create();
2569
2570	lwpidarray = NULL;
2571	PROC_LOCK(p);
2572	do {
2573		if (lwpidarray != NULL) {
2574			free(lwpidarray, M_TEMP);
2575			lwpidarray = NULL;
2576		}
2577		numthreads = p->p_numthreads;
2578		PROC_UNLOCK(p);
2579		lwpidarray = malloc(sizeof(*lwpidarray) * numthreads, M_TEMP,
2580		    M_WAITOK | M_ZERO);
2581		PROC_LOCK(p);
2582	} while (numthreads < p->p_numthreads);
2583
2584	/*
2585	 * XXXRW: During the below loop, execve(2) and countless other sorts
2586	 * of changes could have taken place.  Should we check to see if the
2587	 * vmspace has been replaced, or the like, in order to prevent
2588	 * giving a snapshot that spans, say, execve(2), with some threads
2589	 * before and some after?  Among other things, the credentials could
2590	 * have changed, in which case the right to extract debug info might
2591	 * no longer be assured.
2592	 */
2593	i = 0;
2594	FOREACH_THREAD_IN_PROC(p, td) {
2595		KASSERT(i < numthreads,
2596		    ("sysctl_kern_proc_kstack: numthreads"));
2597		lwpidarray[i] = td->td_tid;
2598		i++;
2599	}
2600	numthreads = i;
2601	for (i = 0; i < numthreads; i++) {
2602		td = thread_find(p, lwpidarray[i]);
2603		if (td == NULL) {
2604			continue;
2605		}
2606		bzero(kkstp, sizeof(*kkstp));
2607		(void)sbuf_new(&sb, kkstp->kkst_trace,
2608		    sizeof(kkstp->kkst_trace), SBUF_FIXEDLEN);
2609		thread_lock(td);
2610		kkstp->kkst_tid = td->td_tid;
2611		if (TD_IS_SWAPPED(td)) {
2612			kkstp->kkst_state = KKST_STATE_SWAPPED;
2613		} else if (TD_IS_RUNNING(td)) {
2614			if (stack_save_td_running(st, td) == 0)
2615				kkstp->kkst_state = KKST_STATE_STACKOK;
2616			else
2617				kkstp->kkst_state = KKST_STATE_RUNNING;
2618		} else {
2619			kkstp->kkst_state = KKST_STATE_STACKOK;
2620			stack_save_td(st, td);
2621		}
2622		thread_unlock(td);
2623		PROC_UNLOCK(p);
2624		stack_sbuf_print(&sb, st);
2625		sbuf_finish(&sb);
2626		sbuf_delete(&sb);
2627		error = SYSCTL_OUT(req, kkstp, sizeof(*kkstp));
2628		PROC_LOCK(p);
2629		if (error)
2630			break;
2631	}
2632	_PRELE(p);
2633	PROC_UNLOCK(p);
2634	if (lwpidarray != NULL)
2635		free(lwpidarray, M_TEMP);
2636	stack_destroy(st);
2637	free(kkstp, M_TEMP);
2638	return (error);
2639}
2640#endif
2641
2642/*
2643 * This sysctl allows a process to retrieve the full list of groups from
2644 * itself or another process.
2645 */
2646static int
2647sysctl_kern_proc_groups(SYSCTL_HANDLER_ARGS)
2648{
2649	pid_t *pidp = (pid_t *)arg1;
2650	unsigned int arglen = arg2;
2651	struct proc *p;
2652	struct ucred *cred;
2653	int error;
2654
2655	if (arglen != 1)
2656		return (EINVAL);
2657	if (*pidp == -1) {	/* -1 means this process */
2658		p = req->td->td_proc;
2659		PROC_LOCK(p);
2660	} else {
2661		error = pget(*pidp, PGET_CANSEE, &p);
2662		if (error != 0)
2663			return (error);
2664	}
2665
2666	cred = crhold(p->p_ucred);
2667	PROC_UNLOCK(p);
2668
2669	error = SYSCTL_OUT(req, cred->cr_groups,
2670	    cred->cr_ngroups * sizeof(gid_t));
2671	crfree(cred);
2672	return (error);
2673}
2674
2675/*
2676 * This sysctl allows a process to retrieve or/and set the resource limit for
2677 * another process.
2678 */
2679static int
2680sysctl_kern_proc_rlimit(SYSCTL_HANDLER_ARGS)
2681{
2682	int *name = (int *)arg1;
2683	u_int namelen = arg2;
2684	struct rlimit rlim;
2685	struct proc *p;
2686	u_int which;
2687	int flags, error;
2688
2689	if (namelen != 2)
2690		return (EINVAL);
2691
2692	which = (u_int)name[1];
2693	if (which >= RLIM_NLIMITS)
2694		return (EINVAL);
2695
2696	if (req->newptr != NULL && req->newlen != sizeof(rlim))
2697		return (EINVAL);
2698
2699	flags = PGET_HOLD | PGET_NOTWEXIT;
2700	if (req->newptr != NULL)
2701		flags |= PGET_CANDEBUG;
2702	else
2703		flags |= PGET_CANSEE;
2704	error = pget((pid_t)name[0], flags, &p);
2705	if (error != 0)
2706		return (error);
2707
2708	/*
2709	 * Retrieve limit.
2710	 */
2711	if (req->oldptr != NULL) {
2712		PROC_LOCK(p);
2713		lim_rlimit_proc(p, which, &rlim);
2714		PROC_UNLOCK(p);
2715	}
2716	error = SYSCTL_OUT(req, &rlim, sizeof(rlim));
2717	if (error != 0)
2718		goto errout;
2719
2720	/*
2721	 * Set limit.
2722	 */
2723	if (req->newptr != NULL) {
2724		error = SYSCTL_IN(req, &rlim, sizeof(rlim));
2725		if (error == 0)
2726			error = kern_proc_setrlimit(curthread, p, which, &rlim);
2727	}
2728
2729errout:
2730	PRELE(p);
2731	return (error);
2732}
2733
2734/*
2735 * This sysctl allows a process to retrieve ps_strings structure location of
2736 * another process.
2737 */
2738static int
2739sysctl_kern_proc_ps_strings(SYSCTL_HANDLER_ARGS)
2740{
2741	int *name = (int *)arg1;
2742	u_int namelen = arg2;
2743	struct proc *p;
2744	vm_offset_t ps_strings;
2745	int error;
2746#ifdef COMPAT_FREEBSD32
2747	uint32_t ps_strings32;
2748#endif
2749
2750	if (namelen != 1)
2751		return (EINVAL);
2752
2753	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
2754	if (error != 0)
2755		return (error);
2756#ifdef COMPAT_FREEBSD32
2757	if ((req->flags & SCTL_MASK32) != 0) {
2758		/*
2759		 * We return 0 if the 32 bit emulation request is for a 64 bit
2760		 * process.
2761		 */
2762		ps_strings32 = SV_PROC_FLAG(p, SV_ILP32) != 0 ?
2763		    PTROUT(p->p_sysent->sv_psstrings) : 0;
2764		PROC_UNLOCK(p);
2765		error = SYSCTL_OUT(req, &ps_strings32, sizeof(ps_strings32));
2766		return (error);
2767	}
2768#endif
2769	ps_strings = p->p_sysent->sv_psstrings;
2770	PROC_UNLOCK(p);
2771	error = SYSCTL_OUT(req, &ps_strings, sizeof(ps_strings));
2772	return (error);
2773}
2774
2775/*
2776 * This sysctl allows a process to retrieve umask of another process.
2777 */
2778static int
2779sysctl_kern_proc_umask(SYSCTL_HANDLER_ARGS)
2780{
2781	int *name = (int *)arg1;
2782	u_int namelen = arg2;
2783	struct proc *p;
2784	int error;
2785	u_short fd_cmask;
2786
2787	if (namelen != 1)
2788		return (EINVAL);
2789
2790	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
2791	if (error != 0)
2792		return (error);
2793
2794	FILEDESC_SLOCK(p->p_fd);
2795	fd_cmask = p->p_fd->fd_cmask;
2796	FILEDESC_SUNLOCK(p->p_fd);
2797	PRELE(p);
2798	error = SYSCTL_OUT(req, &fd_cmask, sizeof(fd_cmask));
2799	return (error);
2800}
2801
2802/*
2803 * This sysctl allows a process to set and retrieve binary osreldate of
2804 * another process.
2805 */
2806static int
2807sysctl_kern_proc_osrel(SYSCTL_HANDLER_ARGS)
2808{
2809	int *name = (int *)arg1;
2810	u_int namelen = arg2;
2811	struct proc *p;
2812	int flags, error, osrel;
2813
2814	if (namelen != 1)
2815		return (EINVAL);
2816
2817	if (req->newptr != NULL && req->newlen != sizeof(osrel))
2818		return (EINVAL);
2819
2820	flags = PGET_HOLD | PGET_NOTWEXIT;
2821	if (req->newptr != NULL)
2822		flags |= PGET_CANDEBUG;
2823	else
2824		flags |= PGET_CANSEE;
2825	error = pget((pid_t)name[0], flags, &p);
2826	if (error != 0)
2827		return (error);
2828
2829	error = SYSCTL_OUT(req, &p->p_osrel, sizeof(p->p_osrel));
2830	if (error != 0)
2831		goto errout;
2832
2833	if (req->newptr != NULL) {
2834		error = SYSCTL_IN(req, &osrel, sizeof(osrel));
2835		if (error != 0)
2836			goto errout;
2837		if (osrel < 0) {
2838			error = EINVAL;
2839			goto errout;
2840		}
2841		p->p_osrel = osrel;
2842	}
2843errout:
2844	PRELE(p);
2845	return (error);
2846}
2847
2848static int
2849sysctl_kern_proc_sigtramp(SYSCTL_HANDLER_ARGS)
2850{
2851	int *name = (int *)arg1;
2852	u_int namelen = arg2;
2853	struct proc *p;
2854	struct kinfo_sigtramp kst;
2855	const struct sysentvec *sv;
2856	int error;
2857#ifdef COMPAT_FREEBSD32
2858	struct kinfo_sigtramp32 kst32;
2859#endif
2860
2861	if (namelen != 1)
2862		return (EINVAL);
2863
2864	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
2865	if (error != 0)
2866		return (error);
2867	sv = p->p_sysent;
2868#ifdef COMPAT_FREEBSD32
2869	if ((req->flags & SCTL_MASK32) != 0) {
2870		bzero(&kst32, sizeof(kst32));
2871		if (SV_PROC_FLAG(p, SV_ILP32)) {
2872			if (sv->sv_sigcode_base != 0) {
2873				kst32.ksigtramp_start = sv->sv_sigcode_base;
2874				kst32.ksigtramp_end = sv->sv_sigcode_base +
2875				    *sv->sv_szsigcode;
2876			} else {
2877				kst32.ksigtramp_start = sv->sv_psstrings -
2878				    *sv->sv_szsigcode;
2879				kst32.ksigtramp_end = sv->sv_psstrings;
2880			}
2881		}
2882		PROC_UNLOCK(p);
2883		error = SYSCTL_OUT(req, &kst32, sizeof(kst32));
2884		return (error);
2885	}
2886#endif
2887	bzero(&kst, sizeof(kst));
2888	if (sv->sv_sigcode_base != 0) {
2889		kst.ksigtramp_start = (char *)sv->sv_sigcode_base;
2890		kst.ksigtramp_end = (char *)sv->sv_sigcode_base +
2891		    *sv->sv_szsigcode;
2892	} else {
2893		kst.ksigtramp_start = (char *)sv->sv_psstrings -
2894		    *sv->sv_szsigcode;
2895		kst.ksigtramp_end = (char *)sv->sv_psstrings;
2896	}
2897	PROC_UNLOCK(p);
2898	error = SYSCTL_OUT(req, &kst, sizeof(kst));
2899	return (error);
2900}
2901
2902SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD,  0, "Process table");
2903
2904SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT|
2905	CTLFLAG_MPSAFE, 0, 0, sysctl_kern_proc, "S,proc",
2906	"Return entire process table");
2907
2908static SYSCTL_NODE(_kern_proc, KERN_PROC_GID, gid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2909	sysctl_kern_proc, "Process table");
2910
2911static SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD | CTLFLAG_MPSAFE,
2912	sysctl_kern_proc, "Process table");
2913
2914static SYSCTL_NODE(_kern_proc, KERN_PROC_RGID, rgid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2915	sysctl_kern_proc, "Process table");
2916
2917static SYSCTL_NODE(_kern_proc, KERN_PROC_SESSION, sid, CTLFLAG_RD |
2918	CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2919
2920static SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD | CTLFLAG_MPSAFE,
2921	sysctl_kern_proc, "Process table");
2922
2923static SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2924	sysctl_kern_proc, "Process table");
2925
2926static SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2927	sysctl_kern_proc, "Process table");
2928
2929static SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2930	sysctl_kern_proc, "Process table");
2931
2932static SYSCTL_NODE(_kern_proc, KERN_PROC_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE,
2933	sysctl_kern_proc, "Return process table, no threads");
2934
2935static SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args,
2936	CTLFLAG_RW | CTLFLAG_CAPWR | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE,
2937	sysctl_kern_proc_args, "Process argument list");
2938
2939static SYSCTL_NODE(_kern_proc, KERN_PROC_ENV, env, CTLFLAG_RD | CTLFLAG_MPSAFE,
2940	sysctl_kern_proc_env, "Process environment");
2941
2942static SYSCTL_NODE(_kern_proc, KERN_PROC_AUXV, auxv, CTLFLAG_RD |
2943	CTLFLAG_MPSAFE, sysctl_kern_proc_auxv, "Process ELF auxiliary vector");
2944
2945static SYSCTL_NODE(_kern_proc, KERN_PROC_PATHNAME, pathname, CTLFLAG_RD |
2946	CTLFLAG_MPSAFE, sysctl_kern_proc_pathname, "Process executable path");
2947
2948static SYSCTL_NODE(_kern_proc, KERN_PROC_SV_NAME, sv_name, CTLFLAG_RD |
2949	CTLFLAG_MPSAFE, sysctl_kern_proc_sv_name,
2950	"Process syscall vector name (ABI type)");
2951
2952static SYSCTL_NODE(_kern_proc, (KERN_PROC_GID | KERN_PROC_INC_THREAD), gid_td,
2953	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2954
2955static SYSCTL_NODE(_kern_proc, (KERN_PROC_PGRP | KERN_PROC_INC_THREAD), pgrp_td,
2956	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2957
2958static SYSCTL_NODE(_kern_proc, (KERN_PROC_RGID | KERN_PROC_INC_THREAD), rgid_td,
2959	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2960
2961static SYSCTL_NODE(_kern_proc, (KERN_PROC_SESSION | KERN_PROC_INC_THREAD),
2962	sid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2963
2964static SYSCTL_NODE(_kern_proc, (KERN_PROC_TTY | KERN_PROC_INC_THREAD), tty_td,
2965	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2966
2967static SYSCTL_NODE(_kern_proc, (KERN_PROC_UID | KERN_PROC_INC_THREAD), uid_td,
2968	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2969
2970static SYSCTL_NODE(_kern_proc, (KERN_PROC_RUID | KERN_PROC_INC_THREAD), ruid_td,
2971	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2972
2973static SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_INC_THREAD), pid_td,
2974	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2975
2976static SYSCTL_NODE(_kern_proc, (KERN_PROC_PROC | KERN_PROC_INC_THREAD), proc_td,
2977	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc,
2978	"Return process table, no threads");
2979
2980#ifdef COMPAT_FREEBSD7
2981static SYSCTL_NODE(_kern_proc, KERN_PROC_OVMMAP, ovmmap, CTLFLAG_RD |
2982	CTLFLAG_MPSAFE, sysctl_kern_proc_ovmmap, "Old Process vm map entries");
2983#endif
2984
2985static SYSCTL_NODE(_kern_proc, KERN_PROC_VMMAP, vmmap, CTLFLAG_RD |
2986	CTLFLAG_MPSAFE, sysctl_kern_proc_vmmap, "Process vm map entries");
2987
2988#if defined(STACK) || defined(DDB)
2989static SYSCTL_NODE(_kern_proc, KERN_PROC_KSTACK, kstack, CTLFLAG_RD |
2990	CTLFLAG_MPSAFE, sysctl_kern_proc_kstack, "Process kernel stacks");
2991#endif
2992
2993static SYSCTL_NODE(_kern_proc, KERN_PROC_GROUPS, groups, CTLFLAG_RD |
2994	CTLFLAG_MPSAFE, sysctl_kern_proc_groups, "Process groups");
2995
2996static SYSCTL_NODE(_kern_proc, KERN_PROC_RLIMIT, rlimit, CTLFLAG_RW |
2997	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_rlimit,
2998	"Process resource limits");
2999
3000static SYSCTL_NODE(_kern_proc, KERN_PROC_PS_STRINGS, ps_strings, CTLFLAG_RD |
3001	CTLFLAG_MPSAFE, sysctl_kern_proc_ps_strings,
3002	"Process ps_strings location");
3003
3004static SYSCTL_NODE(_kern_proc, KERN_PROC_UMASK, umask, CTLFLAG_RD |
3005	CTLFLAG_MPSAFE, sysctl_kern_proc_umask, "Process umask");
3006
3007static SYSCTL_NODE(_kern_proc, KERN_PROC_OSREL, osrel, CTLFLAG_RW |
3008	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_osrel,
3009	"Process binary osreldate");
3010
3011static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGTRAMP, sigtramp, CTLFLAG_RD |
3012	CTLFLAG_MPSAFE, sysctl_kern_proc_sigtramp,
3013	"Process signal trampoline location");
3014
3015int allproc_gen;
3016
3017/*
3018 * stop_all_proc() purpose is to stop all process which have usermode,
3019 * except current process for obvious reasons.  This makes it somewhat
3020 * unreliable when invoked from multithreaded process.  The service
3021 * must not be user-callable anyway.
3022 */
3023void
3024stop_all_proc(void)
3025{
3026	struct proc *cp, *p;
3027	int r, gen;
3028	bool restart, seen_stopped, seen_exiting, stopped_some;
3029
3030	cp = curproc;
3031allproc_loop:
3032	sx_xlock(&allproc_lock);
3033	gen = allproc_gen;
3034	seen_exiting = seen_stopped = stopped_some = restart = false;
3035	LIST_REMOVE(cp, p_list);
3036	LIST_INSERT_HEAD(&allproc, cp, p_list);
3037	for (;;) {
3038		p = LIST_NEXT(cp, p_list);
3039		if (p == NULL)
3040			break;
3041		LIST_REMOVE(cp, p_list);
3042		LIST_INSERT_AFTER(p, cp, p_list);
3043		PROC_LOCK(p);
3044		if ((p->p_flag & (P_KPROC | P_SYSTEM | P_TOTAL_STOP)) != 0) {
3045			PROC_UNLOCK(p);
3046			continue;
3047		}
3048		if ((p->p_flag & P_WEXIT) != 0) {
3049			seen_exiting = true;
3050			PROC_UNLOCK(p);
3051			continue;
3052		}
3053		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
3054			/*
3055			 * Stopped processes are tolerated when there
3056			 * are no other processes which might continue
3057			 * them.  P_STOPPED_SINGLE but not
3058			 * P_TOTAL_STOP process still has at least one
3059			 * thread running.
3060			 */
3061			seen_stopped = true;
3062			PROC_UNLOCK(p);
3063			continue;
3064		}
3065		_PHOLD(p);
3066		sx_xunlock(&allproc_lock);
3067		r = thread_single(p, SINGLE_ALLPROC);
3068		if (r != 0)
3069			restart = true;
3070		else
3071			stopped_some = true;
3072		_PRELE(p);
3073		PROC_UNLOCK(p);
3074		sx_xlock(&allproc_lock);
3075	}
3076	/* Catch forked children we did not see in iteration. */
3077	if (gen != allproc_gen)
3078		restart = true;
3079	sx_xunlock(&allproc_lock);
3080	if (restart || stopped_some || seen_exiting || seen_stopped) {
3081		kern_yield(PRI_USER);
3082		goto allproc_loop;
3083	}
3084}
3085
3086void
3087resume_all_proc(void)
3088{
3089	struct proc *cp, *p;
3090
3091	cp = curproc;
3092	sx_xlock(&allproc_lock);
3093again:
3094	LIST_REMOVE(cp, p_list);
3095	LIST_INSERT_HEAD(&allproc, cp, p_list);
3096	for (;;) {
3097		p = LIST_NEXT(cp, p_list);
3098		if (p == NULL)
3099			break;
3100		LIST_REMOVE(cp, p_list);
3101		LIST_INSERT_AFTER(p, cp, p_list);
3102		PROC_LOCK(p);
3103		if ((p->p_flag & P_TOTAL_STOP) != 0) {
3104			sx_xunlock(&allproc_lock);
3105			_PHOLD(p);
3106			thread_single_end(p, SINGLE_ALLPROC);
3107			_PRELE(p);
3108			PROC_UNLOCK(p);
3109			sx_xlock(&allproc_lock);
3110		} else {
3111			PROC_UNLOCK(p);
3112		}
3113	}
3114	/*  Did the loop above missed any stopped process ? */
3115	LIST_FOREACH(p, &allproc, p_list) {
3116		/* No need for proc lock. */
3117		if ((p->p_flag & P_TOTAL_STOP) != 0)
3118			goto again;
3119	}
3120	sx_xunlock(&allproc_lock);
3121}
3122
3123/* #define	TOTAL_STOP_DEBUG	1 */
3124#ifdef TOTAL_STOP_DEBUG
3125volatile static int ap_resume;
3126#include <sys/mount.h>
3127
3128static int
3129sysctl_debug_stop_all_proc(SYSCTL_HANDLER_ARGS)
3130{
3131	int error, val;
3132
3133	val = 0;
3134	ap_resume = 0;
3135	error = sysctl_handle_int(oidp, &val, 0, req);
3136	if (error != 0 || req->newptr == NULL)
3137		return (error);
3138	if (val != 0) {
3139		stop_all_proc();
3140		syncer_suspend();
3141		while (ap_resume == 0)
3142			;
3143		syncer_resume();
3144		resume_all_proc();
3145	}
3146	return (0);
3147}
3148
3149SYSCTL_PROC(_debug, OID_AUTO, stop_all_proc, CTLTYPE_INT | CTLFLAG_RW |
3150    CTLFLAG_MPSAFE, __DEVOLATILE(int *, &ap_resume), 0,
3151    sysctl_debug_stop_all_proc, "I",
3152    "");
3153#endif
3154