kern_proc.c revision 293473
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)kern_proc.c	8.7 (Berkeley) 2/14/95
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/10/sys/kern/kern_proc.c 293473 2016-01-09 14:08:10Z dchagin $");
34
35#include "opt_compat.h"
36#include "opt_ddb.h"
37#include "opt_kdtrace.h"
38#include "opt_ktrace.h"
39#include "opt_kstack_pages.h"
40#include "opt_stack.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/elf.h>
45#include <sys/exec.h>
46#include <sys/kernel.h>
47#include <sys/limits.h>
48#include <sys/lock.h>
49#include <sys/loginclass.h>
50#include <sys/malloc.h>
51#include <sys/mman.h>
52#include <sys/mount.h>
53#include <sys/mutex.h>
54#include <sys/proc.h>
55#include <sys/ptrace.h>
56#include <sys/refcount.h>
57#include <sys/resourcevar.h>
58#include <sys/rwlock.h>
59#include <sys/sbuf.h>
60#include <sys/sysent.h>
61#include <sys/sched.h>
62#include <sys/smp.h>
63#include <sys/stack.h>
64#include <sys/stat.h>
65#include <sys/sysctl.h>
66#include <sys/filedesc.h>
67#include <sys/tty.h>
68#include <sys/signalvar.h>
69#include <sys/sdt.h>
70#include <sys/sx.h>
71#include <sys/user.h>
72#include <sys/jail.h>
73#include <sys/vnode.h>
74#include <sys/eventhandler.h>
75
76#ifdef DDB
77#include <ddb/ddb.h>
78#endif
79
80#include <vm/vm.h>
81#include <vm/vm_param.h>
82#include <vm/vm_extern.h>
83#include <vm/pmap.h>
84#include <vm/vm_map.h>
85#include <vm/vm_object.h>
86#include <vm/vm_page.h>
87#include <vm/uma.h>
88
89#ifdef COMPAT_FREEBSD32
90#include <compat/freebsd32/freebsd32.h>
91#include <compat/freebsd32/freebsd32_util.h>
92#endif
93
94SDT_PROVIDER_DEFINE(proc);
95SDT_PROBE_DEFINE4(proc, kernel, ctor, entry, "struct proc *", "int",
96    "void *", "int");
97SDT_PROBE_DEFINE4(proc, kernel, ctor, return, "struct proc *", "int",
98    "void *", "int");
99SDT_PROBE_DEFINE4(proc, kernel, dtor, entry, "struct proc *", "int",
100    "void *", "struct thread *");
101SDT_PROBE_DEFINE3(proc, kernel, dtor, return, "struct proc *", "int",
102    "void *");
103SDT_PROBE_DEFINE3(proc, kernel, init, entry, "struct proc *", "int",
104    "int");
105SDT_PROBE_DEFINE3(proc, kernel, init, return, "struct proc *", "int",
106    "int");
107
108MALLOC_DEFINE(M_PGRP, "pgrp", "process group header");
109MALLOC_DEFINE(M_SESSION, "session", "session header");
110static MALLOC_DEFINE(M_PROC, "proc", "Proc structures");
111MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
112
113static void doenterpgrp(struct proc *, struct pgrp *);
114static void orphanpg(struct pgrp *pg);
115static void fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp);
116static void fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp);
117static void fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp,
118    int preferthread);
119static void pgadjustjobc(struct pgrp *pgrp, int entering);
120static void pgdelete(struct pgrp *);
121static int proc_ctor(void *mem, int size, void *arg, int flags);
122static void proc_dtor(void *mem, int size, void *arg);
123static int proc_init(void *mem, int size, int flags);
124static void proc_fini(void *mem, int size);
125static void pargs_free(struct pargs *pa);
126static struct proc *zpfind_locked(pid_t pid);
127
128/*
129 * Other process lists
130 */
131struct pidhashhead *pidhashtbl;
132u_long pidhash;
133struct pgrphashhead *pgrphashtbl;
134u_long pgrphash;
135struct proclist allproc;
136struct proclist zombproc;
137struct sx allproc_lock;
138struct sx proctree_lock;
139struct mtx ppeers_lock;
140uma_zone_t proc_zone;
141
142int kstack_pages = KSTACK_PAGES;
143SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0,
144    "Kernel stack size in pages");
145static int vmmap_skip_res_cnt = 0;
146SYSCTL_INT(_kern, OID_AUTO, proc_vmmap_skip_resident_count, CTLFLAG_RW,
147    &vmmap_skip_res_cnt, 0,
148    "Skip calculation of the pages resident count in kern.proc.vmmap");
149
150CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
151#ifdef COMPAT_FREEBSD32
152CTASSERT(sizeof(struct kinfo_proc32) == KINFO_PROC32_SIZE);
153#endif
154
155/*
156 * Initialize global process hashing structures.
157 */
158void
159procinit()
160{
161
162	sx_init(&allproc_lock, "allproc");
163	sx_init(&proctree_lock, "proctree");
164	mtx_init(&ppeers_lock, "p_peers", NULL, MTX_DEF);
165	LIST_INIT(&allproc);
166	LIST_INIT(&zombproc);
167	pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
168	pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
169	proc_zone = uma_zcreate("PROC", sched_sizeof_proc(),
170	    proc_ctor, proc_dtor, proc_init, proc_fini,
171	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
172	uihashinit();
173}
174
175/*
176 * Prepare a proc for use.
177 */
178static int
179proc_ctor(void *mem, int size, void *arg, int flags)
180{
181	struct proc *p;
182
183	p = (struct proc *)mem;
184	SDT_PROBE4(proc, kernel, ctor , entry, p, size, arg, flags);
185	EVENTHANDLER_INVOKE(process_ctor, p);
186	SDT_PROBE4(proc, kernel, ctor , return, p, size, arg, flags);
187	return (0);
188}
189
190/*
191 * Reclaim a proc after use.
192 */
193static void
194proc_dtor(void *mem, int size, void *arg)
195{
196	struct proc *p;
197	struct thread *td;
198
199	/* INVARIANTS checks go here */
200	p = (struct proc *)mem;
201	td = FIRST_THREAD_IN_PROC(p);
202	SDT_PROBE4(proc, kernel, dtor, entry, p, size, arg, td);
203	if (td != NULL) {
204#ifdef INVARIANTS
205		KASSERT((p->p_numthreads == 1),
206		    ("bad number of threads in exiting process"));
207		KASSERT(STAILQ_EMPTY(&p->p_ktr), ("proc_dtor: non-empty p_ktr"));
208#endif
209		/* Free all OSD associated to this thread. */
210		osd_thread_exit(td);
211	}
212	EVENTHANDLER_INVOKE(process_dtor, p);
213	if (p->p_ksi != NULL)
214		KASSERT(! KSI_ONQ(p->p_ksi), ("SIGCHLD queue"));
215	SDT_PROBE3(proc, kernel, dtor, return, p, size, arg);
216}
217
218/*
219 * Initialize type-stable parts of a proc (when newly created).
220 */
221static int
222proc_init(void *mem, int size, int flags)
223{
224	struct proc *p;
225
226	p = (struct proc *)mem;
227	SDT_PROBE3(proc, kernel, init, entry, p, size, flags);
228	p->p_sched = (struct p_sched *)&p[1];
229	bzero(&p->p_mtx, sizeof(struct mtx));
230	mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
231	mtx_init(&p->p_slock, "process slock", NULL, MTX_SPIN | MTX_RECURSE);
232	cv_init(&p->p_pwait, "ppwait");
233	cv_init(&p->p_dbgwait, "dbgwait");
234	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
235	EVENTHANDLER_INVOKE(process_init, p);
236	p->p_stats = pstats_alloc();
237	p->p_pgrp = NULL;
238	SDT_PROBE3(proc, kernel, init, return, p, size, flags);
239	return (0);
240}
241
242/*
243 * UMA should ensure that this function is never called.
244 * Freeing a proc structure would violate type stability.
245 */
246static void
247proc_fini(void *mem, int size)
248{
249#ifdef notnow
250	struct proc *p;
251
252	p = (struct proc *)mem;
253	EVENTHANDLER_INVOKE(process_fini, p);
254	pstats_free(p->p_stats);
255	thread_free(FIRST_THREAD_IN_PROC(p));
256	mtx_destroy(&p->p_mtx);
257	if (p->p_ksi != NULL)
258		ksiginfo_free(p->p_ksi);
259#else
260	panic("proc reclaimed");
261#endif
262}
263
264/*
265 * Is p an inferior of the current process?
266 */
267int
268inferior(struct proc *p)
269{
270
271	sx_assert(&proctree_lock, SX_LOCKED);
272	PROC_LOCK_ASSERT(p, MA_OWNED);
273	for (; p != curproc; p = proc_realparent(p)) {
274		if (p->p_pid == 0)
275			return (0);
276	}
277	return (1);
278}
279
280struct proc *
281pfind_locked(pid_t pid)
282{
283	struct proc *p;
284
285	sx_assert(&allproc_lock, SX_LOCKED);
286	LIST_FOREACH(p, PIDHASH(pid), p_hash) {
287		if (p->p_pid == pid) {
288			PROC_LOCK(p);
289			if (p->p_state == PRS_NEW) {
290				PROC_UNLOCK(p);
291				p = NULL;
292			}
293			break;
294		}
295	}
296	return (p);
297}
298
299/*
300 * Locate a process by number; return only "live" processes -- i.e., neither
301 * zombies nor newly born but incompletely initialized processes.  By not
302 * returning processes in the PRS_NEW state, we allow callers to avoid
303 * testing for that condition to avoid dereferencing p_ucred, et al.
304 */
305struct proc *
306pfind(pid_t pid)
307{
308	struct proc *p;
309
310	sx_slock(&allproc_lock);
311	p = pfind_locked(pid);
312	sx_sunlock(&allproc_lock);
313	return (p);
314}
315
316static struct proc *
317pfind_tid_locked(pid_t tid)
318{
319	struct proc *p;
320	struct thread *td;
321
322	sx_assert(&allproc_lock, SX_LOCKED);
323	FOREACH_PROC_IN_SYSTEM(p) {
324		PROC_LOCK(p);
325		if (p->p_state == PRS_NEW) {
326			PROC_UNLOCK(p);
327			continue;
328		}
329		FOREACH_THREAD_IN_PROC(p, td) {
330			if (td->td_tid == tid)
331				goto found;
332		}
333		PROC_UNLOCK(p);
334	}
335found:
336	return (p);
337}
338
339/*
340 * Locate a process group by number.
341 * The caller must hold proctree_lock.
342 */
343struct pgrp *
344pgfind(pgid)
345	register pid_t pgid;
346{
347	register struct pgrp *pgrp;
348
349	sx_assert(&proctree_lock, SX_LOCKED);
350
351	LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) {
352		if (pgrp->pg_id == pgid) {
353			PGRP_LOCK(pgrp);
354			return (pgrp);
355		}
356	}
357	return (NULL);
358}
359
360/*
361 * Locate process and do additional manipulations, depending on flags.
362 */
363int
364pget(pid_t pid, int flags, struct proc **pp)
365{
366	struct proc *p;
367	int error;
368
369	sx_slock(&allproc_lock);
370	if (pid <= PID_MAX) {
371		p = pfind_locked(pid);
372		if (p == NULL && (flags & PGET_NOTWEXIT) == 0)
373			p = zpfind_locked(pid);
374	} else if ((flags & PGET_NOTID) == 0) {
375		p = pfind_tid_locked(pid);
376	} else {
377		p = NULL;
378	}
379	sx_sunlock(&allproc_lock);
380	if (p == NULL)
381		return (ESRCH);
382	if ((flags & PGET_CANSEE) != 0) {
383		error = p_cansee(curthread, p);
384		if (error != 0)
385			goto errout;
386	}
387	if ((flags & PGET_CANDEBUG) != 0) {
388		error = p_candebug(curthread, p);
389		if (error != 0)
390			goto errout;
391	}
392	if ((flags & PGET_ISCURRENT) != 0 && curproc != p) {
393		error = EPERM;
394		goto errout;
395	}
396	if ((flags & PGET_NOTWEXIT) != 0 && (p->p_flag & P_WEXIT) != 0) {
397		error = ESRCH;
398		goto errout;
399	}
400	if ((flags & PGET_NOTINEXEC) != 0 && (p->p_flag & P_INEXEC) != 0) {
401		/*
402		 * XXXRW: Not clear ESRCH is the right error during proc
403		 * execve().
404		 */
405		error = ESRCH;
406		goto errout;
407	}
408	if ((flags & PGET_HOLD) != 0) {
409		_PHOLD(p);
410		PROC_UNLOCK(p);
411	}
412	*pp = p;
413	return (0);
414errout:
415	PROC_UNLOCK(p);
416	return (error);
417}
418
419/*
420 * Create a new process group.
421 * pgid must be equal to the pid of p.
422 * Begin a new session if required.
423 */
424int
425enterpgrp(p, pgid, pgrp, sess)
426	register struct proc *p;
427	pid_t pgid;
428	struct pgrp *pgrp;
429	struct session *sess;
430{
431
432	sx_assert(&proctree_lock, SX_XLOCKED);
433
434	KASSERT(pgrp != NULL, ("enterpgrp: pgrp == NULL"));
435	KASSERT(p->p_pid == pgid,
436	    ("enterpgrp: new pgrp and pid != pgid"));
437	KASSERT(pgfind(pgid) == NULL,
438	    ("enterpgrp: pgrp with pgid exists"));
439	KASSERT(!SESS_LEADER(p),
440	    ("enterpgrp: session leader attempted setpgrp"));
441
442	mtx_init(&pgrp->pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
443
444	if (sess != NULL) {
445		/*
446		 * new session
447		 */
448		mtx_init(&sess->s_mtx, "session", NULL, MTX_DEF);
449		PROC_LOCK(p);
450		p->p_flag &= ~P_CONTROLT;
451		PROC_UNLOCK(p);
452		PGRP_LOCK(pgrp);
453		sess->s_leader = p;
454		sess->s_sid = p->p_pid;
455		refcount_init(&sess->s_count, 1);
456		sess->s_ttyvp = NULL;
457		sess->s_ttydp = NULL;
458		sess->s_ttyp = NULL;
459		bcopy(p->p_session->s_login, sess->s_login,
460			    sizeof(sess->s_login));
461		pgrp->pg_session = sess;
462		KASSERT(p == curproc,
463		    ("enterpgrp: mksession and p != curproc"));
464	} else {
465		pgrp->pg_session = p->p_session;
466		sess_hold(pgrp->pg_session);
467		PGRP_LOCK(pgrp);
468	}
469	pgrp->pg_id = pgid;
470	LIST_INIT(&pgrp->pg_members);
471
472	/*
473	 * As we have an exclusive lock of proctree_lock,
474	 * this should not deadlock.
475	 */
476	LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash);
477	pgrp->pg_jobc = 0;
478	SLIST_INIT(&pgrp->pg_sigiolst);
479	PGRP_UNLOCK(pgrp);
480
481	doenterpgrp(p, pgrp);
482
483	return (0);
484}
485
486/*
487 * Move p to an existing process group
488 */
489int
490enterthispgrp(p, pgrp)
491	register struct proc *p;
492	struct pgrp *pgrp;
493{
494
495	sx_assert(&proctree_lock, SX_XLOCKED);
496	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
497	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
498	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
499	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
500	KASSERT(pgrp->pg_session == p->p_session,
501		("%s: pgrp's session %p, p->p_session %p.\n",
502		__func__,
503		pgrp->pg_session,
504		p->p_session));
505	KASSERT(pgrp != p->p_pgrp,
506		("%s: p belongs to pgrp.", __func__));
507
508	doenterpgrp(p, pgrp);
509
510	return (0);
511}
512
513/*
514 * Move p to a process group
515 */
516static void
517doenterpgrp(p, pgrp)
518	struct proc *p;
519	struct pgrp *pgrp;
520{
521	struct pgrp *savepgrp;
522
523	sx_assert(&proctree_lock, SX_XLOCKED);
524	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
525	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
526	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
527	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
528
529	savepgrp = p->p_pgrp;
530
531	/*
532	 * Adjust eligibility of affected pgrps to participate in job control.
533	 * Increment eligibility counts before decrementing, otherwise we
534	 * could reach 0 spuriously during the first call.
535	 */
536	fixjobc(p, pgrp, 1);
537	fixjobc(p, p->p_pgrp, 0);
538
539	PGRP_LOCK(pgrp);
540	PGRP_LOCK(savepgrp);
541	PROC_LOCK(p);
542	LIST_REMOVE(p, p_pglist);
543	p->p_pgrp = pgrp;
544	PROC_UNLOCK(p);
545	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
546	PGRP_UNLOCK(savepgrp);
547	PGRP_UNLOCK(pgrp);
548	if (LIST_EMPTY(&savepgrp->pg_members))
549		pgdelete(savepgrp);
550}
551
552/*
553 * remove process from process group
554 */
555int
556leavepgrp(p)
557	register struct proc *p;
558{
559	struct pgrp *savepgrp;
560
561	sx_assert(&proctree_lock, SX_XLOCKED);
562	savepgrp = p->p_pgrp;
563	PGRP_LOCK(savepgrp);
564	PROC_LOCK(p);
565	LIST_REMOVE(p, p_pglist);
566	p->p_pgrp = NULL;
567	PROC_UNLOCK(p);
568	PGRP_UNLOCK(savepgrp);
569	if (LIST_EMPTY(&savepgrp->pg_members))
570		pgdelete(savepgrp);
571	return (0);
572}
573
574/*
575 * delete a process group
576 */
577static void
578pgdelete(pgrp)
579	register struct pgrp *pgrp;
580{
581	struct session *savesess;
582	struct tty *tp;
583
584	sx_assert(&proctree_lock, SX_XLOCKED);
585	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
586	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
587
588	/*
589	 * Reset any sigio structures pointing to us as a result of
590	 * F_SETOWN with our pgid.
591	 */
592	funsetownlst(&pgrp->pg_sigiolst);
593
594	PGRP_LOCK(pgrp);
595	tp = pgrp->pg_session->s_ttyp;
596	LIST_REMOVE(pgrp, pg_hash);
597	savesess = pgrp->pg_session;
598	PGRP_UNLOCK(pgrp);
599
600	/* Remove the reference to the pgrp before deallocating it. */
601	if (tp != NULL) {
602		tty_lock(tp);
603		tty_rel_pgrp(tp, pgrp);
604	}
605
606	mtx_destroy(&pgrp->pg_mtx);
607	free(pgrp, M_PGRP);
608	sess_release(savesess);
609}
610
611static void
612pgadjustjobc(pgrp, entering)
613	struct pgrp *pgrp;
614	int entering;
615{
616
617	PGRP_LOCK(pgrp);
618	if (entering)
619		pgrp->pg_jobc++;
620	else {
621		--pgrp->pg_jobc;
622		if (pgrp->pg_jobc == 0)
623			orphanpg(pgrp);
624	}
625	PGRP_UNLOCK(pgrp);
626}
627
628/*
629 * Adjust pgrp jobc counters when specified process changes process group.
630 * We count the number of processes in each process group that "qualify"
631 * the group for terminal job control (those with a parent in a different
632 * process group of the same session).  If that count reaches zero, the
633 * process group becomes orphaned.  Check both the specified process'
634 * process group and that of its children.
635 * entering == 0 => p is leaving specified group.
636 * entering == 1 => p is entering specified group.
637 */
638void
639fixjobc(p, pgrp, entering)
640	register struct proc *p;
641	register struct pgrp *pgrp;
642	int entering;
643{
644	register struct pgrp *hispgrp;
645	register struct session *mysession;
646
647	sx_assert(&proctree_lock, SX_LOCKED);
648	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
649	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
650	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
651
652	/*
653	 * Check p's parent to see whether p qualifies its own process
654	 * group; if so, adjust count for p's process group.
655	 */
656	mysession = pgrp->pg_session;
657	if ((hispgrp = p->p_pptr->p_pgrp) != pgrp &&
658	    hispgrp->pg_session == mysession)
659		pgadjustjobc(pgrp, entering);
660
661	/*
662	 * Check this process' children to see whether they qualify
663	 * their process groups; if so, adjust counts for children's
664	 * process groups.
665	 */
666	LIST_FOREACH(p, &p->p_children, p_sibling) {
667		hispgrp = p->p_pgrp;
668		if (hispgrp == pgrp ||
669		    hispgrp->pg_session != mysession)
670			continue;
671		PROC_LOCK(p);
672		if (p->p_state == PRS_ZOMBIE) {
673			PROC_UNLOCK(p);
674			continue;
675		}
676		PROC_UNLOCK(p);
677		pgadjustjobc(hispgrp, entering);
678	}
679}
680
681/*
682 * A process group has become orphaned;
683 * if there are any stopped processes in the group,
684 * hang-up all process in that group.
685 */
686static void
687orphanpg(pg)
688	struct pgrp *pg;
689{
690	register struct proc *p;
691
692	PGRP_LOCK_ASSERT(pg, MA_OWNED);
693
694	LIST_FOREACH(p, &pg->pg_members, p_pglist) {
695		PROC_LOCK(p);
696		if (P_SHOULDSTOP(p) == P_STOPPED_SIG) {
697			PROC_UNLOCK(p);
698			LIST_FOREACH(p, &pg->pg_members, p_pglist) {
699				PROC_LOCK(p);
700				kern_psignal(p, SIGHUP);
701				kern_psignal(p, SIGCONT);
702				PROC_UNLOCK(p);
703			}
704			return;
705		}
706		PROC_UNLOCK(p);
707	}
708}
709
710void
711sess_hold(struct session *s)
712{
713
714	refcount_acquire(&s->s_count);
715}
716
717void
718sess_release(struct session *s)
719{
720
721	if (refcount_release(&s->s_count)) {
722		if (s->s_ttyp != NULL) {
723			tty_lock(s->s_ttyp);
724			tty_rel_sess(s->s_ttyp, s);
725		}
726		mtx_destroy(&s->s_mtx);
727		free(s, M_SESSION);
728	}
729}
730
731#ifdef DDB
732
733DB_SHOW_COMMAND(pgrpdump, pgrpdump)
734{
735	register struct pgrp *pgrp;
736	register struct proc *p;
737	register int i;
738
739	for (i = 0; i <= pgrphash; i++) {
740		if (!LIST_EMPTY(&pgrphashtbl[i])) {
741			printf("\tindx %d\n", i);
742			LIST_FOREACH(pgrp, &pgrphashtbl[i], pg_hash) {
743				printf(
744			"\tpgrp %p, pgid %ld, sess %p, sesscnt %d, mem %p\n",
745				    (void *)pgrp, (long)pgrp->pg_id,
746				    (void *)pgrp->pg_session,
747				    pgrp->pg_session->s_count,
748				    (void *)LIST_FIRST(&pgrp->pg_members));
749				LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
750					printf("\t\tpid %ld addr %p pgrp %p\n",
751					    (long)p->p_pid, (void *)p,
752					    (void *)p->p_pgrp);
753				}
754			}
755		}
756	}
757}
758#endif /* DDB */
759
760/*
761 * Calculate the kinfo_proc members which contain process-wide
762 * informations.
763 * Must be called with the target process locked.
764 */
765static void
766fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp)
767{
768	struct thread *td;
769
770	PROC_LOCK_ASSERT(p, MA_OWNED);
771
772	kp->ki_estcpu = 0;
773	kp->ki_pctcpu = 0;
774	FOREACH_THREAD_IN_PROC(p, td) {
775		thread_lock(td);
776		kp->ki_pctcpu += sched_pctcpu(td);
777		kp->ki_estcpu += td->td_estcpu;
778		thread_unlock(td);
779	}
780}
781
782/*
783 * Clear kinfo_proc and fill in any information that is common
784 * to all threads in the process.
785 * Must be called with the target process locked.
786 */
787static void
788fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
789{
790	struct thread *td0;
791	struct tty *tp;
792	struct session *sp;
793	struct ucred *cred;
794	struct sigacts *ps;
795
796	PROC_LOCK_ASSERT(p, MA_OWNED);
797	bzero(kp, sizeof(*kp));
798
799	kp->ki_structsize = sizeof(*kp);
800	kp->ki_paddr = p;
801	kp->ki_addr =/* p->p_addr; */0; /* XXX */
802	kp->ki_args = p->p_args;
803	kp->ki_textvp = p->p_textvp;
804#ifdef KTRACE
805	kp->ki_tracep = p->p_tracevp;
806	kp->ki_traceflag = p->p_traceflag;
807#endif
808	kp->ki_fd = p->p_fd;
809	kp->ki_vmspace = p->p_vmspace;
810	kp->ki_flag = p->p_flag;
811	kp->ki_flag2 = p->p_flag2;
812	cred = p->p_ucred;
813	if (cred) {
814		kp->ki_uid = cred->cr_uid;
815		kp->ki_ruid = cred->cr_ruid;
816		kp->ki_svuid = cred->cr_svuid;
817		kp->ki_cr_flags = 0;
818		if (cred->cr_flags & CRED_FLAG_CAPMODE)
819			kp->ki_cr_flags |= KI_CRF_CAPABILITY_MODE;
820		/* XXX bde doesn't like KI_NGROUPS */
821		if (cred->cr_ngroups > KI_NGROUPS) {
822			kp->ki_ngroups = KI_NGROUPS;
823			kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW;
824		} else
825			kp->ki_ngroups = cred->cr_ngroups;
826		bcopy(cred->cr_groups, kp->ki_groups,
827		    kp->ki_ngroups * sizeof(gid_t));
828		kp->ki_rgid = cred->cr_rgid;
829		kp->ki_svgid = cred->cr_svgid;
830		/* If jailed(cred), emulate the old P_JAILED flag. */
831		if (jailed(cred)) {
832			kp->ki_flag |= P_JAILED;
833			/* If inside the jail, use 0 as a jail ID. */
834			if (cred->cr_prison != curthread->td_ucred->cr_prison)
835				kp->ki_jid = cred->cr_prison->pr_id;
836		}
837		strlcpy(kp->ki_loginclass, cred->cr_loginclass->lc_name,
838		    sizeof(kp->ki_loginclass));
839	}
840	ps = p->p_sigacts;
841	if (ps) {
842		mtx_lock(&ps->ps_mtx);
843		kp->ki_sigignore = ps->ps_sigignore;
844		kp->ki_sigcatch = ps->ps_sigcatch;
845		mtx_unlock(&ps->ps_mtx);
846	}
847	if (p->p_state != PRS_NEW &&
848	    p->p_state != PRS_ZOMBIE &&
849	    p->p_vmspace != NULL) {
850		struct vmspace *vm = p->p_vmspace;
851
852		kp->ki_size = vm->vm_map.size;
853		kp->ki_rssize = vmspace_resident_count(vm); /*XXX*/
854		FOREACH_THREAD_IN_PROC(p, td0) {
855			if (!TD_IS_SWAPPED(td0))
856				kp->ki_rssize += td0->td_kstack_pages;
857		}
858		kp->ki_swrss = vm->vm_swrss;
859		kp->ki_tsize = vm->vm_tsize;
860		kp->ki_dsize = vm->vm_dsize;
861		kp->ki_ssize = vm->vm_ssize;
862	} else if (p->p_state == PRS_ZOMBIE)
863		kp->ki_stat = SZOMB;
864	if (kp->ki_flag & P_INMEM)
865		kp->ki_sflag = PS_INMEM;
866	else
867		kp->ki_sflag = 0;
868	/* Calculate legacy swtime as seconds since 'swtick'. */
869	kp->ki_swtime = (ticks - p->p_swtick) / hz;
870	kp->ki_pid = p->p_pid;
871	kp->ki_nice = p->p_nice;
872	kp->ki_fibnum = p->p_fibnum;
873	kp->ki_start = p->p_stats->p_start;
874	timevaladd(&kp->ki_start, &boottime);
875	PROC_STATLOCK(p);
876	rufetch(p, &kp->ki_rusage);
877	kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime);
878	calcru(p, &kp->ki_rusage.ru_utime, &kp->ki_rusage.ru_stime);
879	PROC_STATUNLOCK(p);
880	calccru(p, &kp->ki_childutime, &kp->ki_childstime);
881	/* Some callers want child times in a single value. */
882	kp->ki_childtime = kp->ki_childstime;
883	timevaladd(&kp->ki_childtime, &kp->ki_childutime);
884
885	FOREACH_THREAD_IN_PROC(p, td0)
886		kp->ki_cow += td0->td_cow;
887
888	tp = NULL;
889	if (p->p_pgrp) {
890		kp->ki_pgid = p->p_pgrp->pg_id;
891		kp->ki_jobc = p->p_pgrp->pg_jobc;
892		sp = p->p_pgrp->pg_session;
893
894		if (sp != NULL) {
895			kp->ki_sid = sp->s_sid;
896			SESS_LOCK(sp);
897			strlcpy(kp->ki_login, sp->s_login,
898			    sizeof(kp->ki_login));
899			if (sp->s_ttyvp)
900				kp->ki_kiflag |= KI_CTTY;
901			if (SESS_LEADER(p))
902				kp->ki_kiflag |= KI_SLEADER;
903			/* XXX proctree_lock */
904			tp = sp->s_ttyp;
905			SESS_UNLOCK(sp);
906		}
907	}
908	if ((p->p_flag & P_CONTROLT) && tp != NULL) {
909		kp->ki_tdev = tty_udev(tp);
910		kp->ki_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
911		if (tp->t_session)
912			kp->ki_tsid = tp->t_session->s_sid;
913	} else
914		kp->ki_tdev = NODEV;
915	if (p->p_comm[0] != '\0')
916		strlcpy(kp->ki_comm, p->p_comm, sizeof(kp->ki_comm));
917	if (p->p_sysent && p->p_sysent->sv_name != NULL &&
918	    p->p_sysent->sv_name[0] != '\0')
919		strlcpy(kp->ki_emul, p->p_sysent->sv_name, sizeof(kp->ki_emul));
920	kp->ki_siglist = p->p_siglist;
921	kp->ki_xstat = p->p_xstat;
922	kp->ki_acflag = p->p_acflag;
923	kp->ki_lock = p->p_lock;
924	if (p->p_pptr)
925		kp->ki_ppid = p->p_pptr->p_pid;
926}
927
928/*
929 * Fill in information that is thread specific.  Must be called with
930 * target process locked.  If 'preferthread' is set, overwrite certain
931 * process-related fields that are maintained for both threads and
932 * processes.
933 */
934static void
935fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread)
936{
937	struct proc *p;
938
939	p = td->td_proc;
940	kp->ki_tdaddr = td;
941	PROC_LOCK_ASSERT(p, MA_OWNED);
942
943	if (preferthread)
944		PROC_STATLOCK(p);
945	thread_lock(td);
946	if (td->td_wmesg != NULL)
947		strlcpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg));
948	else
949		bzero(kp->ki_wmesg, sizeof(kp->ki_wmesg));
950	strlcpy(kp->ki_tdname, td->td_name, sizeof(kp->ki_tdname));
951	if (TD_ON_LOCK(td)) {
952		kp->ki_kiflag |= KI_LOCKBLOCK;
953		strlcpy(kp->ki_lockname, td->td_lockname,
954		    sizeof(kp->ki_lockname));
955	} else {
956		kp->ki_kiflag &= ~KI_LOCKBLOCK;
957		bzero(kp->ki_lockname, sizeof(kp->ki_lockname));
958	}
959
960	if (p->p_state == PRS_NORMAL) { /* approximate. */
961		if (TD_ON_RUNQ(td) ||
962		    TD_CAN_RUN(td) ||
963		    TD_IS_RUNNING(td)) {
964			kp->ki_stat = SRUN;
965		} else if (P_SHOULDSTOP(p)) {
966			kp->ki_stat = SSTOP;
967		} else if (TD_IS_SLEEPING(td)) {
968			kp->ki_stat = SSLEEP;
969		} else if (TD_ON_LOCK(td)) {
970			kp->ki_stat = SLOCK;
971		} else {
972			kp->ki_stat = SWAIT;
973		}
974	} else if (p->p_state == PRS_ZOMBIE) {
975		kp->ki_stat = SZOMB;
976	} else {
977		kp->ki_stat = SIDL;
978	}
979
980	/* Things in the thread */
981	kp->ki_wchan = td->td_wchan;
982	kp->ki_pri.pri_level = td->td_priority;
983	kp->ki_pri.pri_native = td->td_base_pri;
984	kp->ki_lastcpu = td->td_lastcpu;
985	kp->ki_oncpu = td->td_oncpu;
986	kp->ki_tdflags = td->td_flags;
987	kp->ki_tid = td->td_tid;
988	kp->ki_numthreads = p->p_numthreads;
989	kp->ki_pcb = td->td_pcb;
990	kp->ki_kstack = (void *)td->td_kstack;
991	kp->ki_slptime = (ticks - td->td_slptick) / hz;
992	kp->ki_pri.pri_class = td->td_pri_class;
993	kp->ki_pri.pri_user = td->td_user_pri;
994
995	if (preferthread) {
996		rufetchtd(td, &kp->ki_rusage);
997		kp->ki_runtime = cputick2usec(td->td_rux.rux_runtime);
998		kp->ki_pctcpu = sched_pctcpu(td);
999		kp->ki_estcpu = td->td_estcpu;
1000		kp->ki_cow = td->td_cow;
1001	}
1002
1003	/* We can't get this anymore but ps etc never used it anyway. */
1004	kp->ki_rqindex = 0;
1005
1006	if (preferthread)
1007		kp->ki_siglist = td->td_siglist;
1008	kp->ki_sigmask = td->td_sigmask;
1009	thread_unlock(td);
1010	if (preferthread)
1011		PROC_STATUNLOCK(p);
1012}
1013
1014/*
1015 * Fill in a kinfo_proc structure for the specified process.
1016 * Must be called with the target process locked.
1017 */
1018void
1019fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp)
1020{
1021
1022	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
1023
1024	fill_kinfo_proc_only(p, kp);
1025	fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), kp, 0);
1026	fill_kinfo_aggregate(p, kp);
1027}
1028
1029struct pstats *
1030pstats_alloc(void)
1031{
1032
1033	return (malloc(sizeof(struct pstats), M_SUBPROC, M_ZERO|M_WAITOK));
1034}
1035
1036/*
1037 * Copy parts of p_stats; zero the rest of p_stats (statistics).
1038 */
1039void
1040pstats_fork(struct pstats *src, struct pstats *dst)
1041{
1042
1043	bzero(&dst->pstat_startzero,
1044	    __rangeof(struct pstats, pstat_startzero, pstat_endzero));
1045	bcopy(&src->pstat_startcopy, &dst->pstat_startcopy,
1046	    __rangeof(struct pstats, pstat_startcopy, pstat_endcopy));
1047}
1048
1049void
1050pstats_free(struct pstats *ps)
1051{
1052
1053	free(ps, M_SUBPROC);
1054}
1055
1056static struct proc *
1057zpfind_locked(pid_t pid)
1058{
1059	struct proc *p;
1060
1061	sx_assert(&allproc_lock, SX_LOCKED);
1062	LIST_FOREACH(p, &zombproc, p_list) {
1063		if (p->p_pid == pid) {
1064			PROC_LOCK(p);
1065			break;
1066		}
1067	}
1068	return (p);
1069}
1070
1071/*
1072 * Locate a zombie process by number
1073 */
1074struct proc *
1075zpfind(pid_t pid)
1076{
1077	struct proc *p;
1078
1079	sx_slock(&allproc_lock);
1080	p = zpfind_locked(pid);
1081	sx_sunlock(&allproc_lock);
1082	return (p);
1083}
1084
1085#ifdef COMPAT_FREEBSD32
1086
1087/*
1088 * This function is typically used to copy out the kernel address, so
1089 * it can be replaced by assignment of zero.
1090 */
1091static inline uint32_t
1092ptr32_trim(void *ptr)
1093{
1094	uintptr_t uptr;
1095
1096	uptr = (uintptr_t)ptr;
1097	return ((uptr > UINT_MAX) ? 0 : uptr);
1098}
1099
1100#define PTRTRIM_CP(src,dst,fld) \
1101	do { (dst).fld = ptr32_trim((src).fld); } while (0)
1102
1103static void
1104freebsd32_kinfo_proc_out(const struct kinfo_proc *ki, struct kinfo_proc32 *ki32)
1105{
1106	int i;
1107
1108	bzero(ki32, sizeof(struct kinfo_proc32));
1109	ki32->ki_structsize = sizeof(struct kinfo_proc32);
1110	CP(*ki, *ki32, ki_layout);
1111	PTRTRIM_CP(*ki, *ki32, ki_args);
1112	PTRTRIM_CP(*ki, *ki32, ki_paddr);
1113	PTRTRIM_CP(*ki, *ki32, ki_addr);
1114	PTRTRIM_CP(*ki, *ki32, ki_tracep);
1115	PTRTRIM_CP(*ki, *ki32, ki_textvp);
1116	PTRTRIM_CP(*ki, *ki32, ki_fd);
1117	PTRTRIM_CP(*ki, *ki32, ki_vmspace);
1118	PTRTRIM_CP(*ki, *ki32, ki_wchan);
1119	CP(*ki, *ki32, ki_pid);
1120	CP(*ki, *ki32, ki_ppid);
1121	CP(*ki, *ki32, ki_pgid);
1122	CP(*ki, *ki32, ki_tpgid);
1123	CP(*ki, *ki32, ki_sid);
1124	CP(*ki, *ki32, ki_tsid);
1125	CP(*ki, *ki32, ki_jobc);
1126	CP(*ki, *ki32, ki_tdev);
1127	CP(*ki, *ki32, ki_siglist);
1128	CP(*ki, *ki32, ki_sigmask);
1129	CP(*ki, *ki32, ki_sigignore);
1130	CP(*ki, *ki32, ki_sigcatch);
1131	CP(*ki, *ki32, ki_uid);
1132	CP(*ki, *ki32, ki_ruid);
1133	CP(*ki, *ki32, ki_svuid);
1134	CP(*ki, *ki32, ki_rgid);
1135	CP(*ki, *ki32, ki_svgid);
1136	CP(*ki, *ki32, ki_ngroups);
1137	for (i = 0; i < KI_NGROUPS; i++)
1138		CP(*ki, *ki32, ki_groups[i]);
1139	CP(*ki, *ki32, ki_size);
1140	CP(*ki, *ki32, ki_rssize);
1141	CP(*ki, *ki32, ki_swrss);
1142	CP(*ki, *ki32, ki_tsize);
1143	CP(*ki, *ki32, ki_dsize);
1144	CP(*ki, *ki32, ki_ssize);
1145	CP(*ki, *ki32, ki_xstat);
1146	CP(*ki, *ki32, ki_acflag);
1147	CP(*ki, *ki32, ki_pctcpu);
1148	CP(*ki, *ki32, ki_estcpu);
1149	CP(*ki, *ki32, ki_slptime);
1150	CP(*ki, *ki32, ki_swtime);
1151	CP(*ki, *ki32, ki_cow);
1152	CP(*ki, *ki32, ki_runtime);
1153	TV_CP(*ki, *ki32, ki_start);
1154	TV_CP(*ki, *ki32, ki_childtime);
1155	CP(*ki, *ki32, ki_flag);
1156	CP(*ki, *ki32, ki_kiflag);
1157	CP(*ki, *ki32, ki_traceflag);
1158	CP(*ki, *ki32, ki_stat);
1159	CP(*ki, *ki32, ki_nice);
1160	CP(*ki, *ki32, ki_lock);
1161	CP(*ki, *ki32, ki_rqindex);
1162	CP(*ki, *ki32, ki_oncpu);
1163	CP(*ki, *ki32, ki_lastcpu);
1164	bcopy(ki->ki_tdname, ki32->ki_tdname, TDNAMLEN + 1);
1165	bcopy(ki->ki_wmesg, ki32->ki_wmesg, WMESGLEN + 1);
1166	bcopy(ki->ki_login, ki32->ki_login, LOGNAMELEN + 1);
1167	bcopy(ki->ki_lockname, ki32->ki_lockname, LOCKNAMELEN + 1);
1168	bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1);
1169	bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1);
1170	bcopy(ki->ki_loginclass, ki32->ki_loginclass, LOGINCLASSLEN + 1);
1171	CP(*ki, *ki32, ki_flag2);
1172	CP(*ki, *ki32, ki_fibnum);
1173	CP(*ki, *ki32, ki_cr_flags);
1174	CP(*ki, *ki32, ki_jid);
1175	CP(*ki, *ki32, ki_numthreads);
1176	CP(*ki, *ki32, ki_tid);
1177	CP(*ki, *ki32, ki_pri);
1178	freebsd32_rusage_out(&ki->ki_rusage, &ki32->ki_rusage);
1179	freebsd32_rusage_out(&ki->ki_rusage_ch, &ki32->ki_rusage_ch);
1180	PTRTRIM_CP(*ki, *ki32, ki_pcb);
1181	PTRTRIM_CP(*ki, *ki32, ki_kstack);
1182	PTRTRIM_CP(*ki, *ki32, ki_udata);
1183	CP(*ki, *ki32, ki_sflag);
1184	CP(*ki, *ki32, ki_tdflags);
1185}
1186#endif
1187
1188int
1189kern_proc_out(struct proc *p, struct sbuf *sb, int flags)
1190{
1191	struct thread *td;
1192	struct kinfo_proc ki;
1193#ifdef COMPAT_FREEBSD32
1194	struct kinfo_proc32 ki32;
1195#endif
1196	int error;
1197
1198	PROC_LOCK_ASSERT(p, MA_OWNED);
1199	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
1200
1201	error = 0;
1202	fill_kinfo_proc(p, &ki);
1203	if ((flags & KERN_PROC_NOTHREADS) != 0) {
1204#ifdef COMPAT_FREEBSD32
1205		if ((flags & KERN_PROC_MASK32) != 0) {
1206			freebsd32_kinfo_proc_out(&ki, &ki32);
1207			if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
1208				error = ENOMEM;
1209		} else
1210#endif
1211			if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
1212				error = ENOMEM;
1213	} else {
1214		FOREACH_THREAD_IN_PROC(p, td) {
1215			fill_kinfo_thread(td, &ki, 1);
1216#ifdef COMPAT_FREEBSD32
1217			if ((flags & KERN_PROC_MASK32) != 0) {
1218				freebsd32_kinfo_proc_out(&ki, &ki32);
1219				if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
1220					error = ENOMEM;
1221			} else
1222#endif
1223				if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
1224					error = ENOMEM;
1225			if (error != 0)
1226				break;
1227		}
1228	}
1229	PROC_UNLOCK(p);
1230	return (error);
1231}
1232
1233static int
1234sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags,
1235    int doingzomb)
1236{
1237	struct sbuf sb;
1238	struct kinfo_proc ki;
1239	struct proc *np;
1240	int error, error2;
1241	pid_t pid;
1242
1243	pid = p->p_pid;
1244	sbuf_new_for_sysctl(&sb, (char *)&ki, sizeof(ki), req);
1245	error = kern_proc_out(p, &sb, flags);
1246	error2 = sbuf_finish(&sb);
1247	sbuf_delete(&sb);
1248	if (error != 0)
1249		return (error);
1250	else if (error2 != 0)
1251		return (error2);
1252	if (doingzomb)
1253		np = zpfind(pid);
1254	else {
1255		if (pid == 0)
1256			return (0);
1257		np = pfind(pid);
1258	}
1259	if (np == NULL)
1260		return (ESRCH);
1261	if (np != p) {
1262		PROC_UNLOCK(np);
1263		return (ESRCH);
1264	}
1265	PROC_UNLOCK(np);
1266	return (0);
1267}
1268
1269static int
1270sysctl_kern_proc(SYSCTL_HANDLER_ARGS)
1271{
1272	int *name = (int *)arg1;
1273	u_int namelen = arg2;
1274	struct proc *p;
1275	int flags, doingzomb, oid_number;
1276	int error = 0;
1277
1278	oid_number = oidp->oid_number;
1279	if (oid_number != KERN_PROC_ALL &&
1280	    (oid_number & KERN_PROC_INC_THREAD) == 0)
1281		flags = KERN_PROC_NOTHREADS;
1282	else {
1283		flags = 0;
1284		oid_number &= ~KERN_PROC_INC_THREAD;
1285	}
1286#ifdef COMPAT_FREEBSD32
1287	if (req->flags & SCTL_MASK32)
1288		flags |= KERN_PROC_MASK32;
1289#endif
1290	if (oid_number == KERN_PROC_PID) {
1291		if (namelen != 1)
1292			return (EINVAL);
1293		error = sysctl_wire_old_buffer(req, 0);
1294		if (error)
1295			return (error);
1296		error = pget((pid_t)name[0], PGET_CANSEE, &p);
1297		if (error != 0)
1298			return (error);
1299		error = sysctl_out_proc(p, req, flags, 0);
1300		return (error);
1301	}
1302
1303	switch (oid_number) {
1304	case KERN_PROC_ALL:
1305		if (namelen != 0)
1306			return (EINVAL);
1307		break;
1308	case KERN_PROC_PROC:
1309		if (namelen != 0 && namelen != 1)
1310			return (EINVAL);
1311		break;
1312	default:
1313		if (namelen != 1)
1314			return (EINVAL);
1315		break;
1316	}
1317
1318	if (!req->oldptr) {
1319		/* overestimate by 5 procs */
1320		error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5);
1321		if (error)
1322			return (error);
1323	}
1324	error = sysctl_wire_old_buffer(req, 0);
1325	if (error != 0)
1326		return (error);
1327	sx_slock(&allproc_lock);
1328	for (doingzomb=0 ; doingzomb < 2 ; doingzomb++) {
1329		if (!doingzomb)
1330			p = LIST_FIRST(&allproc);
1331		else
1332			p = LIST_FIRST(&zombproc);
1333		for (; p != 0; p = LIST_NEXT(p, p_list)) {
1334			/*
1335			 * Skip embryonic processes.
1336			 */
1337			PROC_LOCK(p);
1338			if (p->p_state == PRS_NEW) {
1339				PROC_UNLOCK(p);
1340				continue;
1341			}
1342			KASSERT(p->p_ucred != NULL,
1343			    ("process credential is NULL for non-NEW proc"));
1344			/*
1345			 * Show a user only appropriate processes.
1346			 */
1347			if (p_cansee(curthread, p)) {
1348				PROC_UNLOCK(p);
1349				continue;
1350			}
1351			/*
1352			 * TODO - make more efficient (see notes below).
1353			 * do by session.
1354			 */
1355			switch (oid_number) {
1356
1357			case KERN_PROC_GID:
1358				if (p->p_ucred->cr_gid != (gid_t)name[0]) {
1359					PROC_UNLOCK(p);
1360					continue;
1361				}
1362				break;
1363
1364			case KERN_PROC_PGRP:
1365				/* could do this by traversing pgrp */
1366				if (p->p_pgrp == NULL ||
1367				    p->p_pgrp->pg_id != (pid_t)name[0]) {
1368					PROC_UNLOCK(p);
1369					continue;
1370				}
1371				break;
1372
1373			case KERN_PROC_RGID:
1374				if (p->p_ucred->cr_rgid != (gid_t)name[0]) {
1375					PROC_UNLOCK(p);
1376					continue;
1377				}
1378				break;
1379
1380			case KERN_PROC_SESSION:
1381				if (p->p_session == NULL ||
1382				    p->p_session->s_sid != (pid_t)name[0]) {
1383					PROC_UNLOCK(p);
1384					continue;
1385				}
1386				break;
1387
1388			case KERN_PROC_TTY:
1389				if ((p->p_flag & P_CONTROLT) == 0 ||
1390				    p->p_session == NULL) {
1391					PROC_UNLOCK(p);
1392					continue;
1393				}
1394				/* XXX proctree_lock */
1395				SESS_LOCK(p->p_session);
1396				if (p->p_session->s_ttyp == NULL ||
1397				    tty_udev(p->p_session->s_ttyp) !=
1398				    (dev_t)name[0]) {
1399					SESS_UNLOCK(p->p_session);
1400					PROC_UNLOCK(p);
1401					continue;
1402				}
1403				SESS_UNLOCK(p->p_session);
1404				break;
1405
1406			case KERN_PROC_UID:
1407				if (p->p_ucred->cr_uid != (uid_t)name[0]) {
1408					PROC_UNLOCK(p);
1409					continue;
1410				}
1411				break;
1412
1413			case KERN_PROC_RUID:
1414				if (p->p_ucred->cr_ruid != (uid_t)name[0]) {
1415					PROC_UNLOCK(p);
1416					continue;
1417				}
1418				break;
1419
1420			case KERN_PROC_PROC:
1421				break;
1422
1423			default:
1424				break;
1425
1426			}
1427
1428			error = sysctl_out_proc(p, req, flags, doingzomb);
1429			if (error) {
1430				sx_sunlock(&allproc_lock);
1431				return (error);
1432			}
1433		}
1434	}
1435	sx_sunlock(&allproc_lock);
1436	return (0);
1437}
1438
1439struct pargs *
1440pargs_alloc(int len)
1441{
1442	struct pargs *pa;
1443
1444	pa = malloc(sizeof(struct pargs) + len, M_PARGS,
1445		M_WAITOK);
1446	refcount_init(&pa->ar_ref, 1);
1447	pa->ar_length = len;
1448	return (pa);
1449}
1450
1451static void
1452pargs_free(struct pargs *pa)
1453{
1454
1455	free(pa, M_PARGS);
1456}
1457
1458void
1459pargs_hold(struct pargs *pa)
1460{
1461
1462	if (pa == NULL)
1463		return;
1464	refcount_acquire(&pa->ar_ref);
1465}
1466
1467void
1468pargs_drop(struct pargs *pa)
1469{
1470
1471	if (pa == NULL)
1472		return;
1473	if (refcount_release(&pa->ar_ref))
1474		pargs_free(pa);
1475}
1476
1477static int
1478proc_read_mem(struct thread *td, struct proc *p, vm_offset_t offset, void* buf,
1479    size_t len)
1480{
1481	struct iovec iov;
1482	struct uio uio;
1483
1484	iov.iov_base = (caddr_t)buf;
1485	iov.iov_len = len;
1486	uio.uio_iov = &iov;
1487	uio.uio_iovcnt = 1;
1488	uio.uio_offset = offset;
1489	uio.uio_resid = (ssize_t)len;
1490	uio.uio_segflg = UIO_SYSSPACE;
1491	uio.uio_rw = UIO_READ;
1492	uio.uio_td = td;
1493
1494	return (proc_rwmem(p, &uio));
1495}
1496
1497static int
1498proc_read_string(struct thread *td, struct proc *p, const char *sptr, char *buf,
1499    size_t len)
1500{
1501	size_t i;
1502	int error;
1503
1504	error = proc_read_mem(td, p, (vm_offset_t)sptr, buf, len);
1505	/*
1506	 * Reading the chunk may validly return EFAULT if the string is shorter
1507	 * than the chunk and is aligned at the end of the page, assuming the
1508	 * next page is not mapped.  So if EFAULT is returned do a fallback to
1509	 * one byte read loop.
1510	 */
1511	if (error == EFAULT) {
1512		for (i = 0; i < len; i++, buf++, sptr++) {
1513			error = proc_read_mem(td, p, (vm_offset_t)sptr, buf, 1);
1514			if (error != 0)
1515				return (error);
1516			if (*buf == '\0')
1517				break;
1518		}
1519		error = 0;
1520	}
1521	return (error);
1522}
1523
1524#define PROC_AUXV_MAX	256	/* Safety limit on auxv size. */
1525
1526enum proc_vector_type {
1527	PROC_ARG,
1528	PROC_ENV,
1529	PROC_AUX,
1530};
1531
1532#ifdef COMPAT_FREEBSD32
1533static int
1534get_proc_vector32(struct thread *td, struct proc *p, char ***proc_vectorp,
1535    size_t *vsizep, enum proc_vector_type type)
1536{
1537	struct freebsd32_ps_strings pss;
1538	Elf32_Auxinfo aux;
1539	vm_offset_t vptr, ptr;
1540	uint32_t *proc_vector32;
1541	char **proc_vector;
1542	size_t vsize, size;
1543	int i, error;
1544
1545	error = proc_read_mem(td, p, (vm_offset_t)(p->p_sysent->sv_psstrings),
1546	    &pss, sizeof(pss));
1547	if (error != 0)
1548		return (error);
1549	switch (type) {
1550	case PROC_ARG:
1551		vptr = (vm_offset_t)PTRIN(pss.ps_argvstr);
1552		vsize = pss.ps_nargvstr;
1553		if (vsize > ARG_MAX)
1554			return (ENOEXEC);
1555		size = vsize * sizeof(int32_t);
1556		break;
1557	case PROC_ENV:
1558		vptr = (vm_offset_t)PTRIN(pss.ps_envstr);
1559		vsize = pss.ps_nenvstr;
1560		if (vsize > ARG_MAX)
1561			return (ENOEXEC);
1562		size = vsize * sizeof(int32_t);
1563		break;
1564	case PROC_AUX:
1565		vptr = (vm_offset_t)PTRIN(pss.ps_envstr) +
1566		    (pss.ps_nenvstr + 1) * sizeof(int32_t);
1567		if (vptr % 4 != 0)
1568			return (ENOEXEC);
1569		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
1570			error = proc_read_mem(td, p, ptr, &aux, sizeof(aux));
1571			if (error != 0)
1572				return (error);
1573			if (aux.a_type == AT_NULL)
1574				break;
1575			ptr += sizeof(aux);
1576		}
1577		if (aux.a_type != AT_NULL)
1578			return (ENOEXEC);
1579		vsize = i + 1;
1580		size = vsize * sizeof(aux);
1581		break;
1582	default:
1583		KASSERT(0, ("Wrong proc vector type: %d", type));
1584		return (EINVAL);
1585	}
1586	proc_vector32 = malloc(size, M_TEMP, M_WAITOK);
1587	error = proc_read_mem(td, p, vptr, proc_vector32, size);
1588	if (error != 0)
1589		goto done;
1590	if (type == PROC_AUX) {
1591		*proc_vectorp = (char **)proc_vector32;
1592		*vsizep = vsize;
1593		return (0);
1594	}
1595	proc_vector = malloc(vsize * sizeof(char *), M_TEMP, M_WAITOK);
1596	for (i = 0; i < (int)vsize; i++)
1597		proc_vector[i] = PTRIN(proc_vector32[i]);
1598	*proc_vectorp = proc_vector;
1599	*vsizep = vsize;
1600done:
1601	free(proc_vector32, M_TEMP);
1602	return (error);
1603}
1604#endif
1605
1606static int
1607get_proc_vector(struct thread *td, struct proc *p, char ***proc_vectorp,
1608    size_t *vsizep, enum proc_vector_type type)
1609{
1610	struct ps_strings pss;
1611	Elf_Auxinfo aux;
1612	vm_offset_t vptr, ptr;
1613	char **proc_vector;
1614	size_t vsize, size;
1615	int error, i;
1616
1617#ifdef COMPAT_FREEBSD32
1618	if (SV_PROC_FLAG(p, SV_ILP32) != 0)
1619		return (get_proc_vector32(td, p, proc_vectorp, vsizep, type));
1620#endif
1621	error = proc_read_mem(td, p, (vm_offset_t)(p->p_sysent->sv_psstrings),
1622	    &pss, sizeof(pss));
1623	if (error != 0)
1624		return (error);
1625	switch (type) {
1626	case PROC_ARG:
1627		vptr = (vm_offset_t)pss.ps_argvstr;
1628		vsize = pss.ps_nargvstr;
1629		if (vsize > ARG_MAX)
1630			return (ENOEXEC);
1631		size = vsize * sizeof(char *);
1632		break;
1633	case PROC_ENV:
1634		vptr = (vm_offset_t)pss.ps_envstr;
1635		vsize = pss.ps_nenvstr;
1636		if (vsize > ARG_MAX)
1637			return (ENOEXEC);
1638		size = vsize * sizeof(char *);
1639		break;
1640	case PROC_AUX:
1641		/*
1642		 * The aux array is just above env array on the stack. Check
1643		 * that the address is naturally aligned.
1644		 */
1645		vptr = (vm_offset_t)pss.ps_envstr + (pss.ps_nenvstr + 1)
1646		    * sizeof(char *);
1647#if __ELF_WORD_SIZE == 64
1648		if (vptr % sizeof(uint64_t) != 0)
1649#else
1650		if (vptr % sizeof(uint32_t) != 0)
1651#endif
1652			return (ENOEXEC);
1653		/*
1654		 * We count the array size reading the aux vectors from the
1655		 * stack until AT_NULL vector is returned.  So (to keep the code
1656		 * simple) we read the process stack twice: the first time here
1657		 * to find the size and the second time when copying the vectors
1658		 * to the allocated proc_vector.
1659		 */
1660		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
1661			error = proc_read_mem(td, p, ptr, &aux, sizeof(aux));
1662			if (error != 0)
1663				return (error);
1664			if (aux.a_type == AT_NULL)
1665				break;
1666			ptr += sizeof(aux);
1667		}
1668		/*
1669		 * If the PROC_AUXV_MAX entries are iterated over, and we have
1670		 * not reached AT_NULL, it is most likely we are reading wrong
1671		 * data: either the process doesn't have auxv array or data has
1672		 * been modified. Return the error in this case.
1673		 */
1674		if (aux.a_type != AT_NULL)
1675			return (ENOEXEC);
1676		vsize = i + 1;
1677		size = vsize * sizeof(aux);
1678		break;
1679	default:
1680		KASSERT(0, ("Wrong proc vector type: %d", type));
1681		return (EINVAL); /* In case we are built without INVARIANTS. */
1682	}
1683	proc_vector = malloc(size, M_TEMP, M_WAITOK);
1684	if (proc_vector == NULL)
1685		return (ENOMEM);
1686	error = proc_read_mem(td, p, vptr, proc_vector, size);
1687	if (error != 0) {
1688		free(proc_vector, M_TEMP);
1689		return (error);
1690	}
1691	*proc_vectorp = proc_vector;
1692	*vsizep = vsize;
1693
1694	return (0);
1695}
1696
1697#define GET_PS_STRINGS_CHUNK_SZ	256	/* Chunk size (bytes) for ps_strings operations. */
1698
1699static int
1700get_ps_strings(struct thread *td, struct proc *p, struct sbuf *sb,
1701    enum proc_vector_type type)
1702{
1703	size_t done, len, nchr, vsize;
1704	int error, i;
1705	char **proc_vector, *sptr;
1706	char pss_string[GET_PS_STRINGS_CHUNK_SZ];
1707
1708	PROC_ASSERT_HELD(p);
1709
1710	/*
1711	 * We are not going to read more than 2 * (PATH_MAX + ARG_MAX) bytes.
1712	 */
1713	nchr = 2 * (PATH_MAX + ARG_MAX);
1714
1715	error = get_proc_vector(td, p, &proc_vector, &vsize, type);
1716	if (error != 0)
1717		return (error);
1718	for (done = 0, i = 0; i < (int)vsize && done < nchr; i++) {
1719		/*
1720		 * The program may have scribbled into its argv array, e.g. to
1721		 * remove some arguments.  If that has happened, break out
1722		 * before trying to read from NULL.
1723		 */
1724		if (proc_vector[i] == NULL)
1725			break;
1726		for (sptr = proc_vector[i]; ; sptr += GET_PS_STRINGS_CHUNK_SZ) {
1727			error = proc_read_string(td, p, sptr, pss_string,
1728			    sizeof(pss_string));
1729			if (error != 0)
1730				goto done;
1731			len = strnlen(pss_string, GET_PS_STRINGS_CHUNK_SZ);
1732			if (done + len >= nchr)
1733				len = nchr - done - 1;
1734			sbuf_bcat(sb, pss_string, len);
1735			if (len != GET_PS_STRINGS_CHUNK_SZ)
1736				break;
1737			done += GET_PS_STRINGS_CHUNK_SZ;
1738		}
1739		sbuf_bcat(sb, "", 1);
1740		done += len + 1;
1741	}
1742done:
1743	free(proc_vector, M_TEMP);
1744	return (error);
1745}
1746
1747int
1748proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb)
1749{
1750
1751	return (get_ps_strings(curthread, p, sb, PROC_ARG));
1752}
1753
1754int
1755proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb)
1756{
1757
1758	return (get_ps_strings(curthread, p, sb, PROC_ENV));
1759}
1760
1761int
1762proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb)
1763{
1764	size_t vsize, size;
1765	char **auxv;
1766	int error;
1767
1768	error = get_proc_vector(td, p, &auxv, &vsize, PROC_AUX);
1769	if (error == 0) {
1770#ifdef COMPAT_FREEBSD32
1771		if (SV_PROC_FLAG(p, SV_ILP32) != 0)
1772			size = vsize * sizeof(Elf32_Auxinfo);
1773		else
1774#endif
1775			size = vsize * sizeof(Elf_Auxinfo);
1776		if (sbuf_bcat(sb, auxv, size) != 0)
1777			error = ENOMEM;
1778		free(auxv, M_TEMP);
1779	}
1780	return (error);
1781}
1782
1783/*
1784 * This sysctl allows a process to retrieve the argument list or process
1785 * title for another process without groping around in the address space
1786 * of the other process.  It also allow a process to set its own "process
1787 * title to a string of its own choice.
1788 */
1789static int
1790sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS)
1791{
1792	int *name = (int *)arg1;
1793	u_int namelen = arg2;
1794	struct pargs *newpa, *pa;
1795	struct proc *p;
1796	struct sbuf sb;
1797	int flags, error = 0, error2;
1798
1799	if (namelen != 1)
1800		return (EINVAL);
1801
1802	flags = PGET_CANSEE;
1803	if (req->newptr != NULL)
1804		flags |= PGET_ISCURRENT;
1805	error = pget((pid_t)name[0], flags, &p);
1806	if (error)
1807		return (error);
1808
1809	pa = p->p_args;
1810	if (pa != NULL) {
1811		pargs_hold(pa);
1812		PROC_UNLOCK(p);
1813		error = SYSCTL_OUT(req, pa->ar_args, pa->ar_length);
1814		pargs_drop(pa);
1815	} else if ((p->p_flag & (P_WEXIT | P_SYSTEM)) == 0) {
1816		_PHOLD(p);
1817		PROC_UNLOCK(p);
1818		sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
1819		error = proc_getargv(curthread, p, &sb);
1820		error2 = sbuf_finish(&sb);
1821		PRELE(p);
1822		sbuf_delete(&sb);
1823		if (error == 0 && error2 != 0)
1824			error = error2;
1825	} else {
1826		PROC_UNLOCK(p);
1827	}
1828	if (error != 0 || req->newptr == NULL)
1829		return (error);
1830
1831	if (req->newlen + sizeof(struct pargs) > ps_arg_cache_limit)
1832		return (ENOMEM);
1833	newpa = pargs_alloc(req->newlen);
1834	error = SYSCTL_IN(req, newpa->ar_args, req->newlen);
1835	if (error != 0) {
1836		pargs_free(newpa);
1837		return (error);
1838	}
1839	PROC_LOCK(p);
1840	pa = p->p_args;
1841	p->p_args = newpa;
1842	PROC_UNLOCK(p);
1843	pargs_drop(pa);
1844	return (0);
1845}
1846
1847/*
1848 * This sysctl allows a process to retrieve environment of another process.
1849 */
1850static int
1851sysctl_kern_proc_env(SYSCTL_HANDLER_ARGS)
1852{
1853	int *name = (int *)arg1;
1854	u_int namelen = arg2;
1855	struct proc *p;
1856	struct sbuf sb;
1857	int error, error2;
1858
1859	if (namelen != 1)
1860		return (EINVAL);
1861
1862	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
1863	if (error != 0)
1864		return (error);
1865	if ((p->p_flag & P_SYSTEM) != 0) {
1866		PRELE(p);
1867		return (0);
1868	}
1869
1870	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
1871	error = proc_getenvv(curthread, p, &sb);
1872	error2 = sbuf_finish(&sb);
1873	PRELE(p);
1874	sbuf_delete(&sb);
1875	return (error != 0 ? error : error2);
1876}
1877
1878/*
1879 * This sysctl allows a process to retrieve ELF auxiliary vector of
1880 * another process.
1881 */
1882static int
1883sysctl_kern_proc_auxv(SYSCTL_HANDLER_ARGS)
1884{
1885	int *name = (int *)arg1;
1886	u_int namelen = arg2;
1887	struct proc *p;
1888	struct sbuf sb;
1889	int error, error2;
1890
1891	if (namelen != 1)
1892		return (EINVAL);
1893
1894	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
1895	if (error != 0)
1896		return (error);
1897	if ((p->p_flag & P_SYSTEM) != 0) {
1898		PRELE(p);
1899		return (0);
1900	}
1901	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
1902	error = proc_getauxv(curthread, p, &sb);
1903	error2 = sbuf_finish(&sb);
1904	PRELE(p);
1905	sbuf_delete(&sb);
1906	return (error != 0 ? error : error2);
1907}
1908
1909/*
1910 * This sysctl allows a process to retrieve the path of the executable for
1911 * itself or another process.
1912 */
1913static int
1914sysctl_kern_proc_pathname(SYSCTL_HANDLER_ARGS)
1915{
1916	pid_t *pidp = (pid_t *)arg1;
1917	unsigned int arglen = arg2;
1918	struct proc *p;
1919	struct vnode *vp;
1920	char *retbuf, *freebuf;
1921	int error;
1922
1923	if (arglen != 1)
1924		return (EINVAL);
1925	if (*pidp == -1) {	/* -1 means this process */
1926		p = req->td->td_proc;
1927	} else {
1928		error = pget(*pidp, PGET_CANSEE, &p);
1929		if (error != 0)
1930			return (error);
1931	}
1932
1933	vp = p->p_textvp;
1934	if (vp == NULL) {
1935		if (*pidp != -1)
1936			PROC_UNLOCK(p);
1937		return (0);
1938	}
1939	vref(vp);
1940	if (*pidp != -1)
1941		PROC_UNLOCK(p);
1942	error = vn_fullpath(req->td, vp, &retbuf, &freebuf);
1943	vrele(vp);
1944	if (error)
1945		return (error);
1946	error = SYSCTL_OUT(req, retbuf, strlen(retbuf) + 1);
1947	free(freebuf, M_TEMP);
1948	return (error);
1949}
1950
1951static int
1952sysctl_kern_proc_sv_name(SYSCTL_HANDLER_ARGS)
1953{
1954	struct proc *p;
1955	char *sv_name;
1956	int *name;
1957	int namelen;
1958	int error;
1959
1960	namelen = arg2;
1961	if (namelen != 1)
1962		return (EINVAL);
1963
1964	name = (int *)arg1;
1965	error = pget((pid_t)name[0], PGET_CANSEE, &p);
1966	if (error != 0)
1967		return (error);
1968	sv_name = p->p_sysent->sv_name;
1969	PROC_UNLOCK(p);
1970	return (sysctl_handle_string(oidp, sv_name, 0, req));
1971}
1972
1973#ifdef KINFO_OVMENTRY_SIZE
1974CTASSERT(sizeof(struct kinfo_ovmentry) == KINFO_OVMENTRY_SIZE);
1975#endif
1976
1977#ifdef COMPAT_FREEBSD7
1978static int
1979sysctl_kern_proc_ovmmap(SYSCTL_HANDLER_ARGS)
1980{
1981	vm_map_entry_t entry, tmp_entry;
1982	unsigned int last_timestamp;
1983	char *fullpath, *freepath;
1984	struct kinfo_ovmentry *kve;
1985	struct vattr va;
1986	struct ucred *cred;
1987	int error, *name;
1988	struct vnode *vp;
1989	struct proc *p;
1990	vm_map_t map;
1991	struct vmspace *vm;
1992
1993	name = (int *)arg1;
1994	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
1995	if (error != 0)
1996		return (error);
1997	vm = vmspace_acquire_ref(p);
1998	if (vm == NULL) {
1999		PRELE(p);
2000		return (ESRCH);
2001	}
2002	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
2003
2004	map = &vm->vm_map;
2005	vm_map_lock_read(map);
2006	for (entry = map->header.next; entry != &map->header;
2007	    entry = entry->next) {
2008		vm_object_t obj, tobj, lobj;
2009		vm_offset_t addr;
2010
2011		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2012			continue;
2013
2014		bzero(kve, sizeof(*kve));
2015		kve->kve_structsize = sizeof(*kve);
2016
2017		kve->kve_private_resident = 0;
2018		obj = entry->object.vm_object;
2019		if (obj != NULL) {
2020			VM_OBJECT_RLOCK(obj);
2021			if (obj->shadow_count == 1)
2022				kve->kve_private_resident =
2023				    obj->resident_page_count;
2024		}
2025		kve->kve_resident = 0;
2026		addr = entry->start;
2027		while (addr < entry->end) {
2028			if (pmap_extract(map->pmap, addr))
2029				kve->kve_resident++;
2030			addr += PAGE_SIZE;
2031		}
2032
2033		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
2034			if (tobj != obj)
2035				VM_OBJECT_RLOCK(tobj);
2036			if (lobj != obj)
2037				VM_OBJECT_RUNLOCK(lobj);
2038			lobj = tobj;
2039		}
2040
2041		kve->kve_start = (void*)entry->start;
2042		kve->kve_end = (void*)entry->end;
2043		kve->kve_offset = (off_t)entry->offset;
2044
2045		if (entry->protection & VM_PROT_READ)
2046			kve->kve_protection |= KVME_PROT_READ;
2047		if (entry->protection & VM_PROT_WRITE)
2048			kve->kve_protection |= KVME_PROT_WRITE;
2049		if (entry->protection & VM_PROT_EXECUTE)
2050			kve->kve_protection |= KVME_PROT_EXEC;
2051
2052		if (entry->eflags & MAP_ENTRY_COW)
2053			kve->kve_flags |= KVME_FLAG_COW;
2054		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
2055			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
2056		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
2057			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
2058
2059		last_timestamp = map->timestamp;
2060		vm_map_unlock_read(map);
2061
2062		kve->kve_fileid = 0;
2063		kve->kve_fsid = 0;
2064		freepath = NULL;
2065		fullpath = "";
2066		if (lobj) {
2067			vp = NULL;
2068			switch (lobj->type) {
2069			case OBJT_DEFAULT:
2070				kve->kve_type = KVME_TYPE_DEFAULT;
2071				break;
2072			case OBJT_VNODE:
2073				kve->kve_type = KVME_TYPE_VNODE;
2074				vp = lobj->handle;
2075				vref(vp);
2076				break;
2077			case OBJT_SWAP:
2078				if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
2079					kve->kve_type = KVME_TYPE_VNODE;
2080					if ((lobj->flags & OBJ_TMPFS) != 0) {
2081						vp = lobj->un_pager.swp.swp_tmpfs;
2082						vref(vp);
2083					}
2084				} else {
2085					kve->kve_type = KVME_TYPE_SWAP;
2086				}
2087				break;
2088			case OBJT_DEVICE:
2089				kve->kve_type = KVME_TYPE_DEVICE;
2090				break;
2091			case OBJT_PHYS:
2092				kve->kve_type = KVME_TYPE_PHYS;
2093				break;
2094			case OBJT_DEAD:
2095				kve->kve_type = KVME_TYPE_DEAD;
2096				break;
2097			case OBJT_SG:
2098				kve->kve_type = KVME_TYPE_SG;
2099				break;
2100			default:
2101				kve->kve_type = KVME_TYPE_UNKNOWN;
2102				break;
2103			}
2104			if (lobj != obj)
2105				VM_OBJECT_RUNLOCK(lobj);
2106
2107			kve->kve_ref_count = obj->ref_count;
2108			kve->kve_shadow_count = obj->shadow_count;
2109			VM_OBJECT_RUNLOCK(obj);
2110			if (vp != NULL) {
2111				vn_fullpath(curthread, vp, &fullpath,
2112				    &freepath);
2113				cred = curthread->td_ucred;
2114				vn_lock(vp, LK_SHARED | LK_RETRY);
2115				if (VOP_GETATTR(vp, &va, cred) == 0) {
2116					kve->kve_fileid = va.va_fileid;
2117					kve->kve_fsid = va.va_fsid;
2118				}
2119				vput(vp);
2120			}
2121		} else {
2122			kve->kve_type = KVME_TYPE_NONE;
2123			kve->kve_ref_count = 0;
2124			kve->kve_shadow_count = 0;
2125		}
2126
2127		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
2128		if (freepath != NULL)
2129			free(freepath, M_TEMP);
2130
2131		error = SYSCTL_OUT(req, kve, sizeof(*kve));
2132		vm_map_lock_read(map);
2133		if (error)
2134			break;
2135		if (last_timestamp != map->timestamp) {
2136			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
2137			entry = tmp_entry;
2138		}
2139	}
2140	vm_map_unlock_read(map);
2141	vmspace_free(vm);
2142	PRELE(p);
2143	free(kve, M_TEMP);
2144	return (error);
2145}
2146#endif	/* COMPAT_FREEBSD7 */
2147
2148#ifdef KINFO_VMENTRY_SIZE
2149CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE);
2150#endif
2151
2152static void
2153kern_proc_vmmap_resident(vm_map_t map, vm_map_entry_t entry,
2154    struct kinfo_vmentry *kve)
2155{
2156	vm_object_t obj, tobj;
2157	vm_page_t m, m_adv;
2158	vm_offset_t addr;
2159	vm_paddr_t locked_pa;
2160	vm_pindex_t pi, pi_adv, pindex;
2161
2162	locked_pa = 0;
2163	obj = entry->object.vm_object;
2164	addr = entry->start;
2165	m_adv = NULL;
2166	pi = OFF_TO_IDX(entry->offset);
2167	for (; addr < entry->end; addr += IDX_TO_OFF(pi_adv), pi += pi_adv) {
2168		if (m_adv != NULL) {
2169			m = m_adv;
2170		} else {
2171			pi_adv = OFF_TO_IDX(entry->end - addr);
2172			pindex = pi;
2173			for (tobj = obj;; tobj = tobj->backing_object) {
2174				m = vm_page_find_least(tobj, pindex);
2175				if (m != NULL) {
2176					if (m->pindex == pindex)
2177						break;
2178					if (pi_adv > m->pindex - pindex) {
2179						pi_adv = m->pindex - pindex;
2180						m_adv = m;
2181					}
2182				}
2183				if (tobj->backing_object == NULL)
2184					goto next;
2185				pindex += OFF_TO_IDX(tobj->
2186				    backing_object_offset);
2187			}
2188		}
2189		m_adv = NULL;
2190		if (m->psind != 0 && addr + pagesizes[1] <= entry->end &&
2191		    (addr & (pagesizes[1] - 1)) == 0 &&
2192		    (pmap_mincore(map->pmap, addr, &locked_pa) &
2193		    MINCORE_SUPER) != 0) {
2194			kve->kve_flags |= KVME_FLAG_SUPER;
2195			pi_adv = OFF_TO_IDX(pagesizes[1]);
2196		} else {
2197			/*
2198			 * We do not test the found page on validity.
2199			 * Either the page is busy and being paged in,
2200			 * or it was invalidated.  The first case
2201			 * should be counted as resident, the second
2202			 * is not so clear; we do account both.
2203			 */
2204			pi_adv = 1;
2205		}
2206		kve->kve_resident += pi_adv;
2207next:;
2208	}
2209	PA_UNLOCK_COND(locked_pa);
2210}
2211
2212/*
2213 * Must be called with the process locked and will return unlocked.
2214 */
2215int
2216kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
2217{
2218	vm_map_entry_t entry, tmp_entry;
2219	struct vattr va;
2220	vm_map_t map;
2221	vm_object_t obj, tobj, lobj;
2222	char *fullpath, *freepath;
2223	struct kinfo_vmentry *kve;
2224	struct ucred *cred;
2225	struct vnode *vp;
2226	struct vmspace *vm;
2227	vm_offset_t addr;
2228	unsigned int last_timestamp;
2229	int error;
2230
2231	PROC_LOCK_ASSERT(p, MA_OWNED);
2232
2233	_PHOLD(p);
2234	PROC_UNLOCK(p);
2235	vm = vmspace_acquire_ref(p);
2236	if (vm == NULL) {
2237		PRELE(p);
2238		return (ESRCH);
2239	}
2240	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
2241
2242	error = 0;
2243	map = &vm->vm_map;
2244	vm_map_lock_read(map);
2245	for (entry = map->header.next; entry != &map->header;
2246	    entry = entry->next) {
2247		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2248			continue;
2249
2250		addr = entry->end;
2251		bzero(kve, sizeof(*kve));
2252		obj = entry->object.vm_object;
2253		if (obj != NULL) {
2254			for (tobj = obj; tobj != NULL;
2255			    tobj = tobj->backing_object) {
2256				VM_OBJECT_RLOCK(tobj);
2257				lobj = tobj;
2258			}
2259			if (obj->backing_object == NULL)
2260				kve->kve_private_resident =
2261				    obj->resident_page_count;
2262			if (!vmmap_skip_res_cnt)
2263				kern_proc_vmmap_resident(map, entry, kve);
2264			for (tobj = obj; tobj != NULL;
2265			    tobj = tobj->backing_object) {
2266				if (tobj != obj && tobj != lobj)
2267					VM_OBJECT_RUNLOCK(tobj);
2268			}
2269		} else {
2270			lobj = NULL;
2271		}
2272
2273		kve->kve_start = entry->start;
2274		kve->kve_end = entry->end;
2275		kve->kve_offset = entry->offset;
2276
2277		if (entry->protection & VM_PROT_READ)
2278			kve->kve_protection |= KVME_PROT_READ;
2279		if (entry->protection & VM_PROT_WRITE)
2280			kve->kve_protection |= KVME_PROT_WRITE;
2281		if (entry->protection & VM_PROT_EXECUTE)
2282			kve->kve_protection |= KVME_PROT_EXEC;
2283
2284		if (entry->eflags & MAP_ENTRY_COW)
2285			kve->kve_flags |= KVME_FLAG_COW;
2286		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
2287			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
2288		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
2289			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
2290		if (entry->eflags & MAP_ENTRY_GROWS_UP)
2291			kve->kve_flags |= KVME_FLAG_GROWS_UP;
2292		if (entry->eflags & MAP_ENTRY_GROWS_DOWN)
2293			kve->kve_flags |= KVME_FLAG_GROWS_DOWN;
2294
2295		last_timestamp = map->timestamp;
2296		vm_map_unlock_read(map);
2297
2298		freepath = NULL;
2299		fullpath = "";
2300		if (lobj != NULL) {
2301			vp = NULL;
2302			switch (lobj->type) {
2303			case OBJT_DEFAULT:
2304				kve->kve_type = KVME_TYPE_DEFAULT;
2305				break;
2306			case OBJT_VNODE:
2307				kve->kve_type = KVME_TYPE_VNODE;
2308				vp = lobj->handle;
2309				vref(vp);
2310				break;
2311			case OBJT_SWAP:
2312				if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
2313					kve->kve_type = KVME_TYPE_VNODE;
2314					if ((lobj->flags & OBJ_TMPFS) != 0) {
2315						vp = lobj->un_pager.swp.swp_tmpfs;
2316						vref(vp);
2317					}
2318				} else {
2319					kve->kve_type = KVME_TYPE_SWAP;
2320				}
2321				break;
2322			case OBJT_DEVICE:
2323				kve->kve_type = KVME_TYPE_DEVICE;
2324				break;
2325			case OBJT_PHYS:
2326				kve->kve_type = KVME_TYPE_PHYS;
2327				break;
2328			case OBJT_DEAD:
2329				kve->kve_type = KVME_TYPE_DEAD;
2330				break;
2331			case OBJT_SG:
2332				kve->kve_type = KVME_TYPE_SG;
2333				break;
2334			case OBJT_MGTDEVICE:
2335				kve->kve_type = KVME_TYPE_MGTDEVICE;
2336				break;
2337			default:
2338				kve->kve_type = KVME_TYPE_UNKNOWN;
2339				break;
2340			}
2341			if (lobj != obj)
2342				VM_OBJECT_RUNLOCK(lobj);
2343
2344			kve->kve_ref_count = obj->ref_count;
2345			kve->kve_shadow_count = obj->shadow_count;
2346			VM_OBJECT_RUNLOCK(obj);
2347			if (vp != NULL) {
2348				vn_fullpath(curthread, vp, &fullpath,
2349				    &freepath);
2350				kve->kve_vn_type = vntype_to_kinfo(vp->v_type);
2351				cred = curthread->td_ucred;
2352				vn_lock(vp, LK_SHARED | LK_RETRY);
2353				if (VOP_GETATTR(vp, &va, cred) == 0) {
2354					kve->kve_vn_fileid = va.va_fileid;
2355					kve->kve_vn_fsid = va.va_fsid;
2356					kve->kve_vn_mode =
2357					    MAKEIMODE(va.va_type, va.va_mode);
2358					kve->kve_vn_size = va.va_size;
2359					kve->kve_vn_rdev = va.va_rdev;
2360					kve->kve_status = KF_ATTR_VALID;
2361				}
2362				vput(vp);
2363			}
2364		} else {
2365			kve->kve_type = KVME_TYPE_NONE;
2366			kve->kve_ref_count = 0;
2367			kve->kve_shadow_count = 0;
2368		}
2369
2370		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
2371		if (freepath != NULL)
2372			free(freepath, M_TEMP);
2373
2374		/* Pack record size down */
2375		kve->kve_structsize = offsetof(struct kinfo_vmentry, kve_path) +
2376		    strlen(kve->kve_path) + 1;
2377		kve->kve_structsize = roundup(kve->kve_structsize,
2378		    sizeof(uint64_t));
2379		if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0)
2380			error = ENOMEM;
2381		vm_map_lock_read(map);
2382		if (error != 0)
2383			break;
2384		if (last_timestamp != map->timestamp) {
2385			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
2386			entry = tmp_entry;
2387		}
2388	}
2389	vm_map_unlock_read(map);
2390	vmspace_free(vm);
2391	PRELE(p);
2392	free(kve, M_TEMP);
2393	return (error);
2394}
2395
2396static int
2397sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS)
2398{
2399	struct proc *p;
2400	struct sbuf sb;
2401	int error, error2, *name;
2402
2403	name = (int *)arg1;
2404	sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_vmentry), req);
2405	error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
2406	if (error != 0) {
2407		sbuf_delete(&sb);
2408		return (error);
2409	}
2410	error = kern_proc_vmmap_out(p, &sb);
2411	error2 = sbuf_finish(&sb);
2412	sbuf_delete(&sb);
2413	return (error != 0 ? error : error2);
2414}
2415
2416#if defined(STACK) || defined(DDB)
2417static int
2418sysctl_kern_proc_kstack(SYSCTL_HANDLER_ARGS)
2419{
2420	struct kinfo_kstack *kkstp;
2421	int error, i, *name, numthreads;
2422	lwpid_t *lwpidarray;
2423	struct thread *td;
2424	struct stack *st;
2425	struct sbuf sb;
2426	struct proc *p;
2427
2428	name = (int *)arg1;
2429	error = pget((pid_t)name[0], PGET_NOTINEXEC | PGET_WANTREAD, &p);
2430	if (error != 0)
2431		return (error);
2432
2433	kkstp = malloc(sizeof(*kkstp), M_TEMP, M_WAITOK);
2434	st = stack_create();
2435
2436	lwpidarray = NULL;
2437	numthreads = 0;
2438	PROC_LOCK(p);
2439repeat:
2440	if (numthreads < p->p_numthreads) {
2441		if (lwpidarray != NULL) {
2442			free(lwpidarray, M_TEMP);
2443			lwpidarray = NULL;
2444		}
2445		numthreads = p->p_numthreads;
2446		PROC_UNLOCK(p);
2447		lwpidarray = malloc(sizeof(*lwpidarray) * numthreads, M_TEMP,
2448		    M_WAITOK | M_ZERO);
2449		PROC_LOCK(p);
2450		goto repeat;
2451	}
2452	i = 0;
2453
2454	/*
2455	 * XXXRW: During the below loop, execve(2) and countless other sorts
2456	 * of changes could have taken place.  Should we check to see if the
2457	 * vmspace has been replaced, or the like, in order to prevent
2458	 * giving a snapshot that spans, say, execve(2), with some threads
2459	 * before and some after?  Among other things, the credentials could
2460	 * have changed, in which case the right to extract debug info might
2461	 * no longer be assured.
2462	 */
2463	FOREACH_THREAD_IN_PROC(p, td) {
2464		KASSERT(i < numthreads,
2465		    ("sysctl_kern_proc_kstack: numthreads"));
2466		lwpidarray[i] = td->td_tid;
2467		i++;
2468	}
2469	numthreads = i;
2470	for (i = 0; i < numthreads; i++) {
2471		td = thread_find(p, lwpidarray[i]);
2472		if (td == NULL) {
2473			continue;
2474		}
2475		bzero(kkstp, sizeof(*kkstp));
2476		(void)sbuf_new(&sb, kkstp->kkst_trace,
2477		    sizeof(kkstp->kkst_trace), SBUF_FIXEDLEN);
2478		thread_lock(td);
2479		kkstp->kkst_tid = td->td_tid;
2480		if (TD_IS_SWAPPED(td))
2481			kkstp->kkst_state = KKST_STATE_SWAPPED;
2482		else if (TD_IS_RUNNING(td))
2483			kkstp->kkst_state = KKST_STATE_RUNNING;
2484		else {
2485			kkstp->kkst_state = KKST_STATE_STACKOK;
2486			stack_save_td(st, td);
2487		}
2488		thread_unlock(td);
2489		PROC_UNLOCK(p);
2490		stack_sbuf_print(&sb, st);
2491		sbuf_finish(&sb);
2492		sbuf_delete(&sb);
2493		error = SYSCTL_OUT(req, kkstp, sizeof(*kkstp));
2494		PROC_LOCK(p);
2495		if (error)
2496			break;
2497	}
2498	_PRELE(p);
2499	PROC_UNLOCK(p);
2500	if (lwpidarray != NULL)
2501		free(lwpidarray, M_TEMP);
2502	stack_destroy(st);
2503	free(kkstp, M_TEMP);
2504	return (error);
2505}
2506#endif
2507
2508/*
2509 * This sysctl allows a process to retrieve the full list of groups from
2510 * itself or another process.
2511 */
2512static int
2513sysctl_kern_proc_groups(SYSCTL_HANDLER_ARGS)
2514{
2515	pid_t *pidp = (pid_t *)arg1;
2516	unsigned int arglen = arg2;
2517	struct proc *p;
2518	struct ucred *cred;
2519	int error;
2520
2521	if (arglen != 1)
2522		return (EINVAL);
2523	if (*pidp == -1) {	/* -1 means this process */
2524		p = req->td->td_proc;
2525	} else {
2526		error = pget(*pidp, PGET_CANSEE, &p);
2527		if (error != 0)
2528			return (error);
2529	}
2530
2531	cred = crhold(p->p_ucred);
2532	if (*pidp != -1)
2533		PROC_UNLOCK(p);
2534
2535	error = SYSCTL_OUT(req, cred->cr_groups,
2536	    cred->cr_ngroups * sizeof(gid_t));
2537	crfree(cred);
2538	return (error);
2539}
2540
2541/*
2542 * This sysctl allows a process to retrieve or/and set the resource limit for
2543 * another process.
2544 */
2545static int
2546sysctl_kern_proc_rlimit(SYSCTL_HANDLER_ARGS)
2547{
2548	int *name = (int *)arg1;
2549	u_int namelen = arg2;
2550	struct rlimit rlim;
2551	struct proc *p;
2552	u_int which;
2553	int flags, error;
2554
2555	if (namelen != 2)
2556		return (EINVAL);
2557
2558	which = (u_int)name[1];
2559	if (which >= RLIM_NLIMITS)
2560		return (EINVAL);
2561
2562	if (req->newptr != NULL && req->newlen != sizeof(rlim))
2563		return (EINVAL);
2564
2565	flags = PGET_HOLD | PGET_NOTWEXIT;
2566	if (req->newptr != NULL)
2567		flags |= PGET_CANDEBUG;
2568	else
2569		flags |= PGET_CANSEE;
2570	error = pget((pid_t)name[0], flags, &p);
2571	if (error != 0)
2572		return (error);
2573
2574	/*
2575	 * Retrieve limit.
2576	 */
2577	if (req->oldptr != NULL) {
2578		PROC_LOCK(p);
2579		lim_rlimit(p, which, &rlim);
2580		PROC_UNLOCK(p);
2581	}
2582	error = SYSCTL_OUT(req, &rlim, sizeof(rlim));
2583	if (error != 0)
2584		goto errout;
2585
2586	/*
2587	 * Set limit.
2588	 */
2589	if (req->newptr != NULL) {
2590		error = SYSCTL_IN(req, &rlim, sizeof(rlim));
2591		if (error == 0)
2592			error = kern_proc_setrlimit(curthread, p, which, &rlim);
2593	}
2594
2595errout:
2596	PRELE(p);
2597	return (error);
2598}
2599
2600/*
2601 * This sysctl allows a process to retrieve ps_strings structure location of
2602 * another process.
2603 */
2604static int
2605sysctl_kern_proc_ps_strings(SYSCTL_HANDLER_ARGS)
2606{
2607	int *name = (int *)arg1;
2608	u_int namelen = arg2;
2609	struct proc *p;
2610	vm_offset_t ps_strings;
2611	int error;
2612#ifdef COMPAT_FREEBSD32
2613	uint32_t ps_strings32;
2614#endif
2615
2616	if (namelen != 1)
2617		return (EINVAL);
2618
2619	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
2620	if (error != 0)
2621		return (error);
2622#ifdef COMPAT_FREEBSD32
2623	if ((req->flags & SCTL_MASK32) != 0) {
2624		/*
2625		 * We return 0 if the 32 bit emulation request is for a 64 bit
2626		 * process.
2627		 */
2628		ps_strings32 = SV_PROC_FLAG(p, SV_ILP32) != 0 ?
2629		    PTROUT(p->p_sysent->sv_psstrings) : 0;
2630		PROC_UNLOCK(p);
2631		error = SYSCTL_OUT(req, &ps_strings32, sizeof(ps_strings32));
2632		return (error);
2633	}
2634#endif
2635	ps_strings = p->p_sysent->sv_psstrings;
2636	PROC_UNLOCK(p);
2637	error = SYSCTL_OUT(req, &ps_strings, sizeof(ps_strings));
2638	return (error);
2639}
2640
2641/*
2642 * This sysctl allows a process to retrieve umask of another process.
2643 */
2644static int
2645sysctl_kern_proc_umask(SYSCTL_HANDLER_ARGS)
2646{
2647	int *name = (int *)arg1;
2648	u_int namelen = arg2;
2649	struct proc *p;
2650	int error;
2651	u_short fd_cmask;
2652
2653	if (namelen != 1)
2654		return (EINVAL);
2655
2656	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
2657	if (error != 0)
2658		return (error);
2659
2660	FILEDESC_SLOCK(p->p_fd);
2661	fd_cmask = p->p_fd->fd_cmask;
2662	FILEDESC_SUNLOCK(p->p_fd);
2663	PRELE(p);
2664	error = SYSCTL_OUT(req, &fd_cmask, sizeof(fd_cmask));
2665	return (error);
2666}
2667
2668/*
2669 * This sysctl allows a process to set and retrieve binary osreldate of
2670 * another process.
2671 */
2672static int
2673sysctl_kern_proc_osrel(SYSCTL_HANDLER_ARGS)
2674{
2675	int *name = (int *)arg1;
2676	u_int namelen = arg2;
2677	struct proc *p;
2678	int flags, error, osrel;
2679
2680	if (namelen != 1)
2681		return (EINVAL);
2682
2683	if (req->newptr != NULL && req->newlen != sizeof(osrel))
2684		return (EINVAL);
2685
2686	flags = PGET_HOLD | PGET_NOTWEXIT;
2687	if (req->newptr != NULL)
2688		flags |= PGET_CANDEBUG;
2689	else
2690		flags |= PGET_CANSEE;
2691	error = pget((pid_t)name[0], flags, &p);
2692	if (error != 0)
2693		return (error);
2694
2695	error = SYSCTL_OUT(req, &p->p_osrel, sizeof(p->p_osrel));
2696	if (error != 0)
2697		goto errout;
2698
2699	if (req->newptr != NULL) {
2700		error = SYSCTL_IN(req, &osrel, sizeof(osrel));
2701		if (error != 0)
2702			goto errout;
2703		if (osrel < 0) {
2704			error = EINVAL;
2705			goto errout;
2706		}
2707		p->p_osrel = osrel;
2708	}
2709errout:
2710	PRELE(p);
2711	return (error);
2712}
2713
2714static int
2715sysctl_kern_proc_sigtramp(SYSCTL_HANDLER_ARGS)
2716{
2717	int *name = (int *)arg1;
2718	u_int namelen = arg2;
2719	struct proc *p;
2720	struct kinfo_sigtramp kst;
2721	const struct sysentvec *sv;
2722	int error;
2723#ifdef COMPAT_FREEBSD32
2724	struct kinfo_sigtramp32 kst32;
2725#endif
2726
2727	if (namelen != 1)
2728		return (EINVAL);
2729
2730	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
2731	if (error != 0)
2732		return (error);
2733	sv = p->p_sysent;
2734#ifdef COMPAT_FREEBSD32
2735	if ((req->flags & SCTL_MASK32) != 0) {
2736		bzero(&kst32, sizeof(kst32));
2737		if (SV_PROC_FLAG(p, SV_ILP32)) {
2738			if (sv->sv_sigcode_base != 0) {
2739				kst32.ksigtramp_start = sv->sv_sigcode_base;
2740				kst32.ksigtramp_end = sv->sv_sigcode_base +
2741				    *sv->sv_szsigcode;
2742			} else {
2743				kst32.ksigtramp_start = sv->sv_psstrings -
2744				    *sv->sv_szsigcode;
2745				kst32.ksigtramp_end = sv->sv_psstrings;
2746			}
2747		}
2748		PROC_UNLOCK(p);
2749		error = SYSCTL_OUT(req, &kst32, sizeof(kst32));
2750		return (error);
2751	}
2752#endif
2753	bzero(&kst, sizeof(kst));
2754	if (sv->sv_sigcode_base != 0) {
2755		kst.ksigtramp_start = (char *)sv->sv_sigcode_base;
2756		kst.ksigtramp_end = (char *)sv->sv_sigcode_base +
2757		    *sv->sv_szsigcode;
2758	} else {
2759		kst.ksigtramp_start = (char *)sv->sv_psstrings -
2760		    *sv->sv_szsigcode;
2761		kst.ksigtramp_end = (char *)sv->sv_psstrings;
2762	}
2763	PROC_UNLOCK(p);
2764	error = SYSCTL_OUT(req, &kst, sizeof(kst));
2765	return (error);
2766}
2767
2768SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD,  0, "Process table");
2769
2770SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT|
2771	CTLFLAG_MPSAFE, 0, 0, sysctl_kern_proc, "S,proc",
2772	"Return entire process table");
2773
2774static SYSCTL_NODE(_kern_proc, KERN_PROC_GID, gid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2775	sysctl_kern_proc, "Process table");
2776
2777static SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD | CTLFLAG_MPSAFE,
2778	sysctl_kern_proc, "Process table");
2779
2780static SYSCTL_NODE(_kern_proc, KERN_PROC_RGID, rgid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2781	sysctl_kern_proc, "Process table");
2782
2783static SYSCTL_NODE(_kern_proc, KERN_PROC_SESSION, sid, CTLFLAG_RD |
2784	CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2785
2786static SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD | CTLFLAG_MPSAFE,
2787	sysctl_kern_proc, "Process table");
2788
2789static SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2790	sysctl_kern_proc, "Process table");
2791
2792static SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2793	sysctl_kern_proc, "Process table");
2794
2795static SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2796	sysctl_kern_proc, "Process table");
2797
2798static SYSCTL_NODE(_kern_proc, KERN_PROC_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE,
2799	sysctl_kern_proc, "Return process table, no threads");
2800
2801static SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args,
2802	CTLFLAG_RW | CTLFLAG_CAPWR | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE,
2803	sysctl_kern_proc_args, "Process argument list");
2804
2805static SYSCTL_NODE(_kern_proc, KERN_PROC_ENV, env, CTLFLAG_RD | CTLFLAG_MPSAFE,
2806	sysctl_kern_proc_env, "Process environment");
2807
2808static SYSCTL_NODE(_kern_proc, KERN_PROC_AUXV, auxv, CTLFLAG_RD |
2809	CTLFLAG_MPSAFE, sysctl_kern_proc_auxv, "Process ELF auxiliary vector");
2810
2811static SYSCTL_NODE(_kern_proc, KERN_PROC_PATHNAME, pathname, CTLFLAG_RD |
2812	CTLFLAG_MPSAFE, sysctl_kern_proc_pathname, "Process executable path");
2813
2814static SYSCTL_NODE(_kern_proc, KERN_PROC_SV_NAME, sv_name, CTLFLAG_RD |
2815	CTLFLAG_MPSAFE, sysctl_kern_proc_sv_name,
2816	"Process syscall vector name (ABI type)");
2817
2818static SYSCTL_NODE(_kern_proc, (KERN_PROC_GID | KERN_PROC_INC_THREAD), gid_td,
2819	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2820
2821static SYSCTL_NODE(_kern_proc, (KERN_PROC_PGRP | KERN_PROC_INC_THREAD), pgrp_td,
2822	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2823
2824static SYSCTL_NODE(_kern_proc, (KERN_PROC_RGID | KERN_PROC_INC_THREAD), rgid_td,
2825	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2826
2827static SYSCTL_NODE(_kern_proc, (KERN_PROC_SESSION | KERN_PROC_INC_THREAD),
2828	sid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2829
2830static SYSCTL_NODE(_kern_proc, (KERN_PROC_TTY | KERN_PROC_INC_THREAD), tty_td,
2831	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2832
2833static SYSCTL_NODE(_kern_proc, (KERN_PROC_UID | KERN_PROC_INC_THREAD), uid_td,
2834	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2835
2836static SYSCTL_NODE(_kern_proc, (KERN_PROC_RUID | KERN_PROC_INC_THREAD), ruid_td,
2837	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2838
2839static SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_INC_THREAD), pid_td,
2840	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2841
2842static SYSCTL_NODE(_kern_proc, (KERN_PROC_PROC | KERN_PROC_INC_THREAD), proc_td,
2843	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc,
2844	"Return process table, no threads");
2845
2846#ifdef COMPAT_FREEBSD7
2847static SYSCTL_NODE(_kern_proc, KERN_PROC_OVMMAP, ovmmap, CTLFLAG_RD |
2848	CTLFLAG_MPSAFE, sysctl_kern_proc_ovmmap, "Old Process vm map entries");
2849#endif
2850
2851static SYSCTL_NODE(_kern_proc, KERN_PROC_VMMAP, vmmap, CTLFLAG_RD |
2852	CTLFLAG_MPSAFE, sysctl_kern_proc_vmmap, "Process vm map entries");
2853
2854#if defined(STACK) || defined(DDB)
2855static SYSCTL_NODE(_kern_proc, KERN_PROC_KSTACK, kstack, CTLFLAG_RD |
2856	CTLFLAG_MPSAFE, sysctl_kern_proc_kstack, "Process kernel stacks");
2857#endif
2858
2859static SYSCTL_NODE(_kern_proc, KERN_PROC_GROUPS, groups, CTLFLAG_RD |
2860	CTLFLAG_MPSAFE, sysctl_kern_proc_groups, "Process groups");
2861
2862static SYSCTL_NODE(_kern_proc, KERN_PROC_RLIMIT, rlimit, CTLFLAG_RW |
2863	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_rlimit,
2864	"Process resource limits");
2865
2866static SYSCTL_NODE(_kern_proc, KERN_PROC_PS_STRINGS, ps_strings, CTLFLAG_RD |
2867	CTLFLAG_MPSAFE, sysctl_kern_proc_ps_strings,
2868	"Process ps_strings location");
2869
2870static SYSCTL_NODE(_kern_proc, KERN_PROC_UMASK, umask, CTLFLAG_RD |
2871	CTLFLAG_MPSAFE, sysctl_kern_proc_umask, "Process umask");
2872
2873static SYSCTL_NODE(_kern_proc, KERN_PROC_OSREL, osrel, CTLFLAG_RW |
2874	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_osrel,
2875	"Process binary osreldate");
2876
2877static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGTRAMP, sigtramp, CTLFLAG_RD |
2878	CTLFLAG_MPSAFE, sysctl_kern_proc_sigtramp,
2879	"Process signal trampoline location");
2880
2881int allproc_gen;
2882
2883void
2884stop_all_proc(void)
2885{
2886	struct proc *cp, *p;
2887	int r, gen;
2888	bool restart, seen_stopped, seen_exiting, stopped_some;
2889
2890	cp = curproc;
2891	/*
2892	 * stop_all_proc() assumes that all process which have
2893	 * usermode must be stopped, except current process, for
2894	 * obvious reasons.  Since other threads in the process
2895	 * establishing global stop could unstop something, disable
2896	 * calls from multithreaded processes as precaution.  The
2897	 * service must not be user-callable anyway.
2898	 */
2899	KASSERT((cp->p_flag & P_HADTHREADS) == 0 ||
2900	    (cp->p_flag & P_KTHREAD) != 0, ("mt stop_all_proc"));
2901
2902allproc_loop:
2903	sx_xlock(&allproc_lock);
2904	gen = allproc_gen;
2905	seen_exiting = seen_stopped = stopped_some = restart = false;
2906	LIST_REMOVE(cp, p_list);
2907	LIST_INSERT_HEAD(&allproc, cp, p_list);
2908	for (;;) {
2909		p = LIST_NEXT(cp, p_list);
2910		if (p == NULL)
2911			break;
2912		LIST_REMOVE(cp, p_list);
2913		LIST_INSERT_AFTER(p, cp, p_list);
2914		PROC_LOCK(p);
2915		if ((p->p_flag & (P_KTHREAD | P_SYSTEM |
2916		    P_TOTAL_STOP)) != 0) {
2917			PROC_UNLOCK(p);
2918			continue;
2919		}
2920		if ((p->p_flag & P_WEXIT) != 0) {
2921			seen_exiting = true;
2922			PROC_UNLOCK(p);
2923			continue;
2924		}
2925		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
2926			/*
2927			 * Stopped processes are tolerated when there
2928			 * are no other processes which might continue
2929			 * them.  P_STOPPED_SINGLE but not
2930			 * P_TOTAL_STOP process still has at least one
2931			 * thread running.
2932			 */
2933			seen_stopped = true;
2934			PROC_UNLOCK(p);
2935			continue;
2936		}
2937		_PHOLD(p);
2938		sx_xunlock(&allproc_lock);
2939		r = thread_single(p, SINGLE_ALLPROC);
2940		if (r != 0)
2941			restart = true;
2942		else
2943			stopped_some = true;
2944		_PRELE(p);
2945		PROC_UNLOCK(p);
2946		sx_xlock(&allproc_lock);
2947	}
2948	/* Catch forked children we did not see in iteration. */
2949	if (gen != allproc_gen)
2950		restart = true;
2951	sx_xunlock(&allproc_lock);
2952	if (restart || stopped_some || seen_exiting || seen_stopped) {
2953		kern_yield(PRI_USER);
2954		goto allproc_loop;
2955	}
2956}
2957
2958void
2959resume_all_proc(void)
2960{
2961	struct proc *cp, *p;
2962
2963	cp = curproc;
2964	sx_xlock(&allproc_lock);
2965	LIST_REMOVE(cp, p_list);
2966	LIST_INSERT_HEAD(&allproc, cp, p_list);
2967	for (;;) {
2968		p = LIST_NEXT(cp, p_list);
2969		if (p == NULL)
2970			break;
2971		LIST_REMOVE(cp, p_list);
2972		LIST_INSERT_AFTER(p, cp, p_list);
2973		PROC_LOCK(p);
2974		if ((p->p_flag & P_TOTAL_STOP) != 0) {
2975			sx_xunlock(&allproc_lock);
2976			_PHOLD(p);
2977			thread_single_end(p, SINGLE_ALLPROC);
2978			_PRELE(p);
2979			PROC_UNLOCK(p);
2980			sx_xlock(&allproc_lock);
2981		} else {
2982			PROC_UNLOCK(p);
2983		}
2984	}
2985	sx_xunlock(&allproc_lock);
2986}
2987
2988#define	TOTAL_STOP_DEBUG	1
2989#ifdef TOTAL_STOP_DEBUG
2990volatile static int ap_resume;
2991#include <sys/mount.h>
2992
2993static int
2994sysctl_debug_stop_all_proc(SYSCTL_HANDLER_ARGS)
2995{
2996	int error, val;
2997
2998	val = 0;
2999	ap_resume = 0;
3000	error = sysctl_handle_int(oidp, &val, 0, req);
3001	if (error != 0 || req->newptr == NULL)
3002		return (error);
3003	if (val != 0) {
3004		stop_all_proc();
3005		syncer_suspend();
3006		while (ap_resume == 0)
3007			;
3008		syncer_resume();
3009		resume_all_proc();
3010	}
3011	return (0);
3012}
3013
3014SYSCTL_PROC(_debug, OID_AUTO, stop_all_proc, CTLTYPE_INT | CTLFLAG_RW |
3015    CTLFLAG_MPSAFE, __DEVOLATILE(int *, &ap_resume), 0,
3016    sysctl_debug_stop_all_proc, "I",
3017    "");
3018#endif
3019