1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1990, 1991, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Copyright (c) 2002 Networks Associates Technologies, Inc.
13 * All rights reserved.
14 *
15 * Portions of this software were developed for the FreeBSD Project by
16 * ThinkSec AS and NAI Labs, the Security Research Division of Network
17 * Associates, Inc.  under DARPA/SPAWAR contract N66001-01-C-8035
18 * ("CBOSS"), as part of the DARPA CHATS research program.
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
22 * are met:
23 * 1. Redistributions of source code must retain the above copyright
24 *    notice, this list of conditions and the following disclaimer.
25 * 2. Redistributions in binary form must reproduce the above copyright
26 *    notice, this list of conditions and the following disclaimer in the
27 *    documentation and/or other materials provided with the distribution.
28 * 3. Neither the name of the University nor the names of its contributors
29 *    may be used to endorse or promote products derived from this software
30 *    without specific prior written permission.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
33 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
36 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
38 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
39 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
40 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
41 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
42 * SUCH DAMAGE.
43 */
44
45#include <sys/cdefs.h>
46#include "opt_stack.h"
47
48#include <sys/param.h>
49#include <sys/cons.h>
50#include <sys/kdb.h>
51#include <sys/lock.h>
52#include <sys/malloc.h>
53#include <sys/mutex.h>
54#include <sys/proc.h>
55#include <sys/resourcevar.h>
56#include <sys/sbuf.h>
57#include <sys/sched.h>
58#include <sys/stack.h>
59#include <sys/sysctl.h>
60#include <sys/systm.h>
61#include <sys/tty.h>
62
63#include <vm/vm.h>
64#include <vm/pmap.h>
65#include <vm/vm_map.h>
66
67/*
68 * Returns 1 if p2 is "better" than p1
69 *
70 * The algorithm for picking the "interesting" process is thus:
71 *
72 *	1) Only foreground processes are eligible - implied.
73 *	2) Runnable processes are favored over anything else.  The runner
74 *	   with the highest cpu utilization is picked (p_estcpu).  Ties are
75 *	   broken by picking the highest pid.
76 *	3) The sleeper with the shortest sleep time is next.  With ties,
77 *	   we pick out just "short-term" sleepers (P_SINTR == 0).
78 *	4) Further ties are broken by picking the highest pid.
79 */
80
81#define TESTAB(a, b)    ((a)<<1 | (b))
82#define ONLYA   2
83#define ONLYB   1
84#define BOTH    3
85
86static int
87proc_sum(struct proc *p, fixpt_t *estcpup)
88{
89	struct thread *td;
90	int estcpu;
91	int val;
92
93	val = 0;
94	estcpu = 0;
95	FOREACH_THREAD_IN_PROC(p, td) {
96		thread_lock(td);
97		if (TD_ON_RUNQ(td) ||
98		    TD_IS_RUNNING(td))
99			val = 1;
100		estcpu += sched_pctcpu(td);
101		thread_unlock(td);
102	}
103	*estcpup = estcpu;
104
105	return (val);
106}
107
108static int
109thread_compare(struct thread *td, struct thread *td2)
110{
111	int runa, runb;
112	int slpa, slpb;
113	fixpt_t esta, estb;
114
115	if (td == NULL)
116		return (1);
117
118	/*
119	 * Fetch running stats, pctcpu usage, and interruptable flag.
120	 */
121	thread_lock(td);
122	runa = TD_IS_RUNNING(td) || TD_ON_RUNQ(td);
123	slpa = td->td_flags & TDF_SINTR;
124	esta = sched_pctcpu(td);
125	thread_unlock(td);
126	thread_lock(td2);
127	runb = TD_IS_RUNNING(td2) || TD_ON_RUNQ(td2);
128	estb = sched_pctcpu(td2);
129	slpb = td2->td_flags & TDF_SINTR;
130	thread_unlock(td2);
131	/*
132	 * see if at least one of them is runnable
133	 */
134	switch (TESTAB(runa, runb)) {
135	case ONLYA:
136		return (0);
137	case ONLYB:
138		return (1);
139	case BOTH:
140		break;
141	}
142	/*
143	 *  favor one with highest recent cpu utilization
144	 */
145	if (estb > esta)
146		return (1);
147	if (esta > estb)
148		return (0);
149	/*
150	 * favor one sleeping in a non-interruptible sleep
151	 */
152	switch (TESTAB(slpa, slpb)) {
153	case ONLYA:
154		return (0);
155	case ONLYB:
156		return (1);
157	case BOTH:
158		break;
159	}
160
161	return (td < td2);
162}
163
164static int
165proc_compare(struct proc *p1, struct proc *p2)
166{
167
168	int runa, runb;
169	fixpt_t esta, estb;
170
171	if (p1 == NULL)
172		return (1);
173
174	/*
175	 * Fetch various stats about these processes.  After we drop the
176	 * lock the information could be stale but the race is unimportant.
177	 */
178	PROC_LOCK(p1);
179	runa = proc_sum(p1, &esta);
180	PROC_UNLOCK(p1);
181	PROC_LOCK(p2);
182	runb = proc_sum(p2, &estb);
183	PROC_UNLOCK(p2);
184
185	/*
186	 * see if at least one of them is runnable
187	 */
188	switch (TESTAB(runa, runb)) {
189	case ONLYA:
190		return (0);
191	case ONLYB:
192		return (1);
193	case BOTH:
194		break;
195	}
196	/*
197	 *  favor one with highest recent cpu utilization
198	 */
199	if (estb > esta)
200		return (1);
201	if (esta > estb)
202		return (0);
203	/*
204	 * weed out zombies
205	 */
206	switch (TESTAB(p1->p_state == PRS_ZOMBIE, p2->p_state == PRS_ZOMBIE)) {
207	case ONLYA:
208		return (1);
209	case ONLYB:
210		return (0);
211	case BOTH:
212		break;
213	}
214
215	return (p2->p_pid > p1->p_pid);		/* tie - return highest pid */
216}
217
218static int
219sbuf_tty_drain(void *a, const char *d, int len)
220{
221	struct tty *tp;
222	int rc;
223
224	tp = a;
225
226	if (kdb_active) {
227		cnputsn(d, len);
228		return (len);
229	}
230	if (tp != NULL && !KERNEL_PANICKED()) {
231		rc = tty_putstrn(tp, d, len);
232		if (rc != 0)
233			return (-ENXIO);
234		return (len);
235	}
236	return (-ENXIO);
237}
238
239#ifdef STACK
240#ifdef INVARIANTS
241static int tty_info_kstacks = STACK_SBUF_FMT_COMPACT;
242#else
243static int tty_info_kstacks = STACK_SBUF_FMT_NONE;
244#endif
245
246static int
247sysctl_tty_info_kstacks(SYSCTL_HANDLER_ARGS)
248{
249	enum stack_sbuf_fmt val;
250	int error;
251
252	val = tty_info_kstacks;
253	error = sysctl_handle_int(oidp, &val, 0, req);
254	if (error != 0 || req->newptr == NULL)
255		return (error);
256
257	switch (val) {
258	case STACK_SBUF_FMT_NONE:
259	case STACK_SBUF_FMT_LONG:
260	case STACK_SBUF_FMT_COMPACT:
261		tty_info_kstacks = val;
262		break;
263	default:
264		error = EINVAL;
265	}
266
267	return (error);
268}
269SYSCTL_PROC(_kern, OID_AUTO, tty_info_kstacks,
270    CTLFLAG_RWTUN | CTLFLAG_MPSAFE | CTLTYPE_INT, NULL, 0,
271    sysctl_tty_info_kstacks, "I",
272    "Adjust format of kernel stack(9) traces on ^T (tty info): "
273    "0 - disabled; 1 - long; 2 - compact");
274#endif
275
276/*
277 * Report on state of foreground process group.
278 */
279void
280tty_info(struct tty *tp)
281{
282	struct timeval rtime, utime, stime;
283#ifdef STACK
284	struct stack stack;
285	int sterr, kstacks_val;
286	bool print_kstacks;
287#endif
288	struct proc *p, *ppick;
289	struct thread *td, *tdpick;
290	const char *stateprefix, *state;
291	struct sbuf sb;
292	long rss;
293	int load, pctcpu;
294	pid_t pid;
295	char comm[MAXCOMLEN + 1];
296	struct rusage ru;
297
298	tty_assert_locked(tp);
299
300	if (tty_checkoutq(tp) == 0)
301		return;
302
303	(void)sbuf_new(&sb, tp->t_prbuf, tp->t_prbufsz, SBUF_FIXEDLEN);
304	sbuf_set_drain(&sb, sbuf_tty_drain, tp);
305
306	/* Print load average. */
307	load = ((int64_t)averunnable.ldavg[0] * 100 + FSCALE / 2) >> FSHIFT;
308	sbuf_printf(&sb, "%sload: %d.%02d ", tp->t_column == 0 ? "" : "\n",
309	    load / 100, load % 100);
310
311	if (tp->t_session == NULL) {
312		sbuf_cat(&sb, "not a controlling terminal\n");
313		goto out;
314	}
315	if (tp->t_pgrp == NULL) {
316		sbuf_cat(&sb, "no foreground process group\n");
317		goto out;
318	}
319	PGRP_LOCK(tp->t_pgrp);
320	if (LIST_EMPTY(&tp->t_pgrp->pg_members)) {
321		PGRP_UNLOCK(tp->t_pgrp);
322		sbuf_cat(&sb, "empty foreground process group\n");
323		goto out;
324	}
325
326	/*
327	 * Pick the most interesting process and copy some of its
328	 * state for printing later.  This operation could rely on stale
329	 * data as we can't hold the proc slock or thread locks over the
330	 * whole list. However, we're guaranteed not to reference an exited
331	 * thread or proc since we hold the tty locked.
332	 */
333	p = NULL;
334	LIST_FOREACH(ppick, &tp->t_pgrp->pg_members, p_pglist)
335		if (proc_compare(p, ppick))
336			p = ppick;
337
338	PROC_LOCK(p);
339	PGRP_UNLOCK(tp->t_pgrp);
340	td = NULL;
341	FOREACH_THREAD_IN_PROC(p, tdpick)
342		if (thread_compare(td, tdpick))
343			td = tdpick;
344	stateprefix = "";
345	thread_lock(td);
346	if (TD_IS_RUNNING(td))
347		state = "running";
348	else if (TD_ON_RUNQ(td) || TD_CAN_RUN(td))
349		state = "runnable";
350	else if (TD_IS_SLEEPING(td)) {
351		/* XXX: If we're sleeping, are we ever not in a queue? */
352		if (TD_ON_SLEEPQ(td))
353			state = td->td_wmesg;
354		else
355			state = "sleeping without queue";
356	} else if (TD_ON_LOCK(td)) {
357		state = td->td_lockname;
358		stateprefix = "*";
359	} else if (TD_IS_SUSPENDED(td))
360		state = "suspended";
361	else if (TD_AWAITING_INTR(td))
362		state = "intrwait";
363	else if (p->p_state == PRS_ZOMBIE)
364		state = "zombie";
365	else
366		state = "unknown";
367	pctcpu = (sched_pctcpu(td) * 10000 + FSCALE / 2) >> FSHIFT;
368#ifdef STACK
369	kstacks_val = atomic_load_int(&tty_info_kstacks);
370	print_kstacks = (kstacks_val != STACK_SBUF_FMT_NONE);
371
372	if (print_kstacks) {
373		if (TD_IS_SWAPPED(td))
374			sterr = ENOENT;
375		else
376			sterr = stack_save_td(&stack, td);
377	}
378#endif
379	thread_unlock(td);
380	if (p->p_state == PRS_NEW || p->p_state == PRS_ZOMBIE)
381		rss = 0;
382	else
383		rss = pgtok(vmspace_resident_count(p->p_vmspace));
384	microuptime(&rtime);
385	timevalsub(&rtime, &p->p_stats->p_start);
386	rufetchcalc(p, &ru, &utime, &stime);
387	pid = p->p_pid;
388	strlcpy(comm, p->p_comm, sizeof comm);
389	PROC_UNLOCK(p);
390
391	/* Print command, pid, state, rtime, utime, stime, %cpu, and rss. */
392	sbuf_printf(&sb,
393	    " cmd: %s %d [%s%s] %ld.%02ldr %ld.%02ldu %ld.%02lds %d%% %ldk\n",
394	    comm, pid, stateprefix, state,
395	    (long)rtime.tv_sec, rtime.tv_usec / 10000,
396	    (long)utime.tv_sec, utime.tv_usec / 10000,
397	    (long)stime.tv_sec, stime.tv_usec / 10000,
398	    pctcpu / 100, rss);
399
400#ifdef STACK
401	if (print_kstacks && sterr == 0)
402		stack_sbuf_print_flags(&sb, &stack, M_NOWAIT, kstacks_val);
403#endif
404
405out:
406	sbuf_finish(&sb);
407	sbuf_delete(&sb);
408}
409