1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1990, 1991, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Copyright (c) 2002 Networks Associates Technologies, Inc.
13 * All rights reserved.
14 *
15 * Portions of this software were developed for the FreeBSD Project by
16 * ThinkSec AS and NAI Labs, the Security Research Division of Network
17 * Associates, Inc.  under DARPA/SPAWAR contract N66001-01-C-8035
18 * ("CBOSS"), as part of the DARPA CHATS research program.
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
22 * are met:
23 * 1. Redistributions of source code must retain the above copyright
24 *    notice, this list of conditions and the following disclaimer.
25 * 2. Redistributions in binary form must reproduce the above copyright
26 *    notice, this list of conditions and the following disclaimer in the
27 *    documentation and/or other materials provided with the distribution.
28 * 3. Neither the name of the University nor the names of its contributors
29 *    may be used to endorse or promote products derived from this software
30 *    without specific prior written permission.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
33 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
36 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
38 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
39 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
40 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
41 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
42 * SUCH DAMAGE.
43 */
44
45#include <sys/cdefs.h>
46__FBSDID("$FreeBSD$");
47
48#include "opt_stack.h"
49
50#include <sys/param.h>
51#include <sys/cons.h>
52#include <sys/kdb.h>
53#include <sys/lock.h>
54#include <sys/malloc.h>
55#include <sys/mutex.h>
56#include <sys/proc.h>
57#include <sys/resourcevar.h>
58#include <sys/sbuf.h>
59#include <sys/sched.h>
60#include <sys/stack.h>
61#include <sys/sysctl.h>
62#include <sys/systm.h>
63#include <sys/tty.h>
64
65#include <vm/vm.h>
66#include <vm/pmap.h>
67#include <vm/vm_map.h>
68
69/*
70 * Returns 1 if p2 is "better" than p1
71 *
72 * The algorithm for picking the "interesting" process is thus:
73 *
74 *	1) Only foreground processes are eligible - implied.
75 *	2) Runnable processes are favored over anything else.  The runner
76 *	   with the highest cpu utilization is picked (p_estcpu).  Ties are
77 *	   broken by picking the highest pid.
78 *	3) The sleeper with the shortest sleep time is next.  With ties,
79 *	   we pick out just "short-term" sleepers (P_SINTR == 0).
80 *	4) Further ties are broken by picking the highest pid.
81 */
82
83#define TESTAB(a, b)    ((a)<<1 | (b))
84#define ONLYA   2
85#define ONLYB   1
86#define BOTH    3
87
88static int
89proc_sum(struct proc *p, fixpt_t *estcpup)
90{
91	struct thread *td;
92	int estcpu;
93	int val;
94
95	val = 0;
96	estcpu = 0;
97	FOREACH_THREAD_IN_PROC(p, td) {
98		thread_lock(td);
99		if (TD_ON_RUNQ(td) ||
100		    TD_IS_RUNNING(td))
101			val = 1;
102		estcpu += sched_pctcpu(td);
103		thread_unlock(td);
104	}
105	*estcpup = estcpu;
106
107	return (val);
108}
109
110static int
111thread_compare(struct thread *td, struct thread *td2)
112{
113	int runa, runb;
114	int slpa, slpb;
115	fixpt_t esta, estb;
116
117	if (td == NULL)
118		return (1);
119
120	/*
121	 * Fetch running stats, pctcpu usage, and interruptable flag.
122	 */
123	thread_lock(td);
124	runa = TD_IS_RUNNING(td) | TD_ON_RUNQ(td);
125	slpa = td->td_flags & TDF_SINTR;
126	esta = sched_pctcpu(td);
127	thread_unlock(td);
128	thread_lock(td2);
129	runb = TD_IS_RUNNING(td2) | TD_ON_RUNQ(td2);
130	estb = sched_pctcpu(td2);
131	slpb = td2->td_flags & TDF_SINTR;
132	thread_unlock(td2);
133	/*
134	 * see if at least one of them is runnable
135	 */
136	switch (TESTAB(runa, runb)) {
137	case ONLYA:
138		return (0);
139	case ONLYB:
140		return (1);
141	case BOTH:
142		break;
143	}
144	/*
145	 *  favor one with highest recent cpu utilization
146	 */
147	if (estb > esta)
148		return (1);
149	if (esta > estb)
150		return (0);
151	/*
152	 * favor one sleeping in a non-interruptible sleep
153	 */
154	switch (TESTAB(slpa, slpb)) {
155	case ONLYA:
156		return (0);
157	case ONLYB:
158		return (1);
159	case BOTH:
160		break;
161	}
162
163	return (td < td2);
164}
165
166static int
167proc_compare(struct proc *p1, struct proc *p2)
168{
169
170	int runa, runb;
171	fixpt_t esta, estb;
172
173	if (p1 == NULL)
174		return (1);
175
176	/*
177	 * Fetch various stats about these processes.  After we drop the
178	 * lock the information could be stale but the race is unimportant.
179	 */
180	PROC_LOCK(p1);
181	runa = proc_sum(p1, &esta);
182	PROC_UNLOCK(p1);
183	PROC_LOCK(p2);
184	runb = proc_sum(p2, &estb);
185	PROC_UNLOCK(p2);
186
187	/*
188	 * see if at least one of them is runnable
189	 */
190	switch (TESTAB(runa, runb)) {
191	case ONLYA:
192		return (0);
193	case ONLYB:
194		return (1);
195	case BOTH:
196		break;
197	}
198	/*
199	 *  favor one with highest recent cpu utilization
200	 */
201	if (estb > esta)
202		return (1);
203	if (esta > estb)
204		return (0);
205	/*
206	 * weed out zombies
207	 */
208	switch (TESTAB(p1->p_state == PRS_ZOMBIE, p2->p_state == PRS_ZOMBIE)) {
209	case ONLYA:
210		return (1);
211	case ONLYB:
212		return (0);
213	case BOTH:
214		break;
215	}
216
217	return (p2->p_pid > p1->p_pid);		/* tie - return highest pid */
218}
219
220static int
221sbuf_tty_drain(void *a, const char *d, int len)
222{
223	struct tty *tp;
224	int rc;
225
226	tp = a;
227
228	if (kdb_active) {
229		cnputsn(d, len);
230		return (len);
231	}
232	if (tp != NULL && !KERNEL_PANICKED()) {
233		rc = tty_putstrn(tp, d, len);
234		if (rc != 0)
235			return (-ENXIO);
236		return (len);
237	}
238	return (-ENXIO);
239}
240
241#ifdef STACK
242static int tty_info_kstacks = STACK_SBUF_FMT_COMPACT;
243
244static int
245sysctl_tty_info_kstacks(SYSCTL_HANDLER_ARGS)
246{
247	enum stack_sbuf_fmt val;
248	int error;
249
250	val = tty_info_kstacks;
251	error = sysctl_handle_int(oidp, &val, 0, req);
252	if (error != 0 || req->newptr == NULL)
253		return (error);
254
255	switch (val) {
256	case STACK_SBUF_FMT_NONE:
257	case STACK_SBUF_FMT_LONG:
258	case STACK_SBUF_FMT_COMPACT:
259		tty_info_kstacks = val;
260		break;
261	default:
262		error = EINVAL;
263	}
264
265	return (error);
266}
267SYSCTL_PROC(_kern, OID_AUTO, tty_info_kstacks,
268    CTLFLAG_RWTUN | CTLFLAG_MPSAFE | CTLTYPE_INT, NULL, 0,
269    sysctl_tty_info_kstacks, "I",
270    "Adjust format of kernel stack(9) traces on ^T (tty info): "
271    "0 - disabled; 1 - long; 2 - compact");
272#endif
273
274/*
275 * Report on state of foreground process group.
276 */
277void
278tty_info(struct tty *tp)
279{
280	struct timeval rtime, utime, stime;
281#ifdef STACK
282	struct stack stack;
283	int sterr, kstacks_val;
284	bool print_kstacks;
285#endif
286	struct proc *p, *ppick;
287	struct thread *td, *tdpick;
288	const char *stateprefix, *state;
289	struct sbuf sb;
290	long rss;
291	int load, pctcpu;
292	pid_t pid;
293	char comm[MAXCOMLEN + 1];
294	struct rusage ru;
295
296	tty_assert_locked(tp);
297
298	if (tty_checkoutq(tp) == 0)
299		return;
300
301	(void)sbuf_new(&sb, tp->t_prbuf, tp->t_prbufsz, SBUF_FIXEDLEN);
302	sbuf_set_drain(&sb, sbuf_tty_drain, tp);
303
304	/* Print load average. */
305	load = (averunnable.ldavg[0] * 100 + FSCALE / 2) >> FSHIFT;
306	sbuf_printf(&sb, "%sload: %d.%02d ", tp->t_column == 0 ? "" : "\n",
307	    load / 100, load % 100);
308
309	if (tp->t_session == NULL) {
310		sbuf_printf(&sb, "not a controlling terminal\n");
311		goto out;
312	}
313	if (tp->t_pgrp == NULL) {
314		sbuf_printf(&sb, "no foreground process group\n");
315		goto out;
316	}
317	PGRP_LOCK(tp->t_pgrp);
318	if (LIST_EMPTY(&tp->t_pgrp->pg_members)) {
319		PGRP_UNLOCK(tp->t_pgrp);
320		sbuf_printf(&sb, "empty foreground process group\n");
321		goto out;
322	}
323
324	/*
325	 * Pick the most interesting process and copy some of its
326	 * state for printing later.  This operation could rely on stale
327	 * data as we can't hold the proc slock or thread locks over the
328	 * whole list. However, we're guaranteed not to reference an exited
329	 * thread or proc since we hold the tty locked.
330	 */
331	p = NULL;
332	LIST_FOREACH(ppick, &tp->t_pgrp->pg_members, p_pglist)
333		if (proc_compare(p, ppick))
334			p = ppick;
335
336	PROC_LOCK(p);
337	PGRP_UNLOCK(tp->t_pgrp);
338	td = NULL;
339	FOREACH_THREAD_IN_PROC(p, tdpick)
340		if (thread_compare(td, tdpick))
341			td = tdpick;
342	stateprefix = "";
343	thread_lock(td);
344	if (TD_IS_RUNNING(td))
345		state = "running";
346	else if (TD_ON_RUNQ(td) || TD_CAN_RUN(td))
347		state = "runnable";
348	else if (TD_IS_SLEEPING(td)) {
349		/* XXX: If we're sleeping, are we ever not in a queue? */
350		if (TD_ON_SLEEPQ(td))
351			state = td->td_wmesg;
352		else
353			state = "sleeping without queue";
354	} else if (TD_ON_LOCK(td)) {
355		state = td->td_lockname;
356		stateprefix = "*";
357	} else if (TD_IS_SUSPENDED(td))
358		state = "suspended";
359	else if (TD_AWAITING_INTR(td))
360		state = "intrwait";
361	else if (p->p_state == PRS_ZOMBIE)
362		state = "zombie";
363	else
364		state = "unknown";
365	pctcpu = (sched_pctcpu(td) * 10000 + FSCALE / 2) >> FSHIFT;
366#ifdef STACK
367	kstacks_val = atomic_load_int(&tty_info_kstacks);
368	print_kstacks = (kstacks_val != STACK_SBUF_FMT_NONE);
369
370	if (print_kstacks) {
371		if (TD_IS_SWAPPED(td))
372			sterr = ENOENT;
373		else
374			sterr = stack_save_td(&stack, td);
375	}
376#endif
377	thread_unlock(td);
378	if (p->p_state == PRS_NEW || p->p_state == PRS_ZOMBIE)
379		rss = 0;
380	else
381		rss = pgtok(vmspace_resident_count(p->p_vmspace));
382	microuptime(&rtime);
383	timevalsub(&rtime, &p->p_stats->p_start);
384	rufetchcalc(p, &ru, &utime, &stime);
385	pid = p->p_pid;
386	strlcpy(comm, p->p_comm, sizeof comm);
387	PROC_UNLOCK(p);
388
389	/* Print command, pid, state, rtime, utime, stime, %cpu, and rss. */
390	sbuf_printf(&sb,
391	    " cmd: %s %d [%s%s] %ld.%02ldr %ld.%02ldu %ld.%02lds %d%% %ldk\n",
392	    comm, pid, stateprefix, state,
393	    (long)rtime.tv_sec, rtime.tv_usec / 10000,
394	    (long)utime.tv_sec, utime.tv_usec / 10000,
395	    (long)stime.tv_sec, stime.tv_usec / 10000,
396	    pctcpu / 100, rss);
397
398#ifdef STACK
399	if (print_kstacks && sterr == 0)
400		stack_sbuf_print_flags(&sb, &stack, M_NOWAIT, kstacks_val);
401#endif
402
403out:
404	sbuf_finish(&sb);
405	sbuf_delete(&sb);
406}
407