1/* vim: set noexpandtab tabstop=4 shiftwidth=4 : */
2/*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22
23/*
24 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
25 * Use is subject to license terms.
26 */
27
28#pragma ident	"%Z%%M%	%I%	%E% SMI"
29
30#include <assert.h>
31#include <dtrace.h>
32#include <limits.h>
33#if !defined(__APPLE__)
34#include <link.h>
35#include <priv.h>
36#endif
37#include <signal.h>
38#include <stdlib.h>
39#include <stdarg.h>
40#include <stdio.h>
41#include <string.h>
42#include <strings.h>
43#include <errno.h>
44#include <sys/wait.h>
45#include <libgen.h>
46#include <libproc.h>
47#if defined(__APPLE__)
48#include <getopt.h>
49#endif
50
51static char *g_pname;
52static dtrace_hdl_t *g_dtp;
53struct ps_prochandle *g_pr;
54
55#define	E_SUCCESS	0
56#define	E_ERROR		1
57#define	E_USAGE		2
58
59/*
60 * For hold times we use a global associative array since for mutexes, in
61 * user-land, it's not invalid to release a sychonization primitive that
62 * another thread acquired; rwlocks require a thread-local associative array
63 * since multiple thread can hold the same lock for reading. Note that we
64 * ignore recursive mutex acquisitions and releases as they don't truly
65 * affect lock contention.
66 */
67static const char *g_hold_init =
68"plockstat$target:::rw-acquire\n"
69"{\n"
70"	self->rwhold[arg0] = timestamp;\n"
71"}\n"
72"plockstat$target:::mutex-acquire\n"
73"/arg1 == 0/\n"
74"{\n"
75"	mtxhold[arg0] = timestamp;\n"
76"}\n";
77
78static const char *g_hold_histogram =
79"plockstat$target:::rw-release\n"
80"/self->rwhold[arg0] && arg1 == 1/\n"
81"{\n"
82"	@rw_w_hold[arg0, ustack()] =\n"
83"	    quantize(timestamp - self->rwhold[arg0]);\n"
84"	self->rwhold[arg0] = 0;\n"
85"	rw_w_hold_found = 1;\n"
86"}\n"
87"plockstat$target:::rw-release\n"
88"/self->rwhold[arg0]/\n"
89"{\n"
90"	@rw_r_hold[arg0, ustack()] =\n"
91"	    quantize(timestamp - self->rwhold[arg0]);\n"
92"	self->rwhold[arg0] = 0;\n"
93"	rw_r_hold_found = 1;\n"
94"}\n"
95"plockstat$target:::mutex-release\n"
96"/mtxhold[arg0] && arg1 == 0/\n"
97"{\n"
98"	@mtx_hold[arg0, ustack()] = quantize(timestamp - mtxhold[arg0]);\n"
99"	mtxhold[arg0] = 0;\n"
100"	mtx_hold_found = 1;\n"
101"}\n"
102"\n"
103"END\n"
104"/mtx_hold_found/\n"
105"{\n"
106"	trace(\"Mutex hold\");\n"
107"	printa(@mtx_hold);\n"
108"}\n"
109"END\n"
110"/rw_r_hold_found/\n"
111"{\n"
112"	trace(\"R/W reader hold\");\n"
113"	printa(@rw_r_hold);\n"
114"}\n"
115"END\n"
116"/rw_w_hold_found/\n"
117"{\n"
118"	trace(\"R/W writer hold\");\n"
119"	printa(@rw_w_hold);\n"
120"}\n";
121
122static const char *g_hold_times =
123"plockstat$target:::rw-release\n"
124"/self->rwhold[arg0] && arg1 == 1/\n"
125"{\n"
126"	@rw_w_hold[arg0, ustack(5)] = sum(timestamp - self->rwhold[arg0]);\n"
127"	@rw_w_hold_count[arg0, ustack(5)] = count();\n"
128"	self->rwhold[arg0] = 0;\n"
129"	rw_w_hold_found = 1;\n"
130"}\n"
131"plockstat$target:::rw-release\n"
132"/self->rwhold[arg0]/\n"
133"{\n"
134"	@rw_r_hold[arg0, ustack(5)] = sum(timestamp - self->rwhold[arg0]);\n"
135"	@rw_r_hold_count[arg0, ustack(5)] = count();\n"
136"	self->rwhold[arg0] = 0;\n"
137"	rw_r_hold_found = 1;\n"
138"}\n"
139"plockstat$target:::mutex-release\n"
140"/mtxhold[arg0] && arg1 == 0/\n"
141"{\n"
142"	@mtx_hold[arg0, ustack(5)] = sum(timestamp - mtxhold[arg0]);\n"
143"	@mtx_hold_count[arg0, ustack(5)] = count();\n"
144"	mtxhold[arg0] = 0;\n"
145"	mtx_hold_found = 1;\n"
146"}\n"
147"\n"
148"END\n"
149"/mtx_hold_found/\n"
150"{\n"
151"	trace(\"Mutex hold\");\n"
152"	printa(@mtx_hold, @mtx_hold_count);\n"
153"}\n"
154"END\n"
155"/rw_r_hold_found/\n"
156"{\n"
157"	trace(\"R/W reader hold\");\n"
158"	printa(@rw_r_hold, @rw_r_hold_count);\n"
159"}\n"
160"END\n"
161"/rw_w_hold_found/\n"
162"{\n"
163"	trace(\"R/W writer hold\");\n"
164"	printa(@rw_w_hold, @rw_w_hold_count);\n"
165"}\n";
166
167
168/*
169 * For contention, we use thread-local associative arrays since we're tracing
170 * a single thread's activity in libc and multiple threads can be blocking or
171 * spinning on the same sychonization primitive.
172 */
173static const char *g_ctnd_init =
174"plockstat$target:::rw-block\n"
175"{\n"
176"	self->rwblock[arg0] = timestamp;\n"
177"}\n"
178"plockstat$target:::mutex-block\n"
179"{\n"
180"	self->mtxblock[arg0] = timestamp;\n"
181"}\n"
182"plockstat$target:::mutex-spin\n"
183"{\n"
184"	self->mtxspin[arg0] = timestamp;\n"
185"}\n";
186
187static const char *g_ctnd_histogram =
188"plockstat$target:::rw-blocked\n"
189"/self->rwblock[arg0] && arg1 == 1 && arg2 != 0/\n"
190"{\n"
191"	@rw_w_block[arg0, ustack()] =\n"
192"	    quantize(timestamp - self->rwblock[arg0]);\n"
193"	self->rwblock[arg0] = 0;\n"
194"	rw_w_block_found = 1;\n"
195"}\n"
196"plockstat$target:::rw-blocked\n"
197"/self->rwblock[arg0] && arg2 != 0/\n"
198"{\n"
199"	@rw_r_block[arg0, ustack()] =\n"
200"	    quantize(timestamp - self->rwblock[arg0]);\n"
201"	self->rwblock[arg0] = 0;\n"
202"	rw_r_block_found = 1;\n"
203"}\n"
204"plockstat$target:::rw-blocked\n"
205"/self->rwblock[arg0]/\n"
206"{\n"
207"	self->rwblock[arg0] = 0;\n"
208"}\n"
209"plockstat$target:::mutex-spun\n"
210"/self->mtxspin[arg0] && arg1 != 0/\n"
211"{\n"
212"	@mtx_spin[arg0, ustack()] =\n"
213"	    quantize(timestamp - self->mtxspin[arg0]);\n"
214"	self->mtxspin[arg0] = 0;\n"
215"	mtx_spin_found = 1;\n"
216"}\n"
217"plockstat$target:::mutex-spun\n"
218"/self->mtxspin[arg0]/\n"
219"{\n"
220"	@mtx_vain_spin[arg0, ustack()] =\n"
221"	    quantize(timestamp - self->mtxspin[arg0]);\n"
222"	self->mtxspin[arg0] = 0;\n"
223"	mtx_vain_spin_found = 1;\n"
224"}\n"
225"plockstat$target:::mutex-blocked\n"
226"/self->mtxblock[arg0] && arg1 != 0/\n"
227"{\n"
228"	@mtx_block[arg0, ustack()] =\n"
229"	    quantize(timestamp - self->mtxblock[arg0]);\n"
230"	self->mtxblock[arg0] = 0;\n"
231"	mtx_block_found = 1;\n"
232"}\n"
233"plockstat$target:::mutex-blocked\n"
234"/self->mtxblock[arg0]/\n"
235"{\n"
236"	self->mtxblock[arg0] = 0;\n"
237"}\n"
238"\n"
239"END\n"
240"/mtx_block_found/\n"
241"{\n"
242"	trace(\"Mutex block\");\n"
243"	printa(@mtx_block);\n"
244"}\n"
245"END\n"
246"/mtx_spin_found/\n"
247"{\n"
248"	trace(\"Mutex spin\");\n"
249"	printa(@mtx_spin);\n"
250"}\n"
251"END\n"
252"/mtx_vain_spin_found/\n"
253"{\n"
254"	trace(\"Mutex unsuccessful spin\");\n"
255"	printa(@mtx_vain_spin);\n"
256"}\n"
257"END\n"
258"/rw_r_block_found/\n"
259"{\n"
260"	trace(\"R/W reader block\");\n"
261"	printa(@rw_r_block);\n"
262"}\n"
263"END\n"
264"/rw_w_block_found/\n"
265"{\n"
266"	trace(\"R/W writer block\");\n"
267"	printa(@rw_w_block);\n"
268"}\n";
269
270
271static const char *g_ctnd_times =
272"plockstat$target:::rw-blocked\n"
273"/self->rwblock[arg0] && arg1 == 1 && arg2 != 0/\n"
274"{\n"
275"	@rw_w_block[arg0, ustack(5)] =\n"
276"	    sum(timestamp - self->rwblock[arg0]);\n"
277"	@rw_w_block_count[arg0, ustack(5)] = count();\n"
278"	self->rwblock[arg0] = 0;\n"
279"	rw_w_block_found = 1;\n"
280"}\n"
281"plockstat$target:::rw-blocked\n"
282"/self->rwblock[arg0] && arg2 != 0/\n"
283"{\n"
284"	@rw_r_block[arg0, ustack(5)] =\n"
285"	    sum(timestamp - self->rwblock[arg0]);\n"
286"	@rw_r_block_count[arg0, ustack(5)] = count();\n"
287"	self->rwblock[arg0] = 0;\n"
288"	rw_r_block_found = 1;\n"
289"}\n"
290"plockstat$target:::rw-blocked\n"
291"/self->rwblock[arg0]/\n"
292"{\n"
293"	self->rwblock[arg0] = 0;\n"
294"}\n"
295"plockstat$target:::mutex-spun\n"
296"/self->mtxspin[arg0] && arg1 != 0/\n"
297"{\n"
298"	@mtx_spin[arg0, ustack(5)] =\n"
299"	    sum(timestamp - self->mtxspin[arg0]);\n"
300"	@mtx_spin_count[arg0, ustack(5)] = count();\n"
301"	self->mtxspin[arg0] = 0;\n"
302"	mtx_spin_found = 1;\n"
303"}\n"
304"plockstat$target:::mutex-spun\n"
305"/self->mtxspin[arg0]/\n"
306"{\n"
307"	@mtx_vain_spin[arg0, ustack(5)] =\n"
308"	    sum(timestamp - self->mtxspin[arg0]);\n"
309"	@mtx_vain_spin_count[arg0, ustack(5)] = count();\n"
310"	self->mtxspin[arg0] = 0;\n"
311"	mtx_vain_spin_found = 1;\n"
312"}\n"
313"plockstat$target:::mutex-blocked\n"
314"/self->mtxblock[arg0] && arg1 != 0/\n"
315"{\n"
316"	@mtx_block[arg0, ustack(5)] =\n"
317"	    sum(timestamp - self->mtxblock[arg0]);\n"
318"	@mtx_block_count[arg0, ustack(5)] = count();\n"
319"	self->mtxblock[arg0] = 0;\n"
320"	mtx_block_found = 1;\n"
321"}\n"
322"plockstat$target:::mutex-blocked\n"
323"/self->mtxblock[arg0]/\n"
324"{\n"
325"	self->mtxblock[arg0] = 0;\n"
326"}\n"
327"\n"
328"END\n"
329"/mtx_block_found/\n"
330"{\n"
331"	trace(\"Mutex block\");\n"
332"	printa(@mtx_block, @mtx_block_count);\n"
333"}\n"
334"END\n"
335"/mtx_spin_found/\n"
336"{\n"
337"	trace(\"Mutex spin\");\n"
338"	printa(@mtx_spin, @mtx_spin_count);\n"
339"}\n"
340"END\n"
341"/mtx_vain_spin_found/\n"
342"{\n"
343"	trace(\"Mutex unsuccessful spin\");\n"
344"	printa(@mtx_vain_spin, @mtx_vain_spin_count);\n"
345"}\n"
346"END\n"
347"/rw_r_block_found/\n"
348"{\n"
349"	trace(\"R/W reader block\");\n"
350"	printa(@rw_r_block, @rw_r_block_count);\n"
351"}\n"
352"END\n"
353"/rw_w_block_found/\n"
354"{\n"
355"	trace(\"R/W writer block\");\n"
356"	printa(@rw_w_block, @rw_w_block_count);\n"
357"}\n";
358
359static char g_prog[4096];
360static size_t g_proglen;
361static int g_opt_V, g_opt_s;
362static int g_intr;
363static int g_exited;
364static dtrace_optval_t g_nframes;
365static ulong_t g_nent = ULONG_MAX;
366
367#define	PLOCKSTAT_OPTSTR	"n:ps:e:vx:ACHV"
368
369static void
370usage(void)
371{
372	(void) fprintf(stderr, "Usage:\n"
373	    "\t%s [-vACHV] [-n count] [-s depth] [-e secs] [-x opt[=val]]\n"
374	    "\t    command [arg...]\n"
375	    "\t%s [-vACHV] [-n count] [-s depth] [-e secs] [-x opt[=val]]\n"
376	    "\t    -p pid\n", g_pname, g_pname);
377
378#if defined(__APPLE__)
379	(void) fprintf(stderr, "\n");
380	(void) fprintf(stderr, "\t-v\t\tprint a message when tracing starts\n");
381	(void) fprintf(stderr, "\t-A\t\ttrace contention and hold events (same as -CH)\n");
382	(void) fprintf(stderr, "\t-C\t\ttrace contention events for mutexes and rwlocks\n");
383	(void) fprintf(stderr, "\t-H\t\ttrace hold events for mutexes and rwlocks\n");
384	(void) fprintf(stderr, "\t-V\t\tprint the dtrace script to run\n");
385	(void) fprintf(stderr, "\t-n count\tdisplay only \'count\' entries for each event type\n");
386	(void) fprintf(stderr, "\t-s depth\tshow stack trace upto \'depth\' entries\n");
387	(void) fprintf(stderr, "\t-e secs\t\texit after specified seconds\n");
388	(void) fprintf(stderr, "\t-x arg[=val]\tenable a DTrace runtime option or a D compiler option\n");
389	(void) fprintf(stderr, "\t-p pid\t\tattach and trace the specified process id\n");
390#endif
391
392	exit(E_USAGE);
393}
394
395static void
396verror(const char *fmt, va_list ap)
397{
398	int error = errno;
399
400	(void) fprintf(stderr, "%s: ", g_pname);
401	(void) vfprintf(stderr, fmt, ap);
402
403	if (fmt[strlen(fmt) - 1] != '\n')
404		(void) fprintf(stderr, ": %s\n", strerror(error));
405}
406
407/*PRINTFLIKE1*/
408static void
409fatal(const char *fmt, ...)
410{
411	va_list ap;
412
413	va_start(ap, fmt);
414	verror(fmt, ap);
415	va_end(ap);
416
417	if (g_pr != NULL && g_dtp != NULL)
418		dtrace_proc_release(g_dtp, g_pr);
419
420	exit(E_ERROR);
421}
422
423/*PRINTFLIKE1*/
424static void
425dfatal(const char *fmt, ...)
426{
427	va_list ap;
428
429	va_start(ap, fmt);
430
431	(void) fprintf(stderr, "%s: ", g_pname);
432	if (fmt != NULL)
433		(void) vfprintf(stderr, fmt, ap);
434
435	va_end(ap);
436
437	if (fmt != NULL && fmt[strlen(fmt) - 1] != '\n') {
438		(void) fprintf(stderr, ": %s\n",
439		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
440	} else if (fmt == NULL) {
441		(void) fprintf(stderr, "%s\n",
442		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
443	}
444
445	if (g_pr != NULL) {
446		dtrace_proc_continue(g_dtp, g_pr);
447		dtrace_proc_release(g_dtp, g_pr);
448	}
449
450	exit(E_ERROR);
451}
452
453/*PRINTFLIKE1*/
454static void
455notice(const char *fmt, ...)
456{
457	va_list ap;
458
459	va_start(ap, fmt);
460	verror(fmt, ap);
461	va_end(ap);
462}
463
464static void
465dprog_add(const char *prog)
466{
467	size_t len = strlen(prog);
468	bcopy(prog, g_prog + g_proglen, len + 1);
469	g_proglen += len;
470	assert(g_proglen < sizeof (g_prog));
471}
472
473static void
474dprog_compile(void)
475{
476	dtrace_prog_t *prog;
477	dtrace_proginfo_t info;
478
479	if (g_opt_V) {
480		(void) fprintf(stderr, "%s: vvvv D program vvvv\n", g_pname);
481		(void) fputs(g_prog, stderr);
482		(void) fprintf(stderr, "%s: ^^^^ D program ^^^^\n", g_pname);
483	}
484
485	if ((prog = dtrace_program_strcompile(g_dtp, g_prog,
486	    DTRACE_PROBESPEC_NAME, 0, 0, NULL)) == NULL)
487		dfatal("failed to compile program");
488
489	if (dtrace_program_exec(g_dtp, prog, &info) == -1)
490		dfatal("failed to enable probes");
491}
492
493void
494print_legend(void)
495{
496	(void) printf("%5s %8s %-28s %s\n", "Count", "nsec", "Lock", "Caller");
497}
498
499void
500print_bar(void)
501{
502	(void) printf("---------------------------------------"
503	    "----------------------------------------\n");
504}
505
506void
507print_histogram_header(void)
508{
509	(void) printf("\n%10s ---- Time Distribution --- %5s %s\n",
510	    "nsec", "count", "Stack");
511}
512
513/*
514 * Convert an address to a symbolic string or a numeric string. If nolocks
515 * is set, we return an error code if this symbol appears to be a mutex- or
516 * rwlock-related symbol in libc so the caller has a chance to find a more
517 * helpful symbol.
518 */
519static int
520getsym(struct ps_prochandle *P, uintptr_t addr, char *buf, size_t size,
521    int nolocks)
522{
523	char name[256];
524	GElf_Sym sym;
525	prsyminfo_t info;
526	size_t len;
527
528	if (P == NULL || Pxlookup_by_addr(P, addr, name, sizeof (name),
529	    &sym, &info) != 0) {
530		(void) snprintf(buf, size, "%#lx", addr);
531		return (0);
532	}
533	if (info.prs_object == NULL)
534		info.prs_object = "<unknown>";
535
536	if (info.prs_lmid != LM_ID_BASE) {
537		len = snprintf(buf, size, "LM%lu`", info.prs_lmid);
538		buf += len;
539		size -= len;
540	}
541
542	len = snprintf(buf, size, "%s`%s", info.prs_object, info.prs_name);
543	buf += len;
544	size -= len;
545
546	if (sym.st_value != addr)
547		len = snprintf(buf, size, "+%#lx", addr - sym.st_value);
548
549	if (nolocks && strcmp("libc.so.1", info.prs_object) == 0 &&
550	    (strstr("mutex", info.prs_name) == 0 ||
551	    strstr("rw", info.prs_name) == 0))
552		return (-1);
553
554	return (0);
555}
556
557/*ARGSUSED*/
558static int
559process_aggregate(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
560{
561	const dtrace_recdesc_t *rec;
562	uintptr_t lock;
563	uint64_t *stack;
564	caddr_t data;
565	pid_t pid;
566	struct ps_prochandle *P;
567	char buf[256];
568	int i, j;
569	uint64_t sum, count, avg;
570
571	if ((*(uint_t *)arg)++ >= g_nent)
572		return (DTRACE_AGGWALK_NEXT);
573
574	rec = aggsdata[0]->dtada_desc->dtagd_rec;
575	data = aggsdata[0]->dtada_data;
576
577	/*LINTED - alignment*/
578	lock = (uintptr_t)*(uint64_t *)(data + rec[1].dtrd_offset);
579	/*LINTED - alignment*/
580	stack = (uint64_t *)(data + rec[2].dtrd_offset);
581
582	if (!g_opt_s) {
583		/*LINTED - alignment*/
584		sum = *(uint64_t *)(aggsdata[1]->dtada_data +
585		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
586		/*LINTED - alignment*/
587		count = *(uint64_t *)(aggsdata[2]->dtada_data +
588		    aggsdata[2]->dtada_desc->dtagd_rec[3].dtrd_offset);
589	} else {
590		uint64_t *a;
591
592		/*LINTED - alignment*/
593		a = (uint64_t *)(aggsdata[1]->dtada_data +
594		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
595
596		print_bar();
597		print_legend();
598
599		for (count = sum = 0, i = DTRACE_QUANTIZE_ZEROBUCKET, j = 0;
600		    i < DTRACE_QUANTIZE_NBUCKETS; i++, j++) {
601			count += a[i];
602			sum += a[i] << (j - 64);
603		}
604	}
605
606	avg = sum / count;
607	(void) printf("%5llu %8llu ", (u_longlong_t)count, (u_longlong_t)avg);
608
609	pid = stack[0];
610	P = dtrace_proc_grab(g_dtp, pid, PGRAB_RDONLY);
611
612	(void) getsym(P, lock, buf, sizeof (buf), 0);
613	(void) printf("%-28s ", buf);
614
615	for (i = 2; i <= 5; i++) {
616		if (getsym(P, stack[i], buf, sizeof (buf), 1) == 0)
617			break;
618	}
619	(void) printf("%s\n", buf);
620
621	if (g_opt_s) {
622		int stack_done = 0;
623		int quant_done = 0;
624		int first_bin, last_bin;
625		uint64_t bin_size, *a;
626
627		/*LINTED - alignment*/
628		a = (uint64_t *)(aggsdata[1]->dtada_data +
629		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
630
631		print_histogram_header();
632
633		for (first_bin = DTRACE_QUANTIZE_ZEROBUCKET;
634		    a[first_bin] == 0; first_bin++)
635			continue;
636		for (last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 63;
637		    a[last_bin] == 0; last_bin--)
638			continue;
639
640		for (i = 0; !stack_done || !quant_done; i++) {
641			if (!stack_done) {
642				(void) getsym(P, stack[i + 2], buf,
643				    sizeof (buf), 0);
644			} else {
645				buf[0] = '\0';
646			}
647
648			if (!quant_done) {
649				bin_size = a[first_bin];
650
651				(void) printf("%10llu |%-24.*s| %5llu %s\n",
652				    1ULL <<
653				    (first_bin - DTRACE_QUANTIZE_ZEROBUCKET),
654				    (int)(24.0 * bin_size / count),
655				    "@@@@@@@@@@@@@@@@@@@@@@@@@@",
656				    (u_longlong_t)bin_size, buf);
657			} else {
658				(void) printf("%43s %s\n", "", buf);
659			}
660
661			if (i + 1 >= g_nframes || stack[i + 3] == 0)
662				stack_done = 1;
663
664			if (first_bin++ == last_bin)
665				quant_done = 1;
666		}
667	}
668
669	dtrace_proc_release(g_dtp, P);
670
671	return (DTRACE_AGGWALK_NEXT);
672}
673
674/*ARGSUSED*/
675static void
676prochandler(struct ps_prochandle *P, const char *msg, void *arg)
677{
678#if !defined(__APPLE__)
679	const psinfo_t *prp = Ppsinfo(P);
680	int pid = Pstatus(P)->pr_pid;
681	char name[SIG2STR_MAX];
682#else
683#define SIG2STR_MAX 32 /* Not referenced so long as prp just below is NULL. */
684#define proc_signame(x,y,z) "Unknown" /* Not referenced so long as prp just below is NULL. */
685	typedef struct psinfo { int pr_wstat; } psinfo_t;
686	const psinfo_t *prp = NULL;
687	int pid = Pstatus(P)->pr_pid;
688#endif /* __APPLE__ */
689
690	if (msg != NULL) {
691		notice("pid %d: %s\n", pid, msg);
692		return;
693	}
694
695	switch (Pstate(P)) {
696	case PS_UNDEAD:
697		/*
698		 * Ideally we would like to always report pr_wstat here, but it
699		 * isn't possible given current /proc semantics.  If we grabbed
700		 * the process, Ppsinfo() will either fail or return a zeroed
701		 * psinfo_t depending on how far the parent is in reaping it.
702		 * When /proc provides a stable pr_wstat in the status file,
703		 * this code can be improved by examining this new pr_wstat.
704		 */
705		if (prp != NULL && WIFSIGNALED(prp->pr_wstat)) {
706			notice("pid %d terminated by %s\n", pid,
707			    proc_signame(WTERMSIG(prp->pr_wstat),
708			    name, sizeof (name)));
709		} else if (prp != NULL && WEXITSTATUS(prp->pr_wstat) != 0) {
710			notice("pid %d exited with status %d\n",
711			    pid, WEXITSTATUS(prp->pr_wstat));
712		} else {
713			notice("pid %d has exited\n", pid);
714		}
715		g_exited = 1;
716		break;
717
718	case PS_LOST:
719#if !defined(__APPLE__)
720		notice("pid %d exec'd a set-id or unobservable program\n", pid);
721#else
722		notice("pid %d has exited\n", pid);
723#endif
724		g_exited = 1;
725		break;
726	}
727}
728
729/*ARGSUSED*/
730static int
731chewrec(const dtrace_probedata_t *data, const dtrace_recdesc_t *rec, void *arg)
732{
733	dtrace_eprobedesc_t *epd = data->dtpda_edesc;
734	dtrace_aggvarid_t aggvars[2];
735	const void *buf;
736	int i, nagv;
737
738	/*
739	 * A NULL rec indicates that we've processed the last record.
740	 */
741	if (rec == NULL)
742		return (DTRACE_CONSUME_NEXT);
743
744	buf = data->dtpda_data - rec->dtrd_offset;
745
746	switch (rec->dtrd_action) {
747	case DTRACEACT_DIFEXPR:
748		(void) printf("\n%s\n\n", (char *)buf + rec->dtrd_offset);
749		if (!g_opt_s) {
750			print_legend();
751			print_bar();
752		}
753		return (DTRACE_CONSUME_NEXT);
754
755	case DTRACEACT_PRINTA:
756		for (nagv = 0, i = 0; i < epd->dtepd_nrecs - 1; i++) {
757			const dtrace_recdesc_t *nrec = &rec[i];
758
759			if (nrec->dtrd_uarg != rec->dtrd_uarg)
760				break;
761
762			/*LINTED - alignment*/
763			aggvars[nagv++] = *(dtrace_aggvarid_t *)((caddr_t)buf +
764			    nrec->dtrd_offset);
765		}
766
767		if (nagv == (g_opt_s ? 1 : 2)) {
768			uint_t nent = 0;
769			if (dtrace_aggregate_walk_joined(g_dtp, aggvars, nagv,
770			    process_aggregate, &nent) != 0)
771				dfatal("failed to walk aggregate");
772		}
773
774		return (DTRACE_CONSUME_NEXT);
775	}
776
777	return (DTRACE_CONSUME_THIS);
778}
779
780/*ARGSUSED*/
781static void
782intr(int signo)
783{
784	g_intr = 1;
785}
786
787int
788main(int argc, char **argv)
789{
790#if !defined(__APPLE__)
791	ucred_t *ucp;
792#endif
793	int err;
794	int opt_C = 0, opt_H = 0, opt_p = 0, opt_v = 0;
795	char c, *p, *end;
796	struct sigaction act;
797	int done = 0;
798
799	g_pname = basename(argv[0]);
800	argv[0] = g_pname; /* rewrite argv[0] for getopt errors */
801
802	/*
803	 * Make sure we have the required dtrace_proc privilege.
804	 */
805#if !defined(__APPLE__)
806	if ((ucp = ucred_get(getpid())) != NULL) {
807		const priv_set_t *psp;
808		if ((psp = ucred_getprivset(ucp, PRIV_EFFECTIVE)) != NULL &&
809		    !priv_ismember(psp, PRIV_DTRACE_PROC)) {
810			fatal("dtrace_proc privilege required\n");
811		}
812
813		ucred_free(ucp);
814	}
815#endif
816
817	while ((c = getopt(argc, argv, PLOCKSTAT_OPTSTR)) != EOF) {
818		switch (c) {
819		case 'n':
820			errno = 0;
821			g_nent = strtoul(optarg, &end, 10);
822			if (*end != '\0' || errno != 0) {
823				(void) fprintf(stderr, "%s: invalid count "
824				    "'%s'\n", g_pname, optarg);
825				usage();
826			}
827			break;
828
829		case 'p':
830			opt_p = 1;
831			break;
832
833		case 'v':
834			opt_v = 1;
835			break;
836
837		case 'A':
838			opt_C = opt_H = 1;
839			break;
840
841		case 'C':
842			opt_C = 1;
843			break;
844
845		case 'H':
846			opt_H = 1;
847			break;
848
849		case 'V':
850			g_opt_V = 1;
851			break;
852
853		default:
854			if (strchr(PLOCKSTAT_OPTSTR, c) == NULL)
855				usage();
856		}
857	}
858
859	/*
860	 * We need a command or at least one pid.
861	 */
862	if (argc == optind)
863		usage();
864
865	if (opt_C == 0 && opt_H == 0)
866		opt_C = 1;
867
868	if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL)
869		fatal("failed to initialize dtrace: %s\n",
870		    dtrace_errmsg(NULL, err));
871
872	/*
873	 * The longest string we trace is 23 bytes long -- so 32 is plenty.
874	 */
875	if (dtrace_setopt(g_dtp, "strsize", "32") == -1)
876		dfatal("failed to set 'strsize'");
877
878	/*
879	 * 1k should be more than enough for all trace() and printa() actions.
880	 */
881	if (dtrace_setopt(g_dtp, "bufsize", "1k") == -1)
882		dfatal("failed to set 'bufsize'");
883
884	/*
885	 * The table we produce has the hottest locks at the top.
886	 */
887	if (dtrace_setopt(g_dtp, "aggsortrev", NULL) == -1)
888		dfatal("failed to set 'aggsortrev'");
889
890	/*
891	 * These are two reasonable defaults which should suffice.
892	 */
893	if (dtrace_setopt(g_dtp, "aggsize", "256k") == -1)
894		dfatal("failed to set 'aggsize'");
895	if (dtrace_setopt(g_dtp, "aggrate", "1sec") == -1)
896		dfatal("failed to set 'aggrate'");
897
898	/*
899	 * Take a second pass through to look for options that set options now
900	 * that we have an open dtrace handle.
901	 */
902	optind = 1;
903	while ((c = getopt(argc, argv, PLOCKSTAT_OPTSTR)) != EOF) {
904		switch (c) {
905		case 's':
906			g_opt_s = 1;
907			if (dtrace_setopt(g_dtp, "ustackframes", optarg) == -1)
908				dfatal("failed to set 'ustackframes'");
909			break;
910
911		case 'x':
912			if ((p = strchr(optarg, '=')) != NULL)
913				*p++ = '\0';
914
915			if (dtrace_setopt(g_dtp, optarg, p) != 0)
916				dfatal("failed to set -x %s", optarg);
917			break;
918
919		case 'e':
920			errno = 0;
921			(void) strtoul(optarg, &end, 10);
922			if (*optarg == '-' || *end != '\0' || errno != 0) {
923				(void) fprintf(stderr, "%s: invalid timeout "
924				    "'%s'\n", g_pname, optarg);
925				usage();
926			}
927
928			/*
929			 * Construct a DTrace enabling that will exit after
930			 * the specified number of seconds.
931			 */
932			dprog_add("BEGIN\n{\n\tend = timestamp + ");
933			dprog_add(optarg);
934			dprog_add(" * 1000000000;\n}\n");
935			dprog_add("tick-10hz\n/timestamp >= end/\n");
936			dprog_add("{\n\texit(0);\n}\n");
937			break;
938		}
939	}
940
941	argc -= optind;
942	argv += optind;
943
944	if (opt_H) {
945		dprog_add(g_hold_init);
946		if (g_opt_s == NULL)
947			dprog_add(g_hold_times);
948		else
949			dprog_add(g_hold_histogram);
950	}
951
952	if (opt_C) {
953		dprog_add(g_ctnd_init);
954		if (g_opt_s == NULL)
955			dprog_add(g_ctnd_times);
956		else
957			dprog_add(g_ctnd_histogram);
958	}
959
960	if (opt_p) {
961		ulong_t pid;
962
963		if (argc > 1) {
964			(void) fprintf(stderr, "%s: only one pid is allowed\n",
965			    g_pname);
966			usage();
967		}
968
969		errno = 0;
970		pid = strtoul(argv[0], &end, 10);
971		if (*end != '\0' || errno != 0 || (pid_t)pid != pid) {
972			(void) fprintf(stderr, "%s: invalid pid '%s'\n",
973			    g_pname, argv[0]);
974			usage();
975		}
976
977		if ((g_pr = dtrace_proc_grab(g_dtp, (pid_t)pid, 0)) == NULL)
978			dfatal(NULL);
979	} else {
980		if ((g_pr = dtrace_proc_create(g_dtp, argv[0], argv)) == NULL)
981			dfatal(NULL);
982	}
983
984	dprog_compile();
985
986	if (dtrace_handle_proc(g_dtp, &prochandler, NULL) == -1)
987		dfatal("failed to establish proc handler");
988
989	(void) sigemptyset(&act.sa_mask);
990	act.sa_flags = 0;
991	act.sa_handler = intr;
992	(void) sigaction(SIGINT, &act, NULL);
993	(void) sigaction(SIGTERM, &act, NULL);
994
995	if (dtrace_go(g_dtp) != 0)
996		dfatal("dtrace_go()");
997
998	if (dtrace_getopt(g_dtp, "ustackframes", &g_nframes) != 0)
999		dfatal("failed to get 'ustackframes'");
1000
1001	dtrace_proc_continue(g_dtp, g_pr);
1002
1003	if (opt_v)
1004		(void) printf("%s: tracing enabled for pid %d\n", g_pname,
1005		    (int)Pstatus(g_pr)->pr_pid);
1006
1007	do {
1008		if (!g_intr && !done)
1009			dtrace_sleep(g_dtp);
1010
1011		if (done || g_intr || g_exited) {
1012			done = 1;
1013			if (dtrace_stop(g_dtp) == -1)
1014				dfatal("couldn't stop tracing");
1015		}
1016
1017		switch (dtrace_work(g_dtp, stdout, NULL, chewrec, NULL)) {
1018		case DTRACE_WORKSTATUS_DONE:
1019			done = 1;
1020			break;
1021		case DTRACE_WORKSTATUS_OKAY:
1022			break;
1023		default:
1024			dfatal("processing aborted");
1025		}
1026
1027	} while (!done);
1028
1029	dtrace_close(g_dtp);
1030
1031	return (0);
1032}
1033