dt_consume.c revision 228579
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <stdlib.h>
27#include <strings.h>
28#include <errno.h>
29#include <unistd.h>
30#include <limits.h>
31#include <assert.h>
32#include <ctype.h>
33#if defined(sun)
34#include <alloca.h>
35#endif
36#include <dt_impl.h>
37#if !defined(sun)
38#include <libproc_compat.h>
39#endif
40
41#define	DT_MASK_LO 0x00000000FFFFFFFFULL
42
43/*
44 * We declare this here because (1) we need it and (2) we want to avoid a
45 * dependency on libm in libdtrace.
46 */
47static long double
48dt_fabsl(long double x)
49{
50	if (x < 0)
51		return (-x);
52
53	return (x);
54}
55
56/*
57 * 128-bit arithmetic functions needed to support the stddev() aggregating
58 * action.
59 */
60static int
61dt_gt_128(uint64_t *a, uint64_t *b)
62{
63	return (a[1] > b[1] || (a[1] == b[1] && a[0] > b[0]));
64}
65
66static int
67dt_ge_128(uint64_t *a, uint64_t *b)
68{
69	return (a[1] > b[1] || (a[1] == b[1] && a[0] >= b[0]));
70}
71
72static int
73dt_le_128(uint64_t *a, uint64_t *b)
74{
75	return (a[1] < b[1] || (a[1] == b[1] && a[0] <= b[0]));
76}
77
78/*
79 * Shift the 128-bit value in a by b. If b is positive, shift left.
80 * If b is negative, shift right.
81 */
82static void
83dt_shift_128(uint64_t *a, int b)
84{
85	uint64_t mask;
86
87	if (b == 0)
88		return;
89
90	if (b < 0) {
91		b = -b;
92		if (b >= 64) {
93			a[0] = a[1] >> (b - 64);
94			a[1] = 0;
95		} else {
96			a[0] >>= b;
97			mask = 1LL << (64 - b);
98			mask -= 1;
99			a[0] |= ((a[1] & mask) << (64 - b));
100			a[1] >>= b;
101		}
102	} else {
103		if (b >= 64) {
104			a[1] = a[0] << (b - 64);
105			a[0] = 0;
106		} else {
107			a[1] <<= b;
108			mask = a[0] >> (64 - b);
109			a[1] |= mask;
110			a[0] <<= b;
111		}
112	}
113}
114
115static int
116dt_nbits_128(uint64_t *a)
117{
118	int nbits = 0;
119	uint64_t tmp[2];
120	uint64_t zero[2] = { 0, 0 };
121
122	tmp[0] = a[0];
123	tmp[1] = a[1];
124
125	dt_shift_128(tmp, -1);
126	while (dt_gt_128(tmp, zero)) {
127		dt_shift_128(tmp, -1);
128		nbits++;
129	}
130
131	return (nbits);
132}
133
134static void
135dt_subtract_128(uint64_t *minuend, uint64_t *subtrahend, uint64_t *difference)
136{
137	uint64_t result[2];
138
139	result[0] = minuend[0] - subtrahend[0];
140	result[1] = minuend[1] - subtrahend[1] -
141	    (minuend[0] < subtrahend[0] ? 1 : 0);
142
143	difference[0] = result[0];
144	difference[1] = result[1];
145}
146
147static void
148dt_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
149{
150	uint64_t result[2];
151
152	result[0] = addend1[0] + addend2[0];
153	result[1] = addend1[1] + addend2[1] +
154	    (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
155
156	sum[0] = result[0];
157	sum[1] = result[1];
158}
159
160/*
161 * The basic idea is to break the 2 64-bit values into 4 32-bit values,
162 * use native multiplication on those, and then re-combine into the
163 * resulting 128-bit value.
164 *
165 * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
166 *     hi1 * hi2 << 64 +
167 *     hi1 * lo2 << 32 +
168 *     hi2 * lo1 << 32 +
169 *     lo1 * lo2
170 */
171static void
172dt_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
173{
174	uint64_t hi1, hi2, lo1, lo2;
175	uint64_t tmp[2];
176
177	hi1 = factor1 >> 32;
178	hi2 = factor2 >> 32;
179
180	lo1 = factor1 & DT_MASK_LO;
181	lo2 = factor2 & DT_MASK_LO;
182
183	product[0] = lo1 * lo2;
184	product[1] = hi1 * hi2;
185
186	tmp[0] = hi1 * lo2;
187	tmp[1] = 0;
188	dt_shift_128(tmp, 32);
189	dt_add_128(product, tmp, product);
190
191	tmp[0] = hi2 * lo1;
192	tmp[1] = 0;
193	dt_shift_128(tmp, 32);
194	dt_add_128(product, tmp, product);
195}
196
197/*
198 * This is long-hand division.
199 *
200 * We initialize subtrahend by shifting divisor left as far as possible. We
201 * loop, comparing subtrahend to dividend:  if subtrahend is smaller, we
202 * subtract and set the appropriate bit in the result.  We then shift
203 * subtrahend right by one bit for the next comparison.
204 */
205static void
206dt_divide_128(uint64_t *dividend, uint64_t divisor, uint64_t *quotient)
207{
208	uint64_t result[2] = { 0, 0 };
209	uint64_t remainder[2];
210	uint64_t subtrahend[2];
211	uint64_t divisor_128[2];
212	uint64_t mask[2] = { 1, 0 };
213	int log = 0;
214
215	assert(divisor != 0);
216
217	divisor_128[0] = divisor;
218	divisor_128[1] = 0;
219
220	remainder[0] = dividend[0];
221	remainder[1] = dividend[1];
222
223	subtrahend[0] = divisor;
224	subtrahend[1] = 0;
225
226	while (divisor > 0) {
227		log++;
228		divisor >>= 1;
229	}
230
231	dt_shift_128(subtrahend, 128 - log);
232	dt_shift_128(mask, 128 - log);
233
234	while (dt_ge_128(remainder, divisor_128)) {
235		if (dt_ge_128(remainder, subtrahend)) {
236			dt_subtract_128(remainder, subtrahend, remainder);
237			result[0] |= mask[0];
238			result[1] |= mask[1];
239		}
240
241		dt_shift_128(subtrahend, -1);
242		dt_shift_128(mask, -1);
243	}
244
245	quotient[0] = result[0];
246	quotient[1] = result[1];
247}
248
249/*
250 * This is the long-hand method of calculating a square root.
251 * The algorithm is as follows:
252 *
253 * 1. Group the digits by 2 from the right.
254 * 2. Over the leftmost group, find the largest single-digit number
255 *    whose square is less than that group.
256 * 3. Subtract the result of the previous step (2 or 4, depending) and
257 *    bring down the next two-digit group.
258 * 4. For the result R we have so far, find the largest single-digit number
259 *    x such that 2 * R * 10 * x + x^2 is less than the result from step 3.
260 *    (Note that this is doubling R and performing a decimal left-shift by 1
261 *    and searching for the appropriate decimal to fill the one's place.)
262 *    The value x is the next digit in the square root.
263 * Repeat steps 3 and 4 until the desired precision is reached.  (We're
264 * dealing with integers, so the above is sufficient.)
265 *
266 * In decimal, the square root of 582,734 would be calculated as so:
267 *
268 *     __7__6__3
269 *    | 58 27 34
270 *     -49       (7^2 == 49 => 7 is the first digit in the square root)
271 *      --
272 *       9 27    (Subtract and bring down the next group.)
273 * 146   8 76    (2 * 7 * 10 * 6 + 6^2 == 876 => 6 is the next digit in
274 *      -----     the square root)
275 *         51 34 (Subtract and bring down the next group.)
276 * 1523    45 69 (2 * 76 * 10 * 3 + 3^2 == 4569 => 3 is the next digit in
277 *         -----  the square root)
278 *          5 65 (remainder)
279 *
280 * The above algorithm applies similarly in binary, but note that the
281 * only possible non-zero value for x in step 4 is 1, so step 4 becomes a
282 * simple decision: is 2 * R * 2 * 1 + 1^2 (aka R << 2 + 1) less than the
283 * preceding difference?
284 *
285 * In binary, the square root of 11011011 would be calculated as so:
286 *
287 *     __1__1__1__0
288 *    | 11 01 10 11
289 *      01          (0 << 2 + 1 == 1 < 11 => this bit is 1)
290 *      --
291 *      10 01 10 11
292 * 101   1 01       (1 << 2 + 1 == 101 < 1001 => next bit is 1)
293 *      -----
294 *       1 00 10 11
295 * 1101    11 01    (11 << 2 + 1 == 1101 < 10010 => next bit is 1)
296 *       -------
297 *          1 01 11
298 * 11101    1 11 01 (111 << 2 + 1 == 11101 > 10111 => last bit is 0)
299 *
300 */
301static uint64_t
302dt_sqrt_128(uint64_t *square)
303{
304	uint64_t result[2] = { 0, 0 };
305	uint64_t diff[2] = { 0, 0 };
306	uint64_t one[2] = { 1, 0 };
307	uint64_t next_pair[2];
308	uint64_t next_try[2];
309	uint64_t bit_pairs, pair_shift;
310	int i;
311
312	bit_pairs = dt_nbits_128(square) / 2;
313	pair_shift = bit_pairs * 2;
314
315	for (i = 0; i <= bit_pairs; i++) {
316		/*
317		 * Bring down the next pair of bits.
318		 */
319		next_pair[0] = square[0];
320		next_pair[1] = square[1];
321		dt_shift_128(next_pair, -pair_shift);
322		next_pair[0] &= 0x3;
323		next_pair[1] = 0;
324
325		dt_shift_128(diff, 2);
326		dt_add_128(diff, next_pair, diff);
327
328		/*
329		 * next_try = R << 2 + 1
330		 */
331		next_try[0] = result[0];
332		next_try[1] = result[1];
333		dt_shift_128(next_try, 2);
334		dt_add_128(next_try, one, next_try);
335
336		if (dt_le_128(next_try, diff)) {
337			dt_subtract_128(diff, next_try, diff);
338			dt_shift_128(result, 1);
339			dt_add_128(result, one, result);
340		} else {
341			dt_shift_128(result, 1);
342		}
343
344		pair_shift -= 2;
345	}
346
347	assert(result[1] == 0);
348
349	return (result[0]);
350}
351
352uint64_t
353dt_stddev(uint64_t *data, uint64_t normal)
354{
355	uint64_t avg_of_squares[2];
356	uint64_t square_of_avg[2];
357	int64_t norm_avg;
358	uint64_t diff[2];
359
360	/*
361	 * The standard approximation for standard deviation is
362	 * sqrt(average(x**2) - average(x)**2), i.e. the square root
363	 * of the average of the squares minus the square of the average.
364	 */
365	dt_divide_128(data + 2, normal, avg_of_squares);
366	dt_divide_128(avg_of_squares, data[0], avg_of_squares);
367
368	norm_avg = (int64_t)data[1] / (int64_t)normal / (int64_t)data[0];
369
370	if (norm_avg < 0)
371		norm_avg = -norm_avg;
372
373	dt_multiply_128((uint64_t)norm_avg, (uint64_t)norm_avg, square_of_avg);
374
375	dt_subtract_128(avg_of_squares, square_of_avg, diff);
376
377	return (dt_sqrt_128(diff));
378}
379
380static int
381dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last,
382    dtrace_bufdesc_t *buf, size_t offs)
383{
384	dtrace_probedesc_t *pd = data->dtpda_pdesc, *npd;
385	dtrace_eprobedesc_t *epd = data->dtpda_edesc, *nepd;
386	char *p = pd->dtpd_provider, *n = pd->dtpd_name, *sub;
387	dtrace_flowkind_t flow = DTRACEFLOW_NONE;
388	const char *str = NULL;
389	static const char *e_str[2] = { " -> ", " => " };
390	static const char *r_str[2] = { " <- ", " <= " };
391	static const char *ent = "entry", *ret = "return";
392	static int entlen = 0, retlen = 0;
393	dtrace_epid_t next, id = epd->dtepd_epid;
394	int rval;
395
396	if (entlen == 0) {
397		assert(retlen == 0);
398		entlen = strlen(ent);
399		retlen = strlen(ret);
400	}
401
402	/*
403	 * If the name of the probe is "entry" or ends with "-entry", we
404	 * treat it as an entry; if it is "return" or ends with "-return",
405	 * we treat it as a return.  (This allows application-provided probes
406	 * like "method-entry" or "function-entry" to participate in flow
407	 * indentation -- without accidentally misinterpreting popular probe
408	 * names like "carpentry", "gentry" or "Coventry".)
409	 */
410	if ((sub = strstr(n, ent)) != NULL && sub[entlen] == '\0' &&
411	    (sub == n || sub[-1] == '-')) {
412		flow = DTRACEFLOW_ENTRY;
413		str = e_str[strcmp(p, "syscall") == 0];
414	} else if ((sub = strstr(n, ret)) != NULL && sub[retlen] == '\0' &&
415	    (sub == n || sub[-1] == '-')) {
416		flow = DTRACEFLOW_RETURN;
417		str = r_str[strcmp(p, "syscall") == 0];
418	}
419
420	/*
421	 * If we're going to indent this, we need to check the ID of our last
422	 * call.  If we're looking at the same probe ID but a different EPID,
423	 * we _don't_ want to indent.  (Yes, there are some minor holes in
424	 * this scheme -- it's a heuristic.)
425	 */
426	if (flow == DTRACEFLOW_ENTRY) {
427		if ((last != DTRACE_EPIDNONE && id != last &&
428		    pd->dtpd_id == dtp->dt_pdesc[last]->dtpd_id))
429			flow = DTRACEFLOW_NONE;
430	}
431
432	/*
433	 * If we're going to unindent this, it's more difficult to see if
434	 * we don't actually want to unindent it -- we need to look at the
435	 * _next_ EPID.
436	 */
437	if (flow == DTRACEFLOW_RETURN) {
438		offs += epd->dtepd_size;
439
440		do {
441			if (offs >= buf->dtbd_size) {
442				/*
443				 * We're at the end -- maybe.  If the oldest
444				 * record is non-zero, we need to wrap.
445				 */
446				if (buf->dtbd_oldest != 0) {
447					offs = 0;
448				} else {
449					goto out;
450				}
451			}
452
453			next = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
454
455			if (next == DTRACE_EPIDNONE)
456				offs += sizeof (id);
457		} while (next == DTRACE_EPIDNONE);
458
459		if ((rval = dt_epid_lookup(dtp, next, &nepd, &npd)) != 0)
460			return (rval);
461
462		if (next != id && npd->dtpd_id == pd->dtpd_id)
463			flow = DTRACEFLOW_NONE;
464	}
465
466out:
467	if (flow == DTRACEFLOW_ENTRY || flow == DTRACEFLOW_RETURN) {
468		data->dtpda_prefix = str;
469	} else {
470		data->dtpda_prefix = "| ";
471	}
472
473	if (flow == DTRACEFLOW_RETURN && data->dtpda_indent > 0)
474		data->dtpda_indent -= 2;
475
476	data->dtpda_flow = flow;
477
478	return (0);
479}
480
481static int
482dt_nullprobe()
483{
484	return (DTRACE_CONSUME_THIS);
485}
486
487static int
488dt_nullrec()
489{
490	return (DTRACE_CONSUME_NEXT);
491}
492
493int
494dt_print_quantline(dtrace_hdl_t *dtp, FILE *fp, int64_t val,
495    uint64_t normal, long double total, char positives, char negatives)
496{
497	long double f;
498	uint_t depth, len = 40;
499
500	const char *ats = "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@";
501	const char *spaces = "                                        ";
502
503	assert(strlen(ats) == len && strlen(spaces) == len);
504	assert(!(total == 0 && (positives || negatives)));
505	assert(!(val < 0 && !negatives));
506	assert(!(val > 0 && !positives));
507	assert(!(val != 0 && total == 0));
508
509	if (!negatives) {
510		if (positives) {
511			f = (dt_fabsl((long double)val) * len) / total;
512			depth = (uint_t)(f + 0.5);
513		} else {
514			depth = 0;
515		}
516
517		return (dt_printf(dtp, fp, "|%s%s %-9lld\n", ats + len - depth,
518		    spaces + depth, (long long)val / normal));
519	}
520
521	if (!positives) {
522		f = (dt_fabsl((long double)val) * len) / total;
523		depth = (uint_t)(f + 0.5);
524
525		return (dt_printf(dtp, fp, "%s%s| %-9lld\n", spaces + depth,
526		    ats + len - depth, (long long)val / normal));
527	}
528
529	/*
530	 * If we're here, we have both positive and negative bucket values.
531	 * To express this graphically, we're going to generate both positive
532	 * and negative bars separated by a centerline.  These bars are half
533	 * the size of normal quantize()/lquantize() bars, so we divide the
534	 * length in half before calculating the bar length.
535	 */
536	len /= 2;
537	ats = &ats[len];
538	spaces = &spaces[len];
539
540	f = (dt_fabsl((long double)val) * len) / total;
541	depth = (uint_t)(f + 0.5);
542
543	if (val <= 0) {
544		return (dt_printf(dtp, fp, "%s%s|%*s %-9lld\n", spaces + depth,
545		    ats + len - depth, len, "", (long long)val / normal));
546	} else {
547		return (dt_printf(dtp, fp, "%20s|%s%s %-9lld\n", "",
548		    ats + len - depth, spaces + depth,
549		    (long long)val / normal));
550	}
551}
552
553int
554dt_print_quantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
555    size_t size, uint64_t normal)
556{
557	const int64_t *data = addr;
558	int i, first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1;
559	long double total = 0;
560	char positives = 0, negatives = 0;
561
562	if (size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
563		return (dt_set_errno(dtp, EDT_DMISMATCH));
564
565	while (first_bin < DTRACE_QUANTIZE_NBUCKETS - 1 && data[first_bin] == 0)
566		first_bin++;
567
568	if (first_bin == DTRACE_QUANTIZE_NBUCKETS - 1) {
569		/*
570		 * There isn't any data.  This is possible if (and only if)
571		 * negative increment values have been used.  In this case,
572		 * we'll print the buckets around 0.
573		 */
574		first_bin = DTRACE_QUANTIZE_ZEROBUCKET - 1;
575		last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 1;
576	} else {
577		if (first_bin > 0)
578			first_bin--;
579
580		while (last_bin > 0 && data[last_bin] == 0)
581			last_bin--;
582
583		if (last_bin < DTRACE_QUANTIZE_NBUCKETS - 1)
584			last_bin++;
585	}
586
587	for (i = first_bin; i <= last_bin; i++) {
588		positives |= (data[i] > 0);
589		negatives |= (data[i] < 0);
590		total += dt_fabsl((long double)data[i]);
591	}
592
593	if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
594	    "------------- Distribution -------------", "count") < 0)
595		return (-1);
596
597	for (i = first_bin; i <= last_bin; i++) {
598		if (dt_printf(dtp, fp, "%16lld ",
599		    (long long)DTRACE_QUANTIZE_BUCKETVAL(i)) < 0)
600			return (-1);
601
602		if (dt_print_quantline(dtp, fp, data[i], normal, total,
603		    positives, negatives) < 0)
604			return (-1);
605	}
606
607	return (0);
608}
609
610int
611dt_print_lquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
612    size_t size, uint64_t normal)
613{
614	const int64_t *data = addr;
615	int i, first_bin, last_bin, base;
616	uint64_t arg;
617	long double total = 0;
618	uint16_t step, levels;
619	char positives = 0, negatives = 0;
620
621	if (size < sizeof (uint64_t))
622		return (dt_set_errno(dtp, EDT_DMISMATCH));
623
624	arg = *data++;
625	size -= sizeof (uint64_t);
626
627	base = DTRACE_LQUANTIZE_BASE(arg);
628	step = DTRACE_LQUANTIZE_STEP(arg);
629	levels = DTRACE_LQUANTIZE_LEVELS(arg);
630
631	first_bin = 0;
632	last_bin = levels + 1;
633
634	if (size != sizeof (uint64_t) * (levels + 2))
635		return (dt_set_errno(dtp, EDT_DMISMATCH));
636
637	while (first_bin <= levels + 1 && data[first_bin] == 0)
638		first_bin++;
639
640	if (first_bin > levels + 1) {
641		first_bin = 0;
642		last_bin = 2;
643	} else {
644		if (first_bin > 0)
645			first_bin--;
646
647		while (last_bin > 0 && data[last_bin] == 0)
648			last_bin--;
649
650		if (last_bin < levels + 1)
651			last_bin++;
652	}
653
654	for (i = first_bin; i <= last_bin; i++) {
655		positives |= (data[i] > 0);
656		negatives |= (data[i] < 0);
657		total += dt_fabsl((long double)data[i]);
658	}
659
660	if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
661	    "------------- Distribution -------------", "count") < 0)
662		return (-1);
663
664	for (i = first_bin; i <= last_bin; i++) {
665		char c[32];
666		int err;
667
668		if (i == 0) {
669			(void) snprintf(c, sizeof (c), "< %d",
670			    base / (uint32_t)normal);
671			err = dt_printf(dtp, fp, "%16s ", c);
672		} else if (i == levels + 1) {
673			(void) snprintf(c, sizeof (c), ">= %d",
674			    base + (levels * step));
675			err = dt_printf(dtp, fp, "%16s ", c);
676		} else {
677			err = dt_printf(dtp, fp, "%16d ",
678			    base + (i - 1) * step);
679		}
680
681		if (err < 0 || dt_print_quantline(dtp, fp, data[i], normal,
682		    total, positives, negatives) < 0)
683			return (-1);
684	}
685
686	return (0);
687}
688
689/*ARGSUSED*/
690static int
691dt_print_average(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
692    size_t size, uint64_t normal)
693{
694	/* LINTED - alignment */
695	int64_t *data = (int64_t *)addr;
696
697	return (dt_printf(dtp, fp, " %16lld", data[0] ?
698	    (long long)(data[1] / (int64_t)normal / data[0]) : 0));
699}
700
701/*ARGSUSED*/
702static int
703dt_print_stddev(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
704    size_t size, uint64_t normal)
705{
706	/* LINTED - alignment */
707	uint64_t *data = (uint64_t *)addr;
708
709	return (dt_printf(dtp, fp, " %16llu", data[0] ?
710	    (unsigned long long) dt_stddev(data, normal) : 0));
711}
712
713/*ARGSUSED*/
714int
715dt_print_bytes(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
716    size_t nbytes, int width, int quiet, int raw)
717{
718	/*
719	 * If the byte stream is a series of printable characters, followed by
720	 * a terminating byte, we print it out as a string.  Otherwise, we
721	 * assume that it's something else and just print the bytes.
722	 */
723	int i, j, margin = 5;
724	char *c = (char *)addr;
725
726	if (nbytes == 0)
727		return (0);
728
729	if (raw || dtp->dt_options[DTRACEOPT_RAWBYTES] != DTRACEOPT_UNSET)
730		goto raw;
731
732	for (i = 0; i < nbytes; i++) {
733		/*
734		 * We define a "printable character" to be one for which
735		 * isprint(3C) returns non-zero, isspace(3C) returns non-zero,
736		 * or a character which is either backspace or the bell.
737		 * Backspace and the bell are regrettably special because
738		 * they fail the first two tests -- and yet they are entirely
739		 * printable.  These are the only two control characters that
740		 * have meaning for the terminal and for which isprint(3C) and
741		 * isspace(3C) return 0.
742		 */
743		if (isprint(c[i]) || isspace(c[i]) ||
744		    c[i] == '\b' || c[i] == '\a')
745			continue;
746
747		if (c[i] == '\0' && i > 0) {
748			/*
749			 * This looks like it might be a string.  Before we
750			 * assume that it is indeed a string, check the
751			 * remainder of the byte range; if it contains
752			 * additional non-nul characters, we'll assume that
753			 * it's a binary stream that just happens to look like
754			 * a string, and we'll print out the individual bytes.
755			 */
756			for (j = i + 1; j < nbytes; j++) {
757				if (c[j] != '\0')
758					break;
759			}
760
761			if (j != nbytes)
762				break;
763
764			if (quiet)
765				return (dt_printf(dtp, fp, "%s", c));
766			else
767				return (dt_printf(dtp, fp, "  %-*s", width, c));
768		}
769
770		break;
771	}
772
773	if (i == nbytes) {
774		/*
775		 * The byte range is all printable characters, but there is
776		 * no trailing nul byte.  We'll assume that it's a string and
777		 * print it as such.
778		 */
779		char *s = alloca(nbytes + 1);
780		bcopy(c, s, nbytes);
781		s[nbytes] = '\0';
782		return (dt_printf(dtp, fp, "  %-*s", width, s));
783	}
784
785raw:
786	if (dt_printf(dtp, fp, "\n%*s      ", margin, "") < 0)
787		return (-1);
788
789	for (i = 0; i < 16; i++)
790		if (dt_printf(dtp, fp, "  %c", "0123456789abcdef"[i]) < 0)
791			return (-1);
792
793	if (dt_printf(dtp, fp, "  0123456789abcdef\n") < 0)
794		return (-1);
795
796
797	for (i = 0; i < nbytes; i += 16) {
798		if (dt_printf(dtp, fp, "%*s%5x:", margin, "", i) < 0)
799			return (-1);
800
801		for (j = i; j < i + 16 && j < nbytes; j++) {
802			if (dt_printf(dtp, fp, " %02x", (uchar_t)c[j]) < 0)
803				return (-1);
804		}
805
806		while (j++ % 16) {
807			if (dt_printf(dtp, fp, "   ") < 0)
808				return (-1);
809		}
810
811		if (dt_printf(dtp, fp, "  ") < 0)
812			return (-1);
813
814		for (j = i; j < i + 16 && j < nbytes; j++) {
815			if (dt_printf(dtp, fp, "%c",
816			    c[j] < ' ' || c[j] > '~' ? '.' : c[j]) < 0)
817				return (-1);
818		}
819
820		if (dt_printf(dtp, fp, "\n") < 0)
821			return (-1);
822	}
823
824	return (0);
825}
826
827int
828dt_print_stack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
829    caddr_t addr, int depth, int size)
830{
831	dtrace_syminfo_t dts;
832	GElf_Sym sym;
833	int i, indent;
834	char c[PATH_MAX * 2];
835	uint64_t pc;
836
837	if (dt_printf(dtp, fp, "\n") < 0)
838		return (-1);
839
840	if (format == NULL)
841		format = "%s";
842
843	if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
844		indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
845	else
846		indent = _dtrace_stkindent;
847
848	for (i = 0; i < depth; i++) {
849		switch (size) {
850		case sizeof (uint32_t):
851			/* LINTED - alignment */
852			pc = *((uint32_t *)addr);
853			break;
854
855		case sizeof (uint64_t):
856			/* LINTED - alignment */
857			pc = *((uint64_t *)addr);
858			break;
859
860		default:
861			return (dt_set_errno(dtp, EDT_BADSTACKPC));
862		}
863
864		if (pc == 0)
865			break;
866
867		addr += size;
868
869		if (dt_printf(dtp, fp, "%*s", indent, "") < 0)
870			return (-1);
871
872		if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
873			if (pc > sym.st_value) {
874				(void) snprintf(c, sizeof (c), "%s`%s+0x%llx",
875				    dts.dts_object, dts.dts_name,
876				    (u_longlong_t)(pc - sym.st_value));
877			} else {
878				(void) snprintf(c, sizeof (c), "%s`%s",
879				    dts.dts_object, dts.dts_name);
880			}
881		} else {
882			/*
883			 * We'll repeat the lookup, but this time we'll specify
884			 * a NULL GElf_Sym -- indicating that we're only
885			 * interested in the containing module.
886			 */
887			if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
888				(void) snprintf(c, sizeof (c), "%s`0x%llx",
889				    dts.dts_object, (u_longlong_t)pc);
890			} else {
891				(void) snprintf(c, sizeof (c), "0x%llx",
892				    (u_longlong_t)pc);
893			}
894		}
895
896		if (dt_printf(dtp, fp, format, c) < 0)
897			return (-1);
898
899		if (dt_printf(dtp, fp, "\n") < 0)
900			return (-1);
901	}
902
903	return (0);
904}
905
906int
907dt_print_ustack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
908    caddr_t addr, uint64_t arg)
909{
910	/* LINTED - alignment */
911	uint64_t *pc = (uint64_t *)addr;
912	uint32_t depth = DTRACE_USTACK_NFRAMES(arg);
913	uint32_t strsize = DTRACE_USTACK_STRSIZE(arg);
914	const char *strbase = addr + (depth + 1) * sizeof (uint64_t);
915	const char *str = strsize ? strbase : NULL;
916	int err = 0;
917
918	char name[PATH_MAX], objname[PATH_MAX], c[PATH_MAX * 2];
919	struct ps_prochandle *P;
920	GElf_Sym sym;
921	int i, indent;
922	pid_t pid;
923
924	if (depth == 0)
925		return (0);
926
927	pid = (pid_t)*pc++;
928
929	if (dt_printf(dtp, fp, "\n") < 0)
930		return (-1);
931
932	if (format == NULL)
933		format = "%s";
934
935	if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
936		indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
937	else
938		indent = _dtrace_stkindent;
939
940	/*
941	 * Ultimately, we need to add an entry point in the library vector for
942	 * determining <symbol, offset> from <pid, address>.  For now, if
943	 * this is a vector open, we just print the raw address or string.
944	 */
945	if (dtp->dt_vector == NULL)
946		P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
947	else
948		P = NULL;
949
950	if (P != NULL)
951		dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
952
953	for (i = 0; i < depth && pc[i] != 0; i++) {
954		const prmap_t *map;
955
956		if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
957			break;
958
959		if (P != NULL && Plookup_by_addr(P, pc[i],
960		    name, sizeof (name), &sym) == 0) {
961			(void) Pobjname(P, pc[i], objname, sizeof (objname));
962
963			if (pc[i] > sym.st_value) {
964				(void) snprintf(c, sizeof (c),
965				    "%s`%s+0x%llx", dt_basename(objname), name,
966				    (u_longlong_t)(pc[i] - sym.st_value));
967			} else {
968				(void) snprintf(c, sizeof (c),
969				    "%s`%s", dt_basename(objname), name);
970			}
971		} else if (str != NULL && str[0] != '\0' && str[0] != '@' &&
972		    (P != NULL && ((map = Paddr_to_map(P, pc[i])) == NULL ||
973		    (map->pr_mflags & MA_WRITE)))) {
974			/*
975			 * If the current string pointer in the string table
976			 * does not point to an empty string _and_ the program
977			 * counter falls in a writable region, we'll use the
978			 * string from the string table instead of the raw
979			 * address.  This last condition is necessary because
980			 * some (broken) ustack helpers will return a string
981			 * even for a program counter that they can't
982			 * identify.  If we have a string for a program
983			 * counter that falls in a segment that isn't
984			 * writable, we assume that we have fallen into this
985			 * case and we refuse to use the string.
986			 */
987			(void) snprintf(c, sizeof (c), "%s", str);
988		} else {
989			if (P != NULL && Pobjname(P, pc[i], objname,
990			    sizeof (objname)) != 0) {
991				(void) snprintf(c, sizeof (c), "%s`0x%llx",
992				    dt_basename(objname), (u_longlong_t)pc[i]);
993			} else {
994				(void) snprintf(c, sizeof (c), "0x%llx",
995				    (u_longlong_t)pc[i]);
996			}
997		}
998
999		if ((err = dt_printf(dtp, fp, format, c)) < 0)
1000			break;
1001
1002		if ((err = dt_printf(dtp, fp, "\n")) < 0)
1003			break;
1004
1005		if (str != NULL && str[0] == '@') {
1006			/*
1007			 * If the first character of the string is an "at" sign,
1008			 * then the string is inferred to be an annotation --
1009			 * and it is printed out beneath the frame and offset
1010			 * with brackets.
1011			 */
1012			if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
1013				break;
1014
1015			(void) snprintf(c, sizeof (c), "  [ %s ]", &str[1]);
1016
1017			if ((err = dt_printf(dtp, fp, format, c)) < 0)
1018				break;
1019
1020			if ((err = dt_printf(dtp, fp, "\n")) < 0)
1021				break;
1022		}
1023
1024		if (str != NULL) {
1025			str += strlen(str) + 1;
1026			if (str - strbase >= strsize)
1027				str = NULL;
1028		}
1029	}
1030
1031	if (P != NULL) {
1032		dt_proc_unlock(dtp, P);
1033		dt_proc_release(dtp, P);
1034	}
1035
1036	return (err);
1037}
1038
1039static int
1040dt_print_usym(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr, dtrace_actkind_t act)
1041{
1042	/* LINTED - alignment */
1043	uint64_t pid = ((uint64_t *)addr)[0];
1044	/* LINTED - alignment */
1045	uint64_t pc = ((uint64_t *)addr)[1];
1046	const char *format = "  %-50s";
1047	char *s;
1048	int n, len = 256;
1049
1050	if (act == DTRACEACT_USYM && dtp->dt_vector == NULL) {
1051		struct ps_prochandle *P;
1052
1053		if ((P = dt_proc_grab(dtp, pid,
1054		    PGRAB_RDONLY | PGRAB_FORCE, 0)) != NULL) {
1055			GElf_Sym sym;
1056
1057			dt_proc_lock(dtp, P);
1058
1059			if (Plookup_by_addr(P, pc, NULL, 0, &sym) == 0)
1060				pc = sym.st_value;
1061
1062			dt_proc_unlock(dtp, P);
1063			dt_proc_release(dtp, P);
1064		}
1065	}
1066
1067	do {
1068		n = len;
1069		s = alloca(n);
1070	} while ((len = dtrace_uaddr2str(dtp, pid, pc, s, n)) > n);
1071
1072	return (dt_printf(dtp, fp, format, s));
1073}
1074
1075int
1076dt_print_umod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1077{
1078	/* LINTED - alignment */
1079	uint64_t pid = ((uint64_t *)addr)[0];
1080	/* LINTED - alignment */
1081	uint64_t pc = ((uint64_t *)addr)[1];
1082	int err = 0;
1083
1084	char objname[PATH_MAX], c[PATH_MAX * 2];
1085	struct ps_prochandle *P;
1086
1087	if (format == NULL)
1088		format = "  %-50s";
1089
1090	/*
1091	 * See the comment in dt_print_ustack() for the rationale for
1092	 * printing raw addresses in the vectored case.
1093	 */
1094	if (dtp->dt_vector == NULL)
1095		P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
1096	else
1097		P = NULL;
1098
1099	if (P != NULL)
1100		dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
1101
1102	if (P != NULL && Pobjname(P, pc, objname, sizeof (objname)) != 0) {
1103		(void) snprintf(c, sizeof (c), "%s", dt_basename(objname));
1104	} else {
1105		(void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
1106	}
1107
1108	err = dt_printf(dtp, fp, format, c);
1109
1110	if (P != NULL) {
1111		dt_proc_unlock(dtp, P);
1112		dt_proc_release(dtp, P);
1113	}
1114
1115	return (err);
1116}
1117
1118int
1119dt_print_memory(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr)
1120{
1121	int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
1122	size_t nbytes = *((uintptr_t *) addr);
1123
1124	return (dt_print_bytes(dtp, fp, addr + sizeof(uintptr_t),
1125	    nbytes, 50, quiet, 1));
1126}
1127
1128typedef struct dt_type_cbdata {
1129	dtrace_hdl_t		*dtp;
1130	dtrace_typeinfo_t	dtt;
1131	caddr_t			addr;
1132	caddr_t			addrend;
1133	const char		*name;
1134	int			f_type;
1135	int			indent;
1136	int			type_width;
1137	int			name_width;
1138	FILE			*fp;
1139} dt_type_cbdata_t;
1140
1141static int	dt_print_type_data(dt_type_cbdata_t *, ctf_id_t);
1142
1143static int
1144dt_print_type_member(const char *name, ctf_id_t type, ulong_t off, void *arg)
1145{
1146	dt_type_cbdata_t cbdata;
1147	dt_type_cbdata_t *cbdatap = arg;
1148	ssize_t ssz;
1149
1150	if ((ssz = ctf_type_size(cbdatap->dtt.dtt_ctfp, type)) <= 0)
1151		return (0);
1152
1153	off /= 8;
1154
1155	cbdata = *cbdatap;
1156	cbdata.name = name;
1157	cbdata.addr += off;
1158	cbdata.addrend = cbdata.addr + ssz;
1159
1160	return (dt_print_type_data(&cbdata, type));
1161}
1162
1163static int
1164dt_print_type_width(const char *name, ctf_id_t type, ulong_t off, void *arg)
1165{
1166	char buf[DT_TYPE_NAMELEN];
1167	char *p;
1168	dt_type_cbdata_t *cbdatap = arg;
1169	size_t sz = strlen(name);
1170
1171	ctf_type_name(cbdatap->dtt.dtt_ctfp, type, buf, sizeof (buf));
1172
1173	if ((p = strchr(buf, '[')) != NULL)
1174		p[-1] = '\0';
1175	else
1176		p = "";
1177
1178	sz += strlen(p);
1179
1180	if (sz > cbdatap->name_width)
1181		cbdatap->name_width = sz;
1182
1183	sz = strlen(buf);
1184
1185	if (sz > cbdatap->type_width)
1186		cbdatap->type_width = sz;
1187
1188	return (0);
1189}
1190
1191static int
1192dt_print_type_data(dt_type_cbdata_t *cbdatap, ctf_id_t type)
1193{
1194	caddr_t addr = cbdatap->addr;
1195	caddr_t addrend = cbdatap->addrend;
1196	char buf[DT_TYPE_NAMELEN];
1197	char *p;
1198	int cnt = 0;
1199	uint_t kind = ctf_type_kind(cbdatap->dtt.dtt_ctfp, type);
1200	ssize_t ssz = ctf_type_size(cbdatap->dtt.dtt_ctfp, type);
1201
1202	ctf_type_name(cbdatap->dtt.dtt_ctfp, type, buf, sizeof (buf));
1203
1204	if ((p = strchr(buf, '[')) != NULL)
1205		p[-1] = '\0';
1206	else
1207		p = "";
1208
1209	if (cbdatap->f_type) {
1210		int type_width = roundup(cbdatap->type_width + 1, 4);
1211		int name_width = roundup(cbdatap->name_width + 1, 4);
1212
1213		name_width -= strlen(cbdatap->name);
1214
1215		dt_printf(cbdatap->dtp, cbdatap->fp, "%*s%-*s%s%-*s	= ",cbdatap->indent * 4,"",type_width,buf,cbdatap->name,name_width,p);
1216	}
1217
1218	while (addr < addrend) {
1219		dt_type_cbdata_t cbdata;
1220		ctf_arinfo_t arinfo;
1221		ctf_encoding_t cte;
1222		uintptr_t *up;
1223		void *vp = addr;
1224		cbdata = *cbdatap;
1225		cbdata.name = "";
1226		cbdata.addr = addr;
1227		cbdata.addrend = addr + ssz;
1228		cbdata.f_type = 0;
1229		cbdata.indent++;
1230		cbdata.type_width = 0;
1231		cbdata.name_width = 0;
1232
1233		if (cnt > 0)
1234			dt_printf(cbdatap->dtp, cbdatap->fp, "%*s", cbdatap->indent * 4,"");
1235
1236		switch (kind) {
1237		case CTF_K_INTEGER:
1238			if (ctf_type_encoding(cbdatap->dtt.dtt_ctfp, type, &cte) != 0)
1239				return (-1);
1240			if ((cte.cte_format & CTF_INT_SIGNED) != 0)
1241				switch (cte.cte_bits) {
1242				case 8:
1243					if (isprint(*((char *) vp)))
1244						dt_printf(cbdatap->dtp, cbdatap->fp, "'%c', ", *((char *) vp));
1245					dt_printf(cbdatap->dtp, cbdatap->fp, "%d (0x%x);\n", *((char *) vp), *((char *) vp));
1246					break;
1247				case 16:
1248					dt_printf(cbdatap->dtp, cbdatap->fp, "%hd (0x%hx);\n", *((short *) vp), *((u_short *) vp));
1249					break;
1250				case 32:
1251					dt_printf(cbdatap->dtp, cbdatap->fp, "%d (0x%x);\n", *((int *) vp), *((u_int *) vp));
1252					break;
1253				case 64:
1254					dt_printf(cbdatap->dtp, cbdatap->fp, "%jd (0x%jx);\n", *((long long *) vp), *((unsigned long long *) vp));
1255					break;
1256				default:
1257					dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_INTEGER: format %x offset %u bits %u\n",cte.cte_format,cte.cte_offset,cte.cte_bits);
1258					break;
1259				}
1260			else
1261				switch (cte.cte_bits) {
1262				case 8:
1263					dt_printf(cbdatap->dtp, cbdatap->fp, "%u (0x%x);\n", *((uint8_t *) vp) & 0xff, *((uint8_t *) vp) & 0xff);
1264					break;
1265				case 16:
1266					dt_printf(cbdatap->dtp, cbdatap->fp, "%hu (0x%hx);\n", *((u_short *) vp), *((u_short *) vp));
1267					break;
1268				case 32:
1269					dt_printf(cbdatap->dtp, cbdatap->fp, "%u (0x%x);\n", *((u_int *) vp), *((u_int *) vp));
1270					break;
1271				case 64:
1272					dt_printf(cbdatap->dtp, cbdatap->fp, "%ju (0x%jx);\n", *((unsigned long long *) vp), *((unsigned long long *) vp));
1273					break;
1274				default:
1275					dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_INTEGER: format %x offset %u bits %u\n",cte.cte_format,cte.cte_offset,cte.cte_bits);
1276					break;
1277				}
1278			break;
1279		case CTF_K_FLOAT:
1280			dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_FLOAT: format %x offset %u bits %u\n",cte.cte_format,cte.cte_offset,cte.cte_bits);
1281			break;
1282		case CTF_K_POINTER:
1283			dt_printf(cbdatap->dtp, cbdatap->fp, "%p;\n", *((void **) addr));
1284			break;
1285		case CTF_K_ARRAY:
1286			if (ctf_array_info(cbdatap->dtt.dtt_ctfp, type, &arinfo) != 0)
1287				return (-1);
1288			dt_printf(cbdatap->dtp, cbdatap->fp, "{\n%*s",cbdata.indent * 4,"");
1289			dt_print_type_data(&cbdata, arinfo.ctr_contents);
1290			dt_printf(cbdatap->dtp, cbdatap->fp, "%*s};\n",cbdatap->indent * 4,"");
1291			break;
1292		case CTF_K_FUNCTION:
1293			dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_FUNCTION:\n");
1294			break;
1295		case CTF_K_STRUCT:
1296			cbdata.f_type = 1;
1297			if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1298			    dt_print_type_width, &cbdata) != 0)
1299				return (-1);
1300			dt_printf(cbdatap->dtp, cbdatap->fp, "{\n");
1301			if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1302			    dt_print_type_member, &cbdata) != 0)
1303				return (-1);
1304			dt_printf(cbdatap->dtp, cbdatap->fp, "%*s};\n",cbdatap->indent * 4,"");
1305			break;
1306		case CTF_K_UNION:
1307			cbdata.f_type = 1;
1308			if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1309			    dt_print_type_width, &cbdata) != 0)
1310				return (-1);
1311			dt_printf(cbdatap->dtp, cbdatap->fp, "{\n");
1312			if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1313			    dt_print_type_member, &cbdata) != 0)
1314				return (-1);
1315			dt_printf(cbdatap->dtp, cbdatap->fp, "%*s};\n",cbdatap->indent * 4,"");
1316			break;
1317		case CTF_K_ENUM:
1318			dt_printf(cbdatap->dtp, cbdatap->fp, "%s;\n", ctf_enum_name(cbdatap->dtt.dtt_ctfp, type, *((int *) vp)));
1319			break;
1320		case CTF_K_TYPEDEF:
1321			dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1322			break;
1323		case CTF_K_VOLATILE:
1324			if (cbdatap->f_type)
1325				dt_printf(cbdatap->dtp, cbdatap->fp, "volatile ");
1326			dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1327			break;
1328		case CTF_K_CONST:
1329			if (cbdatap->f_type)
1330				dt_printf(cbdatap->dtp, cbdatap->fp, "const ");
1331			dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1332			break;
1333		case CTF_K_RESTRICT:
1334			if (cbdatap->f_type)
1335				dt_printf(cbdatap->dtp, cbdatap->fp, "restrict ");
1336			dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1337			break;
1338		default:
1339			break;
1340		}
1341
1342		addr += ssz;
1343		cnt++;
1344	}
1345
1346	return (0);
1347}
1348
1349static int
1350dt_print_type(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr)
1351{
1352	caddr_t addrend;
1353	char *p;
1354	dtrace_typeinfo_t dtt;
1355	dt_type_cbdata_t cbdata;
1356	int num = 0;
1357	int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
1358	ssize_t ssz;
1359
1360	if (!quiet)
1361		dt_printf(dtp, fp, "\n");
1362
1363	/* Get the total number of bytes of data buffered. */
1364	size_t nbytes = *((uintptr_t *) addr);
1365	addr += sizeof(uintptr_t);
1366
1367	/*
1368	 * Get the size of the type so that we can check that it matches
1369	 * the CTF data we look up and so that we can figure out how many
1370	 * type elements are buffered.
1371	 */
1372	size_t typs = *((uintptr_t *) addr);
1373	addr += sizeof(uintptr_t);
1374
1375	/*
1376	 * Point to the type string in the buffer. Get it's string
1377	 * length and round it up to become the offset to the start
1378	 * of the buffered type data which we would like to be aligned
1379	 * for easy access.
1380	 */
1381	char *strp = (char *) addr;
1382	int offset = roundup(strlen(strp) + 1, sizeof(uintptr_t));
1383
1384	/*
1385	 * The type string might have a format such as 'int [20]'.
1386	 * Check if there is an array dimension present.
1387	 */
1388	if ((p = strchr(strp, '[')) != NULL) {
1389		/* Strip off the array dimension. */
1390		*p++ = '\0';
1391
1392		for (; *p != '\0' && *p != ']'; p++)
1393			num = num * 10 + *p - '0';
1394	} else
1395		/* No array dimension, so default. */
1396		num = 1;
1397
1398	/* Lookup the CTF type from the type string. */
1399	if (dtrace_lookup_by_type(dtp,  DTRACE_OBJ_EVERY, strp, &dtt) < 0)
1400		return (-1);
1401
1402	/* Offset the buffer address to the start of the data... */
1403	addr += offset;
1404
1405	ssz = ctf_type_size(dtt.dtt_ctfp, dtt.dtt_type);
1406
1407	if (typs != ssz) {
1408		printf("Expected type size from buffer (%lu) to match type size looked up now (%ld)\n", (u_long) typs, (long) ssz);
1409		return (-1);
1410	}
1411
1412	cbdata.dtp = dtp;
1413	cbdata.dtt = dtt;
1414	cbdata.name = "";
1415	cbdata.addr = addr;
1416	cbdata.addrend = addr + nbytes;
1417	cbdata.indent = 1;
1418	cbdata.f_type = 1;
1419	cbdata.type_width = 0;
1420	cbdata.name_width = 0;
1421	cbdata.fp = fp;
1422
1423	return (dt_print_type_data(&cbdata, dtt.dtt_type));
1424}
1425
1426static int
1427dt_print_sym(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1428{
1429	/* LINTED - alignment */
1430	uint64_t pc = *((uint64_t *)addr);
1431	dtrace_syminfo_t dts;
1432	GElf_Sym sym;
1433	char c[PATH_MAX * 2];
1434
1435	if (format == NULL)
1436		format = "  %-50s";
1437
1438	if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
1439		(void) snprintf(c, sizeof (c), "%s`%s",
1440		    dts.dts_object, dts.dts_name);
1441	} else {
1442		/*
1443		 * We'll repeat the lookup, but this time we'll specify a
1444		 * NULL GElf_Sym -- indicating that we're only interested in
1445		 * the containing module.
1446		 */
1447		if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1448			(void) snprintf(c, sizeof (c), "%s`0x%llx",
1449			    dts.dts_object, (u_longlong_t)pc);
1450		} else {
1451			(void) snprintf(c, sizeof (c), "0x%llx",
1452			    (u_longlong_t)pc);
1453		}
1454	}
1455
1456	if (dt_printf(dtp, fp, format, c) < 0)
1457		return (-1);
1458
1459	return (0);
1460}
1461
1462int
1463dt_print_mod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1464{
1465	/* LINTED - alignment */
1466	uint64_t pc = *((uint64_t *)addr);
1467	dtrace_syminfo_t dts;
1468	char c[PATH_MAX * 2];
1469
1470	if (format == NULL)
1471		format = "  %-50s";
1472
1473	if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1474		(void) snprintf(c, sizeof (c), "%s", dts.dts_object);
1475	} else {
1476		(void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
1477	}
1478
1479	if (dt_printf(dtp, fp, format, c) < 0)
1480		return (-1);
1481
1482	return (0);
1483}
1484
1485typedef struct dt_normal {
1486	dtrace_aggvarid_t dtnd_id;
1487	uint64_t dtnd_normal;
1488} dt_normal_t;
1489
1490static int
1491dt_normalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
1492{
1493	dt_normal_t *normal = arg;
1494	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1495	dtrace_aggvarid_t id = normal->dtnd_id;
1496
1497	if (agg->dtagd_nrecs == 0)
1498		return (DTRACE_AGGWALK_NEXT);
1499
1500	if (agg->dtagd_varid != id)
1501		return (DTRACE_AGGWALK_NEXT);
1502
1503	((dtrace_aggdata_t *)aggdata)->dtada_normal = normal->dtnd_normal;
1504	return (DTRACE_AGGWALK_NORMALIZE);
1505}
1506
1507static int
1508dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
1509{
1510	dt_normal_t normal;
1511	caddr_t addr;
1512
1513	/*
1514	 * We (should) have two records:  the aggregation ID followed by the
1515	 * normalization value.
1516	 */
1517	addr = base + rec->dtrd_offset;
1518
1519	if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
1520		return (dt_set_errno(dtp, EDT_BADNORMAL));
1521
1522	/* LINTED - alignment */
1523	normal.dtnd_id = *((dtrace_aggvarid_t *)addr);
1524	rec++;
1525
1526	if (rec->dtrd_action != DTRACEACT_LIBACT)
1527		return (dt_set_errno(dtp, EDT_BADNORMAL));
1528
1529	if (rec->dtrd_arg != DT_ACT_NORMALIZE)
1530		return (dt_set_errno(dtp, EDT_BADNORMAL));
1531
1532	addr = base + rec->dtrd_offset;
1533
1534	switch (rec->dtrd_size) {
1535	case sizeof (uint64_t):
1536		/* LINTED - alignment */
1537		normal.dtnd_normal = *((uint64_t *)addr);
1538		break;
1539	case sizeof (uint32_t):
1540		/* LINTED - alignment */
1541		normal.dtnd_normal = *((uint32_t *)addr);
1542		break;
1543	case sizeof (uint16_t):
1544		/* LINTED - alignment */
1545		normal.dtnd_normal = *((uint16_t *)addr);
1546		break;
1547	case sizeof (uint8_t):
1548		normal.dtnd_normal = *((uint8_t *)addr);
1549		break;
1550	default:
1551		return (dt_set_errno(dtp, EDT_BADNORMAL));
1552	}
1553
1554	(void) dtrace_aggregate_walk(dtp, dt_normalize_agg, &normal);
1555
1556	return (0);
1557}
1558
1559static int
1560dt_denormalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
1561{
1562	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1563	dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
1564
1565	if (agg->dtagd_nrecs == 0)
1566		return (DTRACE_AGGWALK_NEXT);
1567
1568	if (agg->dtagd_varid != id)
1569		return (DTRACE_AGGWALK_NEXT);
1570
1571	return (DTRACE_AGGWALK_DENORMALIZE);
1572}
1573
1574static int
1575dt_clear_agg(const dtrace_aggdata_t *aggdata, void *arg)
1576{
1577	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1578	dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
1579
1580	if (agg->dtagd_nrecs == 0)
1581		return (DTRACE_AGGWALK_NEXT);
1582
1583	if (agg->dtagd_varid != id)
1584		return (DTRACE_AGGWALK_NEXT);
1585
1586	return (DTRACE_AGGWALK_CLEAR);
1587}
1588
1589typedef struct dt_trunc {
1590	dtrace_aggvarid_t dttd_id;
1591	uint64_t dttd_remaining;
1592} dt_trunc_t;
1593
1594static int
1595dt_trunc_agg(const dtrace_aggdata_t *aggdata, void *arg)
1596{
1597	dt_trunc_t *trunc = arg;
1598	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1599	dtrace_aggvarid_t id = trunc->dttd_id;
1600
1601	if (agg->dtagd_nrecs == 0)
1602		return (DTRACE_AGGWALK_NEXT);
1603
1604	if (agg->dtagd_varid != id)
1605		return (DTRACE_AGGWALK_NEXT);
1606
1607	if (trunc->dttd_remaining == 0)
1608		return (DTRACE_AGGWALK_REMOVE);
1609
1610	trunc->dttd_remaining--;
1611	return (DTRACE_AGGWALK_NEXT);
1612}
1613
1614static int
1615dt_trunc(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
1616{
1617	dt_trunc_t trunc;
1618	caddr_t addr;
1619	int64_t remaining;
1620	int (*func)(dtrace_hdl_t *, dtrace_aggregate_f *, void *);
1621
1622	/*
1623	 * We (should) have two records:  the aggregation ID followed by the
1624	 * number of aggregation entries after which the aggregation is to be
1625	 * truncated.
1626	 */
1627	addr = base + rec->dtrd_offset;
1628
1629	if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
1630		return (dt_set_errno(dtp, EDT_BADTRUNC));
1631
1632	/* LINTED - alignment */
1633	trunc.dttd_id = *((dtrace_aggvarid_t *)addr);
1634	rec++;
1635
1636	if (rec->dtrd_action != DTRACEACT_LIBACT)
1637		return (dt_set_errno(dtp, EDT_BADTRUNC));
1638
1639	if (rec->dtrd_arg != DT_ACT_TRUNC)
1640		return (dt_set_errno(dtp, EDT_BADTRUNC));
1641
1642	addr = base + rec->dtrd_offset;
1643
1644	switch (rec->dtrd_size) {
1645	case sizeof (uint64_t):
1646		/* LINTED - alignment */
1647		remaining = *((int64_t *)addr);
1648		break;
1649	case sizeof (uint32_t):
1650		/* LINTED - alignment */
1651		remaining = *((int32_t *)addr);
1652		break;
1653	case sizeof (uint16_t):
1654		/* LINTED - alignment */
1655		remaining = *((int16_t *)addr);
1656		break;
1657	case sizeof (uint8_t):
1658		remaining = *((int8_t *)addr);
1659		break;
1660	default:
1661		return (dt_set_errno(dtp, EDT_BADNORMAL));
1662	}
1663
1664	if (remaining < 0) {
1665		func = dtrace_aggregate_walk_valsorted;
1666		remaining = -remaining;
1667	} else {
1668		func = dtrace_aggregate_walk_valrevsorted;
1669	}
1670
1671	assert(remaining >= 0);
1672	trunc.dttd_remaining = remaining;
1673
1674	(void) func(dtp, dt_trunc_agg, &trunc);
1675
1676	return (0);
1677}
1678
1679static int
1680dt_print_datum(dtrace_hdl_t *dtp, FILE *fp, dtrace_recdesc_t *rec,
1681    caddr_t addr, size_t size, uint64_t normal)
1682{
1683	int err;
1684	dtrace_actkind_t act = rec->dtrd_action;
1685
1686	switch (act) {
1687	case DTRACEACT_STACK:
1688		return (dt_print_stack(dtp, fp, NULL, addr,
1689		    rec->dtrd_arg, rec->dtrd_size / rec->dtrd_arg));
1690
1691	case DTRACEACT_USTACK:
1692	case DTRACEACT_JSTACK:
1693		return (dt_print_ustack(dtp, fp, NULL, addr, rec->dtrd_arg));
1694
1695	case DTRACEACT_USYM:
1696	case DTRACEACT_UADDR:
1697		return (dt_print_usym(dtp, fp, addr, act));
1698
1699	case DTRACEACT_UMOD:
1700		return (dt_print_umod(dtp, fp, NULL, addr));
1701
1702	case DTRACEACT_SYM:
1703		return (dt_print_sym(dtp, fp, NULL, addr));
1704
1705	case DTRACEACT_MOD:
1706		return (dt_print_mod(dtp, fp, NULL, addr));
1707
1708	case DTRACEAGG_QUANTIZE:
1709		return (dt_print_quantize(dtp, fp, addr, size, normal));
1710
1711	case DTRACEAGG_LQUANTIZE:
1712		return (dt_print_lquantize(dtp, fp, addr, size, normal));
1713
1714	case DTRACEAGG_AVG:
1715		return (dt_print_average(dtp, fp, addr, size, normal));
1716
1717	case DTRACEAGG_STDDEV:
1718		return (dt_print_stddev(dtp, fp, addr, size, normal));
1719
1720	default:
1721		break;
1722	}
1723
1724	switch (size) {
1725	case sizeof (uint64_t):
1726		err = dt_printf(dtp, fp, " %16lld",
1727		    /* LINTED - alignment */
1728		    (long long)*((uint64_t *)addr) / normal);
1729		break;
1730	case sizeof (uint32_t):
1731		/* LINTED - alignment */
1732		err = dt_printf(dtp, fp, " %8d", *((uint32_t *)addr) /
1733		    (uint32_t)normal);
1734		break;
1735	case sizeof (uint16_t):
1736		/* LINTED - alignment */
1737		err = dt_printf(dtp, fp, " %5d", *((uint16_t *)addr) /
1738		    (uint32_t)normal);
1739		break;
1740	case sizeof (uint8_t):
1741		err = dt_printf(dtp, fp, " %3d", *((uint8_t *)addr) /
1742		    (uint32_t)normal);
1743		break;
1744	default:
1745		err = dt_print_bytes(dtp, fp, addr, size, 50, 0, 0);
1746		break;
1747	}
1748
1749	return (err);
1750}
1751
1752int
1753dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
1754{
1755	int i, aggact = 0;
1756	dt_print_aggdata_t *pd = arg;
1757	const dtrace_aggdata_t *aggdata = aggsdata[0];
1758	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1759	FILE *fp = pd->dtpa_fp;
1760	dtrace_hdl_t *dtp = pd->dtpa_dtp;
1761	dtrace_recdesc_t *rec;
1762	dtrace_actkind_t act;
1763	caddr_t addr;
1764	size_t size;
1765
1766	/*
1767	 * Iterate over each record description in the key, printing the traced
1768	 * data, skipping the first datum (the tuple member created by the
1769	 * compiler).
1770	 */
1771	for (i = 1; i < agg->dtagd_nrecs; i++) {
1772		rec = &agg->dtagd_rec[i];
1773		act = rec->dtrd_action;
1774		addr = aggdata->dtada_data + rec->dtrd_offset;
1775		size = rec->dtrd_size;
1776
1777		if (DTRACEACT_ISAGG(act)) {
1778			aggact = i;
1779			break;
1780		}
1781
1782		if (dt_print_datum(dtp, fp, rec, addr, size, 1) < 0)
1783			return (-1);
1784
1785		if (dt_buffered_flush(dtp, NULL, rec, aggdata,
1786		    DTRACE_BUFDATA_AGGKEY) < 0)
1787			return (-1);
1788	}
1789
1790	assert(aggact != 0);
1791
1792	for (i = (naggvars == 1 ? 0 : 1); i < naggvars; i++) {
1793		uint64_t normal;
1794
1795		aggdata = aggsdata[i];
1796		agg = aggdata->dtada_desc;
1797		rec = &agg->dtagd_rec[aggact];
1798		act = rec->dtrd_action;
1799		addr = aggdata->dtada_data + rec->dtrd_offset;
1800		size = rec->dtrd_size;
1801
1802		assert(DTRACEACT_ISAGG(act));
1803		normal = aggdata->dtada_normal;
1804
1805		if (dt_print_datum(dtp, fp, rec, addr, size, normal) < 0)
1806			return (-1);
1807
1808		if (dt_buffered_flush(dtp, NULL, rec, aggdata,
1809		    DTRACE_BUFDATA_AGGVAL) < 0)
1810			return (-1);
1811
1812		if (!pd->dtpa_allunprint)
1813			agg->dtagd_flags |= DTRACE_AGD_PRINTED;
1814	}
1815
1816	if (dt_printf(dtp, fp, "\n") < 0)
1817		return (-1);
1818
1819	if (dt_buffered_flush(dtp, NULL, NULL, aggdata,
1820	    DTRACE_BUFDATA_AGGFORMAT | DTRACE_BUFDATA_AGGLAST) < 0)
1821		return (-1);
1822
1823	return (0);
1824}
1825
1826int
1827dt_print_agg(const dtrace_aggdata_t *aggdata, void *arg)
1828{
1829	dt_print_aggdata_t *pd = arg;
1830	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1831	dtrace_aggvarid_t aggvarid = pd->dtpa_id;
1832
1833	if (pd->dtpa_allunprint) {
1834		if (agg->dtagd_flags & DTRACE_AGD_PRINTED)
1835			return (0);
1836	} else {
1837		/*
1838		 * If we're not printing all unprinted aggregations, then the
1839		 * aggregation variable ID denotes a specific aggregation
1840		 * variable that we should print -- skip any other aggregations
1841		 * that we encounter.
1842		 */
1843		if (agg->dtagd_nrecs == 0)
1844			return (0);
1845
1846		if (aggvarid != agg->dtagd_varid)
1847			return (0);
1848	}
1849
1850	return (dt_print_aggs(&aggdata, 1, arg));
1851}
1852
1853int
1854dt_setopt(dtrace_hdl_t *dtp, const dtrace_probedata_t *data,
1855    const char *option, const char *value)
1856{
1857	int len, rval;
1858	char *msg;
1859	const char *errstr;
1860	dtrace_setoptdata_t optdata;
1861
1862	bzero(&optdata, sizeof (optdata));
1863	(void) dtrace_getopt(dtp, option, &optdata.dtsda_oldval);
1864
1865	if (dtrace_setopt(dtp, option, value) == 0) {
1866		(void) dtrace_getopt(dtp, option, &optdata.dtsda_newval);
1867		optdata.dtsda_probe = data;
1868		optdata.dtsda_option = option;
1869		optdata.dtsda_handle = dtp;
1870
1871		if ((rval = dt_handle_setopt(dtp, &optdata)) != 0)
1872			return (rval);
1873
1874		return (0);
1875	}
1876
1877	errstr = dtrace_errmsg(dtp, dtrace_errno(dtp));
1878	len = strlen(option) + strlen(value) + strlen(errstr) + 80;
1879	msg = alloca(len);
1880
1881	(void) snprintf(msg, len, "couldn't set option \"%s\" to \"%s\": %s\n",
1882	    option, value, errstr);
1883
1884	if ((rval = dt_handle_liberr(dtp, data, msg)) == 0)
1885		return (0);
1886
1887	return (rval);
1888}
1889
1890static int
1891dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu, dtrace_bufdesc_t *buf,
1892    dtrace_consume_probe_f *efunc, dtrace_consume_rec_f *rfunc, void *arg)
1893{
1894	dtrace_epid_t id;
1895	size_t offs, start = buf->dtbd_oldest, end = buf->dtbd_size;
1896	int flow = (dtp->dt_options[DTRACEOPT_FLOWINDENT] != DTRACEOPT_UNSET);
1897	int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
1898	int rval, i, n;
1899	dtrace_epid_t last = DTRACE_EPIDNONE;
1900	dtrace_probedata_t data;
1901	uint64_t drops;
1902	caddr_t addr;
1903
1904	bzero(&data, sizeof (data));
1905	data.dtpda_handle = dtp;
1906	data.dtpda_cpu = cpu;
1907
1908again:
1909	for (offs = start; offs < end; ) {
1910		dtrace_eprobedesc_t *epd;
1911
1912		/*
1913		 * We're guaranteed to have an ID.
1914		 */
1915		id = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
1916
1917		if (id == DTRACE_EPIDNONE) {
1918			/*
1919			 * This is filler to assure proper alignment of the
1920			 * next record; we simply ignore it.
1921			 */
1922			offs += sizeof (id);
1923			continue;
1924		}
1925
1926		if ((rval = dt_epid_lookup(dtp, id, &data.dtpda_edesc,
1927		    &data.dtpda_pdesc)) != 0)
1928			return (rval);
1929
1930		epd = data.dtpda_edesc;
1931		data.dtpda_data = buf->dtbd_data + offs;
1932
1933		if (data.dtpda_edesc->dtepd_uarg != DT_ECB_DEFAULT) {
1934			rval = dt_handle(dtp, &data);
1935
1936			if (rval == DTRACE_CONSUME_NEXT)
1937				goto nextepid;
1938
1939			if (rval == DTRACE_CONSUME_ERROR)
1940				return (-1);
1941		}
1942
1943		if (flow)
1944			(void) dt_flowindent(dtp, &data, last, buf, offs);
1945
1946		rval = (*efunc)(&data, arg);
1947
1948		if (flow) {
1949			if (data.dtpda_flow == DTRACEFLOW_ENTRY)
1950				data.dtpda_indent += 2;
1951		}
1952
1953		if (rval == DTRACE_CONSUME_NEXT)
1954			goto nextepid;
1955
1956		if (rval == DTRACE_CONSUME_ABORT)
1957			return (dt_set_errno(dtp, EDT_DIRABORT));
1958
1959		if (rval != DTRACE_CONSUME_THIS)
1960			return (dt_set_errno(dtp, EDT_BADRVAL));
1961
1962		for (i = 0; i < epd->dtepd_nrecs; i++) {
1963			dtrace_recdesc_t *rec = &epd->dtepd_rec[i];
1964			dtrace_actkind_t act = rec->dtrd_action;
1965
1966			data.dtpda_data = buf->dtbd_data + offs +
1967			    rec->dtrd_offset;
1968			addr = data.dtpda_data;
1969
1970			if (act == DTRACEACT_LIBACT) {
1971				uint64_t arg = rec->dtrd_arg;
1972				dtrace_aggvarid_t id;
1973
1974				switch (arg) {
1975				case DT_ACT_CLEAR:
1976					/* LINTED - alignment */
1977					id = *((dtrace_aggvarid_t *)addr);
1978					(void) dtrace_aggregate_walk(dtp,
1979					    dt_clear_agg, &id);
1980					continue;
1981
1982				case DT_ACT_DENORMALIZE:
1983					/* LINTED - alignment */
1984					id = *((dtrace_aggvarid_t *)addr);
1985					(void) dtrace_aggregate_walk(dtp,
1986					    dt_denormalize_agg, &id);
1987					continue;
1988
1989				case DT_ACT_FTRUNCATE:
1990					if (fp == NULL)
1991						continue;
1992
1993					(void) fflush(fp);
1994					(void) ftruncate(fileno(fp), 0);
1995					(void) fseeko(fp, 0, SEEK_SET);
1996					continue;
1997
1998				case DT_ACT_NORMALIZE:
1999					if (i == epd->dtepd_nrecs - 1)
2000						return (dt_set_errno(dtp,
2001						    EDT_BADNORMAL));
2002
2003					if (dt_normalize(dtp,
2004					    buf->dtbd_data + offs, rec) != 0)
2005						return (-1);
2006
2007					i++;
2008					continue;
2009
2010				case DT_ACT_SETOPT: {
2011					uint64_t *opts = dtp->dt_options;
2012					dtrace_recdesc_t *valrec;
2013					uint32_t valsize;
2014					caddr_t val;
2015					int rv;
2016
2017					if (i == epd->dtepd_nrecs - 1) {
2018						return (dt_set_errno(dtp,
2019						    EDT_BADSETOPT));
2020					}
2021
2022					valrec = &epd->dtepd_rec[++i];
2023					valsize = valrec->dtrd_size;
2024
2025					if (valrec->dtrd_action != act ||
2026					    valrec->dtrd_arg != arg) {
2027						return (dt_set_errno(dtp,
2028						    EDT_BADSETOPT));
2029					}
2030
2031					if (valsize > sizeof (uint64_t)) {
2032						val = buf->dtbd_data + offs +
2033						    valrec->dtrd_offset;
2034					} else {
2035						val = "1";
2036					}
2037
2038					rv = dt_setopt(dtp, &data, addr, val);
2039
2040					if (rv != 0)
2041						return (-1);
2042
2043					flow = (opts[DTRACEOPT_FLOWINDENT] !=
2044					    DTRACEOPT_UNSET);
2045					quiet = (opts[DTRACEOPT_QUIET] !=
2046					    DTRACEOPT_UNSET);
2047
2048					continue;
2049				}
2050
2051				case DT_ACT_TRUNC:
2052					if (i == epd->dtepd_nrecs - 1)
2053						return (dt_set_errno(dtp,
2054						    EDT_BADTRUNC));
2055
2056					if (dt_trunc(dtp,
2057					    buf->dtbd_data + offs, rec) != 0)
2058						return (-1);
2059
2060					i++;
2061					continue;
2062
2063				default:
2064					continue;
2065				}
2066			}
2067
2068			rval = (*rfunc)(&data, rec, arg);
2069
2070			if (rval == DTRACE_CONSUME_NEXT)
2071				continue;
2072
2073			if (rval == DTRACE_CONSUME_ABORT)
2074				return (dt_set_errno(dtp, EDT_DIRABORT));
2075
2076			if (rval != DTRACE_CONSUME_THIS)
2077				return (dt_set_errno(dtp, EDT_BADRVAL));
2078
2079			if (act == DTRACEACT_STACK) {
2080				int depth = rec->dtrd_arg;
2081
2082				if (dt_print_stack(dtp, fp, NULL, addr, depth,
2083				    rec->dtrd_size / depth) < 0)
2084					return (-1);
2085				goto nextrec;
2086			}
2087
2088			if (act == DTRACEACT_USTACK ||
2089			    act == DTRACEACT_JSTACK) {
2090				if (dt_print_ustack(dtp, fp, NULL,
2091				    addr, rec->dtrd_arg) < 0)
2092					return (-1);
2093				goto nextrec;
2094			}
2095
2096			if (act == DTRACEACT_SYM) {
2097				if (dt_print_sym(dtp, fp, NULL, addr) < 0)
2098					return (-1);
2099				goto nextrec;
2100			}
2101
2102			if (act == DTRACEACT_MOD) {
2103				if (dt_print_mod(dtp, fp, NULL, addr) < 0)
2104					return (-1);
2105				goto nextrec;
2106			}
2107
2108			if (act == DTRACEACT_USYM || act == DTRACEACT_UADDR) {
2109				if (dt_print_usym(dtp, fp, addr, act) < 0)
2110					return (-1);
2111				goto nextrec;
2112			}
2113
2114			if (act == DTRACEACT_UMOD) {
2115				if (dt_print_umod(dtp, fp, NULL, addr) < 0)
2116					return (-1);
2117				goto nextrec;
2118			}
2119
2120			if (act == DTRACEACT_PRINTM) {
2121				if (dt_print_memory(dtp, fp, addr) < 0)
2122					return (-1);
2123				goto nextrec;
2124			}
2125
2126			if (act == DTRACEACT_PRINTT) {
2127				if (dt_print_type(dtp, fp, addr) < 0)
2128					return (-1);
2129				goto nextrec;
2130			}
2131
2132			if (DTRACEACT_ISPRINTFLIKE(act)) {
2133				void *fmtdata;
2134				int (*func)(dtrace_hdl_t *, FILE *, void *,
2135				    const dtrace_probedata_t *,
2136				    const dtrace_recdesc_t *, uint_t,
2137				    const void *buf, size_t);
2138
2139				if ((fmtdata = dt_format_lookup(dtp,
2140				    rec->dtrd_format)) == NULL)
2141					goto nofmt;
2142
2143				switch (act) {
2144				case DTRACEACT_PRINTF:
2145					func = dtrace_fprintf;
2146					break;
2147				case DTRACEACT_PRINTA:
2148					func = dtrace_fprinta;
2149					break;
2150				case DTRACEACT_SYSTEM:
2151					func = dtrace_system;
2152					break;
2153				case DTRACEACT_FREOPEN:
2154					func = dtrace_freopen;
2155					break;
2156				}
2157
2158				n = (*func)(dtp, fp, fmtdata, &data,
2159				    rec, epd->dtepd_nrecs - i,
2160				    (uchar_t *)buf->dtbd_data + offs,
2161				    buf->dtbd_size - offs);
2162
2163				if (n < 0)
2164					return (-1); /* errno is set for us */
2165
2166				if (n > 0)
2167					i += n - 1;
2168				goto nextrec;
2169			}
2170
2171nofmt:
2172			if (act == DTRACEACT_PRINTA) {
2173				dt_print_aggdata_t pd;
2174				dtrace_aggvarid_t *aggvars;
2175				int j, naggvars = 0;
2176				size_t size = ((epd->dtepd_nrecs - i) *
2177				    sizeof (dtrace_aggvarid_t));
2178
2179				if ((aggvars = dt_alloc(dtp, size)) == NULL)
2180					return (-1);
2181
2182				/*
2183				 * This might be a printa() with multiple
2184				 * aggregation variables.  We need to scan
2185				 * forward through the records until we find
2186				 * a record from a different statement.
2187				 */
2188				for (j = i; j < epd->dtepd_nrecs; j++) {
2189					dtrace_recdesc_t *nrec;
2190					caddr_t naddr;
2191
2192					nrec = &epd->dtepd_rec[j];
2193
2194					if (nrec->dtrd_uarg != rec->dtrd_uarg)
2195						break;
2196
2197					if (nrec->dtrd_action != act) {
2198						return (dt_set_errno(dtp,
2199						    EDT_BADAGG));
2200					}
2201
2202					naddr = buf->dtbd_data + offs +
2203					    nrec->dtrd_offset;
2204
2205					aggvars[naggvars++] =
2206					    /* LINTED - alignment */
2207					    *((dtrace_aggvarid_t *)naddr);
2208				}
2209
2210				i = j - 1;
2211				bzero(&pd, sizeof (pd));
2212				pd.dtpa_dtp = dtp;
2213				pd.dtpa_fp = fp;
2214
2215				assert(naggvars >= 1);
2216
2217				if (naggvars == 1) {
2218					pd.dtpa_id = aggvars[0];
2219					dt_free(dtp, aggvars);
2220
2221					if (dt_printf(dtp, fp, "\n") < 0 ||
2222					    dtrace_aggregate_walk_sorted(dtp,
2223					    dt_print_agg, &pd) < 0)
2224						return (-1);
2225					goto nextrec;
2226				}
2227
2228				if (dt_printf(dtp, fp, "\n") < 0 ||
2229				    dtrace_aggregate_walk_joined(dtp, aggvars,
2230				    naggvars, dt_print_aggs, &pd) < 0) {
2231					dt_free(dtp, aggvars);
2232					return (-1);
2233				}
2234
2235				dt_free(dtp, aggvars);
2236				goto nextrec;
2237			}
2238
2239			switch (rec->dtrd_size) {
2240			case sizeof (uint64_t):
2241				n = dt_printf(dtp, fp,
2242				    quiet ? "%lld" : " %16lld",
2243				    /* LINTED - alignment */
2244				    *((unsigned long long *)addr));
2245				break;
2246			case sizeof (uint32_t):
2247				n = dt_printf(dtp, fp, quiet ? "%d" : " %8d",
2248				    /* LINTED - alignment */
2249				    *((uint32_t *)addr));
2250				break;
2251			case sizeof (uint16_t):
2252				n = dt_printf(dtp, fp, quiet ? "%d" : " %5d",
2253				    /* LINTED - alignment */
2254				    *((uint16_t *)addr));
2255				break;
2256			case sizeof (uint8_t):
2257				n = dt_printf(dtp, fp, quiet ? "%d" : " %3d",
2258				    *((uint8_t *)addr));
2259				break;
2260			default:
2261				n = dt_print_bytes(dtp, fp, addr,
2262				    rec->dtrd_size, 33, quiet, 0);
2263				break;
2264			}
2265
2266			if (n < 0)
2267				return (-1); /* errno is set for us */
2268
2269nextrec:
2270			if (dt_buffered_flush(dtp, &data, rec, NULL, 0) < 0)
2271				return (-1); /* errno is set for us */
2272		}
2273
2274		/*
2275		 * Call the record callback with a NULL record to indicate
2276		 * that we're done processing this EPID.
2277		 */
2278		rval = (*rfunc)(&data, NULL, arg);
2279nextepid:
2280		offs += epd->dtepd_size;
2281		last = id;
2282	}
2283
2284	if (buf->dtbd_oldest != 0 && start == buf->dtbd_oldest) {
2285		end = buf->dtbd_oldest;
2286		start = 0;
2287		goto again;
2288	}
2289
2290	if ((drops = buf->dtbd_drops) == 0)
2291		return (0);
2292
2293	/*
2294	 * Explicitly zero the drops to prevent us from processing them again.
2295	 */
2296	buf->dtbd_drops = 0;
2297
2298	return (dt_handle_cpudrop(dtp, cpu, DTRACEDROP_PRINCIPAL, drops));
2299}
2300
2301typedef struct dt_begin {
2302	dtrace_consume_probe_f *dtbgn_probefunc;
2303	dtrace_consume_rec_f *dtbgn_recfunc;
2304	void *dtbgn_arg;
2305	dtrace_handle_err_f *dtbgn_errhdlr;
2306	void *dtbgn_errarg;
2307	int dtbgn_beginonly;
2308} dt_begin_t;
2309
2310static int
2311dt_consume_begin_probe(const dtrace_probedata_t *data, void *arg)
2312{
2313	dt_begin_t *begin = (dt_begin_t *)arg;
2314	dtrace_probedesc_t *pd = data->dtpda_pdesc;
2315
2316	int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
2317	int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
2318
2319	if (begin->dtbgn_beginonly) {
2320		if (!(r1 && r2))
2321			return (DTRACE_CONSUME_NEXT);
2322	} else {
2323		if (r1 && r2)
2324			return (DTRACE_CONSUME_NEXT);
2325	}
2326
2327	/*
2328	 * We have a record that we're interested in.  Now call the underlying
2329	 * probe function...
2330	 */
2331	return (begin->dtbgn_probefunc(data, begin->dtbgn_arg));
2332}
2333
2334static int
2335dt_consume_begin_record(const dtrace_probedata_t *data,
2336    const dtrace_recdesc_t *rec, void *arg)
2337{
2338	dt_begin_t *begin = (dt_begin_t *)arg;
2339
2340	return (begin->dtbgn_recfunc(data, rec, begin->dtbgn_arg));
2341}
2342
2343static int
2344dt_consume_begin_error(const dtrace_errdata_t *data, void *arg)
2345{
2346	dt_begin_t *begin = (dt_begin_t *)arg;
2347	dtrace_probedesc_t *pd = data->dteda_pdesc;
2348
2349	int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
2350	int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
2351
2352	if (begin->dtbgn_beginonly) {
2353		if (!(r1 && r2))
2354			return (DTRACE_HANDLE_OK);
2355	} else {
2356		if (r1 && r2)
2357			return (DTRACE_HANDLE_OK);
2358	}
2359
2360	return (begin->dtbgn_errhdlr(data, begin->dtbgn_errarg));
2361}
2362
2363static int
2364dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf,
2365    dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
2366{
2367	/*
2368	 * There's this idea that the BEGIN probe should be processed before
2369	 * everything else, and that the END probe should be processed after
2370	 * anything else.  In the common case, this is pretty easy to deal
2371	 * with.  However, a situation may arise where the BEGIN enabling and
2372	 * END enabling are on the same CPU, and some enabling in the middle
2373	 * occurred on a different CPU.  To deal with this (blech!) we need to
2374	 * consume the BEGIN buffer up until the end of the BEGIN probe, and
2375	 * then set it aside.  We will then process every other CPU, and then
2376	 * we'll return to the BEGIN CPU and process the rest of the data
2377	 * (which will inevitably include the END probe, if any).  Making this
2378	 * even more complicated (!) is the library's ERROR enabling.  Because
2379	 * this enabling is processed before we even get into the consume call
2380	 * back, any ERROR firing would result in the library's ERROR enabling
2381	 * being processed twice -- once in our first pass (for BEGIN probes),
2382	 * and again in our second pass (for everything but BEGIN probes).  To
2383	 * deal with this, we interpose on the ERROR handler to assure that we
2384	 * only process ERROR enablings induced by BEGIN enablings in the
2385	 * first pass, and that we only process ERROR enablings _not_ induced
2386	 * by BEGIN enablings in the second pass.
2387	 */
2388	dt_begin_t begin;
2389	processorid_t cpu = dtp->dt_beganon;
2390	dtrace_bufdesc_t nbuf;
2391#if !defined(sun)
2392	dtrace_bufdesc_t *pbuf;
2393#endif
2394	int rval, i;
2395	static int max_ncpus;
2396	dtrace_optval_t size;
2397
2398	dtp->dt_beganon = -1;
2399
2400#if defined(sun)
2401	if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
2402#else
2403	if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {
2404#endif
2405		/*
2406		 * We really don't expect this to fail, but it is at least
2407		 * technically possible for this to fail with ENOENT.  In this
2408		 * case, we just drive on...
2409		 */
2410		if (errno == ENOENT)
2411			return (0);
2412
2413		return (dt_set_errno(dtp, errno));
2414	}
2415
2416	if (!dtp->dt_stopped || buf->dtbd_cpu != dtp->dt_endedon) {
2417		/*
2418		 * This is the simple case.  We're either not stopped, or if
2419		 * we are, we actually processed any END probes on another
2420		 * CPU.  We can simply consume this buffer and return.
2421		 */
2422		return (dt_consume_cpu(dtp, fp, cpu, buf, pf, rf, arg));
2423	}
2424
2425	begin.dtbgn_probefunc = pf;
2426	begin.dtbgn_recfunc = rf;
2427	begin.dtbgn_arg = arg;
2428	begin.dtbgn_beginonly = 1;
2429
2430	/*
2431	 * We need to interpose on the ERROR handler to be sure that we
2432	 * only process ERRORs induced by BEGIN.
2433	 */
2434	begin.dtbgn_errhdlr = dtp->dt_errhdlr;
2435	begin.dtbgn_errarg = dtp->dt_errarg;
2436	dtp->dt_errhdlr = dt_consume_begin_error;
2437	dtp->dt_errarg = &begin;
2438
2439	rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe,
2440	    dt_consume_begin_record, &begin);
2441
2442	dtp->dt_errhdlr = begin.dtbgn_errhdlr;
2443	dtp->dt_errarg = begin.dtbgn_errarg;
2444
2445	if (rval != 0)
2446		return (rval);
2447
2448	/*
2449	 * Now allocate a new buffer.  We'll use this to deal with every other
2450	 * CPU.
2451	 */
2452	bzero(&nbuf, sizeof (dtrace_bufdesc_t));
2453	(void) dtrace_getopt(dtp, "bufsize", &size);
2454	if ((nbuf.dtbd_data = malloc(size)) == NULL)
2455		return (dt_set_errno(dtp, EDT_NOMEM));
2456
2457	if (max_ncpus == 0)
2458		max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
2459
2460	for (i = 0; i < max_ncpus; i++) {
2461		nbuf.dtbd_cpu = i;
2462
2463		if (i == cpu)
2464			continue;
2465
2466#if defined(sun)
2467		if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &nbuf) == -1) {
2468#else
2469		pbuf = &nbuf;
2470		if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &pbuf) == -1) {
2471#endif
2472			/*
2473			 * If we failed with ENOENT, it may be because the
2474			 * CPU was unconfigured -- this is okay.  Any other
2475			 * error, however, is unexpected.
2476			 */
2477			if (errno == ENOENT)
2478				continue;
2479
2480			free(nbuf.dtbd_data);
2481
2482			return (dt_set_errno(dtp, errno));
2483		}
2484
2485		if ((rval = dt_consume_cpu(dtp, fp,
2486		    i, &nbuf, pf, rf, arg)) != 0) {
2487			free(nbuf.dtbd_data);
2488			return (rval);
2489		}
2490	}
2491
2492	free(nbuf.dtbd_data);
2493
2494	/*
2495	 * Okay -- we're done with the other buffers.  Now we want to
2496	 * reconsume the first buffer -- but this time we're looking for
2497	 * everything _but_ BEGIN.  And of course, in order to only consume
2498	 * those ERRORs _not_ associated with BEGIN, we need to reinstall our
2499	 * ERROR interposition function...
2500	 */
2501	begin.dtbgn_beginonly = 0;
2502
2503	assert(begin.dtbgn_errhdlr == dtp->dt_errhdlr);
2504	assert(begin.dtbgn_errarg == dtp->dt_errarg);
2505	dtp->dt_errhdlr = dt_consume_begin_error;
2506	dtp->dt_errarg = &begin;
2507
2508	rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe,
2509	    dt_consume_begin_record, &begin);
2510
2511	dtp->dt_errhdlr = begin.dtbgn_errhdlr;
2512	dtp->dt_errarg = begin.dtbgn_errarg;
2513
2514	return (rval);
2515}
2516
2517int
2518dtrace_consume(dtrace_hdl_t *dtp, FILE *fp,
2519    dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
2520{
2521	dtrace_bufdesc_t *buf = &dtp->dt_buf;
2522	dtrace_optval_t size;
2523	static int max_ncpus;
2524	int i, rval;
2525	dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_SWITCHRATE];
2526	hrtime_t now = gethrtime();
2527
2528	if (dtp->dt_lastswitch != 0) {
2529		if (now - dtp->dt_lastswitch < interval)
2530			return (0);
2531
2532		dtp->dt_lastswitch += interval;
2533	} else {
2534		dtp->dt_lastswitch = now;
2535	}
2536
2537	if (!dtp->dt_active)
2538		return (dt_set_errno(dtp, EINVAL));
2539
2540	if (max_ncpus == 0)
2541		max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
2542
2543	if (pf == NULL)
2544		pf = (dtrace_consume_probe_f *)dt_nullprobe;
2545
2546	if (rf == NULL)
2547		rf = (dtrace_consume_rec_f *)dt_nullrec;
2548
2549	if (buf->dtbd_data == NULL) {
2550		(void) dtrace_getopt(dtp, "bufsize", &size);
2551		if ((buf->dtbd_data = malloc(size)) == NULL)
2552			return (dt_set_errno(dtp, EDT_NOMEM));
2553
2554		buf->dtbd_size = size;
2555	}
2556
2557	/*
2558	 * If we have just begun, we want to first process the CPU that
2559	 * executed the BEGIN probe (if any).
2560	 */
2561	if (dtp->dt_active && dtp->dt_beganon != -1) {
2562		buf->dtbd_cpu = dtp->dt_beganon;
2563		if ((rval = dt_consume_begin(dtp, fp, buf, pf, rf, arg)) != 0)
2564			return (rval);
2565	}
2566
2567	for (i = 0; i < max_ncpus; i++) {
2568		buf->dtbd_cpu = i;
2569
2570		/*
2571		 * If we have stopped, we want to process the CPU on which the
2572		 * END probe was processed only _after_ we have processed
2573		 * everything else.
2574		 */
2575		if (dtp->dt_stopped && (i == dtp->dt_endedon))
2576			continue;
2577
2578#if defined(sun)
2579		if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
2580#else
2581		if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {
2582#endif
2583			/*
2584			 * If we failed with ENOENT, it may be because the
2585			 * CPU was unconfigured -- this is okay.  Any other
2586			 * error, however, is unexpected.
2587			 */
2588			if (errno == ENOENT)
2589				continue;
2590
2591			return (dt_set_errno(dtp, errno));
2592		}
2593
2594		if ((rval = dt_consume_cpu(dtp, fp, i, buf, pf, rf, arg)) != 0)
2595			return (rval);
2596	}
2597
2598	if (!dtp->dt_stopped)
2599		return (0);
2600
2601	buf->dtbd_cpu = dtp->dt_endedon;
2602
2603#if defined(sun)
2604	if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
2605#else
2606	if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {
2607#endif
2608		/*
2609		 * This _really_ shouldn't fail, but it is strictly speaking
2610		 * possible for this to return ENOENT if the CPU that called
2611		 * the END enabling somehow managed to become unconfigured.
2612		 * It's unclear how the user can possibly expect anything
2613		 * rational to happen in this case -- the state has been thrown
2614		 * out along with the unconfigured CPU -- so we'll just drive
2615		 * on...
2616		 */
2617		if (errno == ENOENT)
2618			return (0);
2619
2620		return (dt_set_errno(dtp, errno));
2621	}
2622
2623	return (dt_consume_cpu(dtp, fp, dtp->dt_endedon, buf, pf, rf, arg));
2624}
2625