1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * Copyright (c) 2023, Domagoj Stolfa. All rights reserved.
28 * Copyright (c) 2017, Joyent, Inc. All rights reserved.
29 * Copyright (c) 2012 by Delphix. All rights reserved.
30 */
31
32#include <stdlib.h>
33#include <strings.h>
34#include <errno.h>
35#include <unistd.h>
36#include <limits.h>
37#include <assert.h>
38#include <ctype.h>
39#ifdef illumos
40#include <alloca.h>
41#endif
42#include <dt_impl.h>
43#include <dt_pq.h>
44#include <dt_oformat.h>
45#ifndef illumos
46#include <libproc_compat.h>
47#endif
48
49#define	DT_MASK_LO 0x00000000FFFFFFFFULL
50
51#define	dt_format_sym(dtp, addr) dt_print_sym((dtp), NULL, NULL, addr)
52
53typedef struct dt_prepare_args {
54	int first_bin;
55	int last_bin;
56	union {
57		struct lquantize_args {
58#define lquantize_step		u.lquantize.step
59#define lquantize_levels	u.lquantize.levels
60#define lquantize_base		u.lquantize.base
61			int base;
62			uint16_t step;
63			uint16_t levels;
64		} lquantize;
65		struct llquantize_args {
66#define	llquantize_next		u.llquantize.next
67#define	llquantize_step		u.llquantize.step
68#define	llquantize_value	u.llquantize.value
69#define	llquantize_levels	u.llquantize.levels
70#define	llquantize_order	u.llquantize.order
71#define	llquantize_factor	u.llquantize.factor
72#define	llquantize_low		u.llquantize.low
73#define	llquantize_high		u.llquantize.high
74#define	llquantize_nsteps	u.llquantize.nsteps
75			int64_t next;
76			int64_t step;
77			int64_t value;
78			int levels;
79			int order;
80			uint16_t factor;
81			uint16_t low;
82			uint16_t high;
83			uint16_t nsteps;
84		} llquantize;
85	} u;
86} dt_prepare_args_t;
87
88/*
89 * We declare this here because (1) we need it and (2) we want to avoid a
90 * dependency on libm in libdtrace.
91 */
92static long double
93dt_fabsl(long double x)
94{
95	if (x < 0)
96		return (-x);
97
98	return (x);
99}
100
101static int
102dt_ndigits(long long val)
103{
104	int rval = 1;
105	long long cmp = 10;
106
107	if (val < 0) {
108		val = val == INT64_MIN ? INT64_MAX : -val;
109		rval++;
110	}
111
112	while (val > cmp && cmp > 0) {
113		rval++;
114		cmp *= 10;
115	}
116
117	return (rval < 4 ? 4 : rval);
118}
119
120/*
121 * 128-bit arithmetic functions needed to support the stddev() aggregating
122 * action.
123 */
124static int
125dt_gt_128(uint64_t *a, uint64_t *b)
126{
127	return (a[1] > b[1] || (a[1] == b[1] && a[0] > b[0]));
128}
129
130static int
131dt_ge_128(uint64_t *a, uint64_t *b)
132{
133	return (a[1] > b[1] || (a[1] == b[1] && a[0] >= b[0]));
134}
135
136static int
137dt_le_128(uint64_t *a, uint64_t *b)
138{
139	return (a[1] < b[1] || (a[1] == b[1] && a[0] <= b[0]));
140}
141
142/*
143 * Shift the 128-bit value in a by b. If b is positive, shift left.
144 * If b is negative, shift right.
145 */
146static void
147dt_shift_128(uint64_t *a, int b)
148{
149	uint64_t mask;
150
151	if (b == 0)
152		return;
153
154	if (b < 0) {
155		b = -b;
156		if (b >= 64) {
157			a[0] = a[1] >> (b - 64);
158			a[1] = 0;
159		} else {
160			a[0] >>= b;
161			mask = 1LL << (64 - b);
162			mask -= 1;
163			a[0] |= ((a[1] & mask) << (64 - b));
164			a[1] >>= b;
165		}
166	} else {
167		if (b >= 64) {
168			a[1] = a[0] << (b - 64);
169			a[0] = 0;
170		} else {
171			a[1] <<= b;
172			mask = a[0] >> (64 - b);
173			a[1] |= mask;
174			a[0] <<= b;
175		}
176	}
177}
178
179static int
180dt_nbits_128(uint64_t *a)
181{
182	int nbits = 0;
183	uint64_t tmp[2];
184	uint64_t zero[2] = { 0, 0 };
185
186	tmp[0] = a[0];
187	tmp[1] = a[1];
188
189	dt_shift_128(tmp, -1);
190	while (dt_gt_128(tmp, zero)) {
191		dt_shift_128(tmp, -1);
192		nbits++;
193	}
194
195	return (nbits);
196}
197
198static void
199dt_subtract_128(uint64_t *minuend, uint64_t *subtrahend, uint64_t *difference)
200{
201	uint64_t result[2];
202
203	result[0] = minuend[0] - subtrahend[0];
204	result[1] = minuend[1] - subtrahend[1] -
205	    (minuend[0] < subtrahend[0] ? 1 : 0);
206
207	difference[0] = result[0];
208	difference[1] = result[1];
209}
210
211static void
212dt_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
213{
214	uint64_t result[2];
215
216	result[0] = addend1[0] + addend2[0];
217	result[1] = addend1[1] + addend2[1] +
218	    (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
219
220	sum[0] = result[0];
221	sum[1] = result[1];
222}
223
224/*
225 * The basic idea is to break the 2 64-bit values into 4 32-bit values,
226 * use native multiplication on those, and then re-combine into the
227 * resulting 128-bit value.
228 *
229 * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
230 *     hi1 * hi2 << 64 +
231 *     hi1 * lo2 << 32 +
232 *     hi2 * lo1 << 32 +
233 *     lo1 * lo2
234 */
235static void
236dt_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
237{
238	uint64_t hi1, hi2, lo1, lo2;
239	uint64_t tmp[2];
240
241	hi1 = factor1 >> 32;
242	hi2 = factor2 >> 32;
243
244	lo1 = factor1 & DT_MASK_LO;
245	lo2 = factor2 & DT_MASK_LO;
246
247	product[0] = lo1 * lo2;
248	product[1] = hi1 * hi2;
249
250	tmp[0] = hi1 * lo2;
251	tmp[1] = 0;
252	dt_shift_128(tmp, 32);
253	dt_add_128(product, tmp, product);
254
255	tmp[0] = hi2 * lo1;
256	tmp[1] = 0;
257	dt_shift_128(tmp, 32);
258	dt_add_128(product, tmp, product);
259}
260
261/*
262 * This is long-hand division.
263 *
264 * We initialize subtrahend by shifting divisor left as far as possible. We
265 * loop, comparing subtrahend to dividend:  if subtrahend is smaller, we
266 * subtract and set the appropriate bit in the result.  We then shift
267 * subtrahend right by one bit for the next comparison.
268 */
269static void
270dt_divide_128(uint64_t *dividend, uint64_t divisor, uint64_t *quotient)
271{
272	uint64_t result[2] = { 0, 0 };
273	uint64_t remainder[2];
274	uint64_t subtrahend[2];
275	uint64_t divisor_128[2];
276	uint64_t mask[2] = { 1, 0 };
277	int log = 0;
278
279	assert(divisor != 0);
280
281	divisor_128[0] = divisor;
282	divisor_128[1] = 0;
283
284	remainder[0] = dividend[0];
285	remainder[1] = dividend[1];
286
287	subtrahend[0] = divisor;
288	subtrahend[1] = 0;
289
290	while (divisor > 0) {
291		log++;
292		divisor >>= 1;
293	}
294
295	dt_shift_128(subtrahend, 128 - log);
296	dt_shift_128(mask, 128 - log);
297
298	while (dt_ge_128(remainder, divisor_128)) {
299		if (dt_ge_128(remainder, subtrahend)) {
300			dt_subtract_128(remainder, subtrahend, remainder);
301			result[0] |= mask[0];
302			result[1] |= mask[1];
303		}
304
305		dt_shift_128(subtrahend, -1);
306		dt_shift_128(mask, -1);
307	}
308
309	quotient[0] = result[0];
310	quotient[1] = result[1];
311}
312
313/*
314 * This is the long-hand method of calculating a square root.
315 * The algorithm is as follows:
316 *
317 * 1. Group the digits by 2 from the right.
318 * 2. Over the leftmost group, find the largest single-digit number
319 *    whose square is less than that group.
320 * 3. Subtract the result of the previous step (2 or 4, depending) and
321 *    bring down the next two-digit group.
322 * 4. For the result R we have so far, find the largest single-digit number
323 *    x such that 2 * R * 10 * x + x^2 is less than the result from step 3.
324 *    (Note that this is doubling R and performing a decimal left-shift by 1
325 *    and searching for the appropriate decimal to fill the one's place.)
326 *    The value x is the next digit in the square root.
327 * Repeat steps 3 and 4 until the desired precision is reached.  (We're
328 * dealing with integers, so the above is sufficient.)
329 *
330 * In decimal, the square root of 582,734 would be calculated as so:
331 *
332 *     __7__6__3
333 *    | 58 27 34
334 *     -49       (7^2 == 49 => 7 is the first digit in the square root)
335 *      --
336 *       9 27    (Subtract and bring down the next group.)
337 * 146   8 76    (2 * 7 * 10 * 6 + 6^2 == 876 => 6 is the next digit in
338 *      -----     the square root)
339 *         51 34 (Subtract and bring down the next group.)
340 * 1523    45 69 (2 * 76 * 10 * 3 + 3^2 == 4569 => 3 is the next digit in
341 *         -----  the square root)
342 *          5 65 (remainder)
343 *
344 * The above algorithm applies similarly in binary, but note that the
345 * only possible non-zero value for x in step 4 is 1, so step 4 becomes a
346 * simple decision: is 2 * R * 2 * 1 + 1^2 (aka R << 2 + 1) less than the
347 * preceding difference?
348 *
349 * In binary, the square root of 11011011 would be calculated as so:
350 *
351 *     __1__1__1__0
352 *    | 11 01 10 11
353 *      01          (0 << 2 + 1 == 1 < 11 => this bit is 1)
354 *      --
355 *      10 01 10 11
356 * 101   1 01       (1 << 2 + 1 == 101 < 1001 => next bit is 1)
357 *      -----
358 *       1 00 10 11
359 * 1101    11 01    (11 << 2 + 1 == 1101 < 10010 => next bit is 1)
360 *       -------
361 *          1 01 11
362 * 11101    1 11 01 (111 << 2 + 1 == 11101 > 10111 => last bit is 0)
363 *
364 */
365static uint64_t
366dt_sqrt_128(uint64_t *square)
367{
368	uint64_t result[2] = { 0, 0 };
369	uint64_t diff[2] = { 0, 0 };
370	uint64_t one[2] = { 1, 0 };
371	uint64_t next_pair[2];
372	uint64_t next_try[2];
373	uint64_t bit_pairs, pair_shift;
374	int i;
375
376	bit_pairs = dt_nbits_128(square) / 2;
377	pair_shift = bit_pairs * 2;
378
379	for (i = 0; i <= bit_pairs; i++) {
380		/*
381		 * Bring down the next pair of bits.
382		 */
383		next_pair[0] = square[0];
384		next_pair[1] = square[1];
385		dt_shift_128(next_pair, -pair_shift);
386		next_pair[0] &= 0x3;
387		next_pair[1] = 0;
388
389		dt_shift_128(diff, 2);
390		dt_add_128(diff, next_pair, diff);
391
392		/*
393		 * next_try = R << 2 + 1
394		 */
395		next_try[0] = result[0];
396		next_try[1] = result[1];
397		dt_shift_128(next_try, 2);
398		dt_add_128(next_try, one, next_try);
399
400		if (dt_le_128(next_try, diff)) {
401			dt_subtract_128(diff, next_try, diff);
402			dt_shift_128(result, 1);
403			dt_add_128(result, one, result);
404		} else {
405			dt_shift_128(result, 1);
406		}
407
408		pair_shift -= 2;
409	}
410
411	assert(result[1] == 0);
412
413	return (result[0]);
414}
415
416uint64_t
417dt_stddev(uint64_t *data, uint64_t normal)
418{
419	uint64_t avg_of_squares[2];
420	uint64_t square_of_avg[2];
421	int64_t norm_avg;
422	uint64_t diff[2];
423
424	if (data[0] == 0)
425		return (0);
426
427	/*
428	 * The standard approximation for standard deviation is
429	 * sqrt(average(x**2) - average(x)**2), i.e. the square root
430	 * of the average of the squares minus the square of the average.
431	 * When normalizing, we should divide the sum of x**2 by normal**2.
432	 */
433	dt_divide_128(data + 2, normal, avg_of_squares);
434	dt_divide_128(avg_of_squares, normal, avg_of_squares);
435	dt_divide_128(avg_of_squares, data[0], avg_of_squares);
436
437	norm_avg = (int64_t)data[1] / (int64_t)normal / (int64_t)data[0];
438
439	if (norm_avg < 0)
440		norm_avg = -norm_avg;
441
442	dt_multiply_128((uint64_t)norm_avg, (uint64_t)norm_avg, square_of_avg);
443
444	dt_subtract_128(avg_of_squares, square_of_avg, diff);
445
446	return (dt_sqrt_128(diff));
447}
448
449static int
450dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last,
451    dtrace_bufdesc_t *buf, size_t offs)
452{
453	dtrace_probedesc_t *pd = data->dtpda_pdesc, *npd;
454	dtrace_eprobedesc_t *epd = data->dtpda_edesc, *nepd;
455	char *p = pd->dtpd_provider, *n = pd->dtpd_name, *sub;
456	dtrace_flowkind_t flow = DTRACEFLOW_NONE;
457	const char *str = NULL;
458	static const char *e_str[2] = { " -> ", " => " };
459	static const char *r_str[2] = { " <- ", " <= " };
460	static const char *ent = "entry", *ret = "return";
461	static int entlen = 0, retlen = 0;
462	dtrace_epid_t next, id = epd->dtepd_epid;
463	int rval;
464
465	if (entlen == 0) {
466		assert(retlen == 0);
467		entlen = strlen(ent);
468		retlen = strlen(ret);
469	}
470
471	/*
472	 * If the name of the probe is "entry" or ends with "-entry", we
473	 * treat it as an entry; if it is "return" or ends with "-return",
474	 * we treat it as a return.  (This allows application-provided probes
475	 * like "method-entry" or "function-entry" to participate in flow
476	 * indentation -- without accidentally misinterpreting popular probe
477	 * names like "carpentry", "gentry" or "Coventry".)
478	 */
479	if ((sub = strstr(n, ent)) != NULL && sub[entlen] == '\0' &&
480	    (sub == n || sub[-1] == '-')) {
481		flow = DTRACEFLOW_ENTRY;
482		str = e_str[strcmp(p, "syscall") == 0];
483	} else if ((sub = strstr(n, ret)) != NULL && sub[retlen] == '\0' &&
484	    (sub == n || sub[-1] == '-')) {
485		flow = DTRACEFLOW_RETURN;
486		str = r_str[strcmp(p, "syscall") == 0];
487	}
488
489	/*
490	 * If we're going to indent this, we need to check the ID of our last
491	 * call.  If we're looking at the same probe ID but a different EPID,
492	 * we _don't_ want to indent.  (Yes, there are some minor holes in
493	 * this scheme -- it's a heuristic.)
494	 */
495	if (flow == DTRACEFLOW_ENTRY) {
496		if ((last != DTRACE_EPIDNONE && id != last &&
497		    pd->dtpd_id == dtp->dt_pdesc[last]->dtpd_id))
498			flow = DTRACEFLOW_NONE;
499	}
500
501	/*
502	 * If we're going to unindent this, it's more difficult to see if
503	 * we don't actually want to unindent it -- we need to look at the
504	 * _next_ EPID.
505	 */
506	if (flow == DTRACEFLOW_RETURN) {
507		offs += epd->dtepd_size;
508
509		do {
510			if (offs >= buf->dtbd_size)
511				goto out;
512
513			next = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
514
515			if (next == DTRACE_EPIDNONE)
516				offs += sizeof (id);
517		} while (next == DTRACE_EPIDNONE);
518
519		if ((rval = dt_epid_lookup(dtp, next, &nepd, &npd)) != 0)
520			return (rval);
521
522		if (next != id && npd->dtpd_id == pd->dtpd_id)
523			flow = DTRACEFLOW_NONE;
524	}
525
526out:
527	if (flow == DTRACEFLOW_ENTRY || flow == DTRACEFLOW_RETURN) {
528		data->dtpda_prefix = str;
529	} else {
530		data->dtpda_prefix = "| ";
531	}
532
533	if (flow == DTRACEFLOW_RETURN && data->dtpda_indent > 0)
534		data->dtpda_indent -= 2;
535
536	data->dtpda_flow = flow;
537
538	return (0);
539}
540
541static int
542dt_nullprobe()
543{
544	return (DTRACE_CONSUME_THIS);
545}
546
547static int
548dt_nullrec()
549{
550	return (DTRACE_CONSUME_NEXT);
551}
552
553static void
554dt_quantize_total(dtrace_hdl_t *dtp, int64_t datum, long double *total)
555{
556	long double val = dt_fabsl((long double)datum);
557
558	if (dtp->dt_options[DTRACEOPT_AGGZOOM] == DTRACEOPT_UNSET) {
559		*total += val;
560		return;
561	}
562
563	/*
564	 * If we're zooming in on an aggregation, we want the height of the
565	 * highest value to be approximately 95% of total bar height -- so we
566	 * adjust up by the reciprocal of DTRACE_AGGZOOM_MAX when comparing to
567	 * our highest value.
568	 */
569	val *= 1 / DTRACE_AGGZOOM_MAX;
570
571	if (*total < val)
572		*total = val;
573}
574
575static int
576dt_print_quanthdr(dtrace_hdl_t *dtp, FILE *fp, int width)
577{
578	return (dt_printf(dtp, fp, "\n%*s %41s %-9s\n",
579	    width ? width : 16, width ? "key" : "value",
580	    "------------- Distribution -------------", "count"));
581}
582
583static int
584dt_print_quanthdr_packed(dtrace_hdl_t *dtp, FILE *fp, int width,
585    const dtrace_aggdata_t *aggdata, dtrace_actkind_t action)
586{
587	int min = aggdata->dtada_minbin, max = aggdata->dtada_maxbin;
588	int minwidth, maxwidth, i;
589
590	assert(action == DTRACEAGG_QUANTIZE || action == DTRACEAGG_LQUANTIZE);
591
592	if (action == DTRACEAGG_QUANTIZE) {
593		if (min != 0 && min != DTRACE_QUANTIZE_ZEROBUCKET)
594			min--;
595
596		if (max < DTRACE_QUANTIZE_NBUCKETS - 1)
597			max++;
598
599		minwidth = dt_ndigits(DTRACE_QUANTIZE_BUCKETVAL(min));
600		maxwidth = dt_ndigits(DTRACE_QUANTIZE_BUCKETVAL(max));
601	} else {
602		maxwidth = 8;
603		minwidth = maxwidth - 1;
604		max++;
605	}
606
607	if (dt_printf(dtp, fp, "\n%*s %*s .",
608	    width, width > 0 ? "key" : "", minwidth, "min") < 0)
609		return (-1);
610
611	for (i = min; i <= max; i++) {
612		if (dt_printf(dtp, fp, "-") < 0)
613			return (-1);
614	}
615
616	return (dt_printf(dtp, fp, ". %*s | count\n", -maxwidth, "max"));
617}
618
619/*
620 * We use a subset of the Unicode Block Elements (U+2588 through U+258F,
621 * inclusive) to represent aggregations via UTF-8 -- which are expressed via
622 * 3-byte UTF-8 sequences.
623 */
624#define	DTRACE_AGGUTF8_FULL	0x2588
625#define	DTRACE_AGGUTF8_BASE	0x258f
626#define	DTRACE_AGGUTF8_LEVELS	8
627
628#define	DTRACE_AGGUTF8_BYTE0(val)	(0xe0 | ((val) >> 12))
629#define	DTRACE_AGGUTF8_BYTE1(val)	(0x80 | (((val) >> 6) & 0x3f))
630#define	DTRACE_AGGUTF8_BYTE2(val)	(0x80 | ((val) & 0x3f))
631
632static int
633dt_print_quantline_utf8(dtrace_hdl_t *dtp, FILE *fp, int64_t val,
634    uint64_t normal, long double total)
635{
636	uint_t len = 40, i, whole, partial;
637	long double f = (dt_fabsl((long double)val) * len) / total;
638	const char *spaces = "                                        ";
639
640	whole = (uint_t)f;
641	partial = (uint_t)((f - (long double)(uint_t)f) *
642	    (long double)DTRACE_AGGUTF8_LEVELS);
643
644	if (dt_printf(dtp, fp, "|") < 0)
645		return (-1);
646
647	for (i = 0; i < whole; i++) {
648		if (dt_printf(dtp, fp, "%c%c%c",
649		    DTRACE_AGGUTF8_BYTE0(DTRACE_AGGUTF8_FULL),
650		    DTRACE_AGGUTF8_BYTE1(DTRACE_AGGUTF8_FULL),
651		    DTRACE_AGGUTF8_BYTE2(DTRACE_AGGUTF8_FULL)) < 0)
652			return (-1);
653	}
654
655	if (partial != 0) {
656		partial = DTRACE_AGGUTF8_BASE - (partial - 1);
657
658		if (dt_printf(dtp, fp, "%c%c%c",
659		    DTRACE_AGGUTF8_BYTE0(partial),
660		    DTRACE_AGGUTF8_BYTE1(partial),
661		    DTRACE_AGGUTF8_BYTE2(partial)) < 0)
662			return (-1);
663
664		i++;
665	}
666
667	return (dt_printf(dtp, fp, "%s %-9lld\n", spaces + i,
668	    (long long)val / normal));
669}
670
671static int
672dt_print_quantline(dtrace_hdl_t *dtp, FILE *fp, int64_t val,
673    uint64_t normal, long double total, char positives, char negatives)
674{
675	long double f;
676	uint_t depth, len = 40;
677
678	const char *ats = "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@";
679	const char *spaces = "                                        ";
680
681	assert(strlen(ats) == len && strlen(spaces) == len);
682	assert(!(total == 0 && (positives || negatives)));
683	assert(!(val < 0 && !negatives));
684	assert(!(val > 0 && !positives));
685	assert(!(val != 0 && total == 0));
686
687	if (!negatives) {
688		if (positives) {
689			if (dtp->dt_encoding == DT_ENCODING_UTF8) {
690				return (dt_print_quantline_utf8(dtp, fp, val,
691				    normal, total));
692			}
693
694			f = (dt_fabsl((long double)val) * len) / total;
695			depth = (uint_t)(f + 0.5);
696		} else {
697			depth = 0;
698		}
699
700		return (dt_printf(dtp, fp, "|%s%s %-9lld\n", ats + len - depth,
701		    spaces + depth, (long long)val / normal));
702	}
703
704	if (!positives) {
705		f = (dt_fabsl((long double)val) * len) / total;
706		depth = (uint_t)(f + 0.5);
707
708		return (dt_printf(dtp, fp, "%s%s| %-9lld\n", spaces + depth,
709		    ats + len - depth, (long long)val / normal));
710	}
711
712	/*
713	 * If we're here, we have both positive and negative bucket values.
714	 * To express this graphically, we're going to generate both positive
715	 * and negative bars separated by a centerline.  These bars are half
716	 * the size of normal quantize()/lquantize() bars, so we divide the
717	 * length in half before calculating the bar length.
718	 */
719	len /= 2;
720	ats = &ats[len];
721	spaces = &spaces[len];
722
723	f = (dt_fabsl((long double)val) * len) / total;
724	depth = (uint_t)(f + 0.5);
725
726	if (val <= 0) {
727		return (dt_printf(dtp, fp, "%s%s|%*s %-9lld\n", spaces + depth,
728		    ats + len - depth, len, "", (long long)val / normal));
729	} else {
730		return (dt_printf(dtp, fp, "%20s|%s%s %-9lld\n", "",
731		    ats + len - depth, spaces + depth,
732		    (long long)val / normal));
733	}
734}
735
736/*
737 * As with UTF-8 printing of aggregations, we use a subset of the Unicode
738 * Block Elements (U+2581 through U+2588, inclusive) to represent our packed
739 * aggregation.
740 */
741#define	DTRACE_AGGPACK_BASE	0x2581
742#define	DTRACE_AGGPACK_LEVELS	8
743
744static int
745dt_print_packed(dtrace_hdl_t *dtp, FILE *fp,
746    long double datum, long double total)
747{
748	static boolean_t utf8_checked = B_FALSE;
749	static boolean_t utf8;
750	char *ascii = "__xxxxXX";
751	char *neg = "vvvvVV";
752	unsigned int len;
753	long double val;
754
755	if (!utf8_checked) {
756		char *term;
757
758		/*
759		 * We want to determine if we can reasonably emit UTF-8 for our
760		 * packed aggregation.  To do this, we will check for terminals
761		 * that are known to be primitive to emit UTF-8 on these.
762		 */
763		utf8_checked = B_TRUE;
764
765		if (dtp->dt_encoding == DT_ENCODING_ASCII) {
766			utf8 = B_FALSE;
767		} else if (dtp->dt_encoding == DT_ENCODING_UTF8) {
768			utf8 = B_TRUE;
769		} else if ((term = getenv("TERM")) != NULL &&
770		    (strcmp(term, "sun") == 0 ||
771		    strcmp(term, "sun-color") == 0 ||
772		    strcmp(term, "dumb") == 0)) {
773			utf8 = B_FALSE;
774		} else {
775			utf8 = B_TRUE;
776		}
777	}
778
779	if (datum == 0)
780		return (dt_printf(dtp, fp, " "));
781
782	if (datum < 0) {
783		len = strlen(neg);
784		val = dt_fabsl(datum * (len - 1)) / total;
785		return (dt_printf(dtp, fp, "%c", neg[(uint_t)(val + 0.5)]));
786	}
787
788	if (utf8) {
789		int block = DTRACE_AGGPACK_BASE + (unsigned int)(((datum *
790		    (DTRACE_AGGPACK_LEVELS - 1)) / total) + 0.5);
791
792		return (dt_printf(dtp, fp, "%c%c%c",
793		    DTRACE_AGGUTF8_BYTE0(block),
794		    DTRACE_AGGUTF8_BYTE1(block),
795		    DTRACE_AGGUTF8_BYTE2(block)));
796	}
797
798	len = strlen(ascii);
799	val = (datum * (len - 1)) / total;
800	return (dt_printf(dtp, fp, "%c", ascii[(uint_t)(val + 0.5)]));
801}
802
803static const int64_t *
804dt_format_quantize_prepare(dtrace_hdl_t *dtp, const void *addr, size_t size,
805    dt_prepare_args_t *args)
806{
807	const int64_t *data = addr;
808	int first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1;
809
810	if (size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t)) {
811		(void) dt_set_errno(dtp, EDT_DMISMATCH);
812		return (NULL);
813	}
814
815	while (first_bin < DTRACE_QUANTIZE_NBUCKETS - 1 && data[first_bin] == 0)
816		first_bin++;
817
818	if (first_bin == DTRACE_QUANTIZE_NBUCKETS - 1) {
819		/*
820		 * There isn't any data.  This is possible if the aggregation
821		 * has been clear()'d or if negative increment values have been
822		 * used.  Regardless, we'll print the buckets around 0.
823		 */
824		first_bin = DTRACE_QUANTIZE_ZEROBUCKET - 1;
825		last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 1;
826	} else {
827		if (first_bin > 0)
828			first_bin--;
829
830		while (last_bin > 0 && data[last_bin] == 0)
831			last_bin--;
832
833		if (last_bin < DTRACE_QUANTIZE_NBUCKETS - 1)
834			last_bin++;
835	}
836
837	args->first_bin = first_bin;
838	args->last_bin = last_bin;
839	return (data);
840}
841
842int
843dt_format_quantize(dtrace_hdl_t *dtp, const void *addr, size_t size,
844    uint64_t normal)
845{
846	const int64_t *data;
847	dt_prepare_args_t args = { 0 };
848	int i, first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1;
849
850	data = dt_format_quantize_prepare(dtp, addr, size, &args);
851	/* dt_errno is set for us */
852	if (data == NULL)
853		return (-1);
854
855	first_bin = args.first_bin;
856	last_bin = args.last_bin;
857
858	xo_open_list("buckets");
859	for (i = first_bin; i <= last_bin; i++) {
860		long long value = (long long)DTRACE_QUANTIZE_BUCKETVAL(i);
861		xo_open_instance("buckets");
862		xo_emit("{:value/%lld} {:count/%lld}", value,
863		    (long long)data[i] / normal);
864		xo_close_instance("buckets");
865	}
866	xo_close_list("buckets");
867
868	return (0);
869}
870
871int
872dt_print_quantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
873    size_t size, uint64_t normal)
874{
875	const int64_t *data;
876	dt_prepare_args_t args = { 0 };
877	int i, first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1;
878	long double total = 0;
879	char positives = 0, negatives = 0;
880
881	data = dt_format_quantize_prepare(dtp, addr, size, &args);
882	/* dt_errno is set for us */
883	if (data == NULL)
884		return (-1);
885
886	first_bin = args.first_bin;
887	last_bin = args.last_bin;
888
889	for (i = first_bin; i <= last_bin; i++) {
890		positives |= (data[i] > 0);
891		negatives |= (data[i] < 0);
892		dt_quantize_total(dtp, data[i], &total);
893	}
894
895	if (dt_print_quanthdr(dtp, fp, 0) < 0)
896		return (-1);
897
898	for (i = first_bin; i <= last_bin; i++) {
899		if (dt_printf(dtp, fp, "%16lld ",
900		    (long long)DTRACE_QUANTIZE_BUCKETVAL(i)) < 0)
901			return (-1);
902
903		if (dt_print_quantline(dtp, fp, data[i], normal, total,
904		    positives, negatives) < 0)
905			return (-1);
906	}
907
908	return (0);
909}
910
911int
912dt_print_quantize_packed(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
913    size_t size, const dtrace_aggdata_t *aggdata)
914{
915	const int64_t *data = addr;
916	long double total = 0, count = 0;
917	int min = aggdata->dtada_minbin, max = aggdata->dtada_maxbin, i;
918	int64_t minval, maxval;
919
920	if (size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
921		return (dt_set_errno(dtp, EDT_DMISMATCH));
922
923	if (min != 0 && min != DTRACE_QUANTIZE_ZEROBUCKET)
924		min--;
925
926	if (max < DTRACE_QUANTIZE_NBUCKETS - 1)
927		max++;
928
929	minval = DTRACE_QUANTIZE_BUCKETVAL(min);
930	maxval = DTRACE_QUANTIZE_BUCKETVAL(max);
931
932	if (dt_printf(dtp, fp, " %*lld :", dt_ndigits(minval),
933	    (long long)minval) < 0)
934		return (-1);
935
936	for (i = min; i <= max; i++) {
937		dt_quantize_total(dtp, data[i], &total);
938		count += data[i];
939	}
940
941	for (i = min; i <= max; i++) {
942		if (dt_print_packed(dtp, fp, data[i], total) < 0)
943			return (-1);
944	}
945
946	if (dt_printf(dtp, fp, ": %*lld | %lld\n",
947	    -dt_ndigits(maxval), (long long)maxval, (long long)count) < 0)
948		return (-1);
949
950	return (0);
951}
952
953static const int64_t *
954dt_format_lquantize_prepare(dtrace_hdl_t *dtp, const void *addr, size_t size,
955    dt_prepare_args_t *args)
956{
957	const int64_t *data = addr;
958	int first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1, base;
959	uint64_t arg;
960	uint16_t step, levels;
961
962	if (size < sizeof (uint64_t)) {
963		(void) dt_set_errno(dtp, EDT_DMISMATCH);
964		return (NULL);
965	}
966
967	arg = *data++;
968	size -= sizeof (uint64_t);
969
970	base = DTRACE_LQUANTIZE_BASE(arg);
971	step = DTRACE_LQUANTIZE_STEP(arg);
972	levels = DTRACE_LQUANTIZE_LEVELS(arg);
973
974	first_bin = 0;
975	last_bin = levels + 1;
976
977	if (size != sizeof (uint64_t) * (levels + 2)) {
978		(void) dt_set_errno(dtp, EDT_DMISMATCH);
979		return (NULL);
980	}
981
982	while (first_bin <= levels + 1 && data[first_bin] == 0)
983		first_bin++;
984
985	if (first_bin > levels + 1) {
986		first_bin = 0;
987		last_bin = 2;
988	} else {
989		if (first_bin > 0)
990			first_bin--;
991
992		while (last_bin > 0 && data[last_bin] == 0)
993			last_bin--;
994
995		if (last_bin < levels + 1)
996			last_bin++;
997	}
998
999	args->first_bin = first_bin;
1000	args->last_bin = last_bin;
1001	args->lquantize_base = base;
1002	args->lquantize_step = step;
1003	args->lquantize_levels = levels;
1004	return (data);
1005}
1006
1007int
1008dt_format_lquantize(dtrace_hdl_t *dtp, const void *addr, size_t size,
1009    uint64_t normal)
1010{
1011	const int64_t *data;
1012	dt_prepare_args_t args = { 0 };
1013	int i, first_bin, last_bin, base;
1014	uint16_t step, levels;
1015
1016	data = dt_format_lquantize_prepare(dtp, addr, size, &args);
1017	/* dt_errno is set for us */
1018	if (data == NULL)
1019		return (-1);
1020
1021	first_bin = args.first_bin;
1022	last_bin = args.last_bin;
1023	step = args.lquantize_step;
1024	levels = args.lquantize_levels;
1025	base = args.lquantize_base;
1026
1027	xo_open_list("buckets");
1028	for (i = first_bin; i <= last_bin; i++) {
1029		char c[32];
1030		int err;
1031
1032		xo_open_instance("buckets");
1033		if (i == 0) {
1034			xo_emit("{:value/%d} {:operator/%s}", base, "<");
1035		} else if (i == levels + 1) {
1036			xo_emit("{:value/%d} {:operator/%s}",
1037			    base + (levels * step), ">=");
1038		} else {
1039			xo_emit("{:value/%d}", base + (i - 1) * step);
1040		}
1041
1042		xo_emit("{:count/%lld}", (long long)data[i] / normal);
1043		xo_close_instance("buckets");
1044	}
1045	xo_close_list("buckets");
1046
1047	return (0);
1048}
1049
1050int
1051dt_print_lquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
1052    size_t size, uint64_t normal)
1053{
1054	const int64_t *data;
1055	dt_prepare_args_t args = { 0 };
1056	int i, first_bin, last_bin, base;
1057	uint64_t arg;
1058	long double total = 0;
1059	uint16_t step, levels;
1060	char positives = 0, negatives = 0;
1061
1062	data = dt_format_lquantize_prepare(dtp, addr, size, &args);
1063	/* dt_errno is set for us */
1064	if (data == NULL)
1065		return (-1);
1066
1067	first_bin = args.first_bin;
1068	last_bin = args.last_bin;
1069	step = args.lquantize_step;
1070	levels = args.lquantize_levels;
1071	base = args.lquantize_base;
1072
1073	for (i = first_bin; i <= last_bin; i++) {
1074		positives |= (data[i] > 0);
1075		negatives |= (data[i] < 0);
1076		dt_quantize_total(dtp, data[i], &total);
1077	}
1078
1079	if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
1080	    "------------- Distribution -------------", "count") < 0)
1081		return (-1);
1082
1083	for (i = first_bin; i <= last_bin; i++) {
1084		char c[32];
1085		int err;
1086
1087		if (i == 0) {
1088			(void) snprintf(c, sizeof (c), "< %d", base);
1089			err = dt_printf(dtp, fp, "%16s ", c);
1090		} else if (i == levels + 1) {
1091			(void) snprintf(c, sizeof (c), ">= %d",
1092			    base + (levels * step));
1093			err = dt_printf(dtp, fp, "%16s ", c);
1094		} else {
1095			err = dt_printf(dtp, fp, "%16d ",
1096			    base + (i - 1) * step);
1097		}
1098
1099		if (err < 0 || dt_print_quantline(dtp, fp, data[i], normal,
1100		    total, positives, negatives) < 0)
1101			return (-1);
1102	}
1103
1104	return (0);
1105}
1106
1107/*ARGSUSED*/
1108int
1109dt_print_lquantize_packed(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
1110    size_t size, const dtrace_aggdata_t *aggdata)
1111{
1112	const int64_t *data = addr;
1113	long double total = 0, count = 0;
1114	int min, max, base, err;
1115	uint64_t arg;
1116	uint16_t step, levels;
1117	char c[32];
1118	unsigned int i;
1119
1120	if (size < sizeof (uint64_t))
1121		return (dt_set_errno(dtp, EDT_DMISMATCH));
1122
1123	arg = *data++;
1124	size -= sizeof (uint64_t);
1125
1126	base = DTRACE_LQUANTIZE_BASE(arg);
1127	step = DTRACE_LQUANTIZE_STEP(arg);
1128	levels = DTRACE_LQUANTIZE_LEVELS(arg);
1129
1130	if (size != sizeof (uint64_t) * (levels + 2))
1131		return (dt_set_errno(dtp, EDT_DMISMATCH));
1132
1133	min = 0;
1134	max = levels + 1;
1135
1136	if (min == 0) {
1137		(void) snprintf(c, sizeof (c), "< %d", base);
1138		err = dt_printf(dtp, fp, "%8s :", c);
1139	} else {
1140		err = dt_printf(dtp, fp, "%8d :", base + (min - 1) * step);
1141	}
1142
1143	if (err < 0)
1144		return (-1);
1145
1146	for (i = min; i <= max; i++) {
1147		dt_quantize_total(dtp, data[i], &total);
1148		count += data[i];
1149	}
1150
1151	for (i = min; i <= max; i++) {
1152		if (dt_print_packed(dtp, fp, data[i], total) < 0)
1153			return (-1);
1154	}
1155
1156	(void) snprintf(c, sizeof (c), ">= %d", base + (levels * step));
1157	return (dt_printf(dtp, fp, ": %-8s | %lld\n", c, (long long)count));
1158}
1159
1160static const int64_t *
1161dt_format_llquantize_prepare(dtrace_hdl_t *dtp, const void *addr, size_t size,
1162    dt_prepare_args_t *args)
1163{
1164	int i, first_bin, last_bin, bin = 1, order, levels;
1165	uint16_t factor, low, high, nsteps;
1166	const int64_t *data = addr;
1167	int64_t value = 1, next, step;
1168	uint64_t arg;
1169
1170	if (size < sizeof(uint64_t)) {
1171		(void) dt_set_errno(dtp, EDT_DMISMATCH);
1172		return (NULL);
1173	}
1174
1175	arg = *data++;
1176	size -= sizeof (uint64_t);
1177
1178	factor = DTRACE_LLQUANTIZE_FACTOR(arg);
1179	low = DTRACE_LLQUANTIZE_LOW(arg);
1180	high = DTRACE_LLQUANTIZE_HIGH(arg);
1181	nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
1182
1183	/*
1184	 * We don't expect to be handed invalid llquantize() parameters here,
1185	 * but sanity check them (to a degree) nonetheless.
1186	 */
1187	if (size > INT32_MAX || factor < 2 || low >= high ||
1188	    nsteps == 0 || factor > nsteps) {
1189		(void) dt_set_errno(dtp, EDT_DMISMATCH);
1190		return (NULL);
1191	}
1192
1193	levels = (int)size / sizeof (uint64_t);
1194
1195	first_bin = 0;
1196	last_bin = levels - 1;
1197
1198	while (first_bin < levels && data[first_bin] == 0)
1199		first_bin++;
1200
1201	if (first_bin == levels) {
1202		first_bin = 0;
1203		last_bin = 1;
1204	} else {
1205		if (first_bin > 0)
1206			first_bin--;
1207
1208		while (last_bin > 0 && data[last_bin] == 0)
1209			last_bin--;
1210
1211		if (last_bin < levels - 1)
1212			last_bin++;
1213	}
1214
1215	for (order = 0; order < low; order++)
1216		value *= factor;
1217
1218	next = value * factor;
1219	step = next > nsteps ? next / nsteps : 1;
1220
1221	args->first_bin = first_bin;
1222	args->last_bin = last_bin;
1223	args->llquantize_factor = factor;
1224	args->llquantize_low = low;
1225	args->llquantize_high = high;
1226	args->llquantize_nsteps = nsteps;
1227	args->llquantize_levels = levels;
1228	args->llquantize_order = order;
1229	args->llquantize_next = next;
1230	args->llquantize_step = step;
1231	args->llquantize_value = value;
1232
1233	return (data);
1234}
1235
1236int
1237dt_format_llquantize(dtrace_hdl_t *dtp, const void *addr, size_t size,
1238    uint64_t normal)
1239{
1240	int first_bin, last_bin, bin = 1, order, levels;
1241	uint16_t factor, low, high, nsteps;
1242	const int64_t *data;
1243	dt_prepare_args_t args = { 0 };
1244	int64_t value = 1, next, step;
1245	uint64_t arg;
1246	char c[32];
1247
1248	data = dt_format_llquantize_prepare(dtp, addr, size, &args);
1249	/* dt_errno is set for us */
1250	if (data == NULL)
1251		return (-1);
1252
1253	first_bin = args.first_bin;
1254	last_bin = args.last_bin;
1255	factor = args.llquantize_factor;
1256	low = args.llquantize_low;
1257	high = args.llquantize_high;
1258	nsteps = args.llquantize_nsteps;
1259	levels = args.llquantize_levels;
1260	order = args.llquantize_order;
1261	next = args.llquantize_next;
1262	step = args.llquantize_step;
1263	value = args.llquantize_value;
1264
1265	xo_open_list("buckets");
1266	if (first_bin == 0) {
1267		/*
1268		 * We have to represent < value somehow in JSON, so we bundle an
1269		 * optional "operator" in llquantize buckets.
1270		 */
1271		xo_open_instance("buckets");
1272		xo_emit("{:value/%lld} {:count/%lld} {:operator/%s}",
1273		    (long long)value, (long long)data[0] / normal, "<");
1274		xo_close_instance("buckets");
1275	}
1276
1277	while (order <= high) {
1278		if (bin >= first_bin && bin <= last_bin) {
1279			xo_open_instance("buckets");
1280			xo_emit("{:value/%lld} {:count/%lld}", (long long)value,
1281			    (long long)data[bin] / normal);
1282			xo_close_instance("buckets");
1283		}
1284
1285		assert(value < next);
1286		bin++;
1287
1288		if ((value += step) != next)
1289			continue;
1290
1291		next = value * factor;
1292		step = next > nsteps ? next / nsteps : 1;
1293		order++;
1294	}
1295
1296	if (last_bin < bin) {
1297		xo_close_list("buckets");
1298		return (0);
1299	}
1300
1301	assert(last_bin == bin);
1302	xo_open_instance("buckets");
1303	xo_emit("{:value/%lld} {:count/%lld} {:operator/%s}", (long long)value,
1304	    (long long)data[bin] / normal, ">=");
1305	xo_close_instance("buckets");
1306
1307	xo_close_list("buckets");
1308	return (0);
1309}
1310
1311int
1312dt_print_llquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
1313    size_t size, uint64_t normal)
1314{
1315	int i, first_bin, last_bin, bin = 1, order, levels;
1316	uint16_t factor, low, high, nsteps;
1317	const int64_t *data;
1318	dt_prepare_args_t args = { 0 };
1319	int64_t value = 1, next, step;
1320	char positives = 0, negatives = 0;
1321	long double total = 0;
1322	uint64_t arg;
1323	char c[32];
1324
1325	data = dt_format_llquantize_prepare(dtp, addr, size, &args);
1326	/* dt_errno is set for us */
1327	if (data == NULL)
1328		return (-1);
1329
1330	first_bin = args.first_bin;
1331	last_bin = args.last_bin;
1332	factor = args.llquantize_factor;
1333	low = args.llquantize_low;
1334	high = args.llquantize_high;
1335	nsteps = args.llquantize_nsteps;
1336	levels = args.llquantize_levels;
1337	order = args.llquantize_order;
1338	next = args.llquantize_next;
1339	step = args.llquantize_step;
1340	value = args.llquantize_value;
1341
1342	for (i = first_bin; i <= last_bin; i++) {
1343		positives |= (data[i] > 0);
1344		negatives |= (data[i] < 0);
1345		dt_quantize_total(dtp, data[i], &total);
1346	}
1347
1348	if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
1349	    "------------- Distribution -------------", "count") < 0)
1350		return (-1);
1351
1352	if (first_bin == 0) {
1353		(void) snprintf(c, sizeof (c), "< %lld", (long long)value);
1354
1355		if (dt_printf(dtp, fp, "%16s ", c) < 0)
1356			return (-1);
1357
1358		if (dt_print_quantline(dtp, fp, data[0], normal,
1359		    total, positives, negatives) < 0)
1360			return (-1);
1361	}
1362
1363	while (order <= high) {
1364		if (bin >= first_bin && bin <= last_bin) {
1365			if (dt_printf(dtp, fp, "%16lld ", (long long)value) < 0)
1366				return (-1);
1367
1368			if (dt_print_quantline(dtp, fp, data[bin],
1369			    normal, total, positives, negatives) < 0)
1370				return (-1);
1371		}
1372
1373		assert(value < next);
1374		bin++;
1375
1376		if ((value += step) != next)
1377			continue;
1378
1379		next = value * factor;
1380		step = next > nsteps ? next / nsteps : 1;
1381		order++;
1382	}
1383
1384	if (last_bin < bin)
1385		return (0);
1386
1387	assert(last_bin == bin);
1388	(void) snprintf(c, sizeof (c), ">= %lld", (long long)value);
1389
1390	if (dt_printf(dtp, fp, "%16s ", c) < 0)
1391		return (-1);
1392
1393	return (dt_print_quantline(dtp, fp, data[bin], normal,
1394	    total, positives, negatives));
1395}
1396
1397static int
1398dt_format_average(dtrace_hdl_t *dtp, caddr_t addr, size_t size, uint64_t normal)
1399{
1400	int64_t *data = (int64_t *)addr;
1401
1402	xo_emit("{:average/%lld}",
1403	    data[0] ? (long long)(data[1] / (int64_t)normal / data[0]) : 0);
1404	return (0);
1405}
1406
1407/*ARGSUSED*/
1408static int
1409dt_print_average(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
1410    size_t size, uint64_t normal)
1411{
1412	/* LINTED - alignment */
1413	int64_t *data = (int64_t *)addr;
1414
1415	return (dt_printf(dtp, fp, " %16lld", data[0] ?
1416	    (long long)(data[1] / (int64_t)normal / data[0]) : 0));
1417}
1418
1419static int
1420dt_format_stddev(dtrace_hdl_t *dtp, caddr_t addr, size_t size, uint64_t normal)
1421{
1422	uint64_t *data = (uint64_t *)addr;
1423
1424	xo_emit("{:stddev/%llu}",
1425	    data[0] ? (unsigned long long)dt_stddev(data, normal) : 0);
1426	return (0);
1427}
1428
1429/*ARGSUSED*/
1430static int
1431dt_print_stddev(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
1432    size_t size, uint64_t normal)
1433{
1434	/* LINTED - alignment */
1435	uint64_t *data = (uint64_t *)addr;
1436
1437	return (dt_printf(dtp, fp, " %16llu", data[0] ?
1438	    (unsigned long long) dt_stddev(data, normal) : 0));
1439}
1440
1441/*ARGSUSED*/
1442static int
1443dt_print_bytes(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
1444    size_t nbytes, int width, int quiet, int forceraw)
1445{
1446	/*
1447	 * If the byte stream is a series of printable characters, followed by
1448	 * a terminating byte, we print it out as a string.  Otherwise, we
1449	 * assume that it's something else and just print the bytes.
1450	 */
1451	int i, j, margin = 5;
1452	char *c = (char *)addr;
1453
1454	if (nbytes == 0)
1455		return (0);
1456
1457	if (forceraw)
1458		goto raw;
1459
1460	if (dtp->dt_options[DTRACEOPT_RAWBYTES] != DTRACEOPT_UNSET)
1461		goto raw;
1462
1463	for (i = 0; i < nbytes; i++) {
1464		/*
1465		 * We define a "printable character" to be one for which
1466		 * isprint(3C) returns non-zero, isspace(3C) returns non-zero,
1467		 * or a character which is either backspace or the bell.
1468		 * Backspace and the bell are regrettably special because
1469		 * they fail the first two tests -- and yet they are entirely
1470		 * printable.  These are the only two control characters that
1471		 * have meaning for the terminal and for which isprint(3C) and
1472		 * isspace(3C) return 0.
1473		 */
1474		if (isprint(c[i]) || isspace(c[i]) ||
1475		    c[i] == '\b' || c[i] == '\a')
1476			continue;
1477
1478		if (c[i] == '\0' && i > 0) {
1479			/*
1480			 * This looks like it might be a string.  Before we
1481			 * assume that it is indeed a string, check the
1482			 * remainder of the byte range; if it contains
1483			 * additional non-nul characters, we'll assume that
1484			 * it's a binary stream that just happens to look like
1485			 * a string, and we'll print out the individual bytes.
1486			 */
1487			for (j = i + 1; j < nbytes; j++) {
1488				if (c[j] != '\0')
1489					break;
1490			}
1491
1492			if (j != nbytes)
1493				break;
1494
1495			if (quiet) {
1496				return (dt_printf(dtp, fp, "%s", c));
1497			} else {
1498				return (dt_printf(dtp, fp, " %s%*s",
1499				    width < 0 ? " " : "", width, c));
1500			}
1501		}
1502
1503		break;
1504	}
1505
1506	if (i == nbytes) {
1507		/*
1508		 * The byte range is all printable characters, but there is
1509		 * no trailing nul byte.  We'll assume that it's a string and
1510		 * print it as such.
1511		 */
1512		char *s = alloca(nbytes + 1);
1513		bcopy(c, s, nbytes);
1514		s[nbytes] = '\0';
1515		return (dt_printf(dtp, fp, "  %-*s", width, s));
1516	}
1517
1518raw:
1519	if (dt_printf(dtp, fp, "\n%*s      ", margin, "") < 0)
1520		return (-1);
1521
1522	for (i = 0; i < 16; i++)
1523		if (dt_printf(dtp, fp, "  %c", "0123456789abcdef"[i]) < 0)
1524			return (-1);
1525
1526	if (dt_printf(dtp, fp, "  0123456789abcdef\n") < 0)
1527		return (-1);
1528
1529
1530	for (i = 0; i < nbytes; i += 16) {
1531		if (dt_printf(dtp, fp, "%*s%5x:", margin, "", i) < 0)
1532			return (-1);
1533
1534		for (j = i; j < i + 16 && j < nbytes; j++) {
1535			if (dt_printf(dtp, fp, " %02x", (uchar_t)c[j]) < 0)
1536				return (-1);
1537		}
1538
1539		while (j++ % 16) {
1540			if (dt_printf(dtp, fp, "   ") < 0)
1541				return (-1);
1542		}
1543
1544		if (dt_printf(dtp, fp, "  ") < 0)
1545			return (-1);
1546
1547		for (j = i; j < i + 16 && j < nbytes; j++) {
1548			if (dt_printf(dtp, fp, "%c",
1549			    c[j] < ' ' || c[j] > '~' ? '.' : c[j]) < 0)
1550				return (-1);
1551		}
1552
1553		if (dt_printf(dtp, fp, "\n") < 0)
1554			return (-1);
1555	}
1556
1557	return (0);
1558}
1559
1560int
1561dt_format_stack(dtrace_hdl_t *dtp, caddr_t addr, int depth, int size)
1562{
1563	dtrace_syminfo_t dts;
1564	GElf_Sym sym;
1565	int i;
1566	uint64_t pc;
1567
1568	xo_open_list("stack-frames");
1569	for (i = 0; i < depth; i++) {
1570		switch (size) {
1571		case sizeof (uint32_t):
1572			pc = *((uint32_t *)addr);
1573			break;
1574
1575		case sizeof (uint64_t):
1576			pc = *((uint64_t *)addr);
1577			break;
1578
1579		default:
1580			return (dt_set_errno(dtp, EDT_BADSTACKPC));
1581		}
1582
1583		if (pc == 0)
1584			break;
1585
1586		addr += size;
1587
1588		xo_open_instance("stack-frames");
1589		if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
1590			if (pc > sym.st_value) {
1591				xo_emit("{:symbol/%s`%s+0x%llx} {:module/%s} "
1592					"{:name/%s} {:offset/0x%llx}",
1593				    dts.dts_object, dts.dts_name,
1594				    (u_longlong_t)(pc - sym.st_value),
1595				    dts.dts_object, dts.dts_name,
1596				    (u_longlong_t)(pc - sym.st_value));
1597			} else {
1598				xo_emit("{:symbol/%s`%s} {:module/%s} "
1599					"{:name/%s}",
1600				    dts.dts_object, dts.dts_name,
1601				    dts.dts_object, dts.dts_name);
1602			}
1603		} else {
1604			/*
1605			 * We'll repeat the lookup, but this time we'll specify
1606			 * a NULL GElf_Sym -- indicating that we're only
1607			 * interested in the containing module.
1608			 */
1609			if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1610				xo_emit("{:symbol/%s`0x%llx} {:module/%s} "
1611					"{:offset/0x%llx}",
1612				    dts.dts_object, (u_longlong_t)pc,
1613				    dts.dts_object, (u_longlong_t)pc);
1614			} else {
1615				xo_emit("{:symbol/0x%llx} {:offset/0x%llx}",
1616				    (u_longlong_t)pc, (u_longlong_t)pc);
1617			}
1618		}
1619		xo_close_instance("stack-frames");
1620	}
1621	xo_close_list("stack-frames");
1622
1623	return (0);
1624}
1625
1626int
1627dt_format_ustack(dtrace_hdl_t *dtp, caddr_t addr, uint64_t arg)
1628{
1629	uint64_t *pc = (uint64_t *)addr;
1630	uint32_t depth = DTRACE_USTACK_NFRAMES(arg);
1631	uint32_t strsize = DTRACE_USTACK_STRSIZE(arg);
1632	const char *strbase = addr + (depth + 1) * sizeof (uint64_t);
1633	const char *str = strsize ? strbase : NULL;
1634	int err = 0;
1635
1636	char name[PATH_MAX], objname[PATH_MAX], c[PATH_MAX * 2];
1637	struct ps_prochandle *P;
1638	GElf_Sym sym;
1639	int i, indent;
1640	pid_t pid;
1641
1642	if (depth == 0)
1643		return (0);
1644
1645	pid = (pid_t)*pc++;
1646
1647	/*
1648	 * Ultimately, we need to add an entry point in the library vector for
1649	 * determining <symbol, offset> from <pid, address>.  For now, if
1650	 * this is a vector open, we just print the raw address or string.
1651	 */
1652	if (dtp->dt_vector == NULL)
1653		P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
1654	else
1655		P = NULL;
1656
1657	if (P != NULL)
1658		dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
1659
1660	xo_open_list("ustack-frames");
1661	for (i = 0; i < depth && pc[i] != 0; i++) {
1662		const prmap_t *map;
1663
1664		xo_open_instance("ustack-frames");
1665		if (P != NULL && Plookup_by_addr(P, pc[i],
1666		    name, sizeof (name), &sym) == 0) {
1667			(void) Pobjname(P, pc[i], objname, sizeof (objname));
1668
1669			if (pc[i] > sym.st_value) {
1670				xo_emit("{:symbol/%s`%s+0x%llx} {:module/%s} "
1671					"{:name/%s} {:offset/0x%llx}",
1672				    dt_basename(objname), name,
1673				    (u_longlong_t)(pc[i] - sym.st_value),
1674				    dt_basename(objname), name,
1675				    (u_longlong_t)(pc[i] - sym.st_value));
1676			} else {
1677				xo_emit("{:symbol/%s`%s} {:module/%s} "
1678					"{:name/%s}",
1679				    dt_basename(objname), name,
1680				    dt_basename(objname), name);
1681			}
1682		} else if (str != NULL && str[0] != '\0' && str[0] != '@' &&
1683		    (P != NULL && ((map = Paddr_to_map(P, pc[i])) == NULL ||
1684		    (map->pr_mflags & MA_WRITE)))) {
1685			/*
1686			 * If the current string pointer in the string table
1687			 * does not point to an empty string _and_ the program
1688			 * counter falls in a writable region, we'll use the
1689			 * string from the string table instead of the raw
1690			 * address.  This last condition is necessary because
1691			 * some (broken) ustack helpers will return a string
1692			 * even for a program counter that they can't
1693			 * identify.  If we have a string for a program
1694			 * counter that falls in a segment that isn't
1695			 * writable, we assume that we have fallen into this
1696			 * case and we refuse to use the string.
1697			 */
1698			xo_emit("{:symbol/%s}", str);
1699		} else {
1700			if (P != NULL && Pobjname(P, pc[i], objname,
1701			    sizeof (objname)) != 0) {
1702				xo_emit("{:symbol/%s`0x%llx} {:module/%s} "
1703					"{:offset/0x%llx}",
1704				    dt_basename(objname), (u_longlong_t)pc[i],
1705				    dt_basename(objname), (u_longlong_t)pc[i]);
1706			} else {
1707				xo_emit("{:symbol/0x%llx} {:offset/0x%llx}",
1708				    (u_longlong_t)pc[i], (u_longlong_t)pc[i]);
1709			}
1710		}
1711
1712		if (str != NULL && str[0] == '@') {
1713			/*
1714			 * If the first character of the string is an "at" sign,
1715			 * then the string is inferred to be an annotation --
1716			 * and it is printed out beneath the frame and offset
1717			 * with brackets.
1718			 */
1719			xo_emit("{:annotation/%s}", &str[1]);
1720		}
1721
1722		if (str != NULL) {
1723			str += strlen(str) + 1;
1724			if (str - strbase >= strsize)
1725				str = NULL;
1726		}
1727		xo_close_instance("ustack-frames");
1728	}
1729	xo_close_list("ustack-frames");
1730
1731	if (P != NULL) {
1732		dt_proc_unlock(dtp, P);
1733		dt_proc_release(dtp, P);
1734	}
1735
1736	return (err);
1737}
1738
1739int
1740dt_print_stack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
1741    caddr_t addr, int depth, int size)
1742{
1743	dtrace_syminfo_t dts;
1744	GElf_Sym sym;
1745	int i, indent;
1746	char c[PATH_MAX * 2];
1747	uint64_t pc;
1748
1749	if (dt_printf(dtp, fp, "\n") < 0)
1750		return (-1);
1751
1752	if (format == NULL)
1753		format = "%s";
1754
1755	if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
1756		indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
1757	else
1758		indent = _dtrace_stkindent;
1759
1760	for (i = 0; i < depth; i++) {
1761		switch (size) {
1762		case sizeof (uint32_t):
1763			/* LINTED - alignment */
1764			pc = *((uint32_t *)addr);
1765			break;
1766
1767		case sizeof (uint64_t):
1768			/* LINTED - alignment */
1769			pc = *((uint64_t *)addr);
1770			break;
1771
1772		default:
1773			return (dt_set_errno(dtp, EDT_BADSTACKPC));
1774		}
1775
1776		if (pc == 0)
1777			break;
1778
1779		addr += size;
1780
1781		if (dt_printf(dtp, fp, "%*s", indent, "") < 0)
1782			return (-1);
1783
1784		if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
1785			if (pc > sym.st_value) {
1786				(void) snprintf(c, sizeof (c), "%s`%s+0x%llx",
1787				    dts.dts_object, dts.dts_name,
1788				    (u_longlong_t)(pc - sym.st_value));
1789			} else {
1790				(void) snprintf(c, sizeof (c), "%s`%s",
1791				    dts.dts_object, dts.dts_name);
1792			}
1793		} else {
1794			/*
1795			 * We'll repeat the lookup, but this time we'll specify
1796			 * a NULL GElf_Sym -- indicating that we're only
1797			 * interested in the containing module.
1798			 */
1799			if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1800				(void) snprintf(c, sizeof (c), "%s`0x%llx",
1801				    dts.dts_object, (u_longlong_t)pc);
1802			} else {
1803				(void) snprintf(c, sizeof (c), "0x%llx",
1804				    (u_longlong_t)pc);
1805			}
1806		}
1807
1808		if (dt_printf(dtp, fp, format, c) < 0)
1809			return (-1);
1810
1811		if (dt_printf(dtp, fp, "\n") < 0)
1812			return (-1);
1813	}
1814
1815	return (0);
1816}
1817
1818int
1819dt_print_ustack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
1820    caddr_t addr, uint64_t arg)
1821{
1822	/* LINTED - alignment */
1823	uint64_t *pc = (uint64_t *)addr;
1824	uint32_t depth = DTRACE_USTACK_NFRAMES(arg);
1825	uint32_t strsize = DTRACE_USTACK_STRSIZE(arg);
1826	const char *strbase = addr + (depth + 1) * sizeof (uint64_t);
1827	const char *str = strsize ? strbase : NULL;
1828	int err = 0;
1829
1830	char name[PATH_MAX], objname[PATH_MAX], c[PATH_MAX * 2];
1831	struct ps_prochandle *P;
1832	GElf_Sym sym;
1833	int i, indent;
1834	pid_t pid;
1835
1836	if (depth == 0)
1837		return (0);
1838
1839	pid = (pid_t)*pc++;
1840
1841	if (dt_printf(dtp, fp, "\n") < 0)
1842		return (-1);
1843
1844	if (format == NULL)
1845		format = "%s";
1846
1847	if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
1848		indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
1849	else
1850		indent = _dtrace_stkindent;
1851
1852	/*
1853	 * Ultimately, we need to add an entry point in the library vector for
1854	 * determining <symbol, offset> from <pid, address>.  For now, if
1855	 * this is a vector open, we just print the raw address or string.
1856	 */
1857	if (dtp->dt_vector == NULL)
1858		P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
1859	else
1860		P = NULL;
1861
1862	if (P != NULL)
1863		dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
1864
1865	for (i = 0; i < depth && pc[i] != 0; i++) {
1866		const prmap_t *map;
1867
1868		if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
1869			break;
1870
1871		if (P != NULL && Plookup_by_addr(P, pc[i],
1872		    name, sizeof (name), &sym) == 0) {
1873			(void) Pobjname(P, pc[i], objname, sizeof (objname));
1874
1875			if (pc[i] > sym.st_value) {
1876				(void) snprintf(c, sizeof (c),
1877				    "%s`%s+0x%llx", dt_basename(objname), name,
1878				    (u_longlong_t)(pc[i] - sym.st_value));
1879			} else {
1880				(void) snprintf(c, sizeof (c),
1881				    "%s`%s", dt_basename(objname), name);
1882			}
1883		} else if (str != NULL && str[0] != '\0' && str[0] != '@' &&
1884		    (P != NULL && ((map = Paddr_to_map(P, pc[i])) == NULL ||
1885		    (map->pr_mflags & MA_WRITE)))) {
1886			/*
1887			 * If the current string pointer in the string table
1888			 * does not point to an empty string _and_ the program
1889			 * counter falls in a writable region, we'll use the
1890			 * string from the string table instead of the raw
1891			 * address.  This last condition is necessary because
1892			 * some (broken) ustack helpers will return a string
1893			 * even for a program counter that they can't
1894			 * identify.  If we have a string for a program
1895			 * counter that falls in a segment that isn't
1896			 * writable, we assume that we have fallen into this
1897			 * case and we refuse to use the string.
1898			 */
1899			(void) snprintf(c, sizeof (c), "%s", str);
1900		} else {
1901			if (P != NULL && Pobjname(P, pc[i], objname,
1902			    sizeof (objname)) != 0) {
1903				(void) snprintf(c, sizeof (c), "%s`0x%llx",
1904				    dt_basename(objname), (u_longlong_t)pc[i]);
1905			} else {
1906				(void) snprintf(c, sizeof (c), "0x%llx",
1907				    (u_longlong_t)pc[i]);
1908			}
1909		}
1910
1911		if ((err = dt_printf(dtp, fp, format, c)) < 0)
1912			break;
1913
1914		if ((err = dt_printf(dtp, fp, "\n")) < 0)
1915			break;
1916
1917		if (str != NULL && str[0] == '@') {
1918			/*
1919			 * If the first character of the string is an "at" sign,
1920			 * then the string is inferred to be an annotation --
1921			 * and it is printed out beneath the frame and offset
1922			 * with brackets.
1923			 */
1924			if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
1925				break;
1926
1927			(void) snprintf(c, sizeof (c), "  [ %s ]", &str[1]);
1928
1929			if ((err = dt_printf(dtp, fp, format, c)) < 0)
1930				break;
1931
1932			if ((err = dt_printf(dtp, fp, "\n")) < 0)
1933				break;
1934		}
1935
1936		if (str != NULL) {
1937			str += strlen(str) + 1;
1938			if (str - strbase >= strsize)
1939				str = NULL;
1940		}
1941	}
1942
1943	if (P != NULL) {
1944		dt_proc_unlock(dtp, P);
1945		dt_proc_release(dtp, P);
1946	}
1947
1948	return (err);
1949}
1950
1951static int
1952dt_format_usym(dtrace_hdl_t *dtp, caddr_t addr, dtrace_actkind_t act)
1953{
1954	uint64_t pid = ((uint64_t *)addr)[0];
1955	uint64_t pc = ((uint64_t *)addr)[1];
1956	char *s;
1957	int n, len = 256;
1958
1959	if (act == DTRACEACT_USYM && dtp->dt_vector == NULL) {
1960		struct ps_prochandle *P;
1961
1962		if ((P = dt_proc_grab(dtp, pid,
1963		    PGRAB_RDONLY | PGRAB_FORCE, 0)) != NULL) {
1964			GElf_Sym sym;
1965
1966			dt_proc_lock(dtp, P);
1967
1968			if (Plookup_by_addr(P, pc, NULL, 0, &sym) == 0)
1969				pc = sym.st_value;
1970
1971			dt_proc_unlock(dtp, P);
1972			dt_proc_release(dtp, P);
1973		}
1974	}
1975
1976	do {
1977		n = len;
1978		s = alloca(n);
1979	} while ((len = dtrace_uaddr2str(dtp, pid, pc, s, n)) > n);
1980
1981	xo_emit("{:usym/%s}", s);
1982	return (0);
1983}
1984
1985
1986static int
1987dt_print_usym(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr, dtrace_actkind_t act)
1988{
1989	/* LINTED - alignment */
1990	uint64_t pid = ((uint64_t *)addr)[0];
1991	/* LINTED - alignment */
1992	uint64_t pc = ((uint64_t *)addr)[1];
1993	const char *format = "  %-50s";
1994	char *s;
1995	int n, len = 256;
1996
1997	if (act == DTRACEACT_USYM && dtp->dt_vector == NULL) {
1998		struct ps_prochandle *P;
1999
2000		if ((P = dt_proc_grab(dtp, pid,
2001		    PGRAB_RDONLY | PGRAB_FORCE, 0)) != NULL) {
2002			GElf_Sym sym;
2003
2004			dt_proc_lock(dtp, P);
2005
2006			if (Plookup_by_addr(P, pc, NULL, 0, &sym) == 0)
2007				pc = sym.st_value;
2008
2009			dt_proc_unlock(dtp, P);
2010			dt_proc_release(dtp, P);
2011		}
2012	}
2013
2014	do {
2015		n = len;
2016		s = alloca(n);
2017	} while ((len = dtrace_uaddr2str(dtp, pid, pc, s, n)) > n);
2018
2019	return (dt_printf(dtp, fp, format, s));
2020}
2021
2022int
2023dt_format_umod(dtrace_hdl_t *dtp, caddr_t addr)
2024{
2025	uint64_t pid = ((uint64_t *)addr)[0];
2026	uint64_t pc = ((uint64_t *)addr)[1];
2027	int err = 0;
2028
2029	char objname[PATH_MAX];
2030	struct ps_prochandle *P;
2031
2032	/*
2033	 * See the comment in dt_print_ustack() for the rationale for
2034	 * printing raw addresses in the vectored case.
2035	 */
2036	if (dtp->dt_vector == NULL)
2037		P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
2038	else
2039		P = NULL;
2040
2041	if (P != NULL)
2042		dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
2043
2044	if (P != NULL && Pobjname(P, pc, objname, sizeof (objname)) != 0) {
2045		xo_emit("{:umod/%s}", dt_basename(objname));
2046	} else {
2047		xo_emit("{:umod/0x%llx}", (u_longlong_t)pc);
2048	}
2049
2050	if (P != NULL) {
2051		dt_proc_unlock(dtp, P);
2052		dt_proc_release(dtp, P);
2053	}
2054
2055	return (0);
2056}
2057
2058int
2059dt_print_umod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
2060{
2061	/* LINTED - alignment */
2062	uint64_t pid = ((uint64_t *)addr)[0];
2063	/* LINTED - alignment */
2064	uint64_t pc = ((uint64_t *)addr)[1];
2065	int err = 0;
2066
2067	char objname[PATH_MAX], c[PATH_MAX * 2];
2068	struct ps_prochandle *P;
2069
2070	if (format == NULL)
2071		format = "  %-50s";
2072
2073	/*
2074	 * See the comment in dt_print_ustack() for the rationale for
2075	 * printing raw addresses in the vectored case.
2076	 */
2077	if (dtp->dt_vector == NULL)
2078		P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
2079	else
2080		P = NULL;
2081
2082	if (P != NULL)
2083		dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
2084
2085	if (P != NULL && Pobjname(P, pc, objname, sizeof (objname)) != 0) {
2086		(void) snprintf(c, sizeof (c), "%s", dt_basename(objname));
2087	} else {
2088		(void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
2089	}
2090
2091	err = dt_printf(dtp, fp, format, c);
2092
2093	if (P != NULL) {
2094		dt_proc_unlock(dtp, P);
2095		dt_proc_release(dtp, P);
2096	}
2097
2098	return (err);
2099}
2100
2101static int
2102dt_print_sym(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
2103{
2104	/* LINTED - alignment */
2105	uint64_t pc = *((uint64_t *)addr);
2106	dtrace_syminfo_t dts;
2107	GElf_Sym sym;
2108	char c[PATH_MAX * 2];
2109
2110	if (format == NULL)
2111		format = "  %-50s";
2112
2113	if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
2114		if (dtp->dt_oformat)
2115			xo_emit("{:sym/%s`%s} {:object/%s} {:name/%s}",
2116			    dts.dts_object, dts.dts_name, dts.dts_object,
2117			    dts.dts_name);
2118		else
2119			(void) snprintf(c, sizeof (c), "%s`%s",
2120			    dts.dts_object, dts.dts_name);
2121	} else {
2122		/*
2123		 * We'll repeat the lookup, but this time we'll specify a
2124		 * NULL GElf_Sym -- indicating that we're only interested in
2125		 * the containing module.
2126		 */
2127		if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
2128			if (dtp->dt_oformat)
2129				xo_emit("{:sym/%s`0x%llx} {:object/%s} "
2130					"{:offset/0x%llx}",
2131				    dts.dts_object, (u_longlong_t)pc,
2132				    dts.dts_object, (u_longlong_t)pc);
2133			else
2134				(void) snprintf(c, sizeof (c), "%s`0x%llx",
2135				    dts.dts_object, (u_longlong_t)pc);
2136		} else {
2137			if (dtp->dt_oformat)
2138				xo_emit("{:sym/0x%llx} {:offset/0x%llx}",
2139				    (u_longlong_t)pc, (u_longlong_t)pc);
2140			else
2141				(void) snprintf(c, sizeof (c), "0x%llx",
2142				    (u_longlong_t)pc);
2143		}
2144	}
2145
2146	if (dtp->dt_oformat != 0 && dt_printf(dtp, fp, format, c) < 0)
2147		return (-1);
2148
2149	return (0);
2150}
2151
2152int
2153dt_format_mod(dtrace_hdl_t *dtp, caddr_t addr)
2154{
2155	/* LINTED - alignment */
2156	uint64_t pc = *((uint64_t *)addr);
2157	dtrace_syminfo_t dts;
2158
2159	if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
2160		xo_emit("{:mod/%s}", dts.dts_object);
2161	} else {
2162		xo_emit("{:mod/0x%llx}", (u_longlong_t)pc);
2163	}
2164
2165	return (0);
2166}
2167
2168int
2169dt_print_mod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
2170{
2171	/* LINTED - alignment */
2172	uint64_t pc = *((uint64_t *)addr);
2173	dtrace_syminfo_t dts;
2174	char c[PATH_MAX * 2];
2175
2176	if (format == NULL)
2177		format = "  %-50s";
2178
2179	if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
2180		(void) snprintf(c, sizeof (c), "%s", dts.dts_object);
2181	} else {
2182		(void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
2183	}
2184
2185	if (dt_printf(dtp, fp, format, c) < 0)
2186		return (-1);
2187
2188	return (0);
2189}
2190
2191static char *
2192dt_format_bytes_get(dtrace_hdl_t *dtp, caddr_t addr, size_t nbytes)
2193{
2194	char *s = dt_alloc(dtp, nbytes * 2 + 2 + 1); /* 2 bytes per byte + 0x + '\0' */
2195	char t[6];
2196	char *c = (char *)addr;
2197	size_t i, j;
2198
2199	if (s == NULL)
2200		return (NULL);
2201
2202	/*
2203	 * XXX: Some duplication with dt_print_bytes().
2204	 */
2205	for (i = 0; i < nbytes; i++) {
2206		if (isprint(c[i]) || isspace(c[i]) || c[i] == '\b' || c[i] == '\a')
2207			continue;
2208
2209		if (c[i] == '\0' && i > 0) {
2210			for (j = i + 1; j < nbytes; j++) {
2211				if (c[j] != '\0')
2212					break;
2213			}
2214
2215			if (j != nbytes)
2216				break;
2217
2218			memcpy(s, c, nbytes);
2219			return (s);
2220		}
2221
2222		break;
2223	}
2224
2225	if (i == nbytes) {
2226		memcpy(s, c, nbytes);
2227		s[nbytes] = '\0';
2228		return (s);
2229	}
2230
2231	s[0] = '0';
2232	s[1] = 'x';
2233	for (i = 0; i < nbytes; i++) {
2234		snprintf(t, sizeof(t), "%02x", (uchar_t)c[i]);
2235		memcpy(s + (i * 2) + 2, t, 2);
2236	}
2237
2238	s[nbytes * 2 + 2] = 0;
2239	return (s);
2240}
2241
2242static int
2243dt_format_memory(dtrace_hdl_t *dtp, caddr_t addr)
2244{
2245
2246	size_t nbytes = *((uintptr_t *) addr);
2247	char *s;
2248
2249	s = dt_format_bytes_get(dtp, addr + sizeof(uintptr_t), nbytes);
2250	if (s == NULL)
2251		return (-1);
2252
2253	xo_emit("{:printm/%s}", s);
2254	dt_free(dtp, s);
2255
2256	return (0);
2257}
2258
2259static int
2260dt_print_memory(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr)
2261{
2262	int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
2263	size_t nbytes = *((uintptr_t *) addr);
2264
2265	return (dt_print_bytes(dtp, fp, addr + sizeof(uintptr_t),
2266	    nbytes, 50, quiet, 1));
2267}
2268
2269typedef struct dt_normal {
2270	dtrace_aggvarid_t dtnd_id;
2271	uint64_t dtnd_normal;
2272} dt_normal_t;
2273
2274static int
2275dt_normalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
2276{
2277	dt_normal_t *normal = arg;
2278	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2279	dtrace_aggvarid_t id = normal->dtnd_id;
2280
2281	if (agg->dtagd_nrecs == 0)
2282		return (DTRACE_AGGWALK_NEXT);
2283
2284	if (agg->dtagd_varid != id)
2285		return (DTRACE_AGGWALK_NEXT);
2286
2287	((dtrace_aggdata_t *)aggdata)->dtada_normal = normal->dtnd_normal;
2288	return (DTRACE_AGGWALK_NORMALIZE);
2289}
2290
2291static int
2292dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
2293{
2294	dt_normal_t normal;
2295	caddr_t addr;
2296
2297	/*
2298	 * We (should) have two records:  the aggregation ID followed by the
2299	 * normalization value.
2300	 */
2301	addr = base + rec->dtrd_offset;
2302
2303	if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
2304		return (dt_set_errno(dtp, EDT_BADNORMAL));
2305
2306	/* LINTED - alignment */
2307	normal.dtnd_id = *((dtrace_aggvarid_t *)addr);
2308	rec++;
2309
2310	if (rec->dtrd_action != DTRACEACT_LIBACT)
2311		return (dt_set_errno(dtp, EDT_BADNORMAL));
2312
2313	if (rec->dtrd_arg != DT_ACT_NORMALIZE)
2314		return (dt_set_errno(dtp, EDT_BADNORMAL));
2315
2316	addr = base + rec->dtrd_offset;
2317
2318	switch (rec->dtrd_size) {
2319	case sizeof (uint64_t):
2320		/* LINTED - alignment */
2321		normal.dtnd_normal = *((uint64_t *)addr);
2322		break;
2323	case sizeof (uint32_t):
2324		/* LINTED - alignment */
2325		normal.dtnd_normal = *((uint32_t *)addr);
2326		break;
2327	case sizeof (uint16_t):
2328		/* LINTED - alignment */
2329		normal.dtnd_normal = *((uint16_t *)addr);
2330		break;
2331	case sizeof (uint8_t):
2332		normal.dtnd_normal = *((uint8_t *)addr);
2333		break;
2334	default:
2335		return (dt_set_errno(dtp, EDT_BADNORMAL));
2336	}
2337
2338	(void) dtrace_aggregate_walk(dtp, dt_normalize_agg, &normal);
2339
2340	return (0);
2341}
2342
2343static int
2344dt_denormalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
2345{
2346	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2347	dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
2348
2349	if (agg->dtagd_nrecs == 0)
2350		return (DTRACE_AGGWALK_NEXT);
2351
2352	if (agg->dtagd_varid != id)
2353		return (DTRACE_AGGWALK_NEXT);
2354
2355	return (DTRACE_AGGWALK_DENORMALIZE);
2356}
2357
2358static int
2359dt_clear_agg(const dtrace_aggdata_t *aggdata, void *arg)
2360{
2361	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2362	dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
2363
2364	if (agg->dtagd_nrecs == 0)
2365		return (DTRACE_AGGWALK_NEXT);
2366
2367	if (agg->dtagd_varid != id)
2368		return (DTRACE_AGGWALK_NEXT);
2369
2370	return (DTRACE_AGGWALK_CLEAR);
2371}
2372
2373typedef struct dt_trunc {
2374	dtrace_aggvarid_t dttd_id;
2375	uint64_t dttd_remaining;
2376} dt_trunc_t;
2377
2378static int
2379dt_trunc_agg(const dtrace_aggdata_t *aggdata, void *arg)
2380{
2381	dt_trunc_t *trunc = arg;
2382	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2383	dtrace_aggvarid_t id = trunc->dttd_id;
2384
2385	if (agg->dtagd_nrecs == 0)
2386		return (DTRACE_AGGWALK_NEXT);
2387
2388	if (agg->dtagd_varid != id)
2389		return (DTRACE_AGGWALK_NEXT);
2390
2391	if (trunc->dttd_remaining == 0)
2392		return (DTRACE_AGGWALK_REMOVE);
2393
2394	trunc->dttd_remaining--;
2395	return (DTRACE_AGGWALK_NEXT);
2396}
2397
2398static int
2399dt_trunc(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
2400{
2401	dt_trunc_t trunc;
2402	caddr_t addr;
2403	int64_t remaining;
2404	int (*func)(dtrace_hdl_t *, dtrace_aggregate_f *, void *);
2405
2406	/*
2407	 * We (should) have two records:  the aggregation ID followed by the
2408	 * number of aggregation entries after which the aggregation is to be
2409	 * truncated.
2410	 */
2411	addr = base + rec->dtrd_offset;
2412
2413	if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
2414		return (dt_set_errno(dtp, EDT_BADTRUNC));
2415
2416	/* LINTED - alignment */
2417	trunc.dttd_id = *((dtrace_aggvarid_t *)addr);
2418	rec++;
2419
2420	if (rec->dtrd_action != DTRACEACT_LIBACT)
2421		return (dt_set_errno(dtp, EDT_BADTRUNC));
2422
2423	if (rec->dtrd_arg != DT_ACT_TRUNC)
2424		return (dt_set_errno(dtp, EDT_BADTRUNC));
2425
2426	addr = base + rec->dtrd_offset;
2427
2428	switch (rec->dtrd_size) {
2429	case sizeof (uint64_t):
2430		/* LINTED - alignment */
2431		remaining = *((int64_t *)addr);
2432		break;
2433	case sizeof (uint32_t):
2434		/* LINTED - alignment */
2435		remaining = *((int32_t *)addr);
2436		break;
2437	case sizeof (uint16_t):
2438		/* LINTED - alignment */
2439		remaining = *((int16_t *)addr);
2440		break;
2441	case sizeof (uint8_t):
2442		remaining = *((int8_t *)addr);
2443		break;
2444	default:
2445		return (dt_set_errno(dtp, EDT_BADNORMAL));
2446	}
2447
2448	if (remaining < 0) {
2449		func = dtrace_aggregate_walk_valsorted;
2450		remaining = -remaining;
2451	} else {
2452		func = dtrace_aggregate_walk_valrevsorted;
2453	}
2454
2455	assert(remaining >= 0);
2456	trunc.dttd_remaining = remaining;
2457
2458	(void) func(dtp, dt_trunc_agg, &trunc);
2459
2460	return (0);
2461}
2462
2463static int
2464dt_format_datum(dtrace_hdl_t *dtp, dtrace_recdesc_t *rec, caddr_t addr,
2465    size_t size, const dtrace_aggdata_t *aggdata, uint64_t normal,
2466    dt_print_aggdata_t *pd)
2467{
2468	dtrace_actkind_t act = rec->dtrd_action;
2469	boolean_t packed = pd->dtpa_agghist || pd->dtpa_aggpack;
2470	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2471	char fmt[512];
2472	char *s;
2473
2474	if (packed && pd->dtpa_agghisthdr != agg->dtagd_varid)
2475		pd->dtpa_agghisthdr = agg->dtagd_varid;
2476
2477	switch (act) {
2478	case DTRACEACT_STACK:
2479		return (dt_format_stack(dtp, addr, rec->dtrd_arg,
2480		    rec->dtrd_size / rec->dtrd_arg));
2481
2482	case DTRACEACT_USTACK:
2483	case DTRACEACT_JSTACK:
2484		return (dt_format_ustack(dtp, addr, rec->dtrd_arg));
2485
2486	case DTRACEACT_USYM:
2487	case DTRACEACT_UADDR:
2488		return (dt_format_usym(dtp, addr, act));
2489
2490	case DTRACEACT_UMOD:
2491		return (dt_format_umod(dtp, addr));
2492
2493	case DTRACEACT_SYM:
2494		return (dt_format_sym(dtp, addr));
2495	case DTRACEACT_MOD:
2496		return (dt_format_mod(dtp, addr));
2497
2498	case DTRACEAGG_QUANTIZE:
2499		return (dt_format_quantize(dtp, addr, size, normal));
2500
2501	case DTRACEAGG_LQUANTIZE:
2502		return (dt_format_lquantize(dtp, addr, size, normal));
2503
2504	case DTRACEAGG_LLQUANTIZE:
2505		return (dt_format_llquantize(dtp, addr, size, normal));
2506
2507	case DTRACEAGG_AVG:
2508		return (dt_format_average(dtp, addr, size, normal));
2509
2510	case DTRACEAGG_STDDEV:
2511		return (dt_format_stddev(dtp, addr, size, normal));
2512
2513	default:
2514		break;
2515	}
2516
2517	switch (size) {
2518	case sizeof (uint64_t):
2519		snprintf(fmt, sizeof(fmt), "{:%s/%%lld}", pd->dtpa_keyname);
2520		xo_emit(fmt, (long long)*((uint64_t *)addr) / normal);
2521		break;
2522	case sizeof (uint32_t):
2523		snprintf(fmt, sizeof(fmt), "{:%s/%%d}", pd->dtpa_keyname);
2524		xo_emit(fmt, *((uint32_t *)addr) / (uint32_t)normal);
2525		break;
2526	case sizeof (uint16_t):
2527		snprintf(fmt, sizeof(fmt), "{:%s/%%d}", pd->dtpa_keyname);
2528		xo_emit(fmt, *((uint16_t *)addr) / (uint32_t)normal);
2529		break;
2530	case sizeof (uint8_t):
2531		snprintf(fmt, sizeof(fmt), "{:%s/%%d}", pd->dtpa_keyname);
2532		xo_emit(fmt, *((uint8_t *)addr) / (uint32_t)normal);
2533		break;
2534	default:
2535		s = dt_format_bytes_get(dtp, addr, size);
2536		if (s == NULL)
2537			return (-1);
2538
2539		xo_emit("{:value/%s}", s);
2540		dt_free(dtp, s);
2541		break;
2542	}
2543
2544	return (0);
2545}
2546
2547static int
2548dt_print_datum(dtrace_hdl_t *dtp, FILE *fp, dtrace_recdesc_t *rec,
2549    caddr_t addr, size_t size, const dtrace_aggdata_t *aggdata,
2550    uint64_t normal, dt_print_aggdata_t *pd)
2551{
2552	int err, width;
2553	dtrace_actkind_t act = rec->dtrd_action;
2554	boolean_t packed = pd->dtpa_agghist || pd->dtpa_aggpack;
2555	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2556
2557	static struct {
2558		size_t size;
2559		int width;
2560		int packedwidth;
2561	} *fmt, fmttab[] = {
2562		{ sizeof (uint8_t),	3,	3 },
2563		{ sizeof (uint16_t),	5,	5 },
2564		{ sizeof (uint32_t),	8,	8 },
2565		{ sizeof (uint64_t),	16,	16 },
2566		{ 0,			-50,	16 }
2567	};
2568
2569	if (packed && pd->dtpa_agghisthdr != agg->dtagd_varid) {
2570		dtrace_recdesc_t *r;
2571
2572		width = 0;
2573
2574		/*
2575		 * To print our quantization header for either an agghist or
2576		 * aggpack aggregation, we need to iterate through all of our
2577		 * of our records to determine their width.
2578		 */
2579		for (r = rec; !DTRACEACT_ISAGG(r->dtrd_action); r++) {
2580			for (fmt = fmttab; fmt->size &&
2581			    fmt->size != r->dtrd_size; fmt++)
2582				continue;
2583
2584			width += fmt->packedwidth + 1;
2585		}
2586
2587		if (pd->dtpa_agghist) {
2588			if (dt_print_quanthdr(dtp, fp, width) < 0)
2589				return (-1);
2590		} else {
2591			if (dt_print_quanthdr_packed(dtp, fp,
2592			    width, aggdata, r->dtrd_action) < 0)
2593				return (-1);
2594		}
2595
2596		pd->dtpa_agghisthdr = agg->dtagd_varid;
2597	}
2598
2599	if (pd->dtpa_agghist && DTRACEACT_ISAGG(act)) {
2600		char positives = aggdata->dtada_flags & DTRACE_A_HASPOSITIVES;
2601		char negatives = aggdata->dtada_flags & DTRACE_A_HASNEGATIVES;
2602		int64_t val;
2603
2604		assert(act == DTRACEAGG_SUM || act == DTRACEAGG_COUNT);
2605		val = (long long)*((uint64_t *)addr);
2606
2607		if (dt_printf(dtp, fp, " ") < 0)
2608			return (-1);
2609
2610		return (dt_print_quantline(dtp, fp, val, normal,
2611		    aggdata->dtada_total, positives, negatives));
2612	}
2613
2614	if (pd->dtpa_aggpack && DTRACEACT_ISAGG(act)) {
2615		switch (act) {
2616		case DTRACEAGG_QUANTIZE:
2617			return (dt_print_quantize_packed(dtp,
2618			    fp, addr, size, aggdata));
2619		case DTRACEAGG_LQUANTIZE:
2620			return (dt_print_lquantize_packed(dtp,
2621			    fp, addr, size, aggdata));
2622		default:
2623			break;
2624		}
2625	}
2626
2627	switch (act) {
2628	case DTRACEACT_STACK:
2629		return (dt_print_stack(dtp, fp, NULL, addr,
2630		    rec->dtrd_arg, rec->dtrd_size / rec->dtrd_arg));
2631
2632	case DTRACEACT_USTACK:
2633	case DTRACEACT_JSTACK:
2634		return (dt_print_ustack(dtp, fp, NULL, addr, rec->dtrd_arg));
2635
2636	case DTRACEACT_USYM:
2637	case DTRACEACT_UADDR:
2638		return (dt_print_usym(dtp, fp, addr, act));
2639
2640	case DTRACEACT_UMOD:
2641		return (dt_print_umod(dtp, fp, NULL, addr));
2642
2643	case DTRACEACT_SYM:
2644		return (dt_print_sym(dtp, fp, NULL, addr));
2645
2646	case DTRACEACT_MOD:
2647		return (dt_print_mod(dtp, fp, NULL, addr));
2648
2649	case DTRACEAGG_QUANTIZE:
2650		return (dt_print_quantize(dtp, fp, addr, size, normal));
2651
2652	case DTRACEAGG_LQUANTIZE:
2653		return (dt_print_lquantize(dtp, fp, addr, size, normal));
2654
2655	case DTRACEAGG_LLQUANTIZE:
2656		return (dt_print_llquantize(dtp, fp, addr, size, normal));
2657
2658	case DTRACEAGG_AVG:
2659		return (dt_print_average(dtp, fp, addr, size, normal));
2660
2661	case DTRACEAGG_STDDEV:
2662		return (dt_print_stddev(dtp, fp, addr, size, normal));
2663
2664	default:
2665		break;
2666	}
2667
2668	for (fmt = fmttab; fmt->size && fmt->size != size; fmt++)
2669		continue;
2670
2671	width = packed ? fmt->packedwidth : fmt->width;
2672
2673	switch (size) {
2674	case sizeof (uint64_t):
2675		err = dt_printf(dtp, fp, " %*lld", width,
2676		    /* LINTED - alignment */
2677		    (long long)*((uint64_t *)addr) / normal);
2678		break;
2679	case sizeof (uint32_t):
2680		/* LINTED - alignment */
2681		err = dt_printf(dtp, fp, " %*d", width, *((uint32_t *)addr) /
2682		    (uint32_t)normal);
2683		break;
2684	case sizeof (uint16_t):
2685		/* LINTED - alignment */
2686		err = dt_printf(dtp, fp, " %*d", width, *((uint16_t *)addr) /
2687		    (uint32_t)normal);
2688		break;
2689	case sizeof (uint8_t):
2690		err = dt_printf(dtp, fp, " %*d", width, *((uint8_t *)addr) /
2691		    (uint32_t)normal);
2692		break;
2693	default:
2694		err = dt_print_bytes(dtp, fp, addr, size, width, 0, 0);
2695		break;
2696	}
2697
2698	return (err);
2699}
2700
2701int
2702dt_format_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
2703{
2704	int i, aggact = 0;
2705	dt_print_aggdata_t *pd = arg;
2706	const dtrace_aggdata_t *aggdata = aggsdata[0];
2707	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2708	dtrace_hdl_t *dtp = pd->dtpa_dtp;
2709	dtrace_recdesc_t *rec;
2710	dtrace_actkind_t act;
2711	caddr_t addr;
2712	size_t size;
2713
2714	if (pd->dtpa_aggname == NULL)
2715		pd->dtpa_aggname = agg->dtagd_name;
2716
2717	xo_open_instance("aggregation-data");
2718	strcpy(pd->dtpa_keyname, "value");
2719	xo_open_list("keys");
2720
2721	/*
2722	 * Iterate over each record description in the key, printing the traced
2723	 * data, skipping the first datum (the tuple member created by the
2724	 * compiler).
2725	 */
2726	for (i = 1; i < agg->dtagd_nrecs; i++) {
2727		rec = &agg->dtagd_rec[i];
2728		act = rec->dtrd_action;
2729		addr = aggdata->dtada_data + rec->dtrd_offset;
2730		size = rec->dtrd_size;
2731
2732		if (DTRACEACT_ISAGG(act)) {
2733			aggact = i;
2734			break;
2735		}
2736
2737		xo_open_instance("keys");
2738		if (dt_format_datum(dtp, rec, addr,
2739		    size, aggdata, 1, pd) < 0) {
2740			xo_close_instance("keys");
2741			xo_close_instance("aggregation-data");
2742			return (-1);
2743		}
2744		xo_close_instance("keys");
2745
2746		if (dt_buffered_flush(dtp, NULL, rec, aggdata,
2747		    DTRACE_BUFDATA_AGGKEY) < 0) {
2748			xo_close_instance("aggregation-data");
2749			return (-1);
2750		}
2751	}
2752	xo_close_list("keys");
2753
2754	assert(aggact != 0);
2755
2756	for (i = (naggvars == 1 ? 0 : 1); i < naggvars; i++) {
2757		uint64_t normal;
2758
2759		aggdata = aggsdata[i];
2760		agg = aggdata->dtada_desc;
2761		rec = &agg->dtagd_rec[aggact];
2762		act = rec->dtrd_action;
2763		addr = aggdata->dtada_data + rec->dtrd_offset;
2764		size = rec->dtrd_size;
2765
2766		assert(DTRACEACT_ISAGG(act));
2767
2768		switch (act) {
2769		case DTRACEAGG_MIN:
2770			strcpy(pd->dtpa_keyname, "min");
2771			break;
2772		case DTRACEAGG_MAX:
2773			strcpy(pd->dtpa_keyname, "max");
2774			break;
2775		case DTRACEAGG_COUNT:
2776			strcpy(pd->dtpa_keyname, "count");
2777			break;
2778		case DTRACEAGG_SUM:
2779			strcpy(pd->dtpa_keyname, "sum");
2780			break;
2781		default:
2782			strcpy(pd->dtpa_keyname, "UNKNOWN");
2783			break;
2784		}
2785
2786		normal = aggdata->dtada_normal;
2787
2788		if (dt_format_datum(dtp, rec, addr, size,
2789		    aggdata, normal, pd) < 0) {
2790			xo_close_instance("aggregation-data");
2791			return (-1);
2792		}
2793
2794		if (dt_buffered_flush(dtp, NULL, rec, aggdata,
2795		    DTRACE_BUFDATA_AGGVAL) < 0) {
2796			xo_close_instance("aggregation-data");
2797			return (-1);
2798		}
2799
2800		if (!pd->dtpa_allunprint)
2801			agg->dtagd_flags |= DTRACE_AGD_PRINTED;
2802	}
2803
2804	if (dt_buffered_flush(dtp, NULL, NULL, aggdata,
2805	    DTRACE_BUFDATA_AGGFORMAT | DTRACE_BUFDATA_AGGLAST) < 0) {
2806		xo_close_instance("aggregation-data");
2807		return (-1);
2808	}
2809
2810	xo_close_instance("aggregation-data");
2811	return (0);
2812}
2813
2814int
2815dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
2816{
2817	int i, aggact = 0;
2818	dt_print_aggdata_t *pd = arg;
2819	const dtrace_aggdata_t *aggdata = aggsdata[0];
2820	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2821	FILE *fp = pd->dtpa_fp;
2822	dtrace_hdl_t *dtp = pd->dtpa_dtp;
2823	dtrace_recdesc_t *rec;
2824	dtrace_actkind_t act;
2825	caddr_t addr;
2826	size_t size;
2827
2828	pd->dtpa_agghist = (aggdata->dtada_flags & DTRACE_A_TOTAL);
2829	pd->dtpa_aggpack = (aggdata->dtada_flags & DTRACE_A_MINMAXBIN);
2830
2831	/*
2832	 * Iterate over each record description in the key, printing the traced
2833	 * data, skipping the first datum (the tuple member created by the
2834	 * compiler).
2835	 */
2836	for (i = 1; i < agg->dtagd_nrecs; i++) {
2837		rec = &agg->dtagd_rec[i];
2838		act = rec->dtrd_action;
2839		addr = aggdata->dtada_data + rec->dtrd_offset;
2840		size = rec->dtrd_size;
2841
2842		if (DTRACEACT_ISAGG(act)) {
2843			aggact = i;
2844			break;
2845		}
2846
2847		if (dt_print_datum(dtp, fp, rec, addr,
2848		    size, aggdata, 1, pd) < 0)
2849			return (-1);
2850
2851		if (dt_buffered_flush(dtp, NULL, rec, aggdata,
2852		    DTRACE_BUFDATA_AGGKEY) < 0)
2853			return (-1);
2854	}
2855
2856	assert(aggact != 0);
2857
2858	for (i = (naggvars == 1 ? 0 : 1); i < naggvars; i++) {
2859		uint64_t normal;
2860
2861		aggdata = aggsdata[i];
2862		agg = aggdata->dtada_desc;
2863		rec = &agg->dtagd_rec[aggact];
2864		act = rec->dtrd_action;
2865		addr = aggdata->dtada_data + rec->dtrd_offset;
2866		size = rec->dtrd_size;
2867
2868		assert(DTRACEACT_ISAGG(act));
2869		normal = aggdata->dtada_normal;
2870
2871		if (dt_print_datum(dtp, fp, rec, addr,
2872		    size, aggdata, normal, pd) < 0)
2873			return (-1);
2874
2875		if (dt_buffered_flush(dtp, NULL, rec, aggdata,
2876		    DTRACE_BUFDATA_AGGVAL) < 0)
2877			return (-1);
2878
2879		if (!pd->dtpa_allunprint)
2880			agg->dtagd_flags |= DTRACE_AGD_PRINTED;
2881	}
2882
2883	if (!pd->dtpa_agghist && !pd->dtpa_aggpack) {
2884		if (dt_printf(dtp, fp, "\n") < 0)
2885			return (-1);
2886	}
2887
2888	if (dt_buffered_flush(dtp, NULL, NULL, aggdata,
2889	    DTRACE_BUFDATA_AGGFORMAT | DTRACE_BUFDATA_AGGLAST) < 0)
2890		return (-1);
2891
2892	return (0);
2893}
2894
2895int
2896dt_format_agg(const dtrace_aggdata_t *aggdata, void *arg)
2897{
2898	dt_print_aggdata_t *pd = arg;
2899	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2900	dtrace_aggvarid_t aggvarid = pd->dtpa_id;
2901
2902	if (pd->dtpa_allunprint) {
2903		if (agg->dtagd_flags & DTRACE_AGD_PRINTED)
2904			return (0);
2905	} else {
2906		/*
2907		 * If we're not printing all unprinted aggregations, then the
2908		 * aggregation variable ID denotes a specific aggregation
2909		 * variable that we should print -- skip any other aggregations
2910		 * that we encounter.
2911		 */
2912		if (agg->dtagd_nrecs == 0)
2913			return (0);
2914
2915		if (aggvarid != agg->dtagd_varid)
2916			return (0);
2917	}
2918
2919	return (dt_format_aggs(&aggdata, 1, arg));
2920}
2921
2922int
2923dt_print_agg(const dtrace_aggdata_t *aggdata, void *arg)
2924{
2925	dt_print_aggdata_t *pd = arg;
2926	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2927	dtrace_aggvarid_t aggvarid = pd->dtpa_id;
2928
2929	if (pd->dtpa_allunprint) {
2930		if (agg->dtagd_flags & DTRACE_AGD_PRINTED)
2931			return (0);
2932	} else {
2933		/*
2934		 * If we're not printing all unprinted aggregations, then the
2935		 * aggregation variable ID denotes a specific aggregation
2936		 * variable that we should print -- skip any other aggregations
2937		 * that we encounter.
2938		 */
2939		if (agg->dtagd_nrecs == 0)
2940			return (0);
2941
2942		if (aggvarid != agg->dtagd_varid)
2943			return (0);
2944	}
2945
2946	return (dt_print_aggs(&aggdata, 1, arg));
2947}
2948
2949int
2950dt_setopt(dtrace_hdl_t *dtp, const dtrace_probedata_t *data,
2951    const char *option, const char *value)
2952{
2953	int len, rval;
2954	char *msg;
2955	const char *errstr;
2956	dtrace_setoptdata_t optdata;
2957
2958	bzero(&optdata, sizeof (optdata));
2959	(void) dtrace_getopt(dtp, option, &optdata.dtsda_oldval);
2960
2961	if (dtrace_setopt(dtp, option, value) == 0) {
2962		(void) dtrace_getopt(dtp, option, &optdata.dtsda_newval);
2963		optdata.dtsda_probe = data;
2964		optdata.dtsda_option = option;
2965		optdata.dtsda_handle = dtp;
2966
2967		if ((rval = dt_handle_setopt(dtp, &optdata)) != 0)
2968			return (rval);
2969
2970		return (0);
2971	}
2972
2973	errstr = dtrace_errmsg(dtp, dtrace_errno(dtp));
2974	len = strlen(option) + strlen(value) + strlen(errstr) + 80;
2975	msg = alloca(len);
2976
2977	(void) snprintf(msg, len, "couldn't set option \"%s\" to \"%s\": %s\n",
2978	    option, value, errstr);
2979
2980	if ((rval = dt_handle_liberr(dtp, data, msg)) == 0)
2981		return (0);
2982
2983	return (rval);
2984}
2985
2986/*
2987 * Helper functions to help maintain style(9) in dt_consume_cpu().
2988 */
2989static int
2990dt_oformat_agg_sorted(dtrace_hdl_t *dtp, dtrace_aggregate_f *func,
2991    dt_print_aggdata_t *pd)
2992{
2993	int r;
2994
2995	r = dtrace_aggregate_walk_sorted(dtp, dt_format_agg, pd);
2996	if (r < 0) {
2997		xo_close_list("aggregation-data");
2998		xo_emit("{:aggregation-name/%s}", pd->dtpa_aggname);
2999		xo_close_instance("output");
3000	}
3001
3002	return (r);
3003}
3004
3005static void
3006dt_oformat_agg_name(dt_print_aggdata_t *pd)
3007{
3008
3009	xo_close_list("aggregation-data");
3010	xo_emit("{:aggregation-name/%s}", pd->dtpa_aggname);
3011}
3012
3013static int
3014dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu,
3015    dtrace_bufdesc_t *buf, boolean_t just_one,
3016    dtrace_consume_probe_f *efunc, dtrace_consume_rec_f *rfunc, void *arg)
3017{
3018	dtrace_epid_t id;
3019	size_t offs;
3020	int flow = (dtp->dt_options[DTRACEOPT_FLOWINDENT] != DTRACEOPT_UNSET);
3021	int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
3022	int rval, i, n;
3023	uint64_t tracememsize = 0;
3024	dtrace_probedata_t data;
3025	uint64_t drops;
3026	size_t skip_format;
3027
3028	bzero(&data, sizeof (data));
3029	data.dtpda_handle = dtp;
3030	data.dtpda_cpu = cpu;
3031	data.dtpda_flow = dtp->dt_flow;
3032	data.dtpda_indent = dtp->dt_indent;
3033	data.dtpda_prefix = dtp->dt_prefix;
3034
3035	for (offs = buf->dtbd_oldest; offs < buf->dtbd_size; ) {
3036		dtrace_eprobedesc_t *epd;
3037
3038		/*
3039		 * We're guaranteed to have an ID.
3040		 */
3041		id = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
3042
3043		if (id == DTRACE_EPIDNONE) {
3044			/*
3045			 * This is filler to assure proper alignment of the
3046			 * next record; we simply ignore it.
3047			 */
3048			offs += sizeof (id);
3049			continue;
3050		}
3051
3052		if ((rval = dt_epid_lookup(dtp, id, &data.dtpda_edesc,
3053		    &data.dtpda_pdesc)) != 0)
3054			return (rval);
3055
3056		epd = data.dtpda_edesc;
3057		data.dtpda_data = buf->dtbd_data + offs;
3058		data.dtpda_timestamp = DTRACE_RECORD_LOAD_TIMESTAMP(
3059		    (struct dtrace_rechdr *)data.dtpda_data);
3060
3061		if (data.dtpda_edesc->dtepd_uarg != DT_ECB_DEFAULT) {
3062			rval = dt_handle(dtp, &data);
3063
3064			if (rval == DTRACE_CONSUME_NEXT)
3065				goto nextepid;
3066
3067			if (rval == DTRACE_CONSUME_ERROR)
3068				return (-1);
3069		}
3070
3071		if (flow)
3072			(void) dt_flowindent(dtp, &data, dtp->dt_last_epid,
3073			    buf, offs);
3074
3075		if (dtp->dt_oformat)
3076			xo_open_instance("probes");
3077		rval = (*efunc)(&data, arg);
3078
3079		if (flow) {
3080			if (data.dtpda_flow == DTRACEFLOW_ENTRY)
3081				data.dtpda_indent += 2;
3082		}
3083
3084		if (rval == DTRACE_CONSUME_NEXT)
3085			goto nextepid;
3086
3087		if (rval == DTRACE_CONSUME_ABORT)
3088			return (dt_set_errno(dtp, EDT_DIRABORT));
3089
3090		if (rval != DTRACE_CONSUME_THIS)
3091			return (dt_set_errno(dtp, EDT_BADRVAL));
3092
3093		skip_format = 0;
3094		if (dtp->dt_oformat)
3095			xo_open_list("output");
3096		for (i = 0; i < epd->dtepd_nrecs; i++) {
3097			caddr_t addr;
3098			dtrace_recdesc_t *rec = &epd->dtepd_rec[i];
3099			dtrace_actkind_t act = rec->dtrd_action;
3100
3101			if (skip_format > 0)
3102				skip_format--;
3103
3104			data.dtpda_data = buf->dtbd_data + offs +
3105			    rec->dtrd_offset;
3106			addr = data.dtpda_data;
3107
3108			if (act == DTRACEACT_LIBACT) {
3109				uint64_t arg = rec->dtrd_arg;
3110				dtrace_aggvarid_t id;
3111
3112				switch (arg) {
3113				case DT_ACT_CLEAR:
3114					/* LINTED - alignment */
3115					id = *((dtrace_aggvarid_t *)addr);
3116					(void) dtrace_aggregate_walk(dtp,
3117					    dt_clear_agg, &id);
3118					continue;
3119
3120				case DT_ACT_DENORMALIZE:
3121					/* LINTED - alignment */
3122					id = *((dtrace_aggvarid_t *)addr);
3123					(void) dtrace_aggregate_walk(dtp,
3124					    dt_denormalize_agg, &id);
3125					continue;
3126
3127				case DT_ACT_FTRUNCATE:
3128					if (fp == NULL)
3129						continue;
3130
3131					(void) fflush(fp);
3132					(void) ftruncate(fileno(fp), 0);
3133					(void) fseeko(fp, 0, SEEK_SET);
3134					continue;
3135
3136				case DT_ACT_NORMALIZE:
3137					if (i == epd->dtepd_nrecs - 1)
3138						return (dt_set_errno(dtp,
3139						    EDT_BADNORMAL));
3140
3141					if (dt_normalize(dtp,
3142					    buf->dtbd_data + offs, rec) != 0)
3143						return (-1);
3144
3145					i++;
3146					continue;
3147
3148				case DT_ACT_SETOPT: {
3149					uint64_t *opts = dtp->dt_options;
3150					dtrace_recdesc_t *valrec;
3151					uint32_t valsize;
3152					caddr_t val;
3153					int rv;
3154
3155					if (i == epd->dtepd_nrecs - 1) {
3156						return (dt_set_errno(dtp,
3157						    EDT_BADSETOPT));
3158					}
3159
3160					valrec = &epd->dtepd_rec[++i];
3161					valsize = valrec->dtrd_size;
3162
3163					if (valrec->dtrd_action != act ||
3164					    valrec->dtrd_arg != arg) {
3165						return (dt_set_errno(dtp,
3166						    EDT_BADSETOPT));
3167					}
3168
3169					if (valsize > sizeof (uint64_t)) {
3170						val = buf->dtbd_data + offs +
3171						    valrec->dtrd_offset;
3172					} else {
3173						val = "1";
3174					}
3175
3176					rv = dt_setopt(dtp, &data, addr, val);
3177
3178					if (rv != 0)
3179						return (-1);
3180
3181					flow = (opts[DTRACEOPT_FLOWINDENT] !=
3182					    DTRACEOPT_UNSET);
3183					quiet = (opts[DTRACEOPT_QUIET] !=
3184					    DTRACEOPT_UNSET);
3185
3186					continue;
3187				}
3188
3189				case DT_ACT_TRUNC:
3190					if (i == epd->dtepd_nrecs - 1)
3191						return (dt_set_errno(dtp,
3192						    EDT_BADTRUNC));
3193
3194					if (dt_trunc(dtp,
3195					    buf->dtbd_data + offs, rec) != 0)
3196						return (-1);
3197
3198					i++;
3199					continue;
3200
3201				default:
3202					continue;
3203				}
3204			}
3205
3206			if (act == DTRACEACT_TRACEMEM_DYNSIZE &&
3207			    rec->dtrd_size == sizeof (uint64_t)) {
3208			    	/* LINTED - alignment */
3209				tracememsize = *((unsigned long long *)addr);
3210				continue;
3211			}
3212
3213			rval = (*rfunc)(&data, rec, arg);
3214
3215			if (rval == DTRACE_CONSUME_NEXT)
3216				continue;
3217
3218			if (rval == DTRACE_CONSUME_ABORT)
3219				return (dt_set_errno(dtp, EDT_DIRABORT));
3220
3221			if (rval != DTRACE_CONSUME_THIS)
3222				return (dt_set_errno(dtp, EDT_BADRVAL));
3223
3224			if (dtp->dt_oformat && rec->dtrd_size > 0)
3225				xo_open_instance("output");
3226			if (act == DTRACEACT_STACK) {
3227				int depth = rec->dtrd_arg;
3228
3229				if (dtp->dt_oformat) {
3230					if (dt_format_stack(dtp, addr, depth,
3231					    rec->dtrd_size / depth) < 0) {
3232						xo_close_instance("output");
3233						return (-1);
3234					}
3235				} else {
3236					if (dt_print_stack(dtp,
3237					    fp, NULL, addr, depth,
3238					    rec->dtrd_size / depth) < 0)
3239					return (-1);
3240				}
3241				goto nextrec;
3242			}
3243
3244			if (act == DTRACEACT_USTACK ||
3245			    act == DTRACEACT_JSTACK) {
3246				if (dtp->dt_oformat) {
3247					if (dt_format_ustack(dtp, addr,
3248					    rec->dtrd_arg) < 0) {
3249						xo_close_instance("output");
3250						return (-1);
3251					}
3252				} else {
3253					if (dt_print_ustack(dtp, fp, NULL,
3254					    addr, rec->dtrd_arg) < 0)
3255						return (-1);
3256				}
3257				goto nextrec;
3258			}
3259
3260			if (act == DTRACEACT_SYM) {
3261				if (dtp->dt_oformat) {
3262					if (dt_format_sym(dtp, addr) < 0) {
3263						xo_close_instance("output");
3264						return (-1);
3265					}
3266				} else {
3267					if (dt_print_sym(dtp, fp, NULL, addr) < 0)
3268						return (-1);
3269				}
3270				goto nextrec;
3271			}
3272
3273			if (act == DTRACEACT_MOD) {
3274				if (dtp->dt_oformat) {
3275					if (dt_format_mod(dtp, addr) < 0) {
3276						xo_close_instance("output");
3277						return (-1);
3278					}
3279				} else {
3280					if (dt_print_mod(dtp, fp, NULL, addr) < 0)
3281						return (-1);
3282				}
3283				goto nextrec;
3284			}
3285
3286			if (act == DTRACEACT_USYM || act == DTRACEACT_UADDR) {
3287				if (dtp->dt_oformat) {
3288					if (dt_format_usym(dtp, addr, act) < 0) {
3289						xo_close_instance("output");
3290						return (-1);
3291					}
3292				} else {
3293					if (dt_print_usym(dtp, fp, addr, act) < 0)
3294						return (-1);
3295				}
3296				goto nextrec;
3297			}
3298
3299			if (act == DTRACEACT_UMOD) {
3300				if (dtp->dt_oformat) {
3301					if (dt_format_umod(dtp, addr) < 0) {
3302						xo_close_instance("output");
3303						return (-1);
3304					}
3305				} else {
3306					if (dt_print_umod(dtp, fp, NULL, addr) < 0)
3307						return (-1);
3308				}
3309				goto nextrec;
3310			}
3311
3312			if (act == DTRACEACT_PRINTM) {
3313				if (dtp->dt_oformat) {
3314					if (dt_format_memory(dtp, addr) < 0) {
3315						xo_close_instance("output");
3316						return (-1);
3317					}
3318				} else {
3319					if (dt_print_memory(dtp, fp, addr) < 0)
3320						return (-1);
3321				}
3322				goto nextrec;
3323			}
3324
3325			if (dtp->dt_oformat == DTRACE_OFORMAT_TEXT &&
3326			    DTRACEACT_ISPRINTFLIKE(act)) {
3327				void *fmtdata;
3328				int (*func)(dtrace_hdl_t *, FILE *, void *,
3329				    const dtrace_probedata_t *,
3330				    const dtrace_recdesc_t *, uint_t,
3331				    const void *buf, size_t);
3332
3333				if ((fmtdata = dt_format_lookup(dtp,
3334				    rec->dtrd_format)) == NULL)
3335					goto nofmt;
3336
3337				switch (act) {
3338				case DTRACEACT_PRINTF:
3339					func = dtrace_fprintf;
3340					break;
3341				case DTRACEACT_PRINTA:
3342					func = dtrace_fprinta;
3343					break;
3344				case DTRACEACT_SYSTEM:
3345					func = dtrace_system;
3346					break;
3347				case DTRACEACT_FREOPEN:
3348					func = dtrace_freopen;
3349					break;
3350				}
3351
3352				n = (*func)(dtp, fp, fmtdata, &data,
3353				    rec, epd->dtepd_nrecs - i,
3354				    (uchar_t *)buf->dtbd_data + offs,
3355				    buf->dtbd_size - offs);
3356
3357				if (n < 0)
3358					return (-1); /* errno is set for us */
3359
3360				if (n > 0)
3361					i += n - 1;
3362				goto nextrec;
3363			}
3364
3365			/*
3366			 * We don't care about a formatted printa, system or
3367			 * freopen for oformat.
3368			 */
3369			if (dtp->dt_oformat && act == DTRACEACT_PRINTF &&
3370			    skip_format == 0) {
3371				void *fmtdata;
3372				if ((fmtdata = dt_format_lookup(dtp,
3373				    rec->dtrd_format)) == NULL)
3374					goto nofmt;
3375
3376				n = dtrace_sprintf(dtp, fp, fmtdata, rec,
3377				    epd->dtepd_nrecs - i,
3378				    (uchar_t *)buf->dtbd_data + offs,
3379				    buf->dtbd_size - offs);
3380
3381				if (n < 0) {
3382					xo_close_instance("output");
3383					return (-1); /* errno is set for us */
3384				}
3385
3386				xo_emit("{:message/%s}", dtp->dt_sprintf_buf);
3387				skip_format += n;
3388
3389				/*
3390				 * We want the "message" object to be its own
3391				 * thing, but we still want to process the
3392				 * current DIFEXPR in case there is a value
3393				 * attached to it. If there is, we need to
3394				 * re-open a new output instance, as otherwise
3395				 * the message ends up bundled with the first
3396				 * value.
3397				 *
3398				 * XXX: There is an edge case where a
3399				 * printf("hello"); will produce a DIFO that
3400				 * returns 0 attached to it and we have no good
3401				 * way to determine if this 0 value is because
3402				 * there's no real data attached to the printf
3403				 * as an argument, or it's because the argument
3404				 * actually returns 0.
3405				 */
3406				if (skip_format == 0)
3407					goto nextrec;
3408
3409				xo_close_instance("output");
3410				xo_open_instance("output");
3411			}
3412
3413			/*
3414			 * If this is a DIF expression, and the record has a
3415			 * format set, this indicates we have a CTF type name
3416			 * associated with the data and we should try to print
3417			 * it out by type.
3418			 */
3419			if (act == DTRACEACT_DIFEXPR) {
3420				const char *strdata = dt_strdata_lookup(dtp,
3421				    rec->dtrd_format);
3422				if (strdata != NULL) {
3423					if (dtp->dt_oformat)
3424						n = dtrace_format_print(dtp, fp,
3425						    strdata, addr,
3426						    rec->dtrd_size);
3427					else
3428						n = dtrace_print(dtp, fp,
3429						    strdata, addr,
3430						    rec->dtrd_size);
3431
3432					/*
3433					 * dtrace_print() will return -1 on
3434					 * error, or return the number of bytes
3435					 * consumed.  It will return 0 if the
3436					 * type couldn't be determined, and we
3437					 * should fall through to the normal
3438					 * trace method.
3439					 */
3440					if (n < 0) {
3441						if (dtp->dt_oformat)
3442							xo_close_instance(
3443							    "output");
3444						return (-1);
3445					}
3446
3447					if (n > 0)
3448						goto nextrec;
3449				}
3450			}
3451
3452nofmt:
3453			if (act == DTRACEACT_PRINTA) {
3454				dt_print_aggdata_t pd;
3455				dtrace_aggvarid_t *aggvars;
3456				int j, naggvars = 0;
3457				size_t size = ((epd->dtepd_nrecs - i) *
3458				    sizeof (dtrace_aggvarid_t));
3459
3460				if ((aggvars = dt_alloc(dtp, size)) == NULL) {
3461					if (dtp->dt_oformat)
3462						xo_close_instance("output");
3463					return (-1);
3464				}
3465
3466				/*
3467				 * This might be a printa() with multiple
3468				 * aggregation variables.  We need to scan
3469				 * forward through the records until we find
3470				 * a record from a different statement.
3471				 */
3472				for (j = i; j < epd->dtepd_nrecs; j++) {
3473					dtrace_recdesc_t *nrec;
3474					caddr_t naddr;
3475
3476					nrec = &epd->dtepd_rec[j];
3477
3478					if (nrec->dtrd_uarg != rec->dtrd_uarg)
3479						break;
3480
3481					if (nrec->dtrd_action != act) {
3482						if (dtp->dt_oformat)
3483							xo_close_instance(
3484							    "output");
3485						return (dt_set_errno(dtp,
3486						    EDT_BADAGG));
3487					}
3488
3489					naddr = buf->dtbd_data + offs +
3490					    nrec->dtrd_offset;
3491
3492					aggvars[naggvars++] =
3493					    /* LINTED - alignment */
3494					    *((dtrace_aggvarid_t *)naddr);
3495				}
3496
3497				i = j - 1;
3498				bzero(&pd, sizeof (pd));
3499				pd.dtpa_dtp = dtp;
3500				pd.dtpa_fp = fp;
3501
3502				assert(naggvars >= 1);
3503
3504				if (dtp->dt_oformat)
3505					xo_open_list("aggregation-data");
3506				if (naggvars == 1) {
3507					pd.dtpa_id = aggvars[0];
3508					dt_free(dtp, aggvars);
3509
3510					if (dtp->dt_oformat) {
3511						n = dt_oformat_agg_sorted(dtp,
3512						    dt_format_agg, &pd);
3513						if (n < 0)
3514							return (-1);
3515					} else {
3516						if (dt_printf(dtp, fp, "\n") < 0 ||
3517						    dtrace_aggregate_walk_sorted(dtp,
3518						    dt_print_agg, &pd) < 0)
3519							return (-1);
3520					}
3521
3522					if (dtp->dt_oformat)
3523						dt_oformat_agg_name(&pd);
3524					goto nextrec;
3525				}
3526
3527				if (dtp->dt_oformat) {
3528					if (dtrace_aggregate_walk_joined(dtp,
3529					    aggvars, naggvars,
3530					    dt_format_aggs, &pd) < 0) {
3531						dt_oformat_agg_name(&pd);
3532						xo_close_instance("output");
3533						dt_free(dtp, aggvars);
3534						return (-1);
3535					}
3536				} else {
3537					if (dt_printf(dtp, fp, "\n") < 0 ||
3538					    dtrace_aggregate_walk_joined(dtp,
3539					    aggvars, naggvars,
3540					    dt_print_aggs, &pd) < 0) {
3541						dt_free(dtp, aggvars);
3542						return (-1);
3543					}
3544				}
3545
3546				if (dtp->dt_oformat)
3547					dt_oformat_agg_name(&pd);
3548				dt_free(dtp, aggvars);
3549				goto nextrec;
3550			}
3551
3552			if (act == DTRACEACT_TRACEMEM) {
3553				if (tracememsize == 0 ||
3554				    tracememsize > rec->dtrd_size) {
3555					tracememsize = rec->dtrd_size;
3556				}
3557
3558				if (dtp->dt_oformat) {
3559					char *s;
3560
3561					s = dt_format_bytes_get(dtp, addr,
3562					    tracememsize);
3563					n = xo_emit("{:tracemem/%s}", s);
3564					dt_free(dtp, s);
3565				} else {
3566					n = dt_print_bytes(dtp, fp, addr,
3567					    tracememsize, -33, quiet, 1);
3568				}
3569
3570				tracememsize = 0;
3571
3572				if (n < 0)
3573					return (-1);
3574
3575				goto nextrec;
3576			}
3577
3578			switch (rec->dtrd_size) {
3579			case sizeof (uint64_t):
3580				if (dtp->dt_oformat) {
3581					xo_emit("{:value/%lld}",
3582					    *((unsigned long long *)addr));
3583					n = 0;
3584				} else
3585					n = dt_printf(dtp, fp,
3586					    quiet ? "%lld" : " %16lld",
3587					    /* LINTED - alignment */
3588					    *((unsigned long long *)addr));
3589				break;
3590			case sizeof (uint32_t):
3591				if (dtp->dt_oformat) {
3592					xo_emit("{:value/%d}",
3593					    *((uint32_t *)addr));
3594					n = 0;
3595				} else
3596					n = dt_printf(dtp, fp,
3597					    quiet ? "%d" : " %8d",
3598					    /* LINTED - alignment */
3599					    *((uint32_t *)addr));
3600				break;
3601			case sizeof (uint16_t):
3602				if (dtp->dt_oformat) {
3603					xo_emit("{:value/%d}",
3604					    *((uint16_t *)addr));
3605					n = 0;
3606				} else
3607					n = dt_printf(dtp, fp,
3608					    quiet ? "%d" : " %5d",
3609					    /* LINTED - alignment */
3610					    *((uint16_t *)addr));
3611				break;
3612			case sizeof (uint8_t):
3613				if (dtp->dt_oformat) {
3614					xo_emit("{:value/%d}",
3615					    *((uint8_t *)addr));
3616					n = 0;
3617				} else
3618					n = dt_printf(dtp, fp,
3619					    quiet ? "%d" : " %3d",
3620					    *((uint8_t *)addr));
3621				break;
3622			default:
3623				if (dtp->dt_oformat && rec->dtrd_size > 0) {
3624					char *s;
3625
3626					s = dt_format_bytes_get(dtp, addr,
3627					    rec->dtrd_size);
3628					xo_emit("{:value/%s}", s);
3629					dt_free(dtp, s);
3630					n = 0;
3631				} else {
3632					n = dt_print_bytes(dtp, fp, addr,
3633					    rec->dtrd_size, -33, quiet, 0);
3634				}
3635				break;
3636			}
3637
3638			if (dtp->dt_oformat && rec->dtrd_size > 0)
3639				xo_close_instance("output");
3640
3641			if (n < 0)
3642				return (-1); /* errno is set for us */
3643
3644nextrec:
3645			if (dt_buffered_flush(dtp, &data, rec, NULL, 0) < 0)
3646				return (-1); /* errno is set for us */
3647		}
3648
3649		/*
3650		 * Call the record callback with a NULL record to indicate
3651		 * that we're done processing this EPID.
3652		 */
3653		rval = (*rfunc)(&data, NULL, arg);
3654nextepid:
3655		offs += epd->dtepd_size;
3656		dtp->dt_last_epid = id;
3657
3658		if (dtp->dt_oformat) {
3659			xo_close_list("output");
3660			xo_close_instance("probes");
3661			xo_flush();
3662		}
3663		if (just_one) {
3664			buf->dtbd_oldest = offs;
3665			break;
3666		}
3667	}
3668
3669	dtp->dt_flow = data.dtpda_flow;
3670	dtp->dt_indent = data.dtpda_indent;
3671	dtp->dt_prefix = data.dtpda_prefix;
3672
3673	if ((drops = buf->dtbd_drops) == 0)
3674		return (0);
3675
3676	/*
3677	 * Explicitly zero the drops to prevent us from processing them again.
3678	 */
3679	buf->dtbd_drops = 0;
3680
3681	xo_open_instance("probes");
3682	dt_oformat_drop(dtp, cpu);
3683	rval = dt_handle_cpudrop(dtp, cpu, DTRACEDROP_PRINCIPAL, drops);
3684	xo_close_instance("probes");
3685
3686	return (rval);
3687}
3688
3689/*
3690 * Reduce memory usage by shrinking the buffer if it's no more than half full.
3691 * Note, we need to preserve the alignment of the data at dtbd_oldest, which is
3692 * only 4-byte aligned.
3693 */
3694static void
3695dt_realloc_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf, int cursize)
3696{
3697	uint64_t used = buf->dtbd_size - buf->dtbd_oldest;
3698	if (used < cursize / 2) {
3699		int misalign = buf->dtbd_oldest & (sizeof (uint64_t) - 1);
3700		char *newdata = dt_alloc(dtp, used + misalign);
3701		if (newdata == NULL)
3702			return;
3703		bzero(newdata, misalign);
3704		bcopy(buf->dtbd_data + buf->dtbd_oldest,
3705		    newdata + misalign, used);
3706		dt_free(dtp, buf->dtbd_data);
3707		buf->dtbd_oldest = misalign;
3708		buf->dtbd_size = used + misalign;
3709		buf->dtbd_data = newdata;
3710	}
3711}
3712
3713/*
3714 * If the ring buffer has wrapped, the data is not in order.  Rearrange it
3715 * so that it is.  Note, we need to preserve the alignment of the data at
3716 * dtbd_oldest, which is only 4-byte aligned.
3717 */
3718static int
3719dt_unring_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf)
3720{
3721	int misalign;
3722	char *newdata, *ndp;
3723
3724	if (buf->dtbd_oldest == 0)
3725		return (0);
3726
3727	misalign = buf->dtbd_oldest & (sizeof (uint64_t) - 1);
3728	newdata = ndp = dt_alloc(dtp, buf->dtbd_size + misalign);
3729
3730	if (newdata == NULL)
3731		return (-1);
3732
3733	assert(0 == (buf->dtbd_size & (sizeof (uint64_t) - 1)));
3734
3735	bzero(ndp, misalign);
3736	ndp += misalign;
3737
3738	bcopy(buf->dtbd_data + buf->dtbd_oldest, ndp,
3739	    buf->dtbd_size - buf->dtbd_oldest);
3740	ndp += buf->dtbd_size - buf->dtbd_oldest;
3741
3742	bcopy(buf->dtbd_data, ndp, buf->dtbd_oldest);
3743
3744	dt_free(dtp, buf->dtbd_data);
3745	buf->dtbd_oldest = misalign;
3746	buf->dtbd_data = newdata;
3747	buf->dtbd_size += misalign;
3748
3749	return (0);
3750}
3751
3752static void
3753dt_put_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf)
3754{
3755	dt_free(dtp, buf->dtbd_data);
3756	dt_free(dtp, buf);
3757}
3758
3759/*
3760 * Returns 0 on success, in which case *cbp will be filled in if we retrieved
3761 * data, or NULL if there is no data for this CPU.
3762 * Returns -1 on failure and sets dt_errno.
3763 */
3764static int
3765dt_get_buf(dtrace_hdl_t *dtp, int cpu, dtrace_bufdesc_t **bufp)
3766{
3767	dtrace_optval_t size;
3768	dtrace_bufdesc_t *buf = dt_zalloc(dtp, sizeof (*buf));
3769	int error, rval;
3770
3771	if (buf == NULL)
3772		return (-1);
3773
3774	(void) dtrace_getopt(dtp, "bufsize", &size);
3775	buf->dtbd_data = dt_alloc(dtp, size);
3776	if (buf->dtbd_data == NULL) {
3777		dt_free(dtp, buf);
3778		return (-1);
3779	}
3780	buf->dtbd_size = size;
3781	buf->dtbd_cpu = cpu;
3782
3783#ifdef illumos
3784	if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
3785#else
3786	if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {
3787#endif
3788		/*
3789		 * If we failed with ENOENT, it may be because the
3790		 * CPU was unconfigured -- this is okay.  Any other
3791		 * error, however, is unexpected.
3792		 */
3793		if (errno == ENOENT) {
3794			*bufp = NULL;
3795			rval = 0;
3796		} else
3797			rval = dt_set_errno(dtp, errno);
3798
3799		dt_put_buf(dtp, buf);
3800		return (rval);
3801	}
3802
3803	error = dt_unring_buf(dtp, buf);
3804	if (error != 0) {
3805		dt_put_buf(dtp, buf);
3806		return (error);
3807	}
3808	dt_realloc_buf(dtp, buf, size);
3809
3810	*bufp = buf;
3811	return (0);
3812}
3813
3814typedef struct dt_begin {
3815	dtrace_consume_probe_f *dtbgn_probefunc;
3816	dtrace_consume_rec_f *dtbgn_recfunc;
3817	void *dtbgn_arg;
3818	dtrace_handle_err_f *dtbgn_errhdlr;
3819	void *dtbgn_errarg;
3820	int dtbgn_beginonly;
3821} dt_begin_t;
3822
3823static int
3824dt_consume_begin_probe(const dtrace_probedata_t *data, void *arg)
3825{
3826	dt_begin_t *begin = arg;
3827	dtrace_probedesc_t *pd = data->dtpda_pdesc;
3828
3829	int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
3830	int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
3831
3832	if (begin->dtbgn_beginonly) {
3833		if (!(r1 && r2))
3834			return (DTRACE_CONSUME_NEXT);
3835	} else {
3836		if (r1 && r2)
3837			return (DTRACE_CONSUME_NEXT);
3838	}
3839
3840	/*
3841	 * We have a record that we're interested in.  Now call the underlying
3842	 * probe function...
3843	 */
3844	return (begin->dtbgn_probefunc(data, begin->dtbgn_arg));
3845}
3846
3847static int
3848dt_consume_begin_record(const dtrace_probedata_t *data,
3849    const dtrace_recdesc_t *rec, void *arg)
3850{
3851	dt_begin_t *begin = arg;
3852
3853	return (begin->dtbgn_recfunc(data, rec, begin->dtbgn_arg));
3854}
3855
3856static int
3857dt_consume_begin_error(const dtrace_errdata_t *data, void *arg)
3858{
3859	dt_begin_t *begin = (dt_begin_t *)arg;
3860	dtrace_probedesc_t *pd = data->dteda_pdesc;
3861
3862	int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
3863	int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
3864
3865	if (begin->dtbgn_beginonly) {
3866		if (!(r1 && r2))
3867			return (DTRACE_HANDLE_OK);
3868	} else {
3869		if (r1 && r2)
3870			return (DTRACE_HANDLE_OK);
3871	}
3872
3873	return (begin->dtbgn_errhdlr(data, begin->dtbgn_errarg));
3874}
3875
3876static int
3877dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp,
3878    dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
3879{
3880	/*
3881	 * There's this idea that the BEGIN probe should be processed before
3882	 * everything else, and that the END probe should be processed after
3883	 * anything else.  In the common case, this is pretty easy to deal
3884	 * with.  However, a situation may arise where the BEGIN enabling and
3885	 * END enabling are on the same CPU, and some enabling in the middle
3886	 * occurred on a different CPU.  To deal with this (blech!) we need to
3887	 * consume the BEGIN buffer up until the end of the BEGIN probe, and
3888	 * then set it aside.  We will then process every other CPU, and then
3889	 * we'll return to the BEGIN CPU and process the rest of the data
3890	 * (which will inevitably include the END probe, if any).  Making this
3891	 * even more complicated (!) is the library's ERROR enabling.  Because
3892	 * this enabling is processed before we even get into the consume call
3893	 * back, any ERROR firing would result in the library's ERROR enabling
3894	 * being processed twice -- once in our first pass (for BEGIN probes),
3895	 * and again in our second pass (for everything but BEGIN probes).  To
3896	 * deal with this, we interpose on the ERROR handler to assure that we
3897	 * only process ERROR enablings induced by BEGIN enablings in the
3898	 * first pass, and that we only process ERROR enablings _not_ induced
3899	 * by BEGIN enablings in the second pass.
3900	 */
3901
3902	dt_begin_t begin;
3903	processorid_t cpu = dtp->dt_beganon;
3904	int rval, i;
3905	static int max_ncpus;
3906	dtrace_bufdesc_t *buf;
3907
3908	dtp->dt_beganon = -1;
3909
3910	if (dt_get_buf(dtp, cpu, &buf) != 0)
3911		return (-1);
3912	if (buf == NULL)
3913		return (0);
3914
3915	if (!dtp->dt_stopped || buf->dtbd_cpu != dtp->dt_endedon) {
3916		/*
3917		 * This is the simple case.  We're either not stopped, or if
3918		 * we are, we actually processed any END probes on another
3919		 * CPU.  We can simply consume this buffer and return.
3920		 */
3921		rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE,
3922		    pf, rf, arg);
3923		dt_put_buf(dtp, buf);
3924		return (rval);
3925	}
3926
3927	begin.dtbgn_probefunc = pf;
3928	begin.dtbgn_recfunc = rf;
3929	begin.dtbgn_arg = arg;
3930	begin.dtbgn_beginonly = 1;
3931
3932	/*
3933	 * We need to interpose on the ERROR handler to be sure that we
3934	 * only process ERRORs induced by BEGIN.
3935	 */
3936	begin.dtbgn_errhdlr = dtp->dt_errhdlr;
3937	begin.dtbgn_errarg = dtp->dt_errarg;
3938	dtp->dt_errhdlr = dt_consume_begin_error;
3939	dtp->dt_errarg = &begin;
3940
3941	rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE,
3942	    dt_consume_begin_probe, dt_consume_begin_record, &begin);
3943
3944	dtp->dt_errhdlr = begin.dtbgn_errhdlr;
3945	dtp->dt_errarg = begin.dtbgn_errarg;
3946
3947	if (rval != 0) {
3948		dt_put_buf(dtp, buf);
3949		return (rval);
3950	}
3951
3952	if (max_ncpus == 0)
3953		max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
3954
3955	for (i = 0; i < max_ncpus; i++) {
3956		dtrace_bufdesc_t *nbuf;
3957		if (i == cpu)
3958			continue;
3959
3960		if (dt_get_buf(dtp, i, &nbuf) != 0) {
3961			dt_put_buf(dtp, buf);
3962			return (-1);
3963		}
3964		if (nbuf == NULL)
3965			continue;
3966
3967		rval = dt_consume_cpu(dtp, fp, i, nbuf, B_FALSE,
3968		    pf, rf, arg);
3969		dt_put_buf(dtp, nbuf);
3970		if (rval != 0) {
3971			dt_put_buf(dtp, buf);
3972			return (rval);
3973		}
3974	}
3975
3976	/*
3977	 * Okay -- we're done with the other buffers.  Now we want to
3978	 * reconsume the first buffer -- but this time we're looking for
3979	 * everything _but_ BEGIN.  And of course, in order to only consume
3980	 * those ERRORs _not_ associated with BEGIN, we need to reinstall our
3981	 * ERROR interposition function...
3982	 */
3983	begin.dtbgn_beginonly = 0;
3984
3985	assert(begin.dtbgn_errhdlr == dtp->dt_errhdlr);
3986	assert(begin.dtbgn_errarg == dtp->dt_errarg);
3987	dtp->dt_errhdlr = dt_consume_begin_error;
3988	dtp->dt_errarg = &begin;
3989
3990	rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE,
3991	    dt_consume_begin_probe, dt_consume_begin_record, &begin);
3992
3993	dtp->dt_errhdlr = begin.dtbgn_errhdlr;
3994	dtp->dt_errarg = begin.dtbgn_errarg;
3995
3996	return (rval);
3997}
3998
3999/* ARGSUSED */
4000static uint64_t
4001dt_buf_oldest(void *elem, void *arg)
4002{
4003	dtrace_bufdesc_t *buf = elem;
4004	size_t offs = buf->dtbd_oldest;
4005
4006	while (offs < buf->dtbd_size) {
4007		dtrace_rechdr_t *dtrh =
4008		    /* LINTED - alignment */
4009		    (dtrace_rechdr_t *)(buf->dtbd_data + offs);
4010		if (dtrh->dtrh_epid == DTRACE_EPIDNONE) {
4011			offs += sizeof (dtrace_epid_t);
4012		} else {
4013			return (DTRACE_RECORD_LOAD_TIMESTAMP(dtrh));
4014		}
4015	}
4016
4017	/* There are no records left; use the time the buffer was retrieved. */
4018	return (buf->dtbd_timestamp);
4019}
4020
4021int
4022dtrace_consume(dtrace_hdl_t *dtp, FILE *fp,
4023    dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
4024{
4025	dtrace_optval_t size;
4026	static int max_ncpus;
4027	int i, rval;
4028	dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_SWITCHRATE];
4029	hrtime_t now = gethrtime();
4030
4031	if (dtp->dt_lastswitch != 0) {
4032		if (now - dtp->dt_lastswitch < interval)
4033			return (0);
4034
4035		dtp->dt_lastswitch += interval;
4036	} else {
4037		dtp->dt_lastswitch = now;
4038	}
4039
4040	if (!dtp->dt_active)
4041		return (dt_set_errno(dtp, EINVAL));
4042
4043	if (max_ncpus == 0)
4044		max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
4045
4046	if (pf == NULL)
4047		pf = (dtrace_consume_probe_f *)dt_nullprobe;
4048
4049	if (rf == NULL)
4050		rf = (dtrace_consume_rec_f *)dt_nullrec;
4051
4052	if (dtp->dt_options[DTRACEOPT_TEMPORAL] == DTRACEOPT_UNSET) {
4053		/*
4054		 * The output will not be in the order it was traced.  Rather,
4055		 * we will consume all of the data from each CPU's buffer in
4056		 * turn.  We apply special handling for the records from BEGIN
4057		 * and END probes so that they are consumed first and last,
4058		 * respectively.
4059		 *
4060		 * If we have just begun, we want to first process the CPU that
4061		 * executed the BEGIN probe (if any).
4062		 */
4063		if (dtp->dt_active && dtp->dt_beganon != -1 &&
4064		    (rval = dt_consume_begin(dtp, fp, pf, rf, arg)) != 0)
4065			return (rval);
4066
4067		for (i = 0; i < max_ncpus; i++) {
4068			dtrace_bufdesc_t *buf;
4069
4070			/*
4071			 * If we have stopped, we want to process the CPU on
4072			 * which the END probe was processed only _after_ we
4073			 * have processed everything else.
4074			 */
4075			if (dtp->dt_stopped && (i == dtp->dt_endedon))
4076				continue;
4077
4078			if (dt_get_buf(dtp, i, &buf) != 0)
4079				return (-1);
4080			if (buf == NULL)
4081				continue;
4082
4083			dtp->dt_flow = 0;
4084			dtp->dt_indent = 0;
4085			dtp->dt_prefix = NULL;
4086			rval = dt_consume_cpu(dtp, fp, i,
4087			    buf, B_FALSE, pf, rf, arg);
4088			dt_put_buf(dtp, buf);
4089			if (rval != 0)
4090				return (rval);
4091		}
4092		if (dtp->dt_stopped) {
4093			dtrace_bufdesc_t *buf;
4094
4095			if (dt_get_buf(dtp, dtp->dt_endedon, &buf) != 0)
4096				return (-1);
4097			if (buf == NULL)
4098				return (0);
4099
4100			rval = dt_consume_cpu(dtp, fp, dtp->dt_endedon,
4101			    buf, B_FALSE, pf, rf, arg);
4102			dt_put_buf(dtp, buf);
4103			return (rval);
4104		}
4105	} else {
4106		/*
4107		 * The output will be in the order it was traced (or for
4108		 * speculations, when it was committed).  We retrieve a buffer
4109		 * from each CPU and put it into a priority queue, which sorts
4110		 * based on the first entry in the buffer.  This is sufficient
4111		 * because entries within a buffer are already sorted.
4112		 *
4113		 * We then consume records one at a time, always consuming the
4114		 * oldest record, as determined by the priority queue.  When
4115		 * we reach the end of the time covered by these buffers,
4116		 * we need to stop and retrieve more records on the next pass.
4117		 * The kernel tells us the time covered by each buffer, in
4118		 * dtbd_timestamp.  The first buffer's timestamp tells us the
4119		 * time covered by all buffers, as subsequently retrieved
4120		 * buffers will cover to a more recent time.
4121		 */
4122
4123		uint64_t *drops = alloca(max_ncpus * sizeof (uint64_t));
4124		uint64_t first_timestamp = 0;
4125		uint_t cookie = 0;
4126		dtrace_bufdesc_t *buf;
4127
4128		bzero(drops, max_ncpus * sizeof (uint64_t));
4129
4130		if (dtp->dt_bufq == NULL) {
4131			dtp->dt_bufq = dt_pq_init(dtp, max_ncpus * 2,
4132			    dt_buf_oldest, NULL);
4133			if (dtp->dt_bufq == NULL) /* ENOMEM */
4134				return (-1);
4135		}
4136
4137		/* Retrieve data from each CPU. */
4138		(void) dtrace_getopt(dtp, "bufsize", &size);
4139		for (i = 0; i < max_ncpus; i++) {
4140			dtrace_bufdesc_t *buf;
4141
4142			if (dt_get_buf(dtp, i, &buf) != 0)
4143				return (-1);
4144			if (buf != NULL) {
4145				if (first_timestamp == 0)
4146					first_timestamp = buf->dtbd_timestamp;
4147				assert(buf->dtbd_timestamp >= first_timestamp);
4148
4149				dt_pq_insert(dtp->dt_bufq, buf);
4150				drops[i] = buf->dtbd_drops;
4151				buf->dtbd_drops = 0;
4152			}
4153		}
4154
4155		/* Consume records. */
4156		for (;;) {
4157			dtrace_bufdesc_t *buf = dt_pq_pop(dtp->dt_bufq);
4158			uint64_t timestamp;
4159
4160			if (buf == NULL)
4161				break;
4162
4163			timestamp = dt_buf_oldest(buf, dtp);
4164			if (timestamp == buf->dtbd_timestamp) {
4165				/*
4166				 * We've reached the end of the time covered
4167				 * by this buffer.  If this is the oldest
4168				 * buffer, we must do another pass
4169				 * to retrieve more data.
4170				 */
4171				dt_put_buf(dtp, buf);
4172				if (timestamp == first_timestamp &&
4173				    !dtp->dt_stopped)
4174					break;
4175				continue;
4176			}
4177			assert(timestamp >= dtp->dt_last_timestamp);
4178			dtp->dt_last_timestamp = timestamp;
4179
4180			if ((rval = dt_consume_cpu(dtp, fp,
4181			    buf->dtbd_cpu, buf, B_TRUE, pf, rf, arg)) != 0)
4182				return (rval);
4183			dt_pq_insert(dtp->dt_bufq, buf);
4184		}
4185
4186		/* Consume drops. */
4187		for (i = 0; i < max_ncpus; i++) {
4188			if (drops[i] != 0) {
4189				int error;
4190				xo_open_instance("probes");
4191				dt_oformat_drop(dtp, i);
4192				error = dt_handle_cpudrop(dtp, i,
4193				    DTRACEDROP_PRINCIPAL, drops[i]);
4194				xo_close_instance("probes");
4195				if (error != 0)
4196					return (error);
4197			}
4198		}
4199
4200		/*
4201		 * Reduce memory usage by re-allocating smaller buffers
4202		 * for the "remnants".
4203		 */
4204		while (buf = dt_pq_walk(dtp->dt_bufq, &cookie))
4205			dt_realloc_buf(dtp, buf, buf->dtbd_size);
4206	}
4207
4208	return (0);
4209}
4210
4211void
4212dtrace_oformat_probe(dtrace_hdl_t *dtp __unused, const dtrace_probedata_t *data,
4213    processorid_t cpu, dtrace_probedesc_t *pd)
4214{
4215
4216	xo_emit("{:timestamp/%llu} {:cpu/%d} {:id/%d} {:provider/%s} "
4217		"{:module/%s} {:function/%s} {:name/%s}",
4218	    (unsigned long long)data->dtpda_timestamp, cpu, pd->dtpd_id,
4219	    pd->dtpd_provider, pd->dtpd_mod, pd->dtpd_func, pd->dtpd_name);
4220}
4221
4222void
4223dt_oformat_drop(dtrace_hdl_t *dtp, processorid_t cpu)
4224{
4225	xo_emit("{:cpu/%d} {:id/%d} {:provider/%s} "
4226		"{:module/%s} {:function/%s} {:name/%s}",
4227	    cpu, -1, "dtrace", "INTERNAL", "INTERNAL", "DROP");
4228}
4229