1/*-
2 * Copyright (c) 2014-2018 Netflix, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27/*
28 * Author: Lawrence Stewart <lstewart@netflix.com>
29 */
30
31#include <sys/param.h>
32#include <sys/arb.h>
33#include <sys/ctype.h>
34#include <sys/errno.h>
35#include <sys/hash.h>
36#include <sys/limits.h>
37#include <sys/malloc.h>
38#include <sys/qmath.h>
39#include <sys/sbuf.h>
40#if defined(DIAGNOSTIC)
41#include <sys/tree.h>
42#endif
43#include <sys/stats.h> /* Must come after qmath.h and arb.h */
44#include <sys/stddef.h>
45#include <sys/stdint.h>
46#include <sys/time.h>
47
48#ifdef _KERNEL
49#include <sys/kernel.h>
50#include <sys/lock.h>
51#include <sys/rwlock.h>
52#include <sys/sysctl.h>
53#include <sys/systm.h>
54#else /* ! _KERNEL */
55#include <pthread.h>
56#include <stdbool.h>
57#include <stdio.h>
58#include <stdlib.h>
59#include <string.h>
60#endif /* _KERNEL */
61
62struct voistatdata_voistate {
63	/* Previous VOI value for diff calculation. */
64	struct voistatdata_numeric prev;
65};
66
67#define	VS_VSDVALID	0x0001	/* Stat's voistatdata updated at least once. */
68struct voistat {
69	int8_t		stype;		/* Type of stat e.g. VS_STYPE_SUM. */
70	enum vsd_dtype	dtype : 8;	/* Data type of this stat's data. */
71	uint16_t	data_off;	/* Blob offset for this stat's data. */
72	uint16_t	dsz;		/* Size of stat's data. */
73#define	VS_EBITS 8
74	uint16_t	errs : VS_EBITS;/* Non-wrapping error count. */
75	uint16_t	flags : 16 - VS_EBITS;
76};
77/* The voistat error count is capped to avoid wrapping. */
78#define	VS_INCERRS(vs) do {						\
79	if ((vs)->errs < (1U << VS_EBITS) - 1)				\
80		(vs)->errs++;						\
81} while (0)
82
83/*
84 * Ideas for flags:
85 *   - Global or entity specific (global would imply use of counter(9)?)
86 *   - Whether to reset stats on read or not
87 *   - Signal an overflow?
88 *   - Compressed voistat array
89 */
90#define	VOI_REQSTATE	0x0001	/* VOI requires VS_STYPE_VOISTATE. */
91struct voi {
92	int16_t		id;		/* VOI id. */
93	enum vsd_dtype	dtype : 8;	/* Data type of the VOI itself. */
94	int8_t		voistatmaxid;	/* Largest allocated voistat index. */
95	uint16_t	stats_off;	/* Blob offset for this VOIs stats. */
96	uint16_t	flags;
97};
98
99/*
100 * Memory for the entire blob is allocated as a slab and then offsets are
101 * maintained to carve up the slab into sections holding different data types.
102 *
103 * Ideas for flags:
104 * - Compressed voi array (trade off memory usage vs search time)
105 * - Units of offsets (default bytes, flag for e.g. vm_page/KiB/Mib)
106 */
107struct statsblobv1 {
108	uint8_t		abi;
109	uint8_t		endian;
110	uint16_t	flags;
111	uint16_t	maxsz;
112	uint16_t	cursz;
113	/* Fields from here down are opaque to consumers. */
114	uint32_t	tplhash;	/* Base template hash ID. */
115	uint16_t	stats_off;	/* voistat array blob offset. */
116	uint16_t	statsdata_off;	/* voistatdata array blob offset. */
117	sbintime_t	created;	/* Blob creation time. */
118	sbintime_t	lastrst;	/* Time of last reset. */
119	struct voi	vois[];		/* Array indexed by [voi_id]. */
120} __aligned(sizeof(void *));
121_Static_assert(offsetof(struct statsblobv1, cursz) +
122    SIZEOF_MEMBER(struct statsblobv1, cursz) ==
123    offsetof(struct statsblob, opaque),
124    "statsblobv1 ABI mismatch");
125
126struct statsblobv1_tpl {
127	struct metablob		*mb;
128	struct statsblobv1	*sb;
129};
130
131/* Context passed to iterator callbacks. */
132struct sb_iter_ctx {
133	void		*usrctx;	/* Caller supplied context. */
134	uint32_t	flags;		/* Flags for current iteration. */
135	int16_t		vslot;		/* struct voi slot index. */
136	int8_t		vsslot;		/* struct voistat slot index. */
137};
138
139struct sb_tostrcb_ctx {
140	struct sbuf		*buf;
141	struct statsblob_tpl	*tpl;
142	enum sb_str_fmt	fmt;
143	uint32_t		flags;
144};
145
146struct sb_visitcb_ctx {
147	stats_blob_visitcb_t	cb;
148	void			*usrctx;
149};
150
151/* Stats blob iterator callback. */
152typedef int (*stats_v1_blob_itercb_t)(struct statsblobv1 *sb, struct voi *v,
153    struct voistat *vs, struct sb_iter_ctx *ctx);
154
155#ifdef _KERNEL
156static struct rwlock tpllistlock;
157RW_SYSINIT(stats_tpl_list, &tpllistlock, "Stat template list lock");
158#define	TPL_LIST_RLOCK() rw_rlock(&tpllistlock)
159#define	TPL_LIST_RUNLOCK() rw_runlock(&tpllistlock)
160#define	TPL_LIST_WLOCK() rw_wlock(&tpllistlock)
161#define	TPL_LIST_WUNLOCK() rw_wunlock(&tpllistlock)
162#define	TPL_LIST_LOCK_ASSERT() rw_assert(&tpllistlock, RA_LOCKED)
163#define	TPL_LIST_RLOCK_ASSERT() rw_assert(&tpllistlock, RA_RLOCKED)
164#define	TPL_LIST_WLOCK_ASSERT() rw_assert(&tpllistlock, RA_WLOCKED)
165MALLOC_DEFINE(M_STATS, "stats(9) related memory", "stats(9) related memory");
166#define	stats_free(ptr) free((ptr), M_STATS)
167#else /* ! _KERNEL */
168static void stats_constructor(void);
169static void stats_destructor(void);
170static pthread_rwlock_t tpllistlock;
171#define	TPL_LIST_UNLOCK() pthread_rwlock_unlock(&tpllistlock)
172#define	TPL_LIST_RLOCK() pthread_rwlock_rdlock(&tpllistlock)
173#define	TPL_LIST_RUNLOCK() TPL_LIST_UNLOCK()
174#define	TPL_LIST_WLOCK() pthread_rwlock_wrlock(&tpllistlock)
175#define	TPL_LIST_WUNLOCK() TPL_LIST_UNLOCK()
176#define	TPL_LIST_LOCK_ASSERT() do { } while (0)
177#define	TPL_LIST_RLOCK_ASSERT() do { } while (0)
178#define	TPL_LIST_WLOCK_ASSERT() do { } while (0)
179#ifdef NDEBUG
180#define	KASSERT(cond, msg) do {} while (0)
181#define	stats_abort() do {} while (0)
182#else /* ! NDEBUG */
183#define	KASSERT(cond, msg) do { \
184	if (!(cond)) { \
185		panic msg; \
186	} \
187} while (0)
188#define	stats_abort() abort()
189#endif /* NDEBUG */
190#define	stats_free(ptr) free(ptr)
191#define	panic(fmt, ...) do { \
192	fprintf(stderr, (fmt), ##__VA_ARGS__); \
193	stats_abort(); \
194} while (0)
195#endif /* _KERNEL */
196
197#define	SB_V1_MAXSZ 65535
198
199/* Obtain a blob offset pointer. */
200#define	BLOB_OFFSET(sb, off) ((void *)(((uint8_t *)(sb)) + (off)))
201
202/*
203 * Number of VOIs in the blob's vois[] array. By virtue of struct voi being a
204 * power of 2 size, we can shift instead of divide. The shift amount must be
205 * updated if sizeof(struct voi) ever changes, which the assert should catch.
206 */
207#define	NVOIS(sb) ((int32_t)((((struct statsblobv1 *)(sb))->stats_off - \
208    sizeof(struct statsblobv1)) >> 3))
209_Static_assert(sizeof(struct voi) == 8, "statsblobv1 voi ABI mismatch");
210
211/* Try restrict names to alphanumeric and underscore to simplify JSON compat. */
212const char *vs_stype2name[VS_NUM_STYPES] = {
213	[VS_STYPE_VOISTATE] = "VOISTATE",
214	[VS_STYPE_SUM] = "SUM",
215	[VS_STYPE_MAX] = "MAX",
216	[VS_STYPE_MIN] = "MIN",
217	[VS_STYPE_HIST] = "HIST",
218	[VS_STYPE_TDGST] = "TDGST",
219};
220
221const char *vs_stype2desc[VS_NUM_STYPES] = {
222	[VS_STYPE_VOISTATE] = "VOI related state data (not a real stat)",
223	[VS_STYPE_SUM] = "Simple arithmetic accumulator",
224	[VS_STYPE_MAX] = "Maximum observed VOI value",
225	[VS_STYPE_MIN] = "Minimum observed VOI value",
226	[VS_STYPE_HIST] = "Histogram of observed VOI values",
227	[VS_STYPE_TDGST] = "t-digest of observed VOI values",
228};
229
230const char *vsd_dtype2name[VSD_NUM_DTYPES] = {
231	[VSD_DTYPE_VOISTATE] = "VOISTATE",
232	[VSD_DTYPE_INT_S32] = "INT_S32",
233	[VSD_DTYPE_INT_U32] = "INT_U32",
234	[VSD_DTYPE_INT_S64] = "INT_S64",
235	[VSD_DTYPE_INT_U64] = "INT_U64",
236	[VSD_DTYPE_INT_SLONG] = "INT_SLONG",
237	[VSD_DTYPE_INT_ULONG] = "INT_ULONG",
238	[VSD_DTYPE_Q_S32] = "Q_S32",
239	[VSD_DTYPE_Q_U32] = "Q_U32",
240	[VSD_DTYPE_Q_S64] = "Q_S64",
241	[VSD_DTYPE_Q_U64] = "Q_U64",
242	[VSD_DTYPE_CRHIST32] = "CRHIST32",
243	[VSD_DTYPE_DRHIST32] = "DRHIST32",
244	[VSD_DTYPE_DVHIST32] = "DVHIST32",
245	[VSD_DTYPE_CRHIST64] = "CRHIST64",
246	[VSD_DTYPE_DRHIST64] = "DRHIST64",
247	[VSD_DTYPE_DVHIST64] = "DVHIST64",
248	[VSD_DTYPE_TDGSTCLUST32] = "TDGSTCLUST32",
249	[VSD_DTYPE_TDGSTCLUST64] = "TDGSTCLUST64",
250};
251
252const size_t vsd_dtype2size[VSD_NUM_DTYPES] = {
253	[VSD_DTYPE_VOISTATE] = sizeof(struct voistatdata_voistate),
254	[VSD_DTYPE_INT_S32] = sizeof(struct voistatdata_int32),
255	[VSD_DTYPE_INT_U32] = sizeof(struct voistatdata_int32),
256	[VSD_DTYPE_INT_S64] = sizeof(struct voistatdata_int64),
257	[VSD_DTYPE_INT_U64] = sizeof(struct voistatdata_int64),
258	[VSD_DTYPE_INT_SLONG] = sizeof(struct voistatdata_intlong),
259	[VSD_DTYPE_INT_ULONG] = sizeof(struct voistatdata_intlong),
260	[VSD_DTYPE_Q_S32] = sizeof(struct voistatdata_q32),
261	[VSD_DTYPE_Q_U32] = sizeof(struct voistatdata_q32),
262	[VSD_DTYPE_Q_S64] = sizeof(struct voistatdata_q64),
263	[VSD_DTYPE_Q_U64] = sizeof(struct voistatdata_q64),
264	[VSD_DTYPE_CRHIST32] = sizeof(struct voistatdata_crhist32),
265	[VSD_DTYPE_DRHIST32] = sizeof(struct voistatdata_drhist32),
266	[VSD_DTYPE_DVHIST32] = sizeof(struct voistatdata_dvhist32),
267	[VSD_DTYPE_CRHIST64] = sizeof(struct voistatdata_crhist64),
268	[VSD_DTYPE_DRHIST64] = sizeof(struct voistatdata_drhist64),
269	[VSD_DTYPE_DVHIST64] = sizeof(struct voistatdata_dvhist64),
270	[VSD_DTYPE_TDGSTCLUST32] = sizeof(struct voistatdata_tdgstclust32),
271	[VSD_DTYPE_TDGSTCLUST64] = sizeof(struct voistatdata_tdgstclust64),
272};
273
274static const bool vsd_compoundtype[VSD_NUM_DTYPES] = {
275	[VSD_DTYPE_VOISTATE] = true,
276	[VSD_DTYPE_INT_S32] = false,
277	[VSD_DTYPE_INT_U32] = false,
278	[VSD_DTYPE_INT_S64] = false,
279	[VSD_DTYPE_INT_U64] = false,
280	[VSD_DTYPE_INT_SLONG] = false,
281	[VSD_DTYPE_INT_ULONG] = false,
282	[VSD_DTYPE_Q_S32] = false,
283	[VSD_DTYPE_Q_U32] = false,
284	[VSD_DTYPE_Q_S64] = false,
285	[VSD_DTYPE_Q_U64] = false,
286	[VSD_DTYPE_CRHIST32] = true,
287	[VSD_DTYPE_DRHIST32] = true,
288	[VSD_DTYPE_DVHIST32] = true,
289	[VSD_DTYPE_CRHIST64] = true,
290	[VSD_DTYPE_DRHIST64] = true,
291	[VSD_DTYPE_DVHIST64] = true,
292	[VSD_DTYPE_TDGSTCLUST32] = true,
293	[VSD_DTYPE_TDGSTCLUST64] = true,
294};
295
296const struct voistatdata_numeric numeric_limits[2][VSD_DTYPE_Q_U64 + 1] = {
297	[LIM_MIN] = {
298		[VSD_DTYPE_VOISTATE] = {0},
299		[VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MIN}},
300		[VSD_DTYPE_INT_U32] = {.int32 = {.u32 = 0}},
301		[VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MIN}},
302		[VSD_DTYPE_INT_U64] = {.int64 = {.u64 = 0}},
303		[VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MIN}},
304		[VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = 0}},
305		[VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMINVAL(INT32_MIN)}},
306		[VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = 0}},
307		[VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMINVAL(INT64_MIN)}},
308		[VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = 0}},
309	},
310	[LIM_MAX] = {
311		[VSD_DTYPE_VOISTATE] = {0},
312		[VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MAX}},
313		[VSD_DTYPE_INT_U32] = {.int32 = {.u32 = UINT32_MAX}},
314		[VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MAX}},
315		[VSD_DTYPE_INT_U64] = {.int64 = {.u64 = UINT64_MAX}},
316		[VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MAX}},
317		[VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = ULONG_MAX}},
318		[VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMAXVAL(INT32_MAX)}},
319		[VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = Q_IFMAXVAL(UINT32_MAX)}},
320		[VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMAXVAL(INT64_MAX)}},
321		[VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = Q_IFMAXVAL(UINT64_MAX)}},
322	}
323};
324
325/* tpllistlock protects tpllist and ntpl */
326static uint32_t ntpl;
327static struct statsblob_tpl **tpllist;
328
329static inline void * stats_realloc(void *ptr, size_t oldsz, size_t newsz,
330    int flags);
331//static void stats_v1_blob_finalise(struct statsblobv1 *sb);
332static int stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
333    uint32_t flags);
334static int stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
335    int newvoistatbytes, int newvoistatdatabytes);
336static void stats_v1_blob_iter(struct statsblobv1 *sb,
337    stats_v1_blob_itercb_t icb, void *usrctx, uint32_t flags);
338static inline int stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype,
339    struct voistatdata_tdgst *tdgst, s64q_t x, uint64_t weight, int attempt);
340
341static inline int
342ctd32cmp(const struct voistatdata_tdgstctd32 *c1, const struct voistatdata_tdgstctd32 *c2)
343{
344
345	KASSERT(Q_PRECEQ(c1->mu, c2->mu),
346	    ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
347	    Q_RELPREC(c1->mu, c2->mu)));
348
349       return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
350}
351ARB_GENERATE_STATIC(ctdth32, voistatdata_tdgstctd32, ctdlnk, ctd32cmp);
352
353static inline int
354ctd64cmp(const struct voistatdata_tdgstctd64 *c1, const struct voistatdata_tdgstctd64 *c2)
355{
356
357	KASSERT(Q_PRECEQ(c1->mu, c2->mu),
358	    ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
359	    Q_RELPREC(c1->mu, c2->mu)));
360
361       return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
362}
363ARB_GENERATE_STATIC(ctdth64, voistatdata_tdgstctd64, ctdlnk, ctd64cmp);
364
365#ifdef DIAGNOSTIC
366RB_GENERATE_STATIC(rbctdth32, voistatdata_tdgstctd32, rblnk, ctd32cmp);
367RB_GENERATE_STATIC(rbctdth64, voistatdata_tdgstctd64, rblnk, ctd64cmp);
368#endif
369
370static inline sbintime_t
371stats_sbinuptime(void)
372{
373	sbintime_t sbt;
374#ifdef _KERNEL
375
376	sbt = sbinuptime();
377#else /* ! _KERNEL */
378	struct timespec tp;
379
380	clock_gettime(CLOCK_MONOTONIC_FAST, &tp);
381	sbt = tstosbt(tp);
382#endif /* _KERNEL */
383
384	return (sbt);
385}
386
387static inline void *
388stats_realloc(void *ptr, size_t oldsz, size_t newsz, int flags)
389{
390
391#ifdef _KERNEL
392	/* Default to M_NOWAIT if neither M_NOWAIT or M_WAITOK are set. */
393	if (!(flags & (M_WAITOK | M_NOWAIT)))
394		flags |= M_NOWAIT;
395	ptr = realloc(ptr, newsz, M_STATS, flags);
396#else /* ! _KERNEL */
397	ptr = realloc(ptr, newsz);
398	if ((flags & M_ZERO) && ptr != NULL) {
399		if (oldsz == 0)
400			memset(ptr, '\0', newsz);
401		else if (newsz > oldsz)
402			memset(BLOB_OFFSET(ptr, oldsz), '\0', newsz - oldsz);
403	}
404#endif /* _KERNEL */
405
406	return (ptr);
407}
408
409static inline char *
410stats_strdup(const char *s,
411#ifdef _KERNEL
412    int flags)
413{
414	char *copy;
415	size_t len;
416
417	if (!(flags & (M_WAITOK | M_NOWAIT)))
418		flags |= M_NOWAIT;
419
420	len = strlen(s) + 1;
421	if ((copy = malloc(len, M_STATS, flags)) != NULL)
422		bcopy(s, copy, len);
423
424	return (copy);
425#else
426    int flags __unused)
427{
428	return (strdup(s));
429#endif
430}
431
432static inline void
433stats_tpl_update_hash(struct statsblob_tpl *tpl)
434{
435
436	TPL_LIST_WLOCK_ASSERT();
437	tpl->mb->tplhash = hash32_str(tpl->mb->tplname, 0);
438	for (int voi_id = 0; voi_id < NVOIS(tpl->sb); voi_id++) {
439		if (tpl->mb->voi_meta[voi_id].name != NULL)
440			tpl->mb->tplhash = hash32_str(
441			    tpl->mb->voi_meta[voi_id].name, tpl->mb->tplhash);
442	}
443	tpl->mb->tplhash = hash32_buf(tpl->sb, tpl->sb->cursz,
444	    tpl->mb->tplhash);
445}
446
447static inline uint64_t
448stats_pow_u64(uint64_t base, uint64_t exp)
449{
450	uint64_t result = 1;
451
452	while (exp) {
453		if (exp & 1)
454			result *= base;
455		exp >>= 1;
456		base *= base;
457	}
458
459	return (result);
460}
461
462static inline int
463stats_vss_hist_bkt_hlpr(struct vss_hist_hlpr_info *info, uint32_t curbkt,
464    struct voistatdata_numeric *bkt_lb, struct voistatdata_numeric *bkt_ub)
465{
466	uint64_t step = 0;
467	int error = 0;
468
469	switch (info->scheme) {
470	case BKT_LIN:
471		step = info->lin.stepinc;
472		break;
473	case BKT_EXP:
474		step = stats_pow_u64(info->exp.stepbase,
475		    info->exp.stepexp + curbkt);
476		break;
477	case BKT_LINEXP:
478		{
479		uint64_t curstepexp = 1;
480
481		switch (info->voi_dtype) {
482		case VSD_DTYPE_INT_S32:
483			while ((int32_t)stats_pow_u64(info->linexp.stepbase,
484			    curstepexp) <= bkt_lb->int32.s32)
485				curstepexp++;
486			break;
487		case VSD_DTYPE_INT_U32:
488			while ((uint32_t)stats_pow_u64(info->linexp.stepbase,
489			    curstepexp) <= bkt_lb->int32.u32)
490				curstepexp++;
491			break;
492		case VSD_DTYPE_INT_S64:
493			while ((int64_t)stats_pow_u64(info->linexp.stepbase,
494			    curstepexp) <= bkt_lb->int64.s64)
495				curstepexp++;
496			break;
497		case VSD_DTYPE_INT_U64:
498			while ((uint64_t)stats_pow_u64(info->linexp.stepbase,
499			    curstepexp) <= bkt_lb->int64.u64)
500				curstepexp++;
501			break;
502		case VSD_DTYPE_INT_SLONG:
503			while ((long)stats_pow_u64(info->linexp.stepbase,
504			    curstepexp) <= bkt_lb->intlong.slong)
505				curstepexp++;
506			break;
507		case VSD_DTYPE_INT_ULONG:
508			while ((unsigned long)stats_pow_u64(info->linexp.stepbase,
509			    curstepexp) <= bkt_lb->intlong.ulong)
510				curstepexp++;
511			break;
512		case VSD_DTYPE_Q_S32:
513			while ((s32q_t)stats_pow_u64(info->linexp.stepbase,
514			    curstepexp) <= Q_GIVAL(bkt_lb->q32.sq32))
515			break;
516		case VSD_DTYPE_Q_U32:
517			while ((u32q_t)stats_pow_u64(info->linexp.stepbase,
518			    curstepexp) <= Q_GIVAL(bkt_lb->q32.uq32))
519			break;
520		case VSD_DTYPE_Q_S64:
521			while ((s64q_t)stats_pow_u64(info->linexp.stepbase,
522			    curstepexp) <= Q_GIVAL(bkt_lb->q64.sq64))
523				curstepexp++;
524			break;
525		case VSD_DTYPE_Q_U64:
526			while ((u64q_t)stats_pow_u64(info->linexp.stepbase,
527			    curstepexp) <= Q_GIVAL(bkt_lb->q64.uq64))
528				curstepexp++;
529			break;
530		default:
531			break;
532		}
533
534		step = stats_pow_u64(info->linexp.stepbase, curstepexp) /
535		    info->linexp.linstepdiv;
536		if (step == 0)
537			step = 1;
538		break;
539		}
540	default:
541		break;
542	}
543
544	if (info->scheme == BKT_USR) {
545		*bkt_lb = info->usr.bkts[curbkt].lb;
546		*bkt_ub = info->usr.bkts[curbkt].ub;
547	} else if (step != 0) {
548		switch (info->voi_dtype) {
549		case VSD_DTYPE_INT_S32:
550			bkt_ub->int32.s32 += (int32_t)step;
551			break;
552		case VSD_DTYPE_INT_U32:
553			bkt_ub->int32.u32 += (uint32_t)step;
554			break;
555		case VSD_DTYPE_INT_S64:
556			bkt_ub->int64.s64 += (int64_t)step;
557			break;
558		case VSD_DTYPE_INT_U64:
559			bkt_ub->int64.u64 += (uint64_t)step;
560			break;
561		case VSD_DTYPE_INT_SLONG:
562			bkt_ub->intlong.slong += (long)step;
563			break;
564		case VSD_DTYPE_INT_ULONG:
565			bkt_ub->intlong.ulong += (unsigned long)step;
566			break;
567		case VSD_DTYPE_Q_S32:
568			error = Q_QADDI(&bkt_ub->q32.sq32, step);
569			break;
570		case VSD_DTYPE_Q_U32:
571			error = Q_QADDI(&bkt_ub->q32.uq32, step);
572			break;
573		case VSD_DTYPE_Q_S64:
574			error = Q_QADDI(&bkt_ub->q64.sq64, step);
575			break;
576		case VSD_DTYPE_Q_U64:
577			error = Q_QADDI(&bkt_ub->q64.uq64, step);
578			break;
579		default:
580			break;
581		}
582	} else { /* info->scheme != BKT_USR && step == 0 */
583		return (EINVAL);
584	}
585
586	return (error);
587}
588
589static uint32_t
590stats_vss_hist_nbkts_hlpr(struct vss_hist_hlpr_info *info)
591{
592	struct voistatdata_numeric bkt_lb, bkt_ub;
593	uint32_t nbkts;
594	int done;
595
596	if (info->scheme == BKT_USR) {
597		/* XXXLAS: Setting info->{lb,ub} from macro is tricky. */
598		info->lb = info->usr.bkts[0].lb;
599		info->ub = info->usr.bkts[info->usr.nbkts - 1].lb;
600	}
601
602	nbkts = 0;
603	done = 0;
604	bkt_ub = info->lb;
605
606	do {
607		bkt_lb = bkt_ub;
608		if (stats_vss_hist_bkt_hlpr(info, nbkts++, &bkt_lb, &bkt_ub))
609			return (0);
610
611		if (info->scheme == BKT_USR)
612			done = (nbkts == info->usr.nbkts);
613		else {
614			switch (info->voi_dtype) {
615			case VSD_DTYPE_INT_S32:
616				done = (bkt_ub.int32.s32 > info->ub.int32.s32);
617				break;
618			case VSD_DTYPE_INT_U32:
619				done = (bkt_ub.int32.u32 > info->ub.int32.u32);
620				break;
621			case VSD_DTYPE_INT_S64:
622				done = (bkt_ub.int64.s64 > info->ub.int64.s64);
623				break;
624			case VSD_DTYPE_INT_U64:
625				done = (bkt_ub.int64.u64 > info->ub.int64.u64);
626				break;
627			case VSD_DTYPE_INT_SLONG:
628				done = (bkt_ub.intlong.slong >
629				    info->ub.intlong.slong);
630				break;
631			case VSD_DTYPE_INT_ULONG:
632				done = (bkt_ub.intlong.ulong >
633				    info->ub.intlong.ulong);
634				break;
635			case VSD_DTYPE_Q_S32:
636				done = Q_QGTQ(bkt_ub.q32.sq32,
637				    info->ub.q32.sq32);
638				break;
639			case VSD_DTYPE_Q_U32:
640				done = Q_QGTQ(bkt_ub.q32.uq32,
641				    info->ub.q32.uq32);
642				break;
643			case VSD_DTYPE_Q_S64:
644				done = Q_QGTQ(bkt_ub.q64.sq64,
645				    info->ub.q64.sq64);
646				break;
647			case VSD_DTYPE_Q_U64:
648				done = Q_QGTQ(bkt_ub.q64.uq64,
649				    info->ub.q64.uq64);
650				break;
651			default:
652				return (0);
653			}
654		}
655	} while (!done);
656
657	if (info->flags & VSD_HIST_LBOUND_INF)
658		nbkts++;
659	if (info->flags & VSD_HIST_UBOUND_INF)
660		nbkts++;
661
662	return (nbkts);
663}
664
665int
666stats_vss_hist_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
667    struct vss_hist_hlpr_info *info)
668{
669	struct voistatdata_hist *hist;
670	struct voistatdata_numeric bkt_lb, bkt_ub, *lbinfbktlb, *lbinfbktub,
671	    *ubinfbktlb, *ubinfbktub;
672	uint32_t bkt, nbkts, nloop;
673
674	if (vss == NULL || info == NULL || (info->flags &
675	(VSD_HIST_LBOUND_INF|VSD_HIST_UBOUND_INF) && (info->hist_dtype ==
676	VSD_DTYPE_DVHIST32 || info->hist_dtype == VSD_DTYPE_DVHIST64)))
677		return (EINVAL);
678
679	info->voi_dtype = voi_dtype;
680
681	if ((nbkts = stats_vss_hist_nbkts_hlpr(info)) == 0)
682		return (EINVAL);
683
684	switch (info->hist_dtype) {
685	case VSD_DTYPE_CRHIST32:
686		vss->vsdsz = HIST_NBKTS2VSDSZ(crhist32, nbkts);
687		break;
688	case VSD_DTYPE_DRHIST32:
689		vss->vsdsz = HIST_NBKTS2VSDSZ(drhist32, nbkts);
690		break;
691	case VSD_DTYPE_DVHIST32:
692		vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist32, nbkts);
693		break;
694	case VSD_DTYPE_CRHIST64:
695		vss->vsdsz = HIST_NBKTS2VSDSZ(crhist64, nbkts);
696		break;
697	case VSD_DTYPE_DRHIST64:
698		vss->vsdsz = HIST_NBKTS2VSDSZ(drhist64, nbkts);
699		break;
700	case VSD_DTYPE_DVHIST64:
701		vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist64, nbkts);
702		break;
703	default:
704		return (EINVAL);
705	}
706
707	vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
708	if (vss->iv == NULL)
709		return (ENOMEM);
710
711	hist = (struct voistatdata_hist *)vss->iv;
712	bkt_ub = info->lb;
713
714	for (bkt = (info->flags & VSD_HIST_LBOUND_INF), nloop = 0;
715	    bkt < nbkts;
716	    bkt++, nloop++) {
717		bkt_lb = bkt_ub;
718		if (stats_vss_hist_bkt_hlpr(info, nloop, &bkt_lb, &bkt_ub))
719			return (EINVAL);
720
721		switch (info->hist_dtype) {
722		case VSD_DTYPE_CRHIST32:
723			VSD(crhist32, hist)->bkts[bkt].lb = bkt_lb;
724			break;
725		case VSD_DTYPE_DRHIST32:
726			VSD(drhist32, hist)->bkts[bkt].lb = bkt_lb;
727			VSD(drhist32, hist)->bkts[bkt].ub = bkt_ub;
728			break;
729		case VSD_DTYPE_DVHIST32:
730			VSD(dvhist32, hist)->bkts[bkt].val = bkt_lb;
731			break;
732		case VSD_DTYPE_CRHIST64:
733			VSD(crhist64, hist)->bkts[bkt].lb = bkt_lb;
734			break;
735		case VSD_DTYPE_DRHIST64:
736			VSD(drhist64, hist)->bkts[bkt].lb = bkt_lb;
737			VSD(drhist64, hist)->bkts[bkt].ub = bkt_ub;
738			break;
739		case VSD_DTYPE_DVHIST64:
740			VSD(dvhist64, hist)->bkts[bkt].val = bkt_lb;
741			break;
742		default:
743			return (EINVAL);
744		}
745	}
746
747	lbinfbktlb = lbinfbktub = ubinfbktlb = ubinfbktub = NULL;
748
749	switch (info->hist_dtype) {
750	case VSD_DTYPE_CRHIST32:
751		lbinfbktlb = &VSD(crhist32, hist)->bkts[0].lb;
752		ubinfbktlb = &VSD(crhist32, hist)->bkts[nbkts - 1].lb;
753		break;
754	case VSD_DTYPE_DRHIST32:
755		lbinfbktlb = &VSD(drhist32, hist)->bkts[0].lb;
756		lbinfbktub = &VSD(drhist32, hist)->bkts[0].ub;
757		ubinfbktlb = &VSD(drhist32, hist)->bkts[nbkts - 1].lb;
758		ubinfbktub = &VSD(drhist32, hist)->bkts[nbkts - 1].ub;
759		break;
760	case VSD_DTYPE_CRHIST64:
761		lbinfbktlb = &VSD(crhist64, hist)->bkts[0].lb;
762		ubinfbktlb = &VSD(crhist64, hist)->bkts[nbkts - 1].lb;
763		break;
764	case VSD_DTYPE_DRHIST64:
765		lbinfbktlb = &VSD(drhist64, hist)->bkts[0].lb;
766		lbinfbktub = &VSD(drhist64, hist)->bkts[0].ub;
767		ubinfbktlb = &VSD(drhist64, hist)->bkts[nbkts - 1].lb;
768		ubinfbktub = &VSD(drhist64, hist)->bkts[nbkts - 1].ub;
769		break;
770	case VSD_DTYPE_DVHIST32:
771	case VSD_DTYPE_DVHIST64:
772		break;
773	default:
774		return (EINVAL);
775	}
776
777	if ((info->flags & VSD_HIST_LBOUND_INF) && lbinfbktlb) {
778		*lbinfbktlb = numeric_limits[LIM_MIN][info->voi_dtype];
779		/*
780		 * Assignment from numeric_limit array for Q types assigns max
781		 * possible integral/fractional value for underlying data type,
782		 * but we must set control bits for this specific histogram per
783		 * the user's choice of fractional bits, which we extract from
784		 * info->lb.
785		 */
786		if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
787		    info->voi_dtype == VSD_DTYPE_Q_U32) {
788			/* Signedness doesn't matter for setting control bits. */
789			Q_SCVAL(lbinfbktlb->q32.sq32,
790			    Q_GCVAL(info->lb.q32.sq32));
791		} else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
792		    info->voi_dtype == VSD_DTYPE_Q_U64) {
793			/* Signedness doesn't matter for setting control bits. */
794			Q_SCVAL(lbinfbktlb->q64.sq64,
795			    Q_GCVAL(info->lb.q64.sq64));
796		}
797		if (lbinfbktub)
798			*lbinfbktub = info->lb;
799	}
800	if ((info->flags & VSD_HIST_UBOUND_INF) && ubinfbktlb) {
801		*ubinfbktlb = bkt_lb;
802		if (ubinfbktub) {
803			*ubinfbktub = numeric_limits[LIM_MAX][info->voi_dtype];
804			if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
805			    info->voi_dtype == VSD_DTYPE_Q_U32) {
806				Q_SCVAL(ubinfbktub->q32.sq32,
807				    Q_GCVAL(info->lb.q32.sq32));
808			} else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
809			    info->voi_dtype == VSD_DTYPE_Q_U64) {
810				Q_SCVAL(ubinfbktub->q64.sq64,
811				    Q_GCVAL(info->lb.q64.sq64));
812			}
813		}
814	}
815
816	return (0);
817}
818
819int
820stats_vss_tdgst_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
821    struct vss_tdgst_hlpr_info *info)
822{
823	struct voistatdata_tdgst *tdgst;
824	struct ctdth32 *ctd32tree;
825	struct ctdth64 *ctd64tree;
826	struct voistatdata_tdgstctd32 *ctd32;
827	struct voistatdata_tdgstctd64 *ctd64;
828
829	info->voi_dtype = voi_dtype;
830
831	switch (info->tdgst_dtype) {
832	case VSD_DTYPE_TDGSTCLUST32:
833		vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust32, info->nctds);
834		break;
835	case VSD_DTYPE_TDGSTCLUST64:
836		vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust64, info->nctds);
837		break;
838	default:
839		return (EINVAL);
840	}
841
842	vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
843	if (vss->iv == NULL)
844		return (ENOMEM);
845
846	tdgst = (struct voistatdata_tdgst *)vss->iv;
847
848	switch (info->tdgst_dtype) {
849	case VSD_DTYPE_TDGSTCLUST32:
850		ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
851		ARB_INIT(ctd32, ctdlnk, ctd32tree, info->nctds) {
852			Q_INI(&ctd32->mu, 0, 0, info->prec);
853		}
854		break;
855	case VSD_DTYPE_TDGSTCLUST64:
856		ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
857		ARB_INIT(ctd64, ctdlnk, ctd64tree, info->nctds) {
858			Q_INI(&ctd64->mu, 0, 0, info->prec);
859		}
860		break;
861	default:
862		return (EINVAL);
863	}
864
865	return (0);
866}
867
868int
869stats_vss_numeric_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
870    struct vss_numeric_hlpr_info *info)
871{
872	struct voistatdata_numeric iv;
873
874	switch (vss->stype) {
875	case VS_STYPE_SUM:
876		iv = stats_ctor_vsd_numeric(0);
877		break;
878	case VS_STYPE_MIN:
879		iv = numeric_limits[LIM_MAX][voi_dtype];
880		break;
881	case VS_STYPE_MAX:
882		iv = numeric_limits[LIM_MIN][voi_dtype];
883		break;
884	default:
885		return (EINVAL);
886	}
887
888	vss->iv = stats_realloc(NULL, 0, vsd_dtype2size[voi_dtype], 0);
889	if (vss->iv == NULL)
890		return (ENOMEM);
891
892	vss->vs_dtype = voi_dtype;
893	vss->vsdsz = vsd_dtype2size[voi_dtype];
894	switch (voi_dtype) {
895	case VSD_DTYPE_INT_S32:
896		*((int32_t *)vss->iv) = iv.int32.s32;
897		break;
898	case VSD_DTYPE_INT_U32:
899		*((uint32_t *)vss->iv) = iv.int32.u32;
900		break;
901	case VSD_DTYPE_INT_S64:
902		*((int64_t *)vss->iv) = iv.int64.s64;
903		break;
904	case VSD_DTYPE_INT_U64:
905		*((uint64_t *)vss->iv) = iv.int64.u64;
906		break;
907	case VSD_DTYPE_INT_SLONG:
908		*((long *)vss->iv) = iv.intlong.slong;
909		break;
910	case VSD_DTYPE_INT_ULONG:
911		*((unsigned long *)vss->iv) = iv.intlong.ulong;
912		break;
913	case VSD_DTYPE_Q_S32:
914		*((s32q_t *)vss->iv) = Q_SCVAL(iv.q32.sq32,
915		    Q_CTRLINI(info->prec));
916		break;
917	case VSD_DTYPE_Q_U32:
918		*((u32q_t *)vss->iv) = Q_SCVAL(iv.q32.uq32,
919		    Q_CTRLINI(info->prec));
920		break;
921	case VSD_DTYPE_Q_S64:
922		*((s64q_t *)vss->iv) = Q_SCVAL(iv.q64.sq64,
923		    Q_CTRLINI(info->prec));
924		break;
925	case VSD_DTYPE_Q_U64:
926		*((u64q_t *)vss->iv) = Q_SCVAL(iv.q64.uq64,
927		    Q_CTRLINI(info->prec));
928		break;
929	default:
930		break;
931	}
932
933	return (0);
934}
935
936int
937stats_vss_hlpr_init(enum vsd_dtype voi_dtype, uint32_t nvss,
938    struct voistatspec *vss)
939{
940	int i, ret;
941
942	for (i = nvss - 1; i >= 0; i--) {
943		if (vss[i].hlpr && (ret = vss[i].hlpr(voi_dtype, &vss[i],
944		    vss[i].hlprinfo)) != 0)
945			return (ret);
946	}
947
948	return (0);
949}
950
951void
952stats_vss_hlpr_cleanup(uint32_t nvss, struct voistatspec *vss)
953{
954	int i;
955
956	for (i = nvss - 1; i >= 0; i--) {
957		if (vss[i].hlpr) {
958			stats_free((void *)vss[i].iv);
959			vss[i].iv = NULL;
960		}
961	}
962}
963
964int
965stats_tpl_fetch(int tpl_id, struct statsblob_tpl **tpl)
966{
967	int error;
968
969	error = 0;
970
971	TPL_LIST_WLOCK();
972	if (tpl_id < 0 || tpl_id >= (int)ntpl) {
973		error = ENOENT;
974	} else {
975		*tpl = tpllist[tpl_id];
976		/* XXXLAS: Acquire refcount on tpl. */
977	}
978	TPL_LIST_WUNLOCK();
979
980	return (error);
981}
982
983int
984stats_tpl_fetch_allocid(const char *name, uint32_t hash)
985{
986	int i, tpl_id;
987
988	tpl_id = -ESRCH;
989
990	TPL_LIST_RLOCK();
991	for (i = ntpl - 1; i >= 0; i--) {
992		if (name != NULL) {
993			if (strlen(name) == strlen(tpllist[i]->mb->tplname) &&
994			    strncmp(name, tpllist[i]->mb->tplname,
995			    TPL_MAX_NAME_LEN) == 0 && (!hash || hash ==
996			    tpllist[i]->mb->tplhash)) {
997				tpl_id = i;
998				break;
999			}
1000		} else if (hash == tpllist[i]->mb->tplhash) {
1001			tpl_id = i;
1002			break;
1003		}
1004	}
1005	TPL_LIST_RUNLOCK();
1006
1007	return (tpl_id);
1008}
1009
1010int
1011stats_tpl_id2name(uint32_t tpl_id, char *buf, size_t len)
1012{
1013	int error;
1014
1015	error = 0;
1016
1017	TPL_LIST_RLOCK();
1018	if (tpl_id < ntpl) {
1019		if (buf != NULL && len > strlen(tpllist[tpl_id]->mb->tplname))
1020			strlcpy(buf, tpllist[tpl_id]->mb->tplname, len);
1021		else
1022			error = EOVERFLOW;
1023	} else
1024		error = ENOENT;
1025	TPL_LIST_RUNLOCK();
1026
1027	return (error);
1028}
1029
1030int
1031stats_tpl_sample_rollthedice(struct stats_tpl_sample_rate *rates, int nrates,
1032    void *seed_bytes, size_t seed_len)
1033{
1034	uint32_t cum_pct, rnd_pct;
1035	int i;
1036
1037	cum_pct = 0;
1038
1039	/*
1040	 * Choose a pseudorandom or seeded number in range [0,100] and use
1041	 * it to make a sampling decision and template selection where required.
1042	 * If no seed is supplied, a PRNG is used to generate a pseudorandom
1043	 * number so that every selection is independent. If a seed is supplied,
1044	 * the caller desires random selection across different seeds, but
1045	 * deterministic selection given the same seed. This is achieved by
1046	 * hashing the seed and using the hash as the random number source.
1047	 *
1048	 * XXXLAS: Characterise hash function output distribution.
1049	 */
1050	if (seed_bytes == NULL)
1051		rnd_pct = random() / (INT32_MAX / 100);
1052	else
1053		rnd_pct = hash32_buf(seed_bytes, seed_len, 0) /
1054		    (UINT32_MAX / 100U);
1055
1056	/*
1057	 * We map the randomly selected percentage on to the interval [0,100]
1058	 * consisting of the cumulatively summed template sampling percentages.
1059	 * The difference between the cumulative sum of all template sampling
1060	 * percentages and 100 is treated as a NULL assignment i.e. no stats
1061	 * template will be assigned, and -1 returned instead.
1062	 */
1063	for (i = 0; i < nrates; i++) {
1064		cum_pct += rates[i].tpl_sample_pct;
1065
1066		KASSERT(cum_pct <= 100, ("%s cum_pct %u > 100", __func__,
1067		    cum_pct));
1068		if (rnd_pct > cum_pct || rates[i].tpl_sample_pct == 0)
1069			continue;
1070
1071		return (rates[i].tpl_slot_id);
1072	}
1073
1074	return (-1);
1075}
1076
1077int
1078stats_v1_blob_clone(struct statsblobv1 **dst, size_t dstmaxsz,
1079    struct statsblobv1 *src, uint32_t flags)
1080{
1081	int error, tmperror;
1082
1083	error = tmperror = 0;
1084
1085	if (src == NULL || dst == NULL ||
1086	    src->cursz < sizeof(struct statsblob) ||
1087	    ((flags & SB_CLONE_ALLOCDST) &&
1088	    (flags & (SB_CLONE_USRDSTNOFAULT | SB_CLONE_USRDST)))) {
1089		error = EINVAL;
1090	} else if (flags & SB_CLONE_ALLOCDST) {
1091		*dst = stats_realloc(NULL, 0, src->cursz, 0);
1092		if (*dst)
1093			(*dst)->maxsz = dstmaxsz = src->cursz;
1094		else
1095			error = ENOMEM;
1096	} else if (*dst == NULL || dstmaxsz < sizeof(struct statsblob)) {
1097		error = EINVAL;
1098	}
1099
1100	if (!error) {
1101		size_t postcurszlen;
1102
1103		/*
1104		 * Clone src into dst except for the maxsz field. If dst is too
1105		 * small to hold all of src, only copy src's header and return
1106		 * EOVERFLOW.
1107		 */
1108#ifdef _KERNEL
1109		if (flags & SB_CLONE_USRDSTNOFAULT)
1110			error = copyout_nofault(src, *dst,
1111			    offsetof(struct statsblob, maxsz));
1112		else if (flags & SB_CLONE_USRDST)
1113			error = copyout(src, *dst,
1114			    offsetof(struct statsblob, maxsz));
1115		else
1116#endif
1117			memcpy(*dst, src, offsetof(struct statsblob, maxsz));
1118#ifdef _KERNEL
1119		if (error != 0)
1120			goto out;
1121#endif
1122
1123
1124		if (dstmaxsz >= src->cursz) {
1125			postcurszlen = src->cursz -
1126			    offsetof(struct statsblob, cursz);
1127		} else {
1128			error = EOVERFLOW;
1129			postcurszlen = sizeof(struct statsblob) -
1130			    offsetof(struct statsblob, cursz);
1131		}
1132#ifdef _KERNEL
1133		if (flags & SB_CLONE_USRDSTNOFAULT)
1134			tmperror = copyout_nofault(&(src->cursz), &((*dst)->cursz),
1135			    postcurszlen);
1136		else if (flags & SB_CLONE_USRDST)
1137			tmperror = copyout(&(src->cursz), &((*dst)->cursz),
1138			    postcurszlen);
1139		else
1140#endif
1141			memcpy(&((*dst)->cursz), &(src->cursz), postcurszlen);
1142
1143		error = error ? error : tmperror;
1144	}
1145#ifdef _KERNEL
1146out:
1147#endif
1148
1149	return (error);
1150}
1151
1152int
1153stats_v1_tpl_alloc(const char *name, uint32_t flags __unused)
1154{
1155	struct statsblobv1_tpl *tpl, **newtpllist;
1156	struct statsblobv1 *tpl_sb;
1157	struct metablob *tpl_mb;
1158	int tpl_id;
1159
1160	if (name != NULL && strlen(name) > TPL_MAX_NAME_LEN)
1161		return (-EINVAL);
1162
1163	if (name != NULL && stats_tpl_fetch_allocid(name, 0) >= 0)
1164		return (-EEXIST);
1165
1166	tpl = stats_realloc(NULL, 0, sizeof(struct statsblobv1_tpl), M_ZERO);
1167	tpl_mb = stats_realloc(NULL, 0, sizeof(struct metablob), M_ZERO);
1168	tpl_sb = stats_realloc(NULL, 0, sizeof(struct statsblobv1), M_ZERO);
1169
1170	if (tpl_mb != NULL && name != NULL)
1171		tpl_mb->tplname = stats_strdup(name, 0);
1172
1173	if (tpl == NULL || tpl_sb == NULL || tpl_mb == NULL ||
1174	    tpl_mb->tplname == NULL) {
1175		stats_free(tpl);
1176		stats_free(tpl_sb);
1177		if (tpl_mb != NULL) {
1178			stats_free(tpl_mb->tplname);
1179			stats_free(tpl_mb);
1180		}
1181		return (-ENOMEM);
1182	}
1183
1184	tpl->mb = tpl_mb;
1185	tpl->sb = tpl_sb;
1186
1187	tpl_sb->abi = STATS_ABI_V1;
1188	tpl_sb->endian =
1189#if BYTE_ORDER == LITTLE_ENDIAN
1190	    SB_LE;
1191#elif BYTE_ORDER == BIG_ENDIAN
1192	    SB_BE;
1193#else
1194	    SB_UE;
1195#endif
1196	tpl_sb->cursz = tpl_sb->maxsz = sizeof(struct statsblobv1);
1197	tpl_sb->stats_off = tpl_sb->statsdata_off = sizeof(struct statsblobv1);
1198
1199	TPL_LIST_WLOCK();
1200	newtpllist = stats_realloc(tpllist, ntpl * sizeof(void *),
1201	    (ntpl + 1) * sizeof(void *), 0);
1202	if (newtpllist != NULL) {
1203		tpl_id = ntpl++;
1204		tpllist = (struct statsblob_tpl **)newtpllist;
1205		tpllist[tpl_id] = (struct statsblob_tpl *)tpl;
1206		stats_tpl_update_hash(tpllist[tpl_id]);
1207	} else {
1208		stats_free(tpl);
1209		stats_free(tpl_sb);
1210		if (tpl_mb != NULL) {
1211			stats_free(tpl_mb->tplname);
1212			stats_free(tpl_mb);
1213		}
1214		tpl_id = -ENOMEM;
1215	}
1216	TPL_LIST_WUNLOCK();
1217
1218	return (tpl_id);
1219}
1220
1221int
1222stats_v1_tpl_add_voistats(uint32_t tpl_id, int32_t voi_id, const char *voi_name,
1223    enum vsd_dtype voi_dtype, uint32_t nvss, struct voistatspec *vss,
1224    uint32_t flags)
1225{
1226	struct voi *voi;
1227	struct voistat *tmpstat;
1228	struct statsblobv1 *tpl_sb;
1229	struct metablob *tpl_mb;
1230	int error, i, newstatdataidx, newvoibytes, newvoistatbytes,
1231	    newvoistatdatabytes, newvoistatmaxid;
1232	uint32_t nbytes;
1233
1234	if (voi_id < 0 || voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES ||
1235	    nvss == 0 || vss == NULL)
1236		return (EINVAL);
1237
1238	error = nbytes = newvoibytes = newvoistatbytes =
1239	    newvoistatdatabytes = 0;
1240	newvoistatmaxid = -1;
1241
1242	/* Calculate the number of bytes required for the new voistats. */
1243	for (i = nvss - 1; i >= 0; i--) {
1244		if (vss[i].stype == 0 || vss[i].stype >= VS_NUM_STYPES ||
1245		    vss[i].vs_dtype == 0 || vss[i].vs_dtype >= VSD_NUM_DTYPES ||
1246		    vss[i].iv == NULL || vss[i].vsdsz == 0)
1247			return (EINVAL);
1248		if ((int)vss[i].stype > newvoistatmaxid)
1249			newvoistatmaxid = vss[i].stype;
1250		newvoistatdatabytes += vss[i].vsdsz;
1251	}
1252
1253	if (flags & SB_VOI_RELUPDATE) {
1254		/* XXXLAS: VOI state bytes may need to vary based on stat types. */
1255		newvoistatdatabytes += sizeof(struct voistatdata_voistate);
1256	}
1257	nbytes += newvoistatdatabytes;
1258
1259	TPL_LIST_WLOCK();
1260	if (tpl_id < ntpl) {
1261		tpl_sb = (struct statsblobv1 *)tpllist[tpl_id]->sb;
1262		tpl_mb = tpllist[tpl_id]->mb;
1263
1264		if (voi_id >= NVOIS(tpl_sb) || tpl_sb->vois[voi_id].id == -1) {
1265			/* Adding a new VOI and associated stats. */
1266			if (voi_id >= NVOIS(tpl_sb)) {
1267				/* We need to grow the tpl_sb->vois array. */
1268				newvoibytes = (voi_id - (NVOIS(tpl_sb) - 1)) *
1269				    sizeof(struct voi);
1270				nbytes += newvoibytes;
1271			}
1272			newvoistatbytes =
1273			    (newvoistatmaxid + 1) * sizeof(struct voistat);
1274		} else {
1275			/* Adding stats to an existing VOI. */
1276			if (newvoistatmaxid >
1277			    tpl_sb->vois[voi_id].voistatmaxid) {
1278				newvoistatbytes = (newvoistatmaxid -
1279				    tpl_sb->vois[voi_id].voistatmaxid) *
1280				    sizeof(struct voistat);
1281			}
1282			/* XXXLAS: KPI does not yet support expanding VOIs. */
1283			error = EOPNOTSUPP;
1284		}
1285		nbytes += newvoistatbytes;
1286
1287		if (!error && newvoibytes > 0) {
1288			struct voi_meta *voi_meta = tpl_mb->voi_meta;
1289
1290			voi_meta = stats_realloc(voi_meta, voi_meta == NULL ?
1291			    0 : NVOIS(tpl_sb) * sizeof(struct voi_meta),
1292			    (1 + voi_id) * sizeof(struct voi_meta),
1293			    M_ZERO);
1294
1295			if (voi_meta == NULL)
1296				error = ENOMEM;
1297			else
1298				tpl_mb->voi_meta = voi_meta;
1299		}
1300
1301		if (!error) {
1302			/* NB: Resizing can change where tpl_sb points. */
1303			error = stats_v1_blob_expand(&tpl_sb, newvoibytes,
1304			    newvoistatbytes, newvoistatdatabytes);
1305		}
1306
1307		if (!error) {
1308			tpl_mb->voi_meta[voi_id].name = stats_strdup(voi_name,
1309			    0);
1310			if (tpl_mb->voi_meta[voi_id].name == NULL)
1311				error = ENOMEM;
1312		}
1313
1314		if (!error) {
1315			/* Update the template list with the resized pointer. */
1316			tpllist[tpl_id]->sb = (struct statsblob *)tpl_sb;
1317
1318			/* Update the template. */
1319			voi = &tpl_sb->vois[voi_id];
1320
1321			if (voi->id < 0) {
1322				/* VOI is new and needs to be initialised. */
1323				voi->id = voi_id;
1324				voi->dtype = voi_dtype;
1325				voi->stats_off = tpl_sb->stats_off;
1326				if (flags & SB_VOI_RELUPDATE)
1327					voi->flags |= VOI_REQSTATE;
1328			} else {
1329				/*
1330				 * XXXLAS: When this else block is written, the
1331				 * "KPI does not yet support expanding VOIs"
1332				 * error earlier in this function can be
1333				 * removed. What is required here is to shuffle
1334				 * the voistat array such that the new stats for
1335				 * the voi are contiguous, which will displace
1336				 * stats for other vois that reside after the
1337				 * voi being updated. The other vois then need
1338				 * to have their stats_off adjusted post
1339				 * shuffle.
1340				 */
1341			}
1342
1343			voi->voistatmaxid = newvoistatmaxid;
1344			newstatdataidx = 0;
1345
1346			if (voi->flags & VOI_REQSTATE) {
1347				/* Initialise the voistate stat in slot 0. */
1348				tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off);
1349				tmpstat->stype = VS_STYPE_VOISTATE;
1350				tmpstat->flags = 0;
1351				tmpstat->dtype = VSD_DTYPE_VOISTATE;
1352				newstatdataidx = tmpstat->dsz =
1353				    sizeof(struct voistatdata_numeric);
1354				tmpstat->data_off = tpl_sb->statsdata_off;
1355			}
1356
1357			for (i = 0; (uint32_t)i < nvss; i++) {
1358				tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off +
1359				    (vss[i].stype * sizeof(struct voistat)));
1360				KASSERT(tmpstat->stype < 0, ("voistat %p "
1361				    "already initialised", tmpstat));
1362				tmpstat->stype = vss[i].stype;
1363				tmpstat->flags = vss[i].flags;
1364				tmpstat->dtype = vss[i].vs_dtype;
1365				tmpstat->dsz = vss[i].vsdsz;
1366				tmpstat->data_off = tpl_sb->statsdata_off +
1367				    newstatdataidx;
1368				memcpy(BLOB_OFFSET(tpl_sb, tmpstat->data_off),
1369				    vss[i].iv, vss[i].vsdsz);
1370				newstatdataidx += vss[i].vsdsz;
1371			}
1372
1373			/* Update the template version hash. */
1374			stats_tpl_update_hash(tpllist[tpl_id]);
1375			/* XXXLAS: Confirm tpl name/hash pair remains unique. */
1376		}
1377	} else
1378		error = EINVAL;
1379	TPL_LIST_WUNLOCK();
1380
1381	return (error);
1382}
1383
1384struct statsblobv1 *
1385stats_v1_blob_alloc(uint32_t tpl_id, uint32_t flags __unused)
1386{
1387	struct statsblobv1 *sb;
1388	int error;
1389
1390	sb = NULL;
1391
1392	TPL_LIST_RLOCK();
1393	if (tpl_id < ntpl) {
1394		sb = stats_realloc(NULL, 0, tpllist[tpl_id]->sb->maxsz, 0);
1395		if (sb != NULL) {
1396			sb->maxsz = tpllist[tpl_id]->sb->maxsz;
1397			error = stats_v1_blob_init_locked(sb, tpl_id, 0);
1398		} else
1399			error = ENOMEM;
1400
1401		if (error) {
1402			stats_free(sb);
1403			sb = NULL;
1404		}
1405	}
1406	TPL_LIST_RUNLOCK();
1407
1408	return (sb);
1409}
1410
1411void
1412stats_v1_blob_destroy(struct statsblobv1 *sb)
1413{
1414
1415	stats_free(sb);
1416}
1417
1418int
1419stats_v1_voistat_fetch_dptr(struct statsblobv1 *sb, int32_t voi_id,
1420    enum voi_stype stype, enum vsd_dtype *retdtype, struct voistatdata **retvsd,
1421    size_t *retvsdsz)
1422{
1423	struct voi *v;
1424	struct voistat *vs;
1425
1426	if (retvsd == NULL || sb == NULL || sb->abi != STATS_ABI_V1 ||
1427	    voi_id >= NVOIS(sb))
1428		return (EINVAL);
1429
1430	v = &sb->vois[voi_id];
1431	if ((__typeof(v->voistatmaxid))stype > v->voistatmaxid)
1432		return (EINVAL);
1433
1434	vs = BLOB_OFFSET(sb, v->stats_off + (stype * sizeof(struct voistat)));
1435	*retvsd = BLOB_OFFSET(sb, vs->data_off);
1436	if (retdtype != NULL)
1437		*retdtype = vs->dtype;
1438	if (retvsdsz != NULL)
1439		*retvsdsz = vs->dsz;
1440
1441	return (0);
1442}
1443
1444int
1445stats_v1_blob_init(struct statsblobv1 *sb, uint32_t tpl_id, uint32_t flags)
1446{
1447	int error;
1448
1449	error = 0;
1450
1451	TPL_LIST_RLOCK();
1452	if (sb == NULL || tpl_id >= ntpl) {
1453		error = EINVAL;
1454	} else {
1455		error = stats_v1_blob_init_locked(sb, tpl_id, flags);
1456	}
1457	TPL_LIST_RUNLOCK();
1458
1459	return (error);
1460}
1461
1462static inline int
1463stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
1464    uint32_t flags __unused)
1465{
1466	int error;
1467
1468	TPL_LIST_RLOCK_ASSERT();
1469	error = (sb->maxsz >= tpllist[tpl_id]->sb->cursz) ? 0 : EOVERFLOW;
1470	KASSERT(!error,
1471	    ("sb %d instead of %d bytes", sb->maxsz, tpllist[tpl_id]->sb->cursz));
1472
1473	if (!error) {
1474		memcpy(sb, tpllist[tpl_id]->sb, tpllist[tpl_id]->sb->cursz);
1475		sb->created = sb->lastrst = stats_sbinuptime();
1476		sb->tplhash = tpllist[tpl_id]->mb->tplhash;
1477	}
1478
1479	return (error);
1480}
1481
1482static int
1483stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
1484    int newvoistatbytes, int newvoistatdatabytes)
1485{
1486	struct statsblobv1 *sb;
1487	struct voi *tmpvoi;
1488	struct voistat *tmpvoistat, *voistat_array;
1489	int error, i, idxnewvois, idxnewvoistats, nbytes, nvoistats;
1490
1491	KASSERT(newvoibytes % sizeof(struct voi) == 0,
1492	    ("Bad newvoibytes %d", newvoibytes));
1493	KASSERT(newvoistatbytes % sizeof(struct voistat) == 0,
1494	    ("Bad newvoistatbytes %d", newvoistatbytes));
1495
1496	error = ((newvoibytes % sizeof(struct voi) == 0) &&
1497	    (newvoistatbytes % sizeof(struct voistat) == 0)) ? 0 : EINVAL;
1498	sb = *sbpp;
1499	nbytes = newvoibytes + newvoistatbytes + newvoistatdatabytes;
1500
1501	/*
1502	 * XXXLAS: Required until we gain support for flags which alter the
1503	 * units of size/offset fields in key structs.
1504	 */
1505	if (!error && ((((int)sb->cursz) + nbytes) > SB_V1_MAXSZ))
1506		error = EFBIG;
1507
1508	if (!error && (sb->cursz + nbytes > sb->maxsz)) {
1509		/* Need to expand our blob. */
1510		sb = stats_realloc(sb, sb->maxsz, sb->cursz + nbytes, M_ZERO);
1511		if (sb != NULL) {
1512			sb->maxsz = sb->cursz + nbytes;
1513			*sbpp = sb;
1514		} else
1515		    error = ENOMEM;
1516	}
1517
1518	if (!error) {
1519		/*
1520		 * Shuffle memory within the expanded blob working from the end
1521		 * backwards, leaving gaps for the new voistat and voistatdata
1522		 * structs at the beginning of their respective blob regions,
1523		 * and for the new voi structs at the end of their blob region.
1524		 */
1525		memmove(BLOB_OFFSET(sb, sb->statsdata_off + nbytes),
1526		    BLOB_OFFSET(sb, sb->statsdata_off),
1527		    sb->cursz - sb->statsdata_off);
1528		memmove(BLOB_OFFSET(sb, sb->stats_off + newvoibytes +
1529		    newvoistatbytes), BLOB_OFFSET(sb, sb->stats_off),
1530		    sb->statsdata_off - sb->stats_off);
1531
1532		/* First index of new voi/voistat structs to be initialised. */
1533		idxnewvois = NVOIS(sb);
1534		idxnewvoistats = (newvoistatbytes / sizeof(struct voistat)) - 1;
1535
1536		/* Update housekeeping variables and offsets. */
1537		sb->cursz += nbytes;
1538		sb->stats_off += newvoibytes;
1539		sb->statsdata_off += newvoibytes + newvoistatbytes;
1540
1541		/* XXXLAS: Zeroing not strictly needed but aids debugging. */
1542		memset(&sb->vois[idxnewvois], '\0', newvoibytes);
1543		memset(BLOB_OFFSET(sb, sb->stats_off), '\0',
1544		    newvoistatbytes);
1545		memset(BLOB_OFFSET(sb, sb->statsdata_off), '\0',
1546		    newvoistatdatabytes);
1547
1548		/* Initialise new voi array members and update offsets. */
1549		for (i = 0; i < NVOIS(sb); i++) {
1550			tmpvoi = &sb->vois[i];
1551			if (i >= idxnewvois) {
1552				tmpvoi->id = tmpvoi->voistatmaxid = -1;
1553			} else if (tmpvoi->id > -1) {
1554				tmpvoi->stats_off += newvoibytes +
1555				    newvoistatbytes;
1556			}
1557		}
1558
1559		/* Initialise new voistat array members and update offsets. */
1560		nvoistats = (sb->statsdata_off - sb->stats_off) /
1561		    sizeof(struct voistat);
1562		voistat_array = BLOB_OFFSET(sb, sb->stats_off);
1563		for (i = 0; i < nvoistats; i++) {
1564			tmpvoistat = &voistat_array[i];
1565			if (i <= idxnewvoistats) {
1566				tmpvoistat->stype = -1;
1567			} else if (tmpvoistat->stype > -1) {
1568				tmpvoistat->data_off += nbytes;
1569			}
1570		}
1571	}
1572
1573	return (error);
1574}
1575
1576static void
1577stats_v1_blob_finalise(struct statsblobv1 *sb __unused)
1578{
1579
1580	/* XXXLAS: Fill this in. */
1581}
1582
1583static void
1584stats_v1_blob_iter(struct statsblobv1 *sb, stats_v1_blob_itercb_t icb,
1585    void *usrctx, uint32_t flags)
1586{
1587	struct voi *v;
1588	struct voistat *vs;
1589	struct sb_iter_ctx ctx;
1590	int i, j, firstvoi;
1591
1592	ctx.usrctx = usrctx;
1593	ctx.flags = SB_IT_FIRST_CB;
1594	firstvoi = 1;
1595
1596	for (i = 0; i < NVOIS(sb); i++) {
1597		v = &sb->vois[i];
1598		ctx.vslot = i;
1599		ctx.vsslot = -1;
1600		ctx.flags |= SB_IT_FIRST_VOISTAT;
1601
1602		if (firstvoi)
1603			ctx.flags |= SB_IT_FIRST_VOI;
1604		else if (i == (NVOIS(sb) - 1))
1605			ctx.flags |= SB_IT_LAST_VOI | SB_IT_LAST_CB;
1606
1607		if (v->id < 0 && (flags & SB_IT_NULLVOI)) {
1608			if (icb(sb, v, NULL, &ctx))
1609				return;
1610			firstvoi = 0;
1611			ctx.flags &= ~SB_IT_FIRST_CB;
1612		}
1613
1614		/* If NULL voi, v->voistatmaxid == -1 */
1615		for (j = 0; j <= v->voistatmaxid; j++) {
1616			vs = &((struct voistat *)BLOB_OFFSET(sb,
1617			    v->stats_off))[j];
1618			if (vs->stype < 0 &&
1619			    !(flags & SB_IT_NULLVOISTAT))
1620				continue;
1621
1622			if (j == v->voistatmaxid) {
1623				ctx.flags |= SB_IT_LAST_VOISTAT;
1624				if (i == (NVOIS(sb) - 1))
1625					ctx.flags |=
1626					    SB_IT_LAST_CB;
1627			} else
1628				ctx.flags &= ~SB_IT_LAST_CB;
1629
1630			ctx.vsslot = j;
1631			if (icb(sb, v, vs, &ctx))
1632				return;
1633
1634			ctx.flags &= ~(SB_IT_FIRST_CB | SB_IT_FIRST_VOISTAT |
1635			    SB_IT_LAST_VOISTAT);
1636		}
1637		ctx.flags &= ~(SB_IT_FIRST_VOI | SB_IT_LAST_VOI);
1638	}
1639}
1640
1641static inline void
1642stats_voistatdata_tdgst_tostr(enum vsd_dtype voi_dtype __unused,
1643    const struct voistatdata_tdgst *tdgst, enum vsd_dtype tdgst_dtype,
1644    size_t tdgst_dsz __unused, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
1645{
1646	const struct ctdth32 *ctd32tree;
1647	const struct ctdth64 *ctd64tree;
1648	const struct voistatdata_tdgstctd32 *ctd32;
1649	const struct voistatdata_tdgstctd64 *ctd64;
1650	const char *fmtstr;
1651	uint64_t smplcnt, compcnt;
1652	int is32bit, qmaxstrlen;
1653	uint16_t maxctds, curctds;
1654
1655	switch (tdgst_dtype) {
1656	case VSD_DTYPE_TDGSTCLUST32:
1657		smplcnt = CONSTVSD(tdgstclust32, tdgst)->smplcnt;
1658		compcnt = CONSTVSD(tdgstclust32, tdgst)->compcnt;
1659		maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
1660		curctds = ARB_CURNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
1661		ctd32tree = &CONSTVSD(tdgstclust32, tdgst)->ctdtree;
1662		ctd32 = (objdump ? ARB_CNODE(ctd32tree, 0) :
1663		    ARB_CMIN(ctdth32, ctd32tree));
1664		qmaxstrlen = (ctd32 == NULL) ? 1 : Q_MAXSTRLEN(ctd32->mu, 10);
1665		is32bit = 1;
1666		ctd64tree = NULL;
1667		ctd64 = NULL;
1668		break;
1669	case VSD_DTYPE_TDGSTCLUST64:
1670		smplcnt = CONSTVSD(tdgstclust64, tdgst)->smplcnt;
1671		compcnt = CONSTVSD(tdgstclust64, tdgst)->compcnt;
1672		maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
1673		curctds = ARB_CURNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
1674		ctd64tree = &CONSTVSD(tdgstclust64, tdgst)->ctdtree;
1675		ctd64 = (objdump ? ARB_CNODE(ctd64tree, 0) :
1676		    ARB_CMIN(ctdth64, ctd64tree));
1677		qmaxstrlen = (ctd64 == NULL) ? 1 : Q_MAXSTRLEN(ctd64->mu, 10);
1678		is32bit = 0;
1679		ctd32tree = NULL;
1680		ctd32 = NULL;
1681		break;
1682	default:
1683		return;
1684	}
1685
1686	switch (fmt) {
1687	case SB_STRFMT_FREEFORM:
1688		fmtstr = "smplcnt=%ju, compcnt=%ju, maxctds=%hu, nctds=%hu";
1689		break;
1690	case SB_STRFMT_JSON:
1691	default:
1692		fmtstr =
1693		    "\"smplcnt\":%ju,\"compcnt\":%ju,\"maxctds\":%hu,"
1694		    "\"nctds\":%hu,\"ctds\":[";
1695		break;
1696	}
1697	sbuf_printf(buf, fmtstr, (uintmax_t)smplcnt, (uintmax_t)compcnt,
1698	    maxctds, curctds);
1699
1700	while ((is32bit ? NULL != ctd32 : NULL != ctd64)) {
1701		char qstr[qmaxstrlen];
1702
1703		switch (fmt) {
1704		case SB_STRFMT_FREEFORM:
1705			fmtstr = "\n\t\t\t\t";
1706			break;
1707		case SB_STRFMT_JSON:
1708		default:
1709			fmtstr = "{";
1710			break;
1711		}
1712		sbuf_cat(buf, fmtstr);
1713
1714		if (objdump) {
1715			switch (fmt) {
1716			case SB_STRFMT_FREEFORM:
1717				fmtstr = "ctd[%hu].";
1718				break;
1719			case SB_STRFMT_JSON:
1720			default:
1721				fmtstr = "\"ctd\":%hu,";
1722				break;
1723			}
1724			sbuf_printf(buf, fmtstr, is32bit ?
1725			    ARB_SELFIDX(ctd32tree, ctd32) :
1726			    ARB_SELFIDX(ctd64tree, ctd64));
1727		}
1728
1729		switch (fmt) {
1730		case SB_STRFMT_FREEFORM:
1731			fmtstr = "{mu=";
1732			break;
1733		case SB_STRFMT_JSON:
1734		default:
1735			fmtstr = "\"mu\":";
1736			break;
1737		}
1738		sbuf_cat(buf, fmtstr);
1739		Q_TOSTR((is32bit ? ctd32->mu : ctd64->mu), -1, 10, qstr,
1740		    sizeof(qstr));
1741		sbuf_cat(buf, qstr);
1742
1743		switch (fmt) {
1744		case SB_STRFMT_FREEFORM:
1745			fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
1746			break;
1747		case SB_STRFMT_JSON:
1748		default:
1749			fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
1750			break;
1751		}
1752		sbuf_printf(buf, fmtstr,
1753		    is32bit ? ctd32->cnt : (uintmax_t)ctd64->cnt);
1754
1755		if (is32bit)
1756			ctd32 = (objdump ? ARB_CNODE(ctd32tree,
1757			    ARB_SELFIDX(ctd32tree, ctd32) + 1) :
1758			    ARB_CNEXT(ctdth32, ctd32tree, ctd32));
1759		else
1760			ctd64 = (objdump ? ARB_CNODE(ctd64tree,
1761			    ARB_SELFIDX(ctd64tree, ctd64) + 1) :
1762			    ARB_CNEXT(ctdth64, ctd64tree, ctd64));
1763
1764		if (fmt == SB_STRFMT_JSON &&
1765		    (is32bit ? NULL != ctd32 : NULL != ctd64))
1766			sbuf_putc(buf, ',');
1767	}
1768	if (fmt == SB_STRFMT_JSON)
1769		sbuf_cat(buf, "]");
1770}
1771
1772static inline void
1773stats_voistatdata_hist_tostr(enum vsd_dtype voi_dtype,
1774    const struct voistatdata_hist *hist, enum vsd_dtype hist_dtype,
1775    size_t hist_dsz, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
1776{
1777	const struct voistatdata_numeric *bkt_lb, *bkt_ub;
1778	const char *fmtstr;
1779	int is32bit;
1780	uint16_t i, nbkts;
1781
1782	switch (hist_dtype) {
1783	case VSD_DTYPE_CRHIST32:
1784		nbkts = HIST_VSDSZ2NBKTS(crhist32, hist_dsz);
1785		is32bit = 1;
1786		break;
1787	case VSD_DTYPE_DRHIST32:
1788		nbkts = HIST_VSDSZ2NBKTS(drhist32, hist_dsz);
1789		is32bit = 1;
1790		break;
1791	case VSD_DTYPE_DVHIST32:
1792		nbkts = HIST_VSDSZ2NBKTS(dvhist32, hist_dsz);
1793		is32bit = 1;
1794		break;
1795	case VSD_DTYPE_CRHIST64:
1796		nbkts = HIST_VSDSZ2NBKTS(crhist64, hist_dsz);
1797		is32bit = 0;
1798		break;
1799	case VSD_DTYPE_DRHIST64:
1800		nbkts = HIST_VSDSZ2NBKTS(drhist64, hist_dsz);
1801		is32bit = 0;
1802		break;
1803	case VSD_DTYPE_DVHIST64:
1804		nbkts = HIST_VSDSZ2NBKTS(dvhist64, hist_dsz);
1805		is32bit = 0;
1806		break;
1807	default:
1808		return;
1809	}
1810
1811	switch (fmt) {
1812	case SB_STRFMT_FREEFORM:
1813		fmtstr = "nbkts=%hu, ";
1814		break;
1815	case SB_STRFMT_JSON:
1816	default:
1817		fmtstr = "\"nbkts\":%hu,";
1818		break;
1819	}
1820	sbuf_printf(buf, fmtstr, nbkts);
1821
1822	switch (fmt) {
1823		case SB_STRFMT_FREEFORM:
1824			fmtstr = (is32bit ? "oob=%u" : "oob=%ju");
1825			break;
1826		case SB_STRFMT_JSON:
1827		default:
1828			fmtstr = (is32bit ? "\"oob\":%u,\"bkts\":[" :
1829			    "\"oob\":%ju,\"bkts\":[");
1830			break;
1831	}
1832	sbuf_printf(buf, fmtstr, is32bit ? VSD_CONSTHIST_FIELDVAL(hist,
1833	    hist_dtype, oob) : (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist,
1834	    hist_dtype, oob));
1835
1836	for (i = 0; i < nbkts; i++) {
1837		switch (hist_dtype) {
1838		case VSD_DTYPE_CRHIST32:
1839		case VSD_DTYPE_CRHIST64:
1840			bkt_lb = VSD_CONSTCRHIST_FIELDPTR(hist, hist_dtype,
1841			    bkts[i].lb);
1842			if (i < nbkts - 1)
1843				bkt_ub = VSD_CONSTCRHIST_FIELDPTR(hist,
1844				    hist_dtype, bkts[i + 1].lb);
1845			else
1846				bkt_ub = &numeric_limits[LIM_MAX][voi_dtype];
1847			break;
1848		case VSD_DTYPE_DRHIST32:
1849		case VSD_DTYPE_DRHIST64:
1850			bkt_lb = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
1851			    bkts[i].lb);
1852			bkt_ub = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
1853			    bkts[i].ub);
1854			break;
1855		case VSD_DTYPE_DVHIST32:
1856		case VSD_DTYPE_DVHIST64:
1857			bkt_lb = bkt_ub = VSD_CONSTDVHIST_FIELDPTR(hist,
1858			    hist_dtype, bkts[i].val);
1859			break;
1860		default:
1861			break;
1862		}
1863
1864		switch (fmt) {
1865		case SB_STRFMT_FREEFORM:
1866			fmtstr = "\n\t\t\t\t";
1867			break;
1868		case SB_STRFMT_JSON:
1869		default:
1870			fmtstr = "{";
1871			break;
1872		}
1873		sbuf_cat(buf, fmtstr);
1874
1875		if (objdump) {
1876			switch (fmt) {
1877			case SB_STRFMT_FREEFORM:
1878				fmtstr = "bkt[%hu].";
1879				break;
1880			case SB_STRFMT_JSON:
1881			default:
1882				fmtstr = "\"bkt\":%hu,";
1883				break;
1884			}
1885			sbuf_printf(buf, fmtstr, i);
1886		}
1887
1888		switch (fmt) {
1889		case SB_STRFMT_FREEFORM:
1890			fmtstr = "{lb=";
1891			break;
1892		case SB_STRFMT_JSON:
1893		default:
1894			fmtstr = "\"lb\":";
1895			break;
1896		}
1897		sbuf_cat(buf, fmtstr);
1898		stats_voistatdata_tostr((const struct voistatdata *)bkt_lb,
1899		    voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
1900		    fmt, buf, objdump);
1901
1902		switch (fmt) {
1903		case SB_STRFMT_FREEFORM:
1904			fmtstr = ",ub=";
1905			break;
1906		case SB_STRFMT_JSON:
1907		default:
1908			fmtstr = ",\"ub\":";
1909			break;
1910		}
1911		sbuf_cat(buf, fmtstr);
1912		stats_voistatdata_tostr((const struct voistatdata *)bkt_ub,
1913		    voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
1914		    fmt, buf, objdump);
1915
1916		switch (fmt) {
1917		case SB_STRFMT_FREEFORM:
1918			fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
1919			break;
1920		case SB_STRFMT_JSON:
1921		default:
1922			fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
1923			break;
1924		}
1925		sbuf_printf(buf, fmtstr, is32bit ?
1926		    VSD_CONSTHIST_FIELDVAL(hist, hist_dtype, bkts[i].cnt) :
1927		    (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist, hist_dtype,
1928		    bkts[i].cnt));
1929
1930		if (fmt == SB_STRFMT_JSON && i < nbkts - 1)
1931			sbuf_putc(buf, ',');
1932	}
1933	if (fmt == SB_STRFMT_JSON)
1934		sbuf_cat(buf, "]");
1935}
1936
1937int
1938stats_voistatdata_tostr(const struct voistatdata *vsd, enum vsd_dtype voi_dtype,
1939    enum vsd_dtype vsd_dtype, size_t vsd_sz, enum sb_str_fmt fmt,
1940    struct sbuf *buf, int objdump)
1941{
1942	const char *fmtstr;
1943
1944	if (vsd == NULL || buf == NULL || voi_dtype >= VSD_NUM_DTYPES ||
1945	    vsd_dtype >= VSD_NUM_DTYPES || fmt >= SB_STRFMT_NUM_FMTS)
1946		return (EINVAL);
1947
1948	switch (vsd_dtype) {
1949	case VSD_DTYPE_VOISTATE:
1950		switch (fmt) {
1951		case SB_STRFMT_FREEFORM:
1952			fmtstr = "prev=";
1953			break;
1954		case SB_STRFMT_JSON:
1955		default:
1956			fmtstr = "\"prev\":";
1957			break;
1958		}
1959		sbuf_cat(buf, fmtstr);
1960		/*
1961		 * Render prev by passing it as *vsd and voi_dtype as vsd_dtype.
1962		 */
1963		stats_voistatdata_tostr(
1964		    (const struct voistatdata *)&CONSTVSD(voistate, vsd)->prev,
1965		    voi_dtype, voi_dtype, vsd_sz, fmt, buf, objdump);
1966		break;
1967	case VSD_DTYPE_INT_S32:
1968		sbuf_printf(buf, "%d", vsd->int32.s32);
1969		break;
1970	case VSD_DTYPE_INT_U32:
1971		sbuf_printf(buf, "%u", vsd->int32.u32);
1972		break;
1973	case VSD_DTYPE_INT_S64:
1974		sbuf_printf(buf, "%jd", (intmax_t)vsd->int64.s64);
1975		break;
1976	case VSD_DTYPE_INT_U64:
1977		sbuf_printf(buf, "%ju", (uintmax_t)vsd->int64.u64);
1978		break;
1979	case VSD_DTYPE_INT_SLONG:
1980		sbuf_printf(buf, "%ld", vsd->intlong.slong);
1981		break;
1982	case VSD_DTYPE_INT_ULONG:
1983		sbuf_printf(buf, "%lu", vsd->intlong.ulong);
1984		break;
1985	case VSD_DTYPE_Q_S32:
1986		{
1987		char qstr[Q_MAXSTRLEN(vsd->q32.sq32, 10)];
1988		Q_TOSTR((s32q_t)vsd->q32.sq32, -1, 10, qstr, sizeof(qstr));
1989		sbuf_cat(buf, qstr);
1990		}
1991		break;
1992	case VSD_DTYPE_Q_U32:
1993		{
1994		char qstr[Q_MAXSTRLEN(vsd->q32.uq32, 10)];
1995		Q_TOSTR((u32q_t)vsd->q32.uq32, -1, 10, qstr, sizeof(qstr));
1996		sbuf_cat(buf, qstr);
1997		}
1998		break;
1999	case VSD_DTYPE_Q_S64:
2000		{
2001		char qstr[Q_MAXSTRLEN(vsd->q64.sq64, 10)];
2002		Q_TOSTR((s64q_t)vsd->q64.sq64, -1, 10, qstr, sizeof(qstr));
2003		sbuf_cat(buf, qstr);
2004		}
2005		break;
2006	case VSD_DTYPE_Q_U64:
2007		{
2008		char qstr[Q_MAXSTRLEN(vsd->q64.uq64, 10)];
2009		Q_TOSTR((u64q_t)vsd->q64.uq64, -1, 10, qstr, sizeof(qstr));
2010		sbuf_cat(buf, qstr);
2011		}
2012		break;
2013	case VSD_DTYPE_CRHIST32:
2014	case VSD_DTYPE_DRHIST32:
2015	case VSD_DTYPE_DVHIST32:
2016	case VSD_DTYPE_CRHIST64:
2017	case VSD_DTYPE_DRHIST64:
2018	case VSD_DTYPE_DVHIST64:
2019		stats_voistatdata_hist_tostr(voi_dtype, CONSTVSD(hist, vsd),
2020		    vsd_dtype, vsd_sz, fmt, buf, objdump);
2021		break;
2022	case VSD_DTYPE_TDGSTCLUST32:
2023	case VSD_DTYPE_TDGSTCLUST64:
2024		stats_voistatdata_tdgst_tostr(voi_dtype,
2025		    CONSTVSD(tdgst, vsd), vsd_dtype, vsd_sz, fmt, buf,
2026		    objdump);
2027		break;
2028	default:
2029		break;
2030	}
2031
2032	return (sbuf_error(buf));
2033}
2034
2035static void
2036stats_v1_itercb_tostr_freeform(struct statsblobv1 *sb, struct voi *v,
2037    struct voistat *vs, struct sb_iter_ctx *ctx)
2038{
2039	struct sb_tostrcb_ctx *sctx;
2040	struct metablob *tpl_mb;
2041	struct sbuf *buf;
2042	void *vsd;
2043	uint8_t dump;
2044
2045	sctx = ctx->usrctx;
2046	buf = sctx->buf;
2047	tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
2048	dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
2049
2050	if (ctx->flags & SB_IT_FIRST_CB) {
2051		sbuf_printf(buf, "struct statsblobv1@%p", sb);
2052		if (dump) {
2053			sbuf_printf(buf, ", abi=%hhu, endian=%hhu, maxsz=%hu, "
2054			    "cursz=%hu, created=%jd, lastrst=%jd, flags=0x%04hx, "
2055			    "stats_off=%hu, statsdata_off=%hu",
2056			    sb->abi, sb->endian, sb->maxsz, sb->cursz,
2057			    sb->created, sb->lastrst, sb->flags, sb->stats_off,
2058			    sb->statsdata_off);
2059		}
2060		sbuf_printf(buf, ", tplhash=%u", sb->tplhash);
2061	}
2062
2063	if (ctx->flags & SB_IT_FIRST_VOISTAT) {
2064		sbuf_printf(buf, "\n\tvois[%hd]: id=%hd", ctx->vslot, v->id);
2065		if (v->id < 0)
2066			return;
2067		sbuf_printf(buf, ", name=\"%s\"", (tpl_mb == NULL) ? "" :
2068		    tpl_mb->voi_meta[v->id].name);
2069		if (dump)
2070		    sbuf_printf(buf, ", flags=0x%04hx, dtype=%s, "
2071		    "voistatmaxid=%hhd, stats_off=%hu", v->flags,
2072		    vsd_dtype2name[v->dtype], v->voistatmaxid, v->stats_off);
2073	}
2074
2075	if (!dump && vs->stype <= 0)
2076		return;
2077
2078	sbuf_printf(buf, "\n\t\tvois[%hd]stat[%hhd]: stype=", v->id, ctx->vsslot);
2079	if (vs->stype < 0) {
2080		sbuf_printf(buf, "%hhd", vs->stype);
2081		return;
2082	} else
2083		sbuf_printf(buf, "%s, errs=%hu", vs_stype2name[vs->stype],
2084		    vs->errs);
2085	vsd = BLOB_OFFSET(sb, vs->data_off);
2086	if (dump)
2087		sbuf_printf(buf, ", flags=0x%04x, dtype=%s, dsz=%hu, "
2088		    "data_off=%hu", vs->flags, vsd_dtype2name[vs->dtype],
2089		    vs->dsz, vs->data_off);
2090
2091	sbuf_cat(buf, "\n\t\t\tvoistatdata: ");
2092	stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
2093	    sctx->fmt, buf, dump);
2094}
2095
2096static void
2097stats_v1_itercb_tostr_json(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
2098    struct sb_iter_ctx *ctx)
2099{
2100	struct sb_tostrcb_ctx *sctx;
2101	struct metablob *tpl_mb;
2102	struct sbuf *buf;
2103	const char *fmtstr;
2104	void *vsd;
2105	uint8_t dump;
2106
2107	sctx = ctx->usrctx;
2108	buf = sctx->buf;
2109	tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
2110	dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
2111
2112	if (ctx->flags & SB_IT_FIRST_CB) {
2113		sbuf_putc(buf, '{');
2114		if (dump) {
2115			sbuf_printf(buf, "\"abi\":%hhu,\"endian\":%hhu,"
2116			    "\"maxsz\":%hu,\"cursz\":%hu,\"created\":%jd,"
2117			    "\"lastrst\":%jd,\"flags\":%hu,\"stats_off\":%hu,"
2118			    "\"statsdata_off\":%hu,", sb->abi,
2119			    sb->endian, sb->maxsz, sb->cursz, sb->created,
2120			    sb->lastrst, sb->flags, sb->stats_off,
2121			    sb->statsdata_off);
2122		}
2123
2124		if (tpl_mb == NULL)
2125			fmtstr = "\"tplname\":%s,\"tplhash\":%u,\"vois\":{";
2126		else
2127			fmtstr = "\"tplname\":\"%s\",\"tplhash\":%u,\"vois\":{";
2128
2129		sbuf_printf(buf, fmtstr, tpl_mb ? tpl_mb->tplname : "null",
2130		    sb->tplhash);
2131	}
2132
2133	if (ctx->flags & SB_IT_FIRST_VOISTAT) {
2134		if (dump) {
2135			sbuf_printf(buf, "\"[%d]\":{\"id\":%d", ctx->vslot,
2136			    v->id);
2137			if (v->id < 0) {
2138				sbuf_cat(buf, "},");
2139				return;
2140			}
2141
2142			if (tpl_mb == NULL)
2143				fmtstr = ",\"name\":%s,\"flags\":%hu,"
2144				    "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
2145				    "\"stats_off\":%hu,";
2146			else
2147				fmtstr = ",\"name\":\"%s\",\"flags\":%hu,"
2148				    "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
2149				    "\"stats_off\":%hu,";
2150
2151			sbuf_printf(buf, fmtstr, tpl_mb ?
2152			    tpl_mb->voi_meta[v->id].name : "null", v->flags,
2153			    vsd_dtype2name[v->dtype], v->voistatmaxid,
2154			    v->stats_off);
2155		} else {
2156			if (tpl_mb == NULL) {
2157				sbuf_printf(buf, "\"[%hd]\":{", v->id);
2158			} else {
2159				sbuf_printf(buf, "\"%s\":{",
2160				    tpl_mb->voi_meta[v->id].name);
2161			}
2162		}
2163		sbuf_cat(buf, "\"stats\":{");
2164	}
2165
2166	vsd = BLOB_OFFSET(sb, vs->data_off);
2167	if (dump) {
2168		sbuf_printf(buf, "\"[%hhd]\":", ctx->vsslot);
2169		if (vs->stype < 0) {
2170			sbuf_cat(buf, "{\"stype\":-1},");
2171			return;
2172		}
2173		sbuf_printf(buf, "{\"stype\":\"%s\",\"errs\":%hu,\"flags\":%hu,"
2174		    "\"dtype\":\"%s\",\"data_off\":%hu,\"voistatdata\":{",
2175		    vs_stype2name[vs->stype], vs->errs, vs->flags,
2176		    vsd_dtype2name[vs->dtype], vs->data_off);
2177	} else if (vs->stype > 0) {
2178		if (tpl_mb == NULL)
2179			sbuf_printf(buf, "\"[%hhd]\":", vs->stype);
2180		else
2181			sbuf_printf(buf, "\"%s\":", vs_stype2name[vs->stype]);
2182	} else
2183		return;
2184
2185	if ((vs->flags & VS_VSDVALID) || dump) {
2186		if (!dump)
2187			sbuf_printf(buf, "{\"errs\":%hu,", vs->errs);
2188		/* Simple non-compound VSD types need a key. */
2189		if (!vsd_compoundtype[vs->dtype])
2190			sbuf_cat(buf, "\"val\":");
2191		stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
2192		    sctx->fmt, buf, dump);
2193		sbuf_cat(buf, dump ? "}}" : "}");
2194	} else
2195		sbuf_cat(buf, dump ? "null}" : "null");
2196
2197	if (ctx->flags & SB_IT_LAST_VOISTAT)
2198		sbuf_cat(buf, "}}");
2199
2200	if (ctx->flags & SB_IT_LAST_CB)
2201		sbuf_cat(buf, "}}");
2202	else
2203		sbuf_putc(buf, ',');
2204}
2205
2206static int
2207stats_v1_itercb_tostr(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
2208    struct sb_iter_ctx *ctx)
2209{
2210	struct sb_tostrcb_ctx *sctx;
2211
2212	sctx = ctx->usrctx;
2213
2214	switch (sctx->fmt) {
2215	case SB_STRFMT_FREEFORM:
2216		stats_v1_itercb_tostr_freeform(sb, v, vs, ctx);
2217		break;
2218	case SB_STRFMT_JSON:
2219		stats_v1_itercb_tostr_json(sb, v, vs, ctx);
2220		break;
2221	default:
2222		break;
2223	}
2224
2225	return (sbuf_error(sctx->buf));
2226}
2227
2228int
2229stats_v1_blob_tostr(struct statsblobv1 *sb, struct sbuf *buf,
2230    enum sb_str_fmt fmt, uint32_t flags)
2231{
2232	struct sb_tostrcb_ctx sctx;
2233	uint32_t iflags;
2234
2235	if (sb == NULL || sb->abi != STATS_ABI_V1 || buf == NULL ||
2236	    fmt >= SB_STRFMT_NUM_FMTS)
2237		return (EINVAL);
2238
2239	sctx.buf = buf;
2240	sctx.fmt = fmt;
2241	sctx.flags = flags;
2242
2243	if (flags & SB_TOSTR_META) {
2244		if (stats_tpl_fetch(stats_tpl_fetch_allocid(NULL, sb->tplhash),
2245		    &sctx.tpl))
2246			return (EINVAL);
2247	} else
2248		sctx.tpl = NULL;
2249
2250	iflags = 0;
2251	if (flags & SB_TOSTR_OBJDUMP)
2252		iflags |= (SB_IT_NULLVOI | SB_IT_NULLVOISTAT);
2253	stats_v1_blob_iter(sb, stats_v1_itercb_tostr, &sctx, iflags);
2254
2255	return (sbuf_error(buf));
2256}
2257
2258static int
2259stats_v1_itercb_visit(struct statsblobv1 *sb, struct voi *v,
2260    struct voistat *vs, struct sb_iter_ctx *ctx)
2261{
2262	struct sb_visitcb_ctx *vctx;
2263	struct sb_visit sbv;
2264
2265	vctx = ctx->usrctx;
2266
2267	sbv.tplhash = sb->tplhash;
2268	sbv.voi_id = v->id;
2269	sbv.voi_dtype = v->dtype;
2270	sbv.vs_stype = vs->stype;
2271	sbv.vs_dtype = vs->dtype;
2272	sbv.vs_dsz = vs->dsz;
2273	sbv.vs_data = BLOB_OFFSET(sb, vs->data_off);
2274	sbv.vs_errs = vs->errs;
2275	sbv.flags = ctx->flags & (SB_IT_FIRST_CB | SB_IT_LAST_CB |
2276	    SB_IT_FIRST_VOI | SB_IT_LAST_VOI | SB_IT_FIRST_VOISTAT |
2277	    SB_IT_LAST_VOISTAT);
2278
2279	return (vctx->cb(&sbv, vctx->usrctx));
2280}
2281
2282int
2283stats_v1_blob_visit(struct statsblobv1 *sb, stats_blob_visitcb_t func,
2284    void *usrctx)
2285{
2286	struct sb_visitcb_ctx vctx;
2287
2288	if (sb == NULL || sb->abi != STATS_ABI_V1 || func == NULL)
2289		return (EINVAL);
2290
2291	vctx.cb = func;
2292	vctx.usrctx = usrctx;
2293
2294	stats_v1_blob_iter(sb, stats_v1_itercb_visit, &vctx, 0);
2295
2296	return (0);
2297}
2298
2299static int
2300stats_v1_icb_reset_voistat(struct statsblobv1 *sb, struct voi *v __unused,
2301    struct voistat *vs, struct sb_iter_ctx *ctx __unused)
2302{
2303	void *vsd;
2304
2305	if (vs->stype == VS_STYPE_VOISTATE)
2306		return (0);
2307
2308	vsd = BLOB_OFFSET(sb, vs->data_off);
2309
2310	/* Perform the stat type's default reset action. */
2311	switch (vs->stype) {
2312	case VS_STYPE_SUM:
2313		switch (vs->dtype) {
2314		case VSD_DTYPE_Q_S32:
2315			Q_SIFVAL(VSD(q32, vsd)->sq32, 0);
2316			break;
2317		case VSD_DTYPE_Q_U32:
2318			Q_SIFVAL(VSD(q32, vsd)->uq32, 0);
2319			break;
2320		case VSD_DTYPE_Q_S64:
2321			Q_SIFVAL(VSD(q64, vsd)->sq64, 0);
2322			break;
2323		case VSD_DTYPE_Q_U64:
2324			Q_SIFVAL(VSD(q64, vsd)->uq64, 0);
2325			break;
2326		default:
2327			bzero(vsd, vs->dsz);
2328			break;
2329		}
2330		break;
2331	case VS_STYPE_MAX:
2332		switch (vs->dtype) {
2333		case VSD_DTYPE_Q_S32:
2334			Q_SIFVAL(VSD(q32, vsd)->sq32,
2335			    Q_IFMINVAL(VSD(q32, vsd)->sq32));
2336			break;
2337		case VSD_DTYPE_Q_U32:
2338			Q_SIFVAL(VSD(q32, vsd)->uq32,
2339			    Q_IFMINVAL(VSD(q32, vsd)->uq32));
2340			break;
2341		case VSD_DTYPE_Q_S64:
2342			Q_SIFVAL(VSD(q64, vsd)->sq64,
2343			    Q_IFMINVAL(VSD(q64, vsd)->sq64));
2344			break;
2345		case VSD_DTYPE_Q_U64:
2346			Q_SIFVAL(VSD(q64, vsd)->uq64,
2347			    Q_IFMINVAL(VSD(q64, vsd)->uq64));
2348			break;
2349		default:
2350			memcpy(vsd, &numeric_limits[LIM_MIN][vs->dtype],
2351			    vs->dsz);
2352			break;
2353		}
2354		break;
2355	case VS_STYPE_MIN:
2356		switch (vs->dtype) {
2357		case VSD_DTYPE_Q_S32:
2358			Q_SIFVAL(VSD(q32, vsd)->sq32,
2359			    Q_IFMAXVAL(VSD(q32, vsd)->sq32));
2360			break;
2361		case VSD_DTYPE_Q_U32:
2362			Q_SIFVAL(VSD(q32, vsd)->uq32,
2363			    Q_IFMAXVAL(VSD(q32, vsd)->uq32));
2364			break;
2365		case VSD_DTYPE_Q_S64:
2366			Q_SIFVAL(VSD(q64, vsd)->sq64,
2367			    Q_IFMAXVAL(VSD(q64, vsd)->sq64));
2368			break;
2369		case VSD_DTYPE_Q_U64:
2370			Q_SIFVAL(VSD(q64, vsd)->uq64,
2371			    Q_IFMAXVAL(VSD(q64, vsd)->uq64));
2372			break;
2373		default:
2374			memcpy(vsd, &numeric_limits[LIM_MAX][vs->dtype],
2375			    vs->dsz);
2376			break;
2377		}
2378		break;
2379	case VS_STYPE_HIST:
2380		{
2381		/* Reset bucket counts. */
2382		struct voistatdata_hist *hist;
2383		int i, is32bit;
2384		uint16_t nbkts;
2385
2386		hist = VSD(hist, vsd);
2387		switch (vs->dtype) {
2388		case VSD_DTYPE_CRHIST32:
2389			nbkts = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
2390			is32bit = 1;
2391			break;
2392		case VSD_DTYPE_DRHIST32:
2393			nbkts = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
2394			is32bit = 1;
2395			break;
2396		case VSD_DTYPE_DVHIST32:
2397			nbkts = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
2398			is32bit = 1;
2399			break;
2400		case VSD_DTYPE_CRHIST64:
2401			nbkts = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
2402			is32bit = 0;
2403			break;
2404		case VSD_DTYPE_DRHIST64:
2405			nbkts = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
2406			is32bit = 0;
2407			break;
2408		case VSD_DTYPE_DVHIST64:
2409			nbkts = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
2410			is32bit = 0;
2411			break;
2412		default:
2413			return (0);
2414		}
2415
2416		bzero(VSD_HIST_FIELDPTR(hist, vs->dtype, oob),
2417		    is32bit ? sizeof(uint32_t) : sizeof(uint64_t));
2418		for (i = nbkts - 1; i >= 0; i--) {
2419			bzero(VSD_HIST_FIELDPTR(hist, vs->dtype,
2420			    bkts[i].cnt), is32bit ? sizeof(uint32_t) :
2421			    sizeof(uint64_t));
2422		}
2423		break;
2424		}
2425	case VS_STYPE_TDGST:
2426		{
2427		/* Reset sample count centroids array/tree. */
2428		struct voistatdata_tdgst *tdgst;
2429		struct ctdth32 *ctd32tree;
2430		struct ctdth64 *ctd64tree;
2431		struct voistatdata_tdgstctd32 *ctd32;
2432		struct voistatdata_tdgstctd64 *ctd64;
2433
2434		tdgst = VSD(tdgst, vsd);
2435		switch (vs->dtype) {
2436		case VSD_DTYPE_TDGSTCLUST32:
2437			VSD(tdgstclust32, tdgst)->smplcnt = 0;
2438			VSD(tdgstclust32, tdgst)->compcnt = 0;
2439			ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
2440			ARB_INIT(ctd32, ctdlnk, ctd32tree,
2441			    ARB_MAXNODES(ctd32tree)) {
2442				ctd32->cnt = 0;
2443				Q_SIFVAL(ctd32->mu, 0);
2444			}
2445#ifdef DIAGNOSTIC
2446			RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
2447#endif
2448		break;
2449		case VSD_DTYPE_TDGSTCLUST64:
2450			VSD(tdgstclust64, tdgst)->smplcnt = 0;
2451			VSD(tdgstclust64, tdgst)->compcnt = 0;
2452			ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
2453			ARB_INIT(ctd64, ctdlnk, ctd64tree,
2454			    ARB_MAXNODES(ctd64tree)) {
2455				ctd64->cnt = 0;
2456				Q_SIFVAL(ctd64->mu, 0);
2457			}
2458#ifdef DIAGNOSTIC
2459			RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
2460#endif
2461		break;
2462		default:
2463			return (0);
2464		}
2465		break;
2466		}
2467	default:
2468		KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
2469		break;
2470	}
2471
2472	vs->errs = 0;
2473	vs->flags &= ~VS_VSDVALID;
2474
2475	return (0);
2476}
2477
2478int
2479stats_v1_blob_snapshot(struct statsblobv1 **dst, size_t dstmaxsz,
2480    struct statsblobv1 *src, uint32_t flags)
2481{
2482	int error;
2483
2484	if (src != NULL && src->abi == STATS_ABI_V1) {
2485		error = stats_v1_blob_clone(dst, dstmaxsz, src, flags);
2486		if (!error) {
2487			if (flags & SB_CLONE_RSTSRC) {
2488				stats_v1_blob_iter(src,
2489				    stats_v1_icb_reset_voistat, NULL, 0);
2490				src->lastrst = stats_sbinuptime();
2491			}
2492			stats_v1_blob_finalise(*dst);
2493		}
2494	} else
2495		error = EINVAL;
2496
2497	return (error);
2498}
2499
2500static inline int
2501stats_v1_voi_update_max(enum vsd_dtype voi_dtype __unused,
2502    struct voistatdata *voival, struct voistat *vs, void *vsd)
2503{
2504	int error;
2505
2506	KASSERT(vs->dtype < VSD_NUM_DTYPES,
2507	    ("Unknown VSD dtype %d", vs->dtype));
2508
2509	error = 0;
2510
2511	switch (vs->dtype) {
2512	case VSD_DTYPE_INT_S32:
2513		if (VSD(int32, vsd)->s32 < voival->int32.s32) {
2514			VSD(int32, vsd)->s32 = voival->int32.s32;
2515			vs->flags |= VS_VSDVALID;
2516		}
2517		break;
2518	case VSD_DTYPE_INT_U32:
2519		if (VSD(int32, vsd)->u32 < voival->int32.u32) {
2520			VSD(int32, vsd)->u32 = voival->int32.u32;
2521			vs->flags |= VS_VSDVALID;
2522		}
2523		break;
2524	case VSD_DTYPE_INT_S64:
2525		if (VSD(int64, vsd)->s64 < voival->int64.s64) {
2526			VSD(int64, vsd)->s64 = voival->int64.s64;
2527			vs->flags |= VS_VSDVALID;
2528		}
2529		break;
2530	case VSD_DTYPE_INT_U64:
2531		if (VSD(int64, vsd)->u64 < voival->int64.u64) {
2532			VSD(int64, vsd)->u64 = voival->int64.u64;
2533			vs->flags |= VS_VSDVALID;
2534		}
2535		break;
2536	case VSD_DTYPE_INT_SLONG:
2537		if (VSD(intlong, vsd)->slong < voival->intlong.slong) {
2538			VSD(intlong, vsd)->slong = voival->intlong.slong;
2539			vs->flags |= VS_VSDVALID;
2540		}
2541		break;
2542	case VSD_DTYPE_INT_ULONG:
2543		if (VSD(intlong, vsd)->ulong < voival->intlong.ulong) {
2544			VSD(intlong, vsd)->ulong = voival->intlong.ulong;
2545			vs->flags |= VS_VSDVALID;
2546		}
2547		break;
2548	case VSD_DTYPE_Q_S32:
2549		if (Q_QLTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
2550		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
2551		    voival->q32.sq32)))) {
2552			vs->flags |= VS_VSDVALID;
2553		}
2554		break;
2555	case VSD_DTYPE_Q_U32:
2556		if (Q_QLTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
2557		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
2558		    voival->q32.uq32)))) {
2559			vs->flags |= VS_VSDVALID;
2560		}
2561		break;
2562	case VSD_DTYPE_Q_S64:
2563		if (Q_QLTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
2564		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
2565		    voival->q64.sq64)))) {
2566			vs->flags |= VS_VSDVALID;
2567		}
2568		break;
2569	case VSD_DTYPE_Q_U64:
2570		if (Q_QLTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
2571		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
2572		    voival->q64.uq64)))) {
2573			vs->flags |= VS_VSDVALID;
2574		}
2575		break;
2576	default:
2577		error = EINVAL;
2578		break;
2579	}
2580
2581	return (error);
2582}
2583
2584static inline int
2585stats_v1_voi_update_min(enum vsd_dtype voi_dtype __unused,
2586    struct voistatdata *voival, struct voistat *vs, void *vsd)
2587{
2588	int error;
2589
2590	KASSERT(vs->dtype < VSD_NUM_DTYPES,
2591	    ("Unknown VSD dtype %d", vs->dtype));
2592
2593	error = 0;
2594
2595	switch (vs->dtype) {
2596	case VSD_DTYPE_INT_S32:
2597		if (VSD(int32, vsd)->s32 > voival->int32.s32) {
2598			VSD(int32, vsd)->s32 = voival->int32.s32;
2599			vs->flags |= VS_VSDVALID;
2600		}
2601		break;
2602	case VSD_DTYPE_INT_U32:
2603		if (VSD(int32, vsd)->u32 > voival->int32.u32) {
2604			VSD(int32, vsd)->u32 = voival->int32.u32;
2605			vs->flags |= VS_VSDVALID;
2606		}
2607		break;
2608	case VSD_DTYPE_INT_S64:
2609		if (VSD(int64, vsd)->s64 > voival->int64.s64) {
2610			VSD(int64, vsd)->s64 = voival->int64.s64;
2611			vs->flags |= VS_VSDVALID;
2612		}
2613		break;
2614	case VSD_DTYPE_INT_U64:
2615		if (VSD(int64, vsd)->u64 > voival->int64.u64) {
2616			VSD(int64, vsd)->u64 = voival->int64.u64;
2617			vs->flags |= VS_VSDVALID;
2618		}
2619		break;
2620	case VSD_DTYPE_INT_SLONG:
2621		if (VSD(intlong, vsd)->slong > voival->intlong.slong) {
2622			VSD(intlong, vsd)->slong = voival->intlong.slong;
2623			vs->flags |= VS_VSDVALID;
2624		}
2625		break;
2626	case VSD_DTYPE_INT_ULONG:
2627		if (VSD(intlong, vsd)->ulong > voival->intlong.ulong) {
2628			VSD(intlong, vsd)->ulong = voival->intlong.ulong;
2629			vs->flags |= VS_VSDVALID;
2630		}
2631		break;
2632	case VSD_DTYPE_Q_S32:
2633		if (Q_QGTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
2634		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
2635		    voival->q32.sq32)))) {
2636			vs->flags |= VS_VSDVALID;
2637		}
2638		break;
2639	case VSD_DTYPE_Q_U32:
2640		if (Q_QGTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
2641		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
2642		    voival->q32.uq32)))) {
2643			vs->flags |= VS_VSDVALID;
2644		}
2645		break;
2646	case VSD_DTYPE_Q_S64:
2647		if (Q_QGTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
2648		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
2649		    voival->q64.sq64)))) {
2650			vs->flags |= VS_VSDVALID;
2651		}
2652		break;
2653	case VSD_DTYPE_Q_U64:
2654		if (Q_QGTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
2655		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
2656		    voival->q64.uq64)))) {
2657			vs->flags |= VS_VSDVALID;
2658		}
2659		break;
2660	default:
2661		error = EINVAL;
2662		break;
2663	}
2664
2665	return (error);
2666}
2667
2668static inline int
2669stats_v1_voi_update_sum(enum vsd_dtype voi_dtype __unused,
2670    struct voistatdata *voival, struct voistat *vs, void *vsd)
2671{
2672	int error;
2673
2674	KASSERT(vs->dtype < VSD_NUM_DTYPES,
2675	    ("Unknown VSD dtype %d", vs->dtype));
2676
2677	error = 0;
2678
2679	switch (vs->dtype) {
2680	case VSD_DTYPE_INT_S32:
2681		VSD(int32, vsd)->s32 += voival->int32.s32;
2682		break;
2683	case VSD_DTYPE_INT_U32:
2684		VSD(int32, vsd)->u32 += voival->int32.u32;
2685		break;
2686	case VSD_DTYPE_INT_S64:
2687		VSD(int64, vsd)->s64 += voival->int64.s64;
2688		break;
2689	case VSD_DTYPE_INT_U64:
2690		VSD(int64, vsd)->u64 += voival->int64.u64;
2691		break;
2692	case VSD_DTYPE_INT_SLONG:
2693		VSD(intlong, vsd)->slong += voival->intlong.slong;
2694		break;
2695	case VSD_DTYPE_INT_ULONG:
2696		VSD(intlong, vsd)->ulong += voival->intlong.ulong;
2697		break;
2698	case VSD_DTYPE_Q_S32:
2699		error = Q_QADDQ(&VSD(q32, vsd)->sq32, voival->q32.sq32);
2700		break;
2701	case VSD_DTYPE_Q_U32:
2702		error = Q_QADDQ(&VSD(q32, vsd)->uq32, voival->q32.uq32);
2703		break;
2704	case VSD_DTYPE_Q_S64:
2705		error = Q_QADDQ(&VSD(q64, vsd)->sq64, voival->q64.sq64);
2706		break;
2707	case VSD_DTYPE_Q_U64:
2708		error = Q_QADDQ(&VSD(q64, vsd)->uq64, voival->q64.uq64);
2709		break;
2710	default:
2711		error = EINVAL;
2712		break;
2713	}
2714
2715	if (!error)
2716		vs->flags |= VS_VSDVALID;
2717
2718	return (error);
2719}
2720
2721static inline int
2722stats_v1_voi_update_hist(enum vsd_dtype voi_dtype, struct voistatdata *voival,
2723    struct voistat *vs, struct voistatdata_hist *hist)
2724{
2725	struct voistatdata_numeric *bkt_lb, *bkt_ub;
2726	uint64_t *oob64, *cnt64;
2727	uint32_t *oob32, *cnt32;
2728	int error, i, found, is32bit, has_ub, eq_only;
2729
2730	error = 0;
2731
2732	switch (vs->dtype) {
2733	case VSD_DTYPE_CRHIST32:
2734		i = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
2735		is32bit = 1;
2736		has_ub = eq_only = 0;
2737		oob32 = &VSD(crhist32, hist)->oob;
2738		break;
2739	case VSD_DTYPE_DRHIST32:
2740		i = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
2741		is32bit = has_ub = 1;
2742		eq_only = 0;
2743		oob32 = &VSD(drhist32, hist)->oob;
2744		break;
2745	case VSD_DTYPE_DVHIST32:
2746		i = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
2747		is32bit = eq_only = 1;
2748		has_ub = 0;
2749		oob32 = &VSD(dvhist32, hist)->oob;
2750		break;
2751	case VSD_DTYPE_CRHIST64:
2752		i = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
2753		is32bit = has_ub = eq_only = 0;
2754		oob64 = &VSD(crhist64, hist)->oob;
2755		break;
2756	case VSD_DTYPE_DRHIST64:
2757		i = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
2758		is32bit = eq_only = 0;
2759		has_ub = 1;
2760		oob64 = &VSD(drhist64, hist)->oob;
2761		break;
2762	case VSD_DTYPE_DVHIST64:
2763		i = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
2764		is32bit = has_ub = 0;
2765		eq_only = 1;
2766		oob64 = &VSD(dvhist64, hist)->oob;
2767		break;
2768	default:
2769		return (EINVAL);
2770	}
2771	i--; /* Adjust for 0-based array index. */
2772
2773	/* XXXLAS: Should probably use a better bucket search algorithm. ARB? */
2774	for (found = 0; i >= 0 && !found; i--) {
2775		switch (vs->dtype) {
2776		case VSD_DTYPE_CRHIST32:
2777			bkt_lb = &VSD(crhist32, hist)->bkts[i].lb;
2778			cnt32 = &VSD(crhist32, hist)->bkts[i].cnt;
2779			break;
2780		case VSD_DTYPE_DRHIST32:
2781			bkt_lb = &VSD(drhist32, hist)->bkts[i].lb;
2782			bkt_ub = &VSD(drhist32, hist)->bkts[i].ub;
2783			cnt32 = &VSD(drhist32, hist)->bkts[i].cnt;
2784			break;
2785		case VSD_DTYPE_DVHIST32:
2786			bkt_lb = &VSD(dvhist32, hist)->bkts[i].val;
2787			cnt32 = &VSD(dvhist32, hist)->bkts[i].cnt;
2788			break;
2789		case VSD_DTYPE_CRHIST64:
2790			bkt_lb = &VSD(crhist64, hist)->bkts[i].lb;
2791			cnt64 = &VSD(crhist64, hist)->bkts[i].cnt;
2792			break;
2793		case VSD_DTYPE_DRHIST64:
2794			bkt_lb = &VSD(drhist64, hist)->bkts[i].lb;
2795			bkt_ub = &VSD(drhist64, hist)->bkts[i].ub;
2796			cnt64 = &VSD(drhist64, hist)->bkts[i].cnt;
2797			break;
2798		case VSD_DTYPE_DVHIST64:
2799			bkt_lb = &VSD(dvhist64, hist)->bkts[i].val;
2800			cnt64 = &VSD(dvhist64, hist)->bkts[i].cnt;
2801			break;
2802		default:
2803			return (EINVAL);
2804		}
2805
2806		switch (voi_dtype) {
2807		case VSD_DTYPE_INT_S32:
2808			if (voival->int32.s32 >= bkt_lb->int32.s32) {
2809				if ((eq_only && voival->int32.s32 ==
2810				    bkt_lb->int32.s32) ||
2811				    (!eq_only && (!has_ub ||
2812				    voival->int32.s32 < bkt_ub->int32.s32)))
2813					found = 1;
2814			}
2815			break;
2816		case VSD_DTYPE_INT_U32:
2817			if (voival->int32.u32 >= bkt_lb->int32.u32) {
2818				if ((eq_only && voival->int32.u32 ==
2819				    bkt_lb->int32.u32) ||
2820				    (!eq_only && (!has_ub ||
2821				    voival->int32.u32 < bkt_ub->int32.u32)))
2822					found = 1;
2823			}
2824			break;
2825		case VSD_DTYPE_INT_S64:
2826			if (voival->int64.s64 >= bkt_lb->int64.s64)
2827				if ((eq_only && voival->int64.s64 ==
2828				    bkt_lb->int64.s64) ||
2829				    (!eq_only && (!has_ub ||
2830				    voival->int64.s64 < bkt_ub->int64.s64)))
2831					found = 1;
2832			break;
2833		case VSD_DTYPE_INT_U64:
2834			if (voival->int64.u64 >= bkt_lb->int64.u64)
2835				if ((eq_only && voival->int64.u64 ==
2836				    bkt_lb->int64.u64) ||
2837				    (!eq_only && (!has_ub ||
2838				    voival->int64.u64 < bkt_ub->int64.u64)))
2839					found = 1;
2840			break;
2841		case VSD_DTYPE_INT_SLONG:
2842			if (voival->intlong.slong >= bkt_lb->intlong.slong)
2843				if ((eq_only && voival->intlong.slong ==
2844				    bkt_lb->intlong.slong) ||
2845				    (!eq_only && (!has_ub ||
2846				    voival->intlong.slong <
2847				    bkt_ub->intlong.slong)))
2848					found = 1;
2849			break;
2850		case VSD_DTYPE_INT_ULONG:
2851			if (voival->intlong.ulong >= bkt_lb->intlong.ulong)
2852				if ((eq_only && voival->intlong.ulong ==
2853				    bkt_lb->intlong.ulong) ||
2854				    (!eq_only && (!has_ub ||
2855				    voival->intlong.ulong <
2856				    bkt_ub->intlong.ulong)))
2857					found = 1;
2858			break;
2859		case VSD_DTYPE_Q_S32:
2860			if (Q_QGEQ(voival->q32.sq32, bkt_lb->q32.sq32))
2861				if ((eq_only && Q_QEQ(voival->q32.sq32,
2862				    bkt_lb->q32.sq32)) ||
2863				    (!eq_only && (!has_ub ||
2864				    Q_QLTQ(voival->q32.sq32,
2865				    bkt_ub->q32.sq32))))
2866					found = 1;
2867			break;
2868		case VSD_DTYPE_Q_U32:
2869			if (Q_QGEQ(voival->q32.uq32, bkt_lb->q32.uq32))
2870				if ((eq_only && Q_QEQ(voival->q32.uq32,
2871				    bkt_lb->q32.uq32)) ||
2872				    (!eq_only && (!has_ub ||
2873				    Q_QLTQ(voival->q32.uq32,
2874				    bkt_ub->q32.uq32))))
2875					found = 1;
2876			break;
2877		case VSD_DTYPE_Q_S64:
2878			if (Q_QGEQ(voival->q64.sq64, bkt_lb->q64.sq64))
2879				if ((eq_only && Q_QEQ(voival->q64.sq64,
2880				    bkt_lb->q64.sq64)) ||
2881				    (!eq_only && (!has_ub ||
2882				    Q_QLTQ(voival->q64.sq64,
2883				    bkt_ub->q64.sq64))))
2884					found = 1;
2885			break;
2886		case VSD_DTYPE_Q_U64:
2887			if (Q_QGEQ(voival->q64.uq64, bkt_lb->q64.uq64))
2888				if ((eq_only && Q_QEQ(voival->q64.uq64,
2889				    bkt_lb->q64.uq64)) ||
2890				    (!eq_only && (!has_ub ||
2891				    Q_QLTQ(voival->q64.uq64,
2892				    bkt_ub->q64.uq64))))
2893					found = 1;
2894			break;
2895		default:
2896			break;
2897		}
2898	}
2899
2900	if (found) {
2901		if (is32bit)
2902			*cnt32 += 1;
2903		else
2904			*cnt64 += 1;
2905	} else {
2906		if (is32bit)
2907			*oob32 += 1;
2908		else
2909			*oob64 += 1;
2910	}
2911
2912	vs->flags |= VS_VSDVALID;
2913	return (error);
2914}
2915
2916static inline int
2917stats_v1_vsd_tdgst_compress(enum vsd_dtype vs_dtype,
2918    struct voistatdata_tdgst *tdgst, int attempt)
2919{
2920	struct ctdth32 *ctd32tree;
2921	struct ctdth64 *ctd64tree;
2922	struct voistatdata_tdgstctd32 *ctd32;
2923	struct voistatdata_tdgstctd64 *ctd64;
2924	uint64_t ebits, idxmask;
2925	uint32_t bitsperidx, nebits;
2926	int error, idx, is32bit, maxctds, remctds, tmperr;
2927
2928	error = 0;
2929
2930	switch (vs_dtype) {
2931	case VSD_DTYPE_TDGSTCLUST32:
2932		ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
2933		if (!ARB_FULL(ctd32tree))
2934			return (0);
2935		VSD(tdgstclust32, tdgst)->compcnt++;
2936		maxctds = remctds = ARB_MAXNODES(ctd32tree);
2937		ARB_RESET_TREE(ctd32tree, ctdth32, maxctds);
2938		VSD(tdgstclust32, tdgst)->smplcnt = 0;
2939		is32bit = 1;
2940		ctd64tree = NULL;
2941		ctd64 = NULL;
2942#ifdef DIAGNOSTIC
2943		RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
2944#endif
2945		break;
2946	case VSD_DTYPE_TDGSTCLUST64:
2947		ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
2948		if (!ARB_FULL(ctd64tree))
2949			return (0);
2950		VSD(tdgstclust64, tdgst)->compcnt++;
2951		maxctds = remctds = ARB_MAXNODES(ctd64tree);
2952		ARB_RESET_TREE(ctd64tree, ctdth64, maxctds);
2953		VSD(tdgstclust64, tdgst)->smplcnt = 0;
2954		is32bit = 0;
2955		ctd32tree = NULL;
2956		ctd32 = NULL;
2957#ifdef DIAGNOSTIC
2958		RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
2959#endif
2960		break;
2961	default:
2962		return (EINVAL);
2963	}
2964
2965	/*
2966	 * Rebuild the t-digest ARB by pseudorandomly selecting centroids and
2967	 * re-inserting the mu/cnt of each as a value and corresponding weight.
2968	 */
2969
2970	/*
2971	 * XXXCEM: random(9) is currently rand(3), not random(3).  rand(3)
2972	 * RAND_MAX happens to be approximately 31 bits (range [0,
2973	 * 0x7ffffffd]), so the math kinda works out.  When/if this portion of
2974	 * the code is compiled in userspace, it gets the random(3) behavior,
2975	 * which has expected range [0, 0x7fffffff].
2976	 */
2977#define	bitsperrand 31
2978	ebits = 0;
2979	nebits = 0;
2980	bitsperidx = fls(maxctds);
2981	KASSERT(bitsperidx <= sizeof(ebits) << 3,
2982	    ("%s: bitsperidx=%d, ebits=%d",
2983	    __func__, bitsperidx, (int)(sizeof(ebits) << 3)));
2984	idxmask = (UINT64_C(1) << bitsperidx) - 1;
2985
2986	/* Initialise the free list with randomised centroid indices. */
2987	for (; remctds > 0; remctds--) {
2988		while (nebits < bitsperidx) {
2989			ebits |= ((uint64_t)random()) << nebits;
2990			nebits += bitsperrand;
2991			if (nebits > (sizeof(ebits) << 3))
2992				nebits = sizeof(ebits) << 3;
2993		}
2994		idx = ebits & idxmask;
2995		nebits -= bitsperidx;
2996		ebits >>= bitsperidx;
2997
2998		/*
2999		 * Select the next centroid to put on the ARB free list. We
3000		 * start with the centroid at our randomly selected array index,
3001		 * and work our way forwards until finding one (the latter
3002		 * aspect reduces re-insertion randomness, but is good enough).
3003		 */
3004		do {
3005			if (idx >= maxctds)
3006				idx %= maxctds;
3007
3008			if (is32bit)
3009				ctd32 = ARB_NODE(ctd32tree, idx);
3010			else
3011				ctd64 = ARB_NODE(ctd64tree, idx);
3012		} while ((is32bit ? ARB_ISFREE(ctd32, ctdlnk) :
3013		    ARB_ISFREE(ctd64, ctdlnk)) && ++idx);
3014
3015		/* Put the centroid on the ARB free list. */
3016		if (is32bit)
3017			ARB_RETURNFREE(ctd32tree, ctd32, ctdlnk);
3018		else
3019			ARB_RETURNFREE(ctd64tree, ctd64, ctdlnk);
3020	}
3021
3022	/*
3023	 * The free list now contains the randomised indices of every centroid.
3024	 * Walk the free list from start to end, re-inserting each centroid's
3025	 * mu/cnt. The tdgst_add() call may or may not consume the free centroid
3026	 * we re-insert values from during each loop iteration, so we must latch
3027	 * the index of the next free list centroid before the re-insertion
3028	 * call. The previous loop above should have left the centroid pointer
3029	 * pointing to the element at the head of the free list.
3030	 */
3031	KASSERT((is32bit ?
3032	    ARB_FREEIDX(ctd32tree) == ARB_SELFIDX(ctd32tree, ctd32) :
3033	    ARB_FREEIDX(ctd64tree) == ARB_SELFIDX(ctd64tree, ctd64)),
3034	    ("%s: t-digest ARB@%p free list bug", __func__,
3035	    (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
3036	remctds = maxctds;
3037	while ((is32bit ? ctd32 != NULL : ctd64 != NULL)) {
3038		tmperr = 0;
3039		if (is32bit) {
3040			s64q_t x;
3041
3042			idx = ARB_NEXTFREEIDX(ctd32, ctdlnk);
3043			/* Cloning a s32q_t into a s64q_t should never fail. */
3044			tmperr = Q_QCLONEQ(&x, ctd32->mu);
3045			tmperr = tmperr ? tmperr : stats_v1_vsd_tdgst_add(
3046			    vs_dtype, tdgst, x, ctd32->cnt, attempt);
3047			ctd32 = ARB_NODE(ctd32tree, idx);
3048			KASSERT(ctd32 == NULL || ARB_ISFREE(ctd32, ctdlnk),
3049			    ("%s: t-digest ARB@%p free list bug", __func__,
3050			    ctd32tree));
3051		} else {
3052			idx = ARB_NEXTFREEIDX(ctd64, ctdlnk);
3053			tmperr = stats_v1_vsd_tdgst_add(vs_dtype, tdgst,
3054			    ctd64->mu, ctd64->cnt, attempt);
3055			ctd64 = ARB_NODE(ctd64tree, idx);
3056			KASSERT(ctd64 == NULL || ARB_ISFREE(ctd64, ctdlnk),
3057			    ("%s: t-digest ARB@%p free list bug", __func__,
3058			    ctd64tree));
3059		}
3060		/*
3061		 * This process should not produce errors, bugs notwithstanding.
3062		 * Just in case, latch any errors and attempt all re-insertions.
3063		 */
3064		error = tmperr ? tmperr : error;
3065		remctds--;
3066	}
3067
3068	KASSERT(remctds == 0, ("%s: t-digest ARB@%p free list bug", __func__,
3069	    (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
3070
3071	return (error);
3072}
3073
3074static inline int
3075stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype, struct voistatdata_tdgst *tdgst,
3076    s64q_t x, uint64_t weight, int attempt)
3077{
3078#ifdef DIAGNOSTIC
3079	char qstr[Q_MAXSTRLEN(x, 10)];
3080#endif
3081	struct ctdth32 *ctd32tree;
3082	struct ctdth64 *ctd64tree;
3083	void *closest, *cur, *lb, *ub;
3084	struct voistatdata_tdgstctd32 *ctd32;
3085	struct voistatdata_tdgstctd64 *ctd64;
3086	uint64_t cnt, smplcnt, sum, tmpsum;
3087	s64q_t k, minz, q, z;
3088	int error, is32bit, n;
3089
3090	error = 0;
3091	minz = Q_INI(&z, 0, 0, Q_NFBITS(x));
3092
3093	switch (vs_dtype) {
3094	case VSD_DTYPE_TDGSTCLUST32:
3095		if ((UINT32_MAX - weight) < VSD(tdgstclust32, tdgst)->smplcnt)
3096			error = EOVERFLOW;
3097		smplcnt = VSD(tdgstclust32, tdgst)->smplcnt;
3098		ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
3099		is32bit = 1;
3100		ctd64tree = NULL;
3101		ctd64 = NULL;
3102		break;
3103	case VSD_DTYPE_TDGSTCLUST64:
3104		if ((UINT64_MAX - weight) < VSD(tdgstclust64, tdgst)->smplcnt)
3105			error = EOVERFLOW;
3106		smplcnt = VSD(tdgstclust64, tdgst)->smplcnt;
3107		ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
3108		is32bit = 0;
3109		ctd32tree = NULL;
3110		ctd32 = NULL;
3111		break;
3112	default:
3113		error = EINVAL;
3114		break;
3115	}
3116
3117	if (error)
3118		return (error);
3119
3120	/*
3121	 * Inspired by Ted Dunning's AVLTreeDigest.java
3122	 */
3123	do {
3124#if defined(DIAGNOSTIC)
3125		KASSERT(attempt < 5,
3126		    ("%s: Too many attempts", __func__));
3127#endif
3128		if (attempt >= 5)
3129			return (EAGAIN);
3130
3131		Q_SIFVAL(minz, Q_IFMAXVAL(minz));
3132		closest = ub = NULL;
3133		sum = tmpsum = 0;
3134
3135		if (is32bit)
3136			lb = cur = (void *)(ctd32 = ARB_MIN(ctdth32, ctd32tree));
3137		else
3138			lb = cur = (void *)(ctd64 = ARB_MIN(ctdth64, ctd64tree));
3139
3140		if (lb == NULL) /* Empty tree. */
3141			lb = (is32bit ? (void *)ARB_ROOT(ctd32tree) :
3142			    (void *)ARB_ROOT(ctd64tree));
3143
3144		/*
3145		 * Find the set of centroids with minimum distance to x and
3146		 * compute the sum of counts for all centroids with mean less
3147		 * than the first centroid in the set.
3148		 */
3149		for (; cur != NULL;
3150		    cur = (is32bit ?
3151		    (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
3152		    (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
3153			if (is32bit) {
3154				cnt = ctd32->cnt;
3155				KASSERT(Q_PRECEQ(ctd32->mu, x),
3156				    ("%s: Q_RELPREC(mu,x)=%d", __func__,
3157				    Q_RELPREC(ctd32->mu, x)));
3158				/* Ok to assign as both have same precision. */
3159				z = ctd32->mu;
3160			} else {
3161				cnt = ctd64->cnt;
3162				KASSERT(Q_PRECEQ(ctd64->mu, x),
3163				    ("%s: Q_RELPREC(mu,x)=%d", __func__,
3164				    Q_RELPREC(ctd64->mu, x)));
3165				/* Ok to assign as both have same precision. */
3166				z = ctd64->mu;
3167			}
3168
3169			error = Q_QSUBQ(&z, x);
3170#if defined(DIAGNOSTIC)
3171			KASSERT(!error, ("%s: unexpected error %d", __func__,
3172			    error));
3173#endif
3174			if (error)
3175				return (error);
3176
3177			z = Q_QABS(z);
3178			if (Q_QLTQ(z, minz)) {
3179				minz = z;
3180				lb = cur;
3181				sum = tmpsum;
3182				tmpsum += cnt;
3183			} else if (Q_QGTQ(z, minz)) {
3184				ub = cur;
3185				break;
3186			}
3187		}
3188
3189		cur = (is32bit ?
3190		    (void *)(ctd32 = (struct voistatdata_tdgstctd32 *)lb) :
3191		    (void *)(ctd64 = (struct voistatdata_tdgstctd64 *)lb));
3192
3193		for (n = 0; cur != ub; cur = (is32bit ?
3194		    (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
3195		    (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
3196			if (is32bit)
3197				cnt = ctd32->cnt;
3198			else
3199				cnt = ctd64->cnt;
3200
3201			q = Q_CTRLINI(16);
3202			if (smplcnt == 1)
3203				error = Q_QFRACI(&q, 1, 2);
3204			else
3205				/* [ sum + ((cnt - 1) / 2) ] / (smplcnt - 1) */
3206				error = Q_QFRACI(&q, (sum << 1) + cnt - 1,
3207				    (smplcnt - 1) << 1);
3208			k = q;
3209			/* k = q x 4 x samplcnt x attempt */
3210			error |= Q_QMULI(&k, 4 * smplcnt * attempt);
3211			/* k = k x (1 - q) */
3212			error |= Q_QSUBI(&q, 1);
3213			q = Q_QABS(q);
3214			error |= Q_QMULQ(&k, q);
3215#if defined(DIAGNOSTIC)
3216#if !defined(_KERNEL)
3217			double q_dbl, k_dbl, q2d, k2d;
3218			q2d = Q_Q2D(q);
3219			k2d = Q_Q2D(k);
3220			q_dbl = smplcnt == 1 ? 0.5 :
3221			    (sum + ((cnt - 1)  / 2.0)) / (double)(smplcnt - 1);
3222			k_dbl = 4 * smplcnt * q_dbl * (1.0 - q_dbl) * attempt;
3223			/*
3224			 * If the difference between q and q_dbl is greater than
3225			 * the fractional precision of q, something is off.
3226			 * NB: q is holding the value of 1 - q
3227			 */
3228			q_dbl = 1.0 - q_dbl;
3229			KASSERT((q_dbl > q2d ? q_dbl - q2d : q2d - q_dbl) <
3230			    (1.05 * ((double)1 / (double)(1ULL << Q_NFBITS(q)))),
3231			    ("Q-type q bad precision"));
3232			KASSERT((k_dbl > k2d ? k_dbl - k2d : k2d - k_dbl) <
3233			    1.0 + (0.01 * smplcnt),
3234			    ("Q-type k bad precision"));
3235#endif /* !_KERNEL */
3236			KASSERT(!error, ("%s: unexpected error %d", __func__,
3237			    error));
3238#endif /* DIAGNOSTIC */
3239			if (error)
3240				return (error);
3241			if ((is32bit && ((ctd32->cnt + weight) <=
3242			    (uint64_t)Q_GIVAL(k))) ||
3243			    (!is32bit && ((ctd64->cnt + weight) <=
3244			    (uint64_t)Q_GIVAL(k)))) {
3245				n++;
3246				/* random() produces 31 bits. */
3247				if (random() < (INT32_MAX / n))
3248					closest = cur;
3249			}
3250			sum += cnt;
3251		}
3252	} while (closest == NULL &&
3253	    (is32bit ? ARB_FULL(ctd32tree) : ARB_FULL(ctd64tree)) &&
3254	    (error = stats_v1_vsd_tdgst_compress(vs_dtype, tdgst,
3255	    attempt++)) == 0);
3256
3257	if (error)
3258		return (error);
3259
3260	if (closest != NULL) {
3261		/* Merge with an existing centroid. */
3262		if (is32bit) {
3263			ctd32 = (struct voistatdata_tdgstctd32 *)closest;
3264			error = Q_QSUBQ(&x, ctd32->mu);
3265			/*
3266			 * The following calculation "x / (cnt + weight)"
3267			 * computes the amount by which to adjust the centroid's
3268			 * mu value in order to merge in the VOI sample.
3269			 *
3270			 * It can underflow (Q_QDIVI() returns ERANGE) when the
3271			 * user centroids' fractional precision (which is
3272			 * inherited by 'x') is too low to represent the result.
3273			 *
3274			 * A sophisticated approach to dealing with this issue
3275			 * would minimise accumulation of error by tracking
3276			 * underflow per centroid and making an adjustment when
3277			 * a LSB's worth of underflow has accumulated.
3278			 *
3279			 * A simpler approach is to let the result underflow
3280			 * i.e. merge the VOI sample into the centroid without
3281			 * adjusting the centroid's mu, and rely on the user to
3282			 * specify their t-digest with sufficient centroid
3283			 * fractional precision such that the accumulation of
3284			 * error from multiple underflows is of no material
3285			 * consequence to the centroid's final value of mu.
3286			 *
3287			 * For the moment, the latter approach is employed by
3288			 * simply ignoring ERANGE here.
3289			 *
3290			 * XXXLAS: Per-centroid underflow tracking is likely too
3291			 * onerous, but it probably makes sense to accumulate a
3292			 * single underflow error variable across all centroids
3293			 * and report it as part of the digest to provide
3294			 * additional visibility into the digest's fidelity.
3295			 */
3296			error = error ? error :
3297			    Q_QDIVI(&x, ctd32->cnt + weight);
3298			if ((error && error != ERANGE)
3299			    || (error = Q_QADDQ(&ctd32->mu, x))) {
3300#ifdef DIAGNOSTIC
3301				KASSERT(!error, ("%s: unexpected error %d",
3302				    __func__, error));
3303#endif
3304				return (error);
3305			}
3306			ctd32->cnt += weight;
3307			error = ARB_REINSERT(ctdth32, ctd32tree, ctd32) ==
3308			    NULL ? 0 : EALREADY;
3309#ifdef DIAGNOSTIC
3310			RB_REINSERT(rbctdth32,
3311			    &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
3312#endif
3313		} else {
3314			ctd64 = (struct voistatdata_tdgstctd64 *)closest;
3315			error = Q_QSUBQ(&x, ctd64->mu);
3316			error = error ? error :
3317			    Q_QDIVI(&x, ctd64->cnt + weight);
3318			/* Refer to is32bit ERANGE discussion above. */
3319			if ((error && error != ERANGE)
3320			    || (error = Q_QADDQ(&ctd64->mu, x))) {
3321				KASSERT(!error, ("%s: unexpected error %d",
3322				    __func__, error));
3323				return (error);
3324			}
3325			ctd64->cnt += weight;
3326			error = ARB_REINSERT(ctdth64, ctd64tree, ctd64) ==
3327			    NULL ? 0 : EALREADY;
3328#ifdef DIAGNOSTIC
3329			RB_REINSERT(rbctdth64,
3330			    &VSD(tdgstclust64, tdgst)->rbctdtree, ctd64);
3331#endif
3332		}
3333	} else {
3334		/*
3335		 * Add a new centroid. If digest compression is working
3336		 * correctly, there should always be at least one free.
3337		 */
3338		if (is32bit) {
3339			ctd32 = ARB_GETFREE(ctd32tree, ctdlnk);
3340#ifdef DIAGNOSTIC
3341			KASSERT(ctd32 != NULL,
3342			    ("%s: t-digest@%p has no free centroids",
3343			    __func__, tdgst));
3344#endif
3345			if (ctd32 == NULL)
3346				return (EAGAIN);
3347			if ((error = Q_QCPYVALQ(&ctd32->mu, x)))
3348				return (error);
3349			ctd32->cnt = weight;
3350			error = ARB_INSERT(ctdth32, ctd32tree, ctd32) == NULL ?
3351			    0 : EALREADY;
3352#ifdef DIAGNOSTIC
3353			RB_INSERT(rbctdth32,
3354			    &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
3355#endif
3356		} else {
3357			ctd64 = ARB_GETFREE(ctd64tree, ctdlnk);
3358#ifdef DIAGNOSTIC
3359			KASSERT(ctd64 != NULL,
3360			    ("%s: t-digest@%p has no free centroids",
3361			    __func__, tdgst));
3362#endif
3363			if (ctd64 == NULL) /* Should not happen. */
3364				return (EAGAIN);
3365			/* Direct assignment ok as both have same type/prec. */
3366			ctd64->mu = x;
3367			ctd64->cnt = weight;
3368			error = ARB_INSERT(ctdth64, ctd64tree, ctd64) == NULL ?
3369			    0 : EALREADY;
3370#ifdef DIAGNOSTIC
3371			RB_INSERT(rbctdth64, &VSD(tdgstclust64,
3372			    tdgst)->rbctdtree, ctd64);
3373#endif
3374		}
3375	}
3376
3377	if (is32bit)
3378		VSD(tdgstclust32, tdgst)->smplcnt += weight;
3379	else {
3380		VSD(tdgstclust64, tdgst)->smplcnt += weight;
3381
3382#ifdef DIAGNOSTIC
3383		struct rbctdth64 *rbctdtree =
3384		    &VSD(tdgstclust64, tdgst)->rbctdtree;
3385		struct voistatdata_tdgstctd64 *rbctd64;
3386		int i = 0;
3387		ARB_FOREACH(ctd64, ctdth64, ctd64tree) {
3388			rbctd64 = (i == 0 ? RB_MIN(rbctdth64, rbctdtree) :
3389			    RB_NEXT(rbctdth64, rbctdtree, rbctd64));
3390
3391			if (i >= ARB_CURNODES(ctd64tree)
3392			    || ctd64 != rbctd64
3393			    || ARB_MIN(ctdth64, ctd64tree) !=
3394			       RB_MIN(rbctdth64, rbctdtree)
3395			    || ARB_MAX(ctdth64, ctd64tree) !=
3396			       RB_MAX(rbctdth64, rbctdtree)
3397			    || ARB_LEFTIDX(ctd64, ctdlnk) !=
3398			       ARB_SELFIDX(ctd64tree, RB_LEFT(rbctd64, rblnk))
3399			    || ARB_RIGHTIDX(ctd64, ctdlnk) !=
3400			       ARB_SELFIDX(ctd64tree, RB_RIGHT(rbctd64, rblnk))
3401			    || ARB_PARENTIDX(ctd64, ctdlnk) !=
3402			       ARB_SELFIDX(ctd64tree,
3403			       RB_PARENT(rbctd64, rblnk))) {
3404				Q_TOSTR(ctd64->mu, -1, 10, qstr, sizeof(qstr));
3405				printf("ARB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
3406				    "mu=%s\n",
3407				    (int)ARB_SELFIDX(ctd64tree, ctd64),
3408				    ARB_PARENTIDX(ctd64, ctdlnk),
3409				    ARB_LEFTIDX(ctd64, ctdlnk),
3410				    ARB_RIGHTIDX(ctd64, ctdlnk),
3411				    ARB_COLOR(ctd64, ctdlnk),
3412				    qstr);
3413
3414				Q_TOSTR(rbctd64->mu, -1, 10, qstr,
3415				    sizeof(qstr));
3416				struct voistatdata_tdgstctd64 *parent;
3417				parent = RB_PARENT(rbctd64, rblnk);
3418				int rb_color =
3419					parent == NULL ? 0 :
3420					RB_LEFT(parent, rblnk) == rbctd64 ?
3421					(_RB_BITSUP(parent, rblnk) & _RB_L) != 0 :
3422 					(_RB_BITSUP(parent, rblnk) & _RB_R) != 0;
3423				printf(" RB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
3424				    "mu=%s\n",
3425				    (int)ARB_SELFIDX(ctd64tree, rbctd64),
3426				    (int)ARB_SELFIDX(ctd64tree,
3427				      RB_PARENT(rbctd64, rblnk)),
3428				    (int)ARB_SELFIDX(ctd64tree,
3429				      RB_LEFT(rbctd64, rblnk)),
3430				    (int)ARB_SELFIDX(ctd64tree,
3431				      RB_RIGHT(rbctd64, rblnk)),
3432				    rb_color,
3433				    qstr);
3434
3435				panic("RB@%p and ARB@%p trees differ\n",
3436				    rbctdtree, ctd64tree);
3437			}
3438			i++;
3439		}
3440#endif /* DIAGNOSTIC */
3441	}
3442
3443	return (error);
3444}
3445
3446static inline int
3447stats_v1_voi_update_tdgst(enum vsd_dtype voi_dtype, struct voistatdata *voival,
3448    struct voistat *vs, struct voistatdata_tdgst *tdgst)
3449{
3450	s64q_t x;
3451	int error;
3452
3453	error = 0;
3454
3455	switch (vs->dtype) {
3456	case VSD_DTYPE_TDGSTCLUST32:
3457		/* Use same precision as the user's centroids. */
3458		Q_INI(&x, 0, 0, Q_NFBITS(
3459		    ARB_CNODE(&VSD(tdgstclust32, tdgst)->ctdtree, 0)->mu));
3460		break;
3461	case VSD_DTYPE_TDGSTCLUST64:
3462		/* Use same precision as the user's centroids. */
3463		Q_INI(&x, 0, 0, Q_NFBITS(
3464		    ARB_CNODE(&VSD(tdgstclust64, tdgst)->ctdtree, 0)->mu));
3465		break;
3466	default:
3467		KASSERT(vs->dtype == VSD_DTYPE_TDGSTCLUST32 ||
3468		    vs->dtype == VSD_DTYPE_TDGSTCLUST64,
3469		    ("%s: vs->dtype(%d) != VSD_DTYPE_TDGSTCLUST<32|64>",
3470		    __func__, vs->dtype));
3471		return (EINVAL);
3472	}
3473
3474	/*
3475	 * XXXLAS: Should have both a signed and unsigned 'x' variable to avoid
3476	 * returning EOVERFLOW if the voival would have fit in a u64q_t.
3477	 */
3478	switch (voi_dtype) {
3479	case VSD_DTYPE_INT_S32:
3480		error = Q_QCPYVALI(&x, voival->int32.s32);
3481		break;
3482	case VSD_DTYPE_INT_U32:
3483		error = Q_QCPYVALI(&x, voival->int32.u32);
3484		break;
3485	case VSD_DTYPE_INT_S64:
3486		error = Q_QCPYVALI(&x, voival->int64.s64);
3487		break;
3488	case VSD_DTYPE_INT_U64:
3489		error = Q_QCPYVALI(&x, voival->int64.u64);
3490		break;
3491	case VSD_DTYPE_INT_SLONG:
3492		error = Q_QCPYVALI(&x, voival->intlong.slong);
3493		break;
3494	case VSD_DTYPE_INT_ULONG:
3495		error = Q_QCPYVALI(&x, voival->intlong.ulong);
3496		break;
3497	case VSD_DTYPE_Q_S32:
3498		error = Q_QCPYVALQ(&x, voival->q32.sq32);
3499		break;
3500	case VSD_DTYPE_Q_U32:
3501		error = Q_QCPYVALQ(&x, voival->q32.uq32);
3502		break;
3503	case VSD_DTYPE_Q_S64:
3504		error = Q_QCPYVALQ(&x, voival->q64.sq64);
3505		break;
3506	case VSD_DTYPE_Q_U64:
3507		error = Q_QCPYVALQ(&x, voival->q64.uq64);
3508		break;
3509	default:
3510		error = EINVAL;
3511		break;
3512	}
3513
3514	if (error ||
3515	    (error = stats_v1_vsd_tdgst_add(vs->dtype, tdgst, x, 1, 1)))
3516		return (error);
3517
3518	vs->flags |= VS_VSDVALID;
3519	return (0);
3520}
3521
3522int
3523stats_v1_voi_update(struct statsblobv1 *sb, int32_t voi_id,
3524    enum vsd_dtype voi_dtype, struct voistatdata *voival, uint32_t flags)
3525{
3526	struct voi *v;
3527	struct voistat *vs;
3528	void *statevsd, *vsd;
3529	int error, i, tmperr;
3530
3531	error = 0;
3532
3533	if (sb == NULL || sb->abi != STATS_ABI_V1 || voi_id >= NVOIS(sb) ||
3534	    voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES || voival == NULL)
3535		return (EINVAL);
3536	v = &sb->vois[voi_id];
3537	if (voi_dtype != v->dtype || v->id < 0 ||
3538	    ((flags & SB_VOI_RELUPDATE) && !(v->flags & VOI_REQSTATE)))
3539		return (EINVAL);
3540
3541	vs = BLOB_OFFSET(sb, v->stats_off);
3542	if (v->flags & VOI_REQSTATE)
3543		statevsd = BLOB_OFFSET(sb, vs->data_off);
3544	else
3545		statevsd = NULL;
3546
3547	if (flags & SB_VOI_RELUPDATE) {
3548		switch (voi_dtype) {
3549		case VSD_DTYPE_INT_S32:
3550			voival->int32.s32 +=
3551			    VSD(voistate, statevsd)->prev.int32.s32;
3552			break;
3553		case VSD_DTYPE_INT_U32:
3554			voival->int32.u32 +=
3555			    VSD(voistate, statevsd)->prev.int32.u32;
3556			break;
3557		case VSD_DTYPE_INT_S64:
3558			voival->int64.s64 +=
3559			    VSD(voistate, statevsd)->prev.int64.s64;
3560			break;
3561		case VSD_DTYPE_INT_U64:
3562			voival->int64.u64 +=
3563			    VSD(voistate, statevsd)->prev.int64.u64;
3564			break;
3565		case VSD_DTYPE_INT_SLONG:
3566			voival->intlong.slong +=
3567			    VSD(voistate, statevsd)->prev.intlong.slong;
3568			break;
3569		case VSD_DTYPE_INT_ULONG:
3570			voival->intlong.ulong +=
3571			    VSD(voistate, statevsd)->prev.intlong.ulong;
3572			break;
3573		case VSD_DTYPE_Q_S32:
3574			error = Q_QADDQ(&voival->q32.sq32,
3575			    VSD(voistate, statevsd)->prev.q32.sq32);
3576			break;
3577		case VSD_DTYPE_Q_U32:
3578			error = Q_QADDQ(&voival->q32.uq32,
3579			    VSD(voistate, statevsd)->prev.q32.uq32);
3580			break;
3581		case VSD_DTYPE_Q_S64:
3582			error = Q_QADDQ(&voival->q64.sq64,
3583			    VSD(voistate, statevsd)->prev.q64.sq64);
3584			break;
3585		case VSD_DTYPE_Q_U64:
3586			error = Q_QADDQ(&voival->q64.uq64,
3587			    VSD(voistate, statevsd)->prev.q64.uq64);
3588			break;
3589		default:
3590			KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
3591			break;
3592		}
3593	}
3594
3595	if (error)
3596		return (error);
3597
3598	for (i = v->voistatmaxid; i > 0; i--) {
3599		vs = &((struct voistat *)BLOB_OFFSET(sb, v->stats_off))[i];
3600		if (vs->stype < 0)
3601			continue;
3602
3603		vsd = BLOB_OFFSET(sb, vs->data_off);
3604
3605		switch (vs->stype) {
3606		case VS_STYPE_MAX:
3607			tmperr = stats_v1_voi_update_max(voi_dtype, voival,
3608			    vs, vsd);
3609			break;
3610		case VS_STYPE_MIN:
3611			tmperr = stats_v1_voi_update_min(voi_dtype, voival,
3612			    vs, vsd);
3613			break;
3614		case VS_STYPE_SUM:
3615			tmperr = stats_v1_voi_update_sum(voi_dtype, voival,
3616			    vs, vsd);
3617			break;
3618		case VS_STYPE_HIST:
3619			tmperr = stats_v1_voi_update_hist(voi_dtype, voival,
3620			    vs, vsd);
3621			break;
3622		case VS_STYPE_TDGST:
3623			tmperr = stats_v1_voi_update_tdgst(voi_dtype, voival,
3624			    vs, vsd);
3625			break;
3626		default:
3627			KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
3628			break;
3629		}
3630
3631		if (tmperr) {
3632			error = tmperr;
3633			VS_INCERRS(vs);
3634		}
3635	}
3636
3637	if (statevsd) {
3638		switch (voi_dtype) {
3639		case VSD_DTYPE_INT_S32:
3640			VSD(voistate, statevsd)->prev.int32.s32 =
3641			    voival->int32.s32;
3642			break;
3643		case VSD_DTYPE_INT_U32:
3644			VSD(voistate, statevsd)->prev.int32.u32 =
3645			    voival->int32.u32;
3646			break;
3647		case VSD_DTYPE_INT_S64:
3648			VSD(voistate, statevsd)->prev.int64.s64 =
3649			    voival->int64.s64;
3650			break;
3651		case VSD_DTYPE_INT_U64:
3652			VSD(voistate, statevsd)->prev.int64.u64 =
3653			    voival->int64.u64;
3654			break;
3655		case VSD_DTYPE_INT_SLONG:
3656			VSD(voistate, statevsd)->prev.intlong.slong =
3657			    voival->intlong.slong;
3658			break;
3659		case VSD_DTYPE_INT_ULONG:
3660			VSD(voistate, statevsd)->prev.intlong.ulong =
3661			    voival->intlong.ulong;
3662			break;
3663		case VSD_DTYPE_Q_S32:
3664			error = Q_QCPYVALQ(
3665			    &VSD(voistate, statevsd)->prev.q32.sq32,
3666			    voival->q32.sq32);
3667			break;
3668		case VSD_DTYPE_Q_U32:
3669			error = Q_QCPYVALQ(
3670			    &VSD(voistate, statevsd)->prev.q32.uq32,
3671			    voival->q32.uq32);
3672			break;
3673		case VSD_DTYPE_Q_S64:
3674			error = Q_QCPYVALQ(
3675			    &VSD(voistate, statevsd)->prev.q64.sq64,
3676			    voival->q64.sq64);
3677			break;
3678		case VSD_DTYPE_Q_U64:
3679			error = Q_QCPYVALQ(
3680			    &VSD(voistate, statevsd)->prev.q64.uq64,
3681			    voival->q64.uq64);
3682			break;
3683		default:
3684			KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
3685			break;
3686		}
3687	}
3688
3689	return (error);
3690}
3691
3692#ifdef _KERNEL
3693
3694static void
3695stats_init(void *arg)
3696{
3697
3698}
3699SYSINIT(stats, SI_SUB_KDTRACE, SI_ORDER_FIRST, stats_init, NULL);
3700
3701/*
3702 * Sysctl handler to display the list of available stats templates.
3703 */
3704static int
3705stats_tpl_list_available(SYSCTL_HANDLER_ARGS)
3706{
3707	struct sbuf *s;
3708	int err, i;
3709
3710	err = 0;
3711
3712	/* We can tolerate ntpl being stale, so do not take the lock. */
3713	s = sbuf_new(NULL, NULL, /* +1 per tpl for , */
3714	    ntpl * (STATS_TPL_MAX_STR_SPEC_LEN + 1), SBUF_FIXEDLEN);
3715	if (s == NULL)
3716		return (ENOMEM);
3717
3718	TPL_LIST_RLOCK();
3719	for (i = 0; i < ntpl; i++) {
3720		err = sbuf_printf(s, "%s\"%s\":%u", i ? "," : "",
3721		    tpllist[i]->mb->tplname, tpllist[i]->mb->tplhash);
3722		if (err) {
3723			/* Sbuf overflow condition. */
3724			err = EOVERFLOW;
3725			break;
3726		}
3727	}
3728	TPL_LIST_RUNLOCK();
3729
3730	if (!err) {
3731		sbuf_finish(s);
3732		err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
3733	}
3734
3735	sbuf_delete(s);
3736	return (err);
3737}
3738
3739/*
3740 * Called by subsystem-specific sysctls to report and/or parse the list of
3741 * templates being sampled and their sampling rates. A stats_tpl_sr_cb_t
3742 * conformant function pointer must be passed in as arg1, which is used to
3743 * interact with the subsystem's stats template sample rates list. If arg2 > 0,
3744 * a zero-initialised allocation of arg2-sized contextual memory is
3745 * heap-allocated and passed in to all subsystem callbacks made during the
3746 * operation of stats_tpl_sample_rates().
3747 *
3748 * XXXLAS: Assumes templates are never removed, which is currently true but may
3749 * need to be reworked in future if dynamic template management becomes a
3750 * requirement e.g. to support kernel module based templates.
3751 */
3752int
3753stats_tpl_sample_rates(SYSCTL_HANDLER_ARGS)
3754{
3755	char kvpair_fmt[16], tplspec_fmt[16];
3756	char tpl_spec[STATS_TPL_MAX_STR_SPEC_LEN];
3757	char tpl_name[TPL_MAX_NAME_LEN + 2]; /* +2 for "" */
3758	stats_tpl_sr_cb_t subsys_cb;
3759	void *subsys_ctx;
3760	char *buf, *new_rates_usr_str, *tpl_name_p;
3761	struct stats_tpl_sample_rate *rates;
3762	struct sbuf *s, _s;
3763	uint32_t cum_pct, pct, tpl_hash;
3764	int err, i, off, len, newlen, nrates;
3765
3766	buf = NULL;
3767	rates = NULL;
3768	err = nrates = 0;
3769	subsys_cb = (stats_tpl_sr_cb_t)arg1;
3770	KASSERT(subsys_cb != NULL, ("%s: subsys_cb == arg1 == NULL", __func__));
3771	if (arg2 > 0)
3772		subsys_ctx = malloc(arg2, M_TEMP, M_WAITOK | M_ZERO);
3773	else
3774		subsys_ctx = NULL;
3775
3776	/* Grab current count of subsystem rates. */
3777	err = subsys_cb(TPL_SR_UNLOCKED_GET, NULL, &nrates, subsys_ctx);
3778	if (err)
3779		goto done;
3780
3781	/* +1 to ensure we can append '\0' post copyin, +5 per rate for =nnn, */
3782	len = max(req->newlen + 1, nrates * (STATS_TPL_MAX_STR_SPEC_LEN + 5));
3783
3784	if (req->oldptr != NULL || req->newptr != NULL)
3785		buf = malloc(len, M_TEMP, M_WAITOK);
3786
3787	if (req->oldptr != NULL) {
3788		if (nrates == 0) {
3789			/* No rates, so return an empty string via oldptr. */
3790			err = SYSCTL_OUT(req, "", 1);
3791			if (err)
3792				goto done;
3793			goto process_new;
3794		}
3795
3796		s = sbuf_new(&_s, buf, len, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
3797
3798		/* Grab locked count of, and ptr to, subsystem rates. */
3799		err = subsys_cb(TPL_SR_RLOCKED_GET, &rates, &nrates,
3800		    subsys_ctx);
3801		if (err)
3802			goto done;
3803		TPL_LIST_RLOCK();
3804		for (i = 0; i < nrates && !err; i++) {
3805			err = sbuf_printf(s, "%s\"%s\":%u=%u", i ? "," : "",
3806			    tpllist[rates[i].tpl_slot_id]->mb->tplname,
3807			    tpllist[rates[i].tpl_slot_id]->mb->tplhash,
3808			    rates[i].tpl_sample_pct);
3809		}
3810		TPL_LIST_RUNLOCK();
3811		/* Tell subsystem that we're done with its rates list. */
3812		err = subsys_cb(TPL_SR_RUNLOCK, &rates, &nrates, subsys_ctx);
3813		if (err)
3814			goto done;
3815
3816		err = sbuf_finish(s);
3817		if (err)
3818			goto done; /* We lost a race for buf to be too small. */
3819
3820		/* Return the rendered string data via oldptr. */
3821		err = SYSCTL_OUT(req, sbuf_data(s), sbuf_len(s));
3822	} else {
3823		/* Return the upper bound size for buffer sizing requests. */
3824		err = SYSCTL_OUT(req, NULL, len);
3825	}
3826
3827process_new:
3828	if (err || req->newptr == NULL)
3829		goto done;
3830
3831	newlen = req->newlen - req->newidx;
3832	err = SYSCTL_IN(req, buf, newlen);
3833	if (err)
3834		goto done;
3835
3836	/*
3837	 * Initialise format strings at run time.
3838	 *
3839	 * Write the max template spec string length into the
3840	 * template_spec=percent key-value pair parsing format string as:
3841	 *     " %<width>[^=]=%u %n"
3842	 *
3843	 * Write the max template name string length into the tplname:tplhash
3844	 * parsing format string as:
3845	 *     "%<width>[^:]:%u"
3846	 *
3847	 * Subtract 1 for \0 appended by sscanf().
3848	 */
3849	sprintf(kvpair_fmt, " %%%zu[^=]=%%u %%n", sizeof(tpl_spec) - 1);
3850	sprintf(tplspec_fmt, "%%%zu[^:]:%%u", sizeof(tpl_name) - 1);
3851
3852	/*
3853	 * Parse each CSV key-value pair specifying a template and its sample
3854	 * percentage. Whitespace either side of a key-value pair is ignored.
3855	 * Templates can be specified by name, hash, or name and hash per the
3856	 * following formats (chars in [] are optional):
3857	 *    ["]<tplname>["]=<percent>
3858	 *    :hash=pct
3859	 *    ["]<tplname>["]:hash=<percent>
3860	 */
3861	cum_pct = nrates = 0;
3862	rates = NULL;
3863	buf[newlen] = '\0'; /* buf is at least newlen+1 in size. */
3864	new_rates_usr_str = buf;
3865	while (isspace(*new_rates_usr_str))
3866		new_rates_usr_str++; /* Skip leading whitespace. */
3867	while (*new_rates_usr_str != '\0') {
3868		tpl_name_p = tpl_name;
3869		tpl_name[0] = '\0';
3870		tpl_hash = 0;
3871		off = 0;
3872
3873		/*
3874		 * Parse key-value pair which must perform 2 conversions, then
3875		 * parse the template spec to extract either name, hash, or name
3876		 * and hash depending on the three possible spec formats. The
3877		 * tplspec_fmt format specifier parses name or name and hash
3878		 * template specs, while the ":%u" format specifier parses
3879		 * hash-only template specs. If parsing is successfull, ensure
3880		 * the cumulative sampling percentage does not exceed 100.
3881		 */
3882		err = EINVAL;
3883		if (2 != sscanf(new_rates_usr_str, kvpair_fmt, tpl_spec, &pct,
3884		    &off))
3885			break;
3886		if ((1 > sscanf(tpl_spec, tplspec_fmt, tpl_name, &tpl_hash)) &&
3887		    (1 != sscanf(tpl_spec, ":%u", &tpl_hash)))
3888			break;
3889		if ((cum_pct += pct) > 100)
3890			break;
3891		err = 0;
3892
3893		/* Strip surrounding "" from template name if present. */
3894		len = strlen(tpl_name);
3895		if (len > 0) {
3896			if (tpl_name[len - 1] == '"')
3897				tpl_name[--len] = '\0';
3898			if (tpl_name[0] == '"') {
3899				tpl_name_p++;
3900				len--;
3901			}
3902		}
3903
3904		rates = stats_realloc(rates, 0, /* oldsz is unused in kernel. */
3905		    (nrates + 1) * sizeof(*rates), M_WAITOK);
3906		rates[nrates].tpl_slot_id =
3907		    stats_tpl_fetch_allocid(len ? tpl_name_p : NULL, tpl_hash);
3908		if (rates[nrates].tpl_slot_id < 0) {
3909			err = -rates[nrates].tpl_slot_id;
3910			break;
3911		}
3912		rates[nrates].tpl_sample_pct = pct;
3913		nrates++;
3914		new_rates_usr_str += off;
3915		if (*new_rates_usr_str != ',')
3916			break; /* End-of-input or malformed. */
3917		new_rates_usr_str++; /* Move past comma to next pair. */
3918	}
3919
3920	if (!err) {
3921		if ((new_rates_usr_str - buf) < newlen) {
3922			/* Entire input has not been consumed. */
3923			err = EINVAL;
3924		} else {
3925			/*
3926			 * Give subsystem the new rates. They'll return the
3927			 * appropriate rates pointer for us to garbage collect.
3928			 */
3929			err = subsys_cb(TPL_SR_PUT, &rates, &nrates,
3930			    subsys_ctx);
3931		}
3932	}
3933	stats_free(rates);
3934
3935done:
3936	free(buf, M_TEMP);
3937	free(subsys_ctx, M_TEMP);
3938	return (err);
3939}
3940
3941SYSCTL_NODE(_kern, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
3942    "stats(9) MIB");
3943
3944SYSCTL_PROC(_kern_stats, OID_AUTO, templates,
3945    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
3946    stats_tpl_list_available, "A",
3947    "list the name/hash of all available stats(9) templates");
3948
3949#else /* ! _KERNEL */
3950
3951static void __attribute__ ((constructor))
3952stats_constructor(void)
3953{
3954
3955	pthread_rwlock_init(&tpllistlock, NULL);
3956}
3957
3958static void __attribute__ ((destructor))
3959stats_destructor(void)
3960{
3961
3962	pthread_rwlock_destroy(&tpllistlock);
3963}
3964
3965#endif /* _KERNEL */
3966