devstat.c revision 112288
1/*
2 * Copyright (c) 1997, 1998 Kenneth D. Merry.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. The name of the author may not be used to endorse or promote products
14 *    derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/lib/libdevstat/devstat.c 112288 2003-03-15 21:59:06Z phk $");
31
32#include <sys/types.h>
33#include <sys/sysctl.h>
34#include <sys/errno.h>
35#include <sys/resource.h>
36#include <sys/queue.h>
37
38#include <ctype.h>
39#include <err.h>
40#include <fcntl.h>
41#include <limits.h>
42#include <stdio.h>
43#include <stdlib.h>
44#include <string.h>
45#include <stdarg.h>
46#include <kvm.h>
47
48#include "devstat.h"
49
50typedef enum {
51	DEVSTAT_ARG_NOTYPE,
52	DEVSTAT_ARG_UINT64,
53	DEVSTAT_ARG_LD,
54	DEVSTAT_ARG_SKIP
55} devstat_arg_type;
56
57char devstat_errbuf[DEVSTAT_ERRBUF_SIZE];
58
59/*
60 * Table to match descriptive strings with device types.  These are in
61 * order from most common to least common to speed search time.
62 */
63struct devstat_match_table match_table[] = {
64	{"da",		DEVSTAT_TYPE_DIRECT,	DEVSTAT_MATCH_TYPE},
65	{"cd",		DEVSTAT_TYPE_CDROM,	DEVSTAT_MATCH_TYPE},
66	{"scsi",	DEVSTAT_TYPE_IF_SCSI,	DEVSTAT_MATCH_IF},
67	{"ide",		DEVSTAT_TYPE_IF_IDE,	DEVSTAT_MATCH_IF},
68	{"other",	DEVSTAT_TYPE_IF_OTHER,	DEVSTAT_MATCH_IF},
69	{"worm",	DEVSTAT_TYPE_WORM,	DEVSTAT_MATCH_TYPE},
70	{"sa",		DEVSTAT_TYPE_SEQUENTIAL,DEVSTAT_MATCH_TYPE},
71	{"pass",	DEVSTAT_TYPE_PASS,	DEVSTAT_MATCH_PASS},
72	{"optical",	DEVSTAT_TYPE_OPTICAL,	DEVSTAT_MATCH_TYPE},
73	{"array",	DEVSTAT_TYPE_STORARRAY,	DEVSTAT_MATCH_TYPE},
74	{"changer",	DEVSTAT_TYPE_CHANGER,	DEVSTAT_MATCH_TYPE},
75	{"scanner",	DEVSTAT_TYPE_SCANNER,	DEVSTAT_MATCH_TYPE},
76	{"printer",	DEVSTAT_TYPE_PRINTER,	DEVSTAT_MATCH_TYPE},
77	{"floppy",	DEVSTAT_TYPE_FLOPPY,	DEVSTAT_MATCH_TYPE},
78	{"proc",	DEVSTAT_TYPE_PROCESSOR,	DEVSTAT_MATCH_TYPE},
79	{"comm",	DEVSTAT_TYPE_COMM,	DEVSTAT_MATCH_TYPE},
80	{"enclosure",	DEVSTAT_TYPE_ENCLOSURE,	DEVSTAT_MATCH_TYPE},
81	{NULL,		0,			0}
82};
83
84struct devstat_args {
85	devstat_metric 		metric;
86	devstat_arg_type	argtype;
87} devstat_arg_list[] = {
88	{ DSM_NONE, DEVSTAT_ARG_NOTYPE },
89	{ DSM_TOTAL_BYTES, DEVSTAT_ARG_UINT64 },
90	{ DSM_TOTAL_BYTES_READ, DEVSTAT_ARG_UINT64 },
91	{ DSM_TOTAL_BYTES_WRITE, DEVSTAT_ARG_UINT64 },
92	{ DSM_TOTAL_TRANSFERS, DEVSTAT_ARG_UINT64 },
93	{ DSM_TOTAL_TRANSFERS_READ, DEVSTAT_ARG_UINT64 },
94	{ DSM_TOTAL_TRANSFERS_WRITE, DEVSTAT_ARG_UINT64 },
95	{ DSM_TOTAL_TRANSFERS_OTHER, DEVSTAT_ARG_UINT64 },
96	{ DSM_TOTAL_BLOCKS, DEVSTAT_ARG_UINT64 },
97	{ DSM_TOTAL_BLOCKS_READ, DEVSTAT_ARG_UINT64 },
98	{ DSM_TOTAL_BLOCKS_WRITE, DEVSTAT_ARG_UINT64 },
99	{ DSM_KB_PER_TRANSFER, DEVSTAT_ARG_LD },
100	{ DSM_KB_PER_TRANSFER_READ, DEVSTAT_ARG_LD },
101	{ DSM_KB_PER_TRANSFER_WRITE, DEVSTAT_ARG_LD },
102	{ DSM_TRANSFERS_PER_SECOND, DEVSTAT_ARG_LD },
103	{ DSM_TRANSFERS_PER_SECOND_READ, DEVSTAT_ARG_LD },
104	{ DSM_TRANSFERS_PER_SECOND_WRITE, DEVSTAT_ARG_LD },
105	{ DSM_TRANSFERS_PER_SECOND_OTHER, DEVSTAT_ARG_LD },
106	{ DSM_MB_PER_SECOND, DEVSTAT_ARG_LD },
107	{ DSM_MB_PER_SECOND_READ, DEVSTAT_ARG_LD },
108	{ DSM_MB_PER_SECOND_WRITE, DEVSTAT_ARG_LD },
109	{ DSM_BLOCKS_PER_SECOND, DEVSTAT_ARG_LD },
110	{ DSM_BLOCKS_PER_SECOND_READ, DEVSTAT_ARG_LD },
111	{ DSM_BLOCKS_PER_SECOND_WRITE, DEVSTAT_ARG_LD },
112	{ DSM_MS_PER_TRANSACTION, DEVSTAT_ARG_LD },
113	{ DSM_MS_PER_TRANSACTION_READ, DEVSTAT_ARG_LD },
114	{ DSM_MS_PER_TRANSACTION_WRITE, DEVSTAT_ARG_LD },
115	{ DSM_SKIP, DEVSTAT_ARG_SKIP }
116};
117
118static const char *namelist[] = {
119#define X_NUMDEVS	0
120	"_devstat_num_devs",
121#define X_GENERATION	1
122	"_devstat_generation",
123#define X_VERSION	2
124	"_devstat_version",
125#define X_DEVICE_STATQ	3
126	"_device_statq",
127#define X_END		4
128};
129
130/*
131 * Local function declarations.
132 */
133static int compare_select(const void *arg1, const void *arg2);
134static int readkmem(kvm_t *kd, unsigned long addr, void *buf, size_t nbytes);
135static int readkmem_nl(kvm_t *kd, const char *name, void *buf, size_t nbytes);
136static char *get_devstat_kvm(kvm_t *kd);
137
138#define KREADNL(kd, var, val) \
139	readkmem_nl(kd, namelist[var], &val, sizeof(val))
140
141int
142devstat_getnumdevs(kvm_t *kd)
143{
144	size_t numdevsize;
145	int numdevs;
146	const char *func_name = "devstat_getnumdevs";
147
148	numdevsize = sizeof(int);
149
150	/*
151	 * Find out how many devices we have in the system.
152	 */
153	if (kd == NULL) {
154		if (sysctlbyname("kern.devstat.numdevs", &numdevs,
155				 &numdevsize, NULL, 0) == -1) {
156			snprintf(devstat_errbuf, sizeof(devstat_errbuf),
157				 "%s: error getting number of devices\n"
158				 "%s: %s", func_name, func_name,
159				 strerror(errno));
160			return(-1);
161		} else
162			return(numdevs);
163	} else {
164
165		if (KREADNL(kd, X_NUMDEVS, numdevs) == -1)
166			return(-1);
167		else
168			return(numdevs);
169	}
170}
171
172/*
173 * This is an easy way to get the generation number, but the generation is
174 * supplied in a more atmoic manner by the kern.devstat.all sysctl.
175 * Because this generation sysctl is separate from the statistics sysctl,
176 * the device list and the generation could change between the time that
177 * this function is called and the device list is retreived.
178 */
179long
180devstat_getgeneration(kvm_t *kd)
181{
182	size_t gensize;
183	long generation;
184	const char *func_name = "devstat_getgeneration";
185
186	gensize = sizeof(long);
187
188	/*
189	 * Get the current generation number.
190	 */
191	if (kd == NULL) {
192		if (sysctlbyname("kern.devstat.generation", &generation,
193				 &gensize, NULL, 0) == -1) {
194			snprintf(devstat_errbuf, sizeof(devstat_errbuf),
195				 "%s: error getting devstat generation\n%s: %s",
196				 func_name, func_name, strerror(errno));
197			return(-1);
198		} else
199			return(generation);
200	} else {
201		if (KREADNL(kd, X_GENERATION, generation) == -1)
202			return(-1);
203		else
204			return(generation);
205	}
206}
207
208/*
209 * Get the current devstat version.  The return value of this function
210 * should be compared with DEVSTAT_VERSION, which is defined in
211 * sys/devicestat.h.  This will enable userland programs to determine
212 * whether they are out of sync with the kernel.
213 */
214int
215devstat_getversion(kvm_t *kd)
216{
217	size_t versize;
218	int version;
219	const char *func_name = "devstat_getversion";
220
221	versize = sizeof(int);
222
223	/*
224	 * Get the current devstat version.
225	 */
226	if (kd == NULL) {
227		if (sysctlbyname("kern.devstat.version", &version, &versize,
228				 NULL, 0) == -1) {
229			snprintf(devstat_errbuf, sizeof(devstat_errbuf),
230				 "%s: error getting devstat version\n%s: %s",
231				 func_name, func_name, strerror(errno));
232			return(-1);
233		} else
234			return(version);
235	} else {
236		if (KREADNL(kd, X_VERSION, version) == -1)
237			return(-1);
238		else
239			return(version);
240	}
241}
242
243/*
244 * Check the devstat version we know about against the devstat version the
245 * kernel knows about.  If they don't match, print an error into the
246 * devstat error buffer, and return -1.  If they match, return 0.
247 */
248int
249devstat_checkversion(kvm_t *kd)
250{
251	const char *func_name = "devstat_checkversion";
252	int buflen, res, retval = 0, version;
253
254	version = devstat_getversion(kd);
255
256	if (version != DEVSTAT_VERSION) {
257		/*
258		 * If getversion() returns an error (i.e. -1), then it
259		 * has printed an error message in the buffer.  Therefore,
260		 * we need to add a \n to the end of that message before we
261		 * print our own message in the buffer.
262		 */
263		if (version == -1)
264			buflen = strlen(devstat_errbuf);
265		else
266			buflen = 0;
267
268		res = snprintf(devstat_errbuf + buflen,
269			       DEVSTAT_ERRBUF_SIZE - buflen,
270			       "%s%s: userland devstat version %d is not "
271			       "the same as the kernel\n%s: devstat "
272			       "version %d\n", version == -1 ? "\n" : "",
273			       func_name, DEVSTAT_VERSION, func_name, version);
274
275		if (res < 0)
276			devstat_errbuf[buflen] = '\0';
277
278		buflen = strlen(devstat_errbuf);
279		if (version < DEVSTAT_VERSION)
280			res = snprintf(devstat_errbuf + buflen,
281				       DEVSTAT_ERRBUF_SIZE - buflen,
282				       "%s: libdevstat newer than kernel\n",
283				       func_name);
284		else
285			res = snprintf(devstat_errbuf + buflen,
286				       DEVSTAT_ERRBUF_SIZE - buflen,
287				       "%s: kernel newer than libdevstat\n",
288				       func_name);
289
290		if (res < 0)
291			devstat_errbuf[buflen] = '\0';
292
293		retval = -1;
294	}
295
296	return(retval);
297}
298
299/*
300 * Get the current list of devices and statistics, and the current
301 * generation number.
302 *
303 * Return values:
304 * -1  -- error
305 *  0  -- device list is unchanged
306 *  1  -- device list has changed
307 */
308int
309devstat_getdevs(kvm_t *kd, struct statinfo *stats)
310{
311	int error;
312	size_t dssize;
313	int oldnumdevs;
314	long oldgeneration;
315	int retval = 0;
316	struct devinfo *dinfo;
317	const char *func_name = "devstat_getdevs";
318	struct timespec ts;
319
320	dinfo = stats->dinfo;
321
322	if (dinfo == NULL) {
323		snprintf(devstat_errbuf, sizeof(devstat_errbuf),
324			 "%s: stats->dinfo was NULL", func_name);
325		return(-1);
326	}
327
328	oldnumdevs = dinfo->numdevs;
329	oldgeneration = dinfo->generation;
330
331	clock_gettime(CLOCK_MONOTONIC, &ts);
332	stats->snap_time = ts.tv_sec + ts.tv_nsec * 1e-9;
333
334	if (kd == NULL) {
335		/* If this is our first time through, mem_ptr will be null. */
336		if (dinfo->mem_ptr == NULL) {
337			/*
338			 * Get the number of devices.  If it's negative, it's an
339			 * error.  Don't bother setting the error string, since
340			 * getnumdevs() has already done that for us.
341			 */
342			if ((dinfo->numdevs = devstat_getnumdevs(NULL)) < 0)
343				return(-1);
344
345			/*
346			 * The kern.devstat.all sysctl returns the current
347			 * generation number, as well as all the devices.
348			 * So we need four bytes more.
349			 */
350			dssize = (dinfo->numdevs * sizeof(struct devstat)) +
351				 sizeof(long);
352			dinfo->mem_ptr = (u_int8_t *)malloc(dssize);
353		} else
354			dssize = (dinfo->numdevs * sizeof(struct devstat)) +
355				 sizeof(long);
356
357		/*
358		 * Request all of the devices.  We only really allow for one
359		 * ENOMEM failure.  It would, of course, be possible to just go
360		 * in a loop and keep reallocing the device structure until we
361		 * don't get ENOMEM back.  I'm not sure it's worth it, though.
362		 * If devices are being added to the system that quickly, maybe
363		 * the user can just wait until all devices are added.
364		 */
365		if ((error = sysctlbyname("kern.devstat.all", dinfo->mem_ptr,
366					  &dssize, NULL, 0)) == -1) {
367			/*
368			 * If we get ENOMEM back, that means that there are
369			 * more devices now, so we need to allocate more
370			 * space for the device array.
371			 */
372			if (errno == ENOMEM) {
373				/*
374				 * No need to set the error string here,
375				 * devstat_getnumdevs() will do that if it fails.
376				 */
377				if ((dinfo->numdevs = devstat_getnumdevs(NULL)) < 0)
378					return(-1);
379
380				dssize = (dinfo->numdevs *
381					sizeof(struct devstat)) + sizeof(long);
382				dinfo->mem_ptr = (u_int8_t *)
383					realloc(dinfo->mem_ptr, dssize);
384				if ((error = sysctlbyname("kern.devstat.all",
385				    dinfo->mem_ptr, &dssize, NULL, 0)) == -1) {
386					snprintf(devstat_errbuf,
387						 sizeof(devstat_errbuf),
388					    	 "%s: error getting device "
389					    	 "stats\n%s: %s", func_name,
390					    	 func_name, strerror(errno));
391					return(-1);
392				}
393			} else {
394				snprintf(devstat_errbuf, sizeof(devstat_errbuf),
395					 "%s: error getting device stats\n"
396					 "%s: %s", func_name, func_name,
397					 strerror(errno));
398				return(-1);
399			}
400		}
401
402	} else {
403		/*
404		 * This is of course non-atomic, but since we are working
405		 * on a core dump, the generation is unlikely to change
406		 */
407		if ((dinfo->numdevs = devstat_getnumdevs(NULL)) == -1)
408			return(-1);
409		if ((dinfo->mem_ptr = get_devstat_kvm(kd)) == NULL)
410			return(-1);
411	}
412	/*
413	 * The sysctl spits out the generation as the first four bytes,
414	 * then all of the device statistics structures.
415	 */
416	dinfo->generation = *(long *)dinfo->mem_ptr;
417
418	/*
419	 * If the generation has changed, and if the current number of
420	 * devices is not the same as the number of devices recorded in the
421	 * devinfo structure, it is likely that the device list has shrunk.
422	 * The reason that it is likely that the device list has shrunk in
423	 * this case is that if the device list has grown, the sysctl above
424	 * will return an ENOMEM error, and we will reset the number of
425	 * devices and reallocate the device array.  If the second sysctl
426	 * fails, we will return an error and therefore never get to this
427	 * point.  If the device list has shrunk, the sysctl will not
428	 * return an error since we have more space allocated than is
429	 * necessary.  So, in the shrinkage case, we catch it here and
430	 * reallocate the array so that we don't use any more space than is
431	 * necessary.
432	 */
433	if (oldgeneration != dinfo->generation) {
434		if (devstat_getnumdevs(NULL) != dinfo->numdevs) {
435			if ((dinfo->numdevs = devstat_getnumdevs(NULL)) < 0)
436				return(-1);
437			dssize = (dinfo->numdevs * sizeof(struct devstat)) +
438				sizeof(long);
439			dinfo->mem_ptr = (u_int8_t *)realloc(dinfo->mem_ptr,
440							     dssize);
441		}
442		retval = 1;
443	}
444
445	dinfo->devices = (struct devstat *)(dinfo->mem_ptr + sizeof(long));
446
447	return(retval);
448}
449
450/*
451 * selectdevs():
452 *
453 * Devices are selected/deselected based upon the following criteria:
454 * - devices specified by the user on the command line
455 * - devices matching any device type expressions given on the command line
456 * - devices with the highest I/O, if 'top' mode is enabled
457 * - the first n unselected devices in the device list, if maxshowdevs
458 *   devices haven't already been selected and if the user has not
459 *   specified any devices on the command line and if we're in "add" mode.
460 *
461 * Input parameters:
462 * - device selection list (dev_select)
463 * - current number of devices selected (num_selected)
464 * - total number of devices in the selection list (num_selections)
465 * - devstat generation as of the last time selectdevs() was called
466 *   (select_generation)
467 * - current devstat generation (current_generation)
468 * - current list of devices and statistics (devices)
469 * - number of devices in the current device list (numdevs)
470 * - compiled version of the command line device type arguments (matches)
471 *   - This is optional.  If the number of devices is 0, this will be ignored.
472 *   - The matching code pays attention to the current selection mode.  So
473 *     if you pass in a matching expression, it will be evaluated based
474 *     upon the selection mode that is passed in.  See below for details.
475 * - number of device type matching expressions (num_matches)
476 *   - Set to 0 to disable the matching code.
477 * - list of devices specified on the command line by the user (dev_selections)
478 * - number of devices selected on the command line by the user
479 *   (num_dev_selections)
480 * - Our selection mode.  There are four different selection modes:
481 *      - add mode.  (DS_SELECT_ADD) Any devices matching devices explicitly
482 *        selected by the user or devices matching a pattern given by the
483 *        user will be selected in addition to devices that are already
484 *        selected.  Additional devices will be selected, up to maxshowdevs
485 *        number of devices.
486 *      - only mode. (DS_SELECT_ONLY)  Only devices matching devices
487 *        explicitly given by the user or devices matching a pattern
488 *        given by the user will be selected.  No other devices will be
489 *        selected.
490 *      - addonly mode.  (DS_SELECT_ADDONLY)  This is similar to add and
491 *        only.  Basically, this will not de-select any devices that are
492 *        current selected, as only mode would, but it will also not
493 *        gratuitously select up to maxshowdevs devices as add mode would.
494 *      - remove mode.  (DS_SELECT_REMOVE)  Any devices matching devices
495 *        explicitly selected by the user or devices matching a pattern
496 *        given by the user will be de-selected.
497 * - maximum number of devices we can select (maxshowdevs)
498 * - flag indicating whether or not we're in 'top' mode (perf_select)
499 *
500 * Output data:
501 * - the device selection list may be modified and passed back out
502 * - the number of devices selected and the total number of items in the
503 *   device selection list may be changed
504 * - the selection generation may be changed to match the current generation
505 *
506 * Return values:
507 * -1  -- error
508 *  0  -- selected devices are unchanged
509 *  1  -- selected devices changed
510 */
511int
512devstat_selectdevs(struct device_selection **dev_select, int *num_selected,
513		   int *num_selections, long *select_generation,
514		   long current_generation, struct devstat *devices,
515		   int numdevs, struct devstat_match *matches, int num_matches,
516		   char **dev_selections, int num_dev_selections,
517		   devstat_select_mode select_mode, int maxshowdevs,
518		   int perf_select)
519{
520	int i, j, k;
521	int init_selections = 0, init_selected_var = 0;
522	struct device_selection *old_dev_select = NULL;
523	int old_num_selections = 0, old_num_selected;
524	int selection_number = 0;
525	int changed = 0, found = 0;
526
527	if ((dev_select == NULL) || (devices == NULL) || (numdevs <= 0))
528		return(-1);
529
530	/*
531	 * We always want to make sure that we have as many dev_select
532	 * entries as there are devices.
533	 */
534	/*
535	 * In this case, we haven't selected devices before.
536	 */
537	if (*dev_select == NULL) {
538		*dev_select = (struct device_selection *)malloc(numdevs *
539			sizeof(struct device_selection));
540		*select_generation = current_generation;
541		init_selections = 1;
542		changed = 1;
543	/*
544	 * In this case, we have selected devices before, but the device
545	 * list has changed since we last selected devices, so we need to
546	 * either enlarge or reduce the size of the device selection list.
547	 */
548	} else if (*num_selections != numdevs) {
549		*dev_select = (struct device_selection *)realloc(*dev_select,
550			numdevs * sizeof(struct device_selection));
551		*select_generation = current_generation;
552		init_selections = 1;
553	/*
554	 * In this case, we've selected devices before, and the selection
555	 * list is the same size as it was the last time, but the device
556	 * list has changed.
557	 */
558	} else if (*select_generation < current_generation) {
559		*select_generation = current_generation;
560		init_selections = 1;
561	}
562
563	/*
564	 * If we're in "only" mode, we want to clear out the selected
565	 * variable since we're going to select exactly what the user wants
566	 * this time through.
567	 */
568	if (select_mode == DS_SELECT_ONLY)
569		init_selected_var = 1;
570
571	/*
572	 * In all cases, we want to back up the number of selected devices.
573	 * It is a quick and accurate way to determine whether the selected
574	 * devices have changed.
575	 */
576	old_num_selected = *num_selected;
577
578	/*
579	 * We want to make a backup of the current selection list if
580	 * the list of devices has changed, or if we're in performance
581	 * selection mode.  In both cases, we don't want to make a backup
582	 * if we already know for sure that the list will be different.
583	 * This is certainly the case if this is our first time through the
584	 * selection code.
585	 */
586	if (((init_selected_var != 0) || (init_selections != 0)
587	 || (perf_select != 0)) && (changed == 0)){
588		old_dev_select = (struct device_selection *)malloc(
589		    *num_selections * sizeof(struct device_selection));
590		old_num_selections = *num_selections;
591		bcopy(*dev_select, old_dev_select,
592		    sizeof(struct device_selection) * *num_selections);
593	}
594
595	if (init_selections != 0) {
596		bzero(*dev_select, sizeof(struct device_selection) * numdevs);
597
598		for (i = 0; i < numdevs; i++) {
599			(*dev_select)[i].device_number =
600				devices[i].device_number;
601			strncpy((*dev_select)[i].device_name,
602				devices[i].device_name,
603				DEVSTAT_NAME_LEN);
604			(*dev_select)[i].device_name[DEVSTAT_NAME_LEN - 1]='\0';
605			(*dev_select)[i].unit_number = devices[i].unit_number;
606			(*dev_select)[i].position = i;
607		}
608		*num_selections = numdevs;
609	} else if (init_selected_var != 0) {
610		for (i = 0; i < numdevs; i++)
611			(*dev_select)[i].selected = 0;
612	}
613
614	/* we haven't gotten around to selecting anything yet.. */
615	if ((select_mode == DS_SELECT_ONLY) || (init_selections != 0)
616	 || (init_selected_var != 0))
617		*num_selected = 0;
618
619	/*
620	 * Look through any devices the user specified on the command line
621	 * and see if they match known devices.  If so, select them.
622	 */
623	for (i = 0; (i < *num_selections) && (num_dev_selections > 0); i++) {
624		char tmpstr[80];
625
626		snprintf(tmpstr, sizeof(tmpstr), "%s%d",
627			 (*dev_select)[i].device_name,
628			 (*dev_select)[i].unit_number);
629		for (j = 0; j < num_dev_selections; j++) {
630			if (strcmp(tmpstr, dev_selections[j]) == 0) {
631				/*
632				 * Here we do different things based on the
633				 * mode we're in.  If we're in add or
634				 * addonly mode, we only select this device
635				 * if it hasn't already been selected.
636				 * Otherwise, we would be unnecessarily
637				 * changing the selection order and
638				 * incrementing the selection count.  If
639				 * we're in only mode, we unconditionally
640				 * select this device, since in only mode
641				 * any previous selections are erased and
642				 * manually specified devices are the first
643				 * ones to be selected.  If we're in remove
644				 * mode, we de-select the specified device and
645				 * decrement the selection count.
646				 */
647				switch(select_mode) {
648				case DS_SELECT_ADD:
649				case DS_SELECT_ADDONLY:
650					if ((*dev_select)[i].selected)
651						break;
652					/* FALLTHROUGH */
653				case DS_SELECT_ONLY:
654					(*dev_select)[i].selected =
655						++selection_number;
656					(*num_selected)++;
657					break;
658				case DS_SELECT_REMOVE:
659					(*dev_select)[i].selected = 0;
660					(*num_selected)--;
661					/*
662					 * This isn't passed back out, we
663					 * just use it to keep track of
664					 * how many devices we've removed.
665					 */
666					num_dev_selections--;
667					break;
668				}
669				break;
670			}
671		}
672	}
673
674	/*
675	 * Go through the user's device type expressions and select devices
676	 * accordingly.  We only do this if the number of devices already
677	 * selected is less than the maximum number we can show.
678	 */
679	for (i = 0; (i < num_matches) && (*num_selected < maxshowdevs); i++) {
680		/* We should probably indicate some error here */
681		if ((matches[i].match_fields == DEVSTAT_MATCH_NONE)
682		 || (matches[i].num_match_categories <= 0))
683			continue;
684
685		for (j = 0; j < numdevs; j++) {
686			int num_match_categories;
687
688			num_match_categories = matches[i].num_match_categories;
689
690			/*
691			 * Determine whether or not the current device
692			 * matches the given matching expression.  This if
693			 * statement consists of three components:
694			 *   - the device type check
695			 *   - the device interface check
696			 *   - the passthrough check
697			 * If a the matching test is successful, it
698			 * decrements the number of matching categories,
699			 * and if we've reached the last element that
700			 * needed to be matched, the if statement succeeds.
701			 *
702			 */
703			if ((((matches[i].match_fields & DEVSTAT_MATCH_TYPE)!=0)
704			  && ((devices[j].device_type & DEVSTAT_TYPE_MASK) ==
705			        (matches[i].device_type & DEVSTAT_TYPE_MASK))
706			  &&(((matches[i].match_fields & DEVSTAT_MATCH_PASS)!=0)
707			   || (((matches[i].match_fields &
708				DEVSTAT_MATCH_PASS) == 0)
709			    && ((devices[j].device_type &
710			        DEVSTAT_TYPE_PASS) == 0)))
711			  && (--num_match_categories == 0))
712			 || (((matches[i].match_fields & DEVSTAT_MATCH_IF) != 0)
713			  && ((devices[j].device_type & DEVSTAT_TYPE_IF_MASK) ==
714			        (matches[i].device_type & DEVSTAT_TYPE_IF_MASK))
715			  &&(((matches[i].match_fields & DEVSTAT_MATCH_PASS)!=0)
716			   || (((matches[i].match_fields &
717				DEVSTAT_MATCH_PASS) == 0)
718			    && ((devices[j].device_type &
719				DEVSTAT_TYPE_PASS) == 0)))
720			  && (--num_match_categories == 0))
721			 || (((matches[i].match_fields & DEVSTAT_MATCH_PASS)!=0)
722			  && ((devices[j].device_type & DEVSTAT_TYPE_PASS) != 0)
723			  && (--num_match_categories == 0))) {
724
725				/*
726				 * This is probably a non-optimal solution
727				 * to the problem that the devices in the
728				 * device list will not be in the same
729				 * order as the devices in the selection
730				 * array.
731				 */
732				for (k = 0; k < numdevs; k++) {
733					if ((*dev_select)[k].position == j) {
734						found = 1;
735						break;
736					}
737				}
738
739				/*
740				 * There shouldn't be a case where a device
741				 * in the device list is not in the
742				 * selection list...but it could happen.
743				 */
744				if (found != 1) {
745					fprintf(stderr, "selectdevs: couldn't"
746						" find %s%d in selection "
747						"list\n",
748						devices[j].device_name,
749						devices[j].unit_number);
750					break;
751				}
752
753				/*
754				 * We do different things based upon the
755				 * mode we're in.  If we're in add or only
756				 * mode, we go ahead and select this device
757				 * if it hasn't already been selected.  If
758				 * it has already been selected, we leave
759				 * it alone so we don't mess up the
760				 * selection ordering.  Manually specified
761				 * devices have already been selected, and
762				 * they have higher priority than pattern
763				 * matched devices.  If we're in remove
764				 * mode, we de-select the given device and
765				 * decrement the selected count.
766				 */
767				switch(select_mode) {
768				case DS_SELECT_ADD:
769				case DS_SELECT_ADDONLY:
770				case DS_SELECT_ONLY:
771					if ((*dev_select)[k].selected != 0)
772						break;
773					(*dev_select)[k].selected =
774						++selection_number;
775					(*num_selected)++;
776					break;
777				case DS_SELECT_REMOVE:
778					(*dev_select)[k].selected = 0;
779					(*num_selected)--;
780					break;
781				}
782			}
783		}
784	}
785
786	/*
787	 * Here we implement "top" mode.  Devices are sorted in the
788	 * selection array based on two criteria:  whether or not they are
789	 * selected (not selection number, just the fact that they are
790	 * selected!) and the number of bytes in the "bytes" field of the
791	 * selection structure.  The bytes field generally must be kept up
792	 * by the user.  In the future, it may be maintained by library
793	 * functions, but for now the user has to do the work.
794	 *
795	 * At first glance, it may seem wrong that we don't go through and
796	 * select every device in the case where the user hasn't specified
797	 * any devices or patterns.  In fact, though, it won't make any
798	 * difference in the device sorting.  In that particular case (i.e.
799	 * when we're in "add" or "only" mode, and the user hasn't
800	 * specified anything) the first time through no devices will be
801	 * selected, so the only criterion used to sort them will be their
802	 * performance.  The second time through, and every time thereafter,
803	 * all devices will be selected, so again selection won't matter.
804	 */
805	if (perf_select != 0) {
806
807		/* Sort the device array by throughput  */
808		qsort(*dev_select, *num_selections,
809		      sizeof(struct device_selection),
810		      compare_select);
811
812		if (*num_selected == 0) {
813			/*
814			 * Here we select every device in the array, if it
815			 * isn't already selected.  Because the 'selected'
816			 * variable in the selection array entries contains
817			 * the selection order, the devstats routine can show
818			 * the devices that were selected first.
819			 */
820			for (i = 0; i < *num_selections; i++) {
821				if ((*dev_select)[i].selected == 0) {
822					(*dev_select)[i].selected =
823						++selection_number;
824					(*num_selected)++;
825				}
826			}
827		} else {
828			selection_number = 0;
829			for (i = 0; i < *num_selections; i++) {
830				if ((*dev_select)[i].selected != 0) {
831					(*dev_select)[i].selected =
832						++selection_number;
833				}
834			}
835		}
836	}
837
838	/*
839	 * If we're in the "add" selection mode and if we haven't already
840	 * selected maxshowdevs number of devices, go through the array and
841	 * select any unselected devices.  If we're in "only" mode, we
842	 * obviously don't want to select anything other than what the user
843	 * specifies.  If we're in "remove" mode, it probably isn't a good
844	 * idea to go through and select any more devices, since we might
845	 * end up selecting something that the user wants removed.  Through
846	 * more complicated logic, we could actually figure this out, but
847	 * that would probably require combining this loop with the various
848	 * selections loops above.
849	 */
850	if ((select_mode == DS_SELECT_ADD) && (*num_selected < maxshowdevs)) {
851		for (i = 0; i < *num_selections; i++)
852			if ((*dev_select)[i].selected == 0) {
853				(*dev_select)[i].selected = ++selection_number;
854				(*num_selected)++;
855			}
856	}
857
858	/*
859	 * Look at the number of devices that have been selected.  If it
860	 * has changed, set the changed variable.  Otherwise, if we've
861	 * made a backup of the selection list, compare it to the current
862	 * selection list to see if the selected devices have changed.
863	 */
864	if ((changed == 0) && (old_num_selected != *num_selected))
865		changed = 1;
866	else if ((changed == 0) && (old_dev_select != NULL)) {
867		/*
868		 * Now we go through the selection list and we look at
869		 * it three different ways.
870		 */
871		for (i = 0; (i < *num_selections) && (changed == 0) &&
872		     (i < old_num_selections); i++) {
873			/*
874			 * If the device at index i in both the new and old
875			 * selection arrays has the same device number and
876			 * selection status, it hasn't changed.  We
877			 * continue on to the next index.
878			 */
879			if (((*dev_select)[i].device_number ==
880			     old_dev_select[i].device_number)
881			 && ((*dev_select)[i].selected ==
882			     old_dev_select[i].selected))
883				continue;
884
885			/*
886			 * Now, if we're still going through the if
887			 * statement, the above test wasn't true.  So we
888			 * check here to see if the device at index i in
889			 * the current array is the same as the device at
890			 * index i in the old array.  If it is, that means
891			 * that its selection number has changed.  Set
892			 * changed to 1 and exit the loop.
893			 */
894			else if ((*dev_select)[i].device_number ==
895			          old_dev_select[i].device_number) {
896				changed = 1;
897				break;
898			}
899			/*
900			 * If we get here, then the device at index i in
901			 * the current array isn't the same device as the
902			 * device at index i in the old array.
903			 */
904			else {
905				found = 0;
906
907				/*
908				 * Search through the old selection array
909				 * looking for a device with the same
910				 * device number as the device at index i
911				 * in the current array.  If the selection
912				 * status is the same, then we mark it as
913				 * found.  If the selection status isn't
914				 * the same, we break out of the loop.
915				 * Since found isn't set, changed will be
916				 * set to 1 below.
917				 */
918				for (j = 0; j < old_num_selections; j++) {
919					if (((*dev_select)[i].device_number ==
920					      old_dev_select[j].device_number)
921					 && ((*dev_select)[i].selected ==
922					      old_dev_select[j].selected)){
923						found = 1;
924						break;
925					}
926					else if ((*dev_select)[i].device_number
927					    == old_dev_select[j].device_number)
928						break;
929				}
930				if (found == 0)
931					changed = 1;
932			}
933		}
934	}
935	if (old_dev_select != NULL)
936		free(old_dev_select);
937
938	return(changed);
939}
940
941/*
942 * Comparison routine for qsort() above.  Note that the comparison here is
943 * backwards -- generally, it should return a value to indicate whether
944 * arg1 is <, =, or > arg2.  Instead, it returns the opposite.  The reason
945 * it returns the opposite is so that the selection array will be sorted in
946 * order of decreasing performance.  We sort on two parameters.  The first
947 * sort key is whether or not one or the other of the devices in question
948 * has been selected.  If one of them has, and the other one has not, the
949 * selected device is automatically more important than the unselected
950 * device.  If neither device is selected, we judge the devices based upon
951 * performance.
952 */
953static int
954compare_select(const void *arg1, const void *arg2)
955{
956	if ((((const struct device_selection *)arg1)->selected)
957	 && (((const struct device_selection *)arg2)->selected == 0))
958		return(-1);
959	else if ((((const struct device_selection *)arg1)->selected == 0)
960	      && (((const struct device_selection *)arg2)->selected))
961		return(1);
962	else if (((const struct device_selection *)arg2)->bytes <
963	         ((const struct device_selection *)arg1)->bytes)
964		return(-1);
965	else if (((const struct device_selection *)arg2)->bytes >
966		 ((const struct device_selection *)arg1)->bytes)
967		return(1);
968	else
969		return(0);
970}
971
972/*
973 * Take a string with the general format "arg1,arg2,arg3", and build a
974 * device matching expression from it.
975 */
976int
977devstat_buildmatch(char *match_str, struct devstat_match **matches,
978		   int *num_matches)
979{
980	char *tstr[5];
981	char **tempstr;
982	int num_args;
983	int i, j;
984	const char *func_name = "devstat_buildmatch";
985
986	/* We can't do much without a string to parse */
987	if (match_str == NULL) {
988		snprintf(devstat_errbuf, sizeof(devstat_errbuf),
989			 "%s: no match expression", func_name);
990		return(-1);
991	}
992
993	/*
994	 * Break the (comma delimited) input string out into separate strings.
995	 */
996	for (tempstr = tstr, num_args  = 0;
997	     (*tempstr = strsep(&match_str, ",")) != NULL && (num_args < 5);
998	     num_args++)
999		if (**tempstr != '\0')
1000			if (++tempstr >= &tstr[5])
1001				break;
1002
1003	/* The user gave us too many type arguments */
1004	if (num_args > 3) {
1005		snprintf(devstat_errbuf, sizeof(devstat_errbuf),
1006			 "%s: too many type arguments", func_name);
1007		return(-1);
1008	}
1009
1010	/*
1011	 * Since you can't realloc a pointer that hasn't been malloced
1012	 * first, we malloc first and then realloc.
1013	 */
1014	if (*num_matches == 0)
1015		*matches = (struct devstat_match *)malloc(
1016			   sizeof(struct devstat_match));
1017	else
1018		*matches = (struct devstat_match *)realloc(*matches,
1019			  sizeof(struct devstat_match) * (*num_matches + 1));
1020
1021	/* Make sure the current entry is clear */
1022	bzero(&matches[0][*num_matches], sizeof(struct devstat_match));
1023
1024	/*
1025	 * Step through the arguments the user gave us and build a device
1026	 * matching expression from them.
1027	 */
1028	for (i = 0; i < num_args; i++) {
1029		char *tempstr2, *tempstr3;
1030
1031		/*
1032		 * Get rid of leading white space.
1033		 */
1034		tempstr2 = tstr[i];
1035		while (isspace(*tempstr2) && (*tempstr2 != '\0'))
1036			tempstr2++;
1037
1038		/*
1039		 * Get rid of trailing white space.
1040		 */
1041		tempstr3 = &tempstr2[strlen(tempstr2) - 1];
1042
1043		while ((*tempstr3 != '\0') && (tempstr3 > tempstr2)
1044		    && (isspace(*tempstr3))) {
1045			*tempstr3 = '\0';
1046			tempstr3--;
1047		}
1048
1049		/*
1050		 * Go through the match table comparing the user's
1051		 * arguments to known device types, interfaces, etc.
1052		 */
1053		for (j = 0; match_table[j].match_str != NULL; j++) {
1054			/*
1055			 * We do case-insensitive matching, in case someone
1056			 * wants to enter "SCSI" instead of "scsi" or
1057			 * something like that.  Only compare as many
1058			 * characters as are in the string in the match
1059			 * table.  This should help if someone tries to use
1060			 * a super-long match expression.
1061			 */
1062			if (strncasecmp(tempstr2, match_table[j].match_str,
1063			    strlen(match_table[j].match_str)) == 0) {
1064				/*
1065				 * Make sure the user hasn't specified two
1066				 * items of the same type, like "da" and
1067				 * "cd".  One device cannot be both.
1068				 */
1069				if (((*matches)[*num_matches].match_fields &
1070				    match_table[j].match_field) != 0) {
1071					snprintf(devstat_errbuf,
1072						 sizeof(devstat_errbuf),
1073						 "%s: cannot have more than "
1074						 "one match item in a single "
1075						 "category", func_name);
1076					return(-1);
1077				}
1078				/*
1079				 * If we've gotten this far, we have a
1080				 * winner.  Set the appropriate fields in
1081				 * the match entry.
1082				 */
1083				(*matches)[*num_matches].match_fields |=
1084					match_table[j].match_field;
1085				(*matches)[*num_matches].device_type |=
1086					match_table[j].type;
1087				(*matches)[*num_matches].num_match_categories++;
1088				break;
1089			}
1090		}
1091		/*
1092		 * We should have found a match in the above for loop.  If
1093		 * not, that means the user entered an invalid device type
1094		 * or interface.
1095		 */
1096		if ((*matches)[*num_matches].num_match_categories != (i + 1)) {
1097			snprintf(devstat_errbuf, sizeof(devstat_errbuf),
1098				 "%s: unknown match item \"%s\"", func_name,
1099				 tstr[i]);
1100			return(-1);
1101		}
1102	}
1103
1104	(*num_matches)++;
1105
1106	return(0);
1107}
1108
1109/*
1110 * Compute a number of device statistics.  Only one field is mandatory, and
1111 * that is "current".  Everything else is optional.  The caller passes in
1112 * pointers to variables to hold the various statistics he desires.  If he
1113 * doesn't want a particular staistic, he should pass in a NULL pointer.
1114 * Return values:
1115 * 0   -- success
1116 * -1  -- failure
1117 */
1118int
1119compute_stats(struct devstat *current, struct devstat *previous,
1120	      long double etime, u_int64_t *total_bytes,
1121	      u_int64_t *total_transfers, u_int64_t *total_blocks,
1122	      long double *kb_per_transfer, long double *transfers_per_second,
1123	      long double *mb_per_second, long double *blocks_per_second,
1124	      long double *ms_per_transaction)
1125{
1126	return(devstat_compute_statistics(current, previous, etime,
1127	       total_bytes ? DSM_TOTAL_BYTES : DSM_SKIP,
1128	       total_bytes,
1129	       total_transfers ? DSM_TOTAL_TRANSFERS : DSM_SKIP,
1130	       total_transfers,
1131	       total_blocks ? DSM_TOTAL_BLOCKS : DSM_SKIP,
1132	       total_blocks,
1133	       kb_per_transfer ? DSM_KB_PER_TRANSFER : DSM_SKIP,
1134	       kb_per_transfer,
1135	       transfers_per_second ? DSM_TRANSFERS_PER_SECOND : DSM_SKIP,
1136	       transfers_per_second,
1137	       mb_per_second ? DSM_MB_PER_SECOND : DSM_SKIP,
1138	       mb_per_second,
1139	       blocks_per_second ? DSM_BLOCKS_PER_SECOND : DSM_SKIP,
1140	       blocks_per_second,
1141	       ms_per_transaction ? DSM_MS_PER_TRANSACTION : DSM_SKIP,
1142	       ms_per_transaction,
1143	       DSM_NONE));
1144}
1145
1146
1147/* This is 1/2^64 */
1148#define BINTIME_SCALE 5.42101086242752217003726400434970855712890625e-20
1149
1150long double
1151devstat_compute_etime(struct bintime *cur_time, struct bintime *prev_time)
1152{
1153	long double etime;
1154
1155	etime = cur_time->sec;
1156	etime += cur_time->frac * BINTIME_SCALE;
1157	if (prev_time != NULL) {
1158		etime -= prev_time->sec;
1159		etime -= prev_time->frac * BINTIME_SCALE;
1160	}
1161	return(etime);
1162}
1163
1164int
1165devstat_compute_statistics(struct devstat *current, struct devstat *previous,
1166			   long double etime, ...)
1167{
1168	const char *func_name = "devstat_compute_statistics";
1169	u_int64_t totalbytes, totalbytesread, totalbyteswrite;
1170	u_int64_t totaltransfers, totaltransfersread, totaltransferswrite;
1171	u_int64_t totaltransfersother, totalblocks, totalblocksread;
1172	u_int64_t totalblockswrite;
1173	va_list ap;
1174	devstat_metric metric;
1175	u_int64_t *destu64;
1176	long double *destld;
1177	int retval;
1178
1179	retval = 0;
1180
1181	/*
1182	 * current is the only mandatory field.
1183	 */
1184	if (current == NULL) {
1185		snprintf(devstat_errbuf, sizeof(devstat_errbuf),
1186			 "%s: current stats structure was NULL", func_name);
1187		return(-1);
1188	}
1189
1190	totalbytesread = current->bytes[DEVSTAT_READ] -
1191			 ((previous) ? previous->bytes[DEVSTAT_READ] : 0);
1192	totalbyteswrite = current->bytes[DEVSTAT_WRITE] -
1193			    ((previous) ? previous->bytes[DEVSTAT_WRITE] : 0);
1194
1195	totalbytes = totalbytesread + totalbyteswrite;
1196
1197	totaltransfersread = current->operations[DEVSTAT_READ] -
1198			     ((previous) ? previous->operations[DEVSTAT_READ] : 0);
1199
1200	totaltransferswrite = current->operations[DEVSTAT_WRITE] -
1201			      ((previous) ? previous->operations[DEVSTAT_WRITE] : 0);
1202
1203	totaltransfersother = current->operations[DEVSTAT_NO_DATA] -
1204			      ((previous) ? previous->operations[DEVSTAT_NO_DATA] : 0);
1205
1206	totaltransfers = totaltransfersread + totaltransferswrite +
1207			 totaltransfersother;
1208
1209	totalblocks = totalbytes;
1210	totalblocksread = totalbytesread;
1211	totalblockswrite = totalbyteswrite;
1212
1213	if (current->block_size > 0) {
1214		totalblocks /= current->block_size;
1215		totalblocksread /= current->block_size;
1216		totalblockswrite /= current->block_size;
1217	} else {
1218		totalblocks /= 512;
1219		totalblocksread /= 512;
1220		totalblockswrite /= 512;
1221	}
1222
1223	va_start(ap, etime);
1224
1225	while ((metric = (devstat_metric)va_arg(ap, devstat_metric)) != 0) {
1226
1227		if (metric == DSM_NONE)
1228			break;
1229
1230		if (metric >= DSM_MAX) {
1231			snprintf(devstat_errbuf, sizeof(devstat_errbuf),
1232				 "%s: metric %d is out of range", func_name,
1233				 metric);
1234			retval = -1;
1235			goto bailout;
1236		}
1237
1238		switch (devstat_arg_list[metric].argtype) {
1239		case DEVSTAT_ARG_UINT64:
1240			destu64 = (u_int64_t *)va_arg(ap, u_int64_t *);
1241			break;
1242		case DEVSTAT_ARG_LD:
1243			destld = (long double *)va_arg(ap, long double *);
1244			break;
1245		case DEVSTAT_ARG_SKIP:
1246			destld = (long double *)va_arg(ap, long double *);
1247			break;
1248		default:
1249			retval = -1;
1250			goto bailout;
1251			break; /* NOTREACHED */
1252		}
1253
1254		if (devstat_arg_list[metric].argtype == DEVSTAT_ARG_SKIP)
1255			continue;
1256
1257		switch (metric) {
1258		case DSM_TOTAL_BYTES:
1259			*destu64 = totalbytes;
1260			break;
1261		case DSM_TOTAL_BYTES_READ:
1262			*destu64 = totalbytesread;
1263			break;
1264		case DSM_TOTAL_BYTES_WRITE:
1265			*destu64 = totalbyteswrite;
1266			break;
1267		case DSM_TOTAL_TRANSFERS:
1268			*destu64 = totaltransfers;
1269			break;
1270		case DSM_TOTAL_TRANSFERS_READ:
1271			*destu64 = totaltransfersread;
1272			break;
1273		case DSM_TOTAL_TRANSFERS_WRITE:
1274			*destu64 = totaltransferswrite;
1275			break;
1276		case DSM_TOTAL_TRANSFERS_OTHER:
1277			*destu64 = totaltransfersother;
1278			break;
1279		case DSM_TOTAL_BLOCKS:
1280			*destu64 = totalblocks;
1281			break;
1282		case DSM_TOTAL_BLOCKS_READ:
1283			*destu64 = totalblocksread;
1284			break;
1285		case DSM_TOTAL_BLOCKS_WRITE:
1286			*destu64 = totalblockswrite;
1287			break;
1288		case DSM_KB_PER_TRANSFER:
1289			*destld = totalbytes;
1290			*destld /= 1024;
1291			if (totaltransfers > 0)
1292				*destld /= totaltransfers;
1293			else
1294				*destld = 0.0;
1295			break;
1296		case DSM_KB_PER_TRANSFER_READ:
1297			*destld = totalbytesread;
1298			*destld /= 1024;
1299			if (totaltransfersread > 0)
1300				*destld /= totaltransfersread;
1301			else
1302				*destld = 0.0;
1303			break;
1304		case DSM_KB_PER_TRANSFER_WRITE:
1305			*destld = totalbyteswrite;
1306			*destld /= 1024;
1307			if (totaltransferswrite > 0)
1308				*destld /= totaltransferswrite;
1309			else
1310				*destld = 0.0;
1311			break;
1312		case DSM_TRANSFERS_PER_SECOND:
1313			if (etime > 0.0) {
1314				*destld = totaltransfers;
1315				*destld /= etime;
1316			} else
1317				*destld = 0.0;
1318			break;
1319		case DSM_TRANSFERS_PER_SECOND_READ:
1320			if (etime > 0.0) {
1321				*destld = totaltransfersread;
1322				*destld /= etime;
1323			} else
1324				*destld = 0.0;
1325			break;
1326		case DSM_TRANSFERS_PER_SECOND_WRITE:
1327			if (etime > 0.0) {
1328				*destld = totaltransferswrite;
1329				*destld /= etime;
1330			} else
1331				*destld = 0.0;
1332			break;
1333		case DSM_TRANSFERS_PER_SECOND_OTHER:
1334			if (etime > 0.0) {
1335				*destld = totaltransfersother;
1336				*destld /= etime;
1337			} else
1338				*destld = 0.0;
1339			break;
1340		case DSM_MB_PER_SECOND:
1341			*destld = totalbytes;
1342			*destld /= 1024 * 1024;
1343			if (etime > 0.0)
1344				*destld /= etime;
1345			else
1346				*destld = 0.0;
1347			break;
1348		case DSM_MB_PER_SECOND_READ:
1349			*destld = totalbytesread;
1350			*destld /= 1024 * 1024;
1351			if (etime > 0.0)
1352				*destld /= etime;
1353			else
1354				*destld = 0.0;
1355			break;
1356		case DSM_MB_PER_SECOND_WRITE:
1357			*destld = totalbyteswrite;
1358			*destld /= 1024 * 1024;
1359			if (etime > 0.0)
1360				*destld /= etime;
1361			else
1362				*destld = 0.0;
1363			break;
1364		case DSM_BLOCKS_PER_SECOND:
1365			*destld = totalblocks;
1366			if (etime > 0.0)
1367				*destld /= etime;
1368			else
1369				*destld = 0.0;
1370			break;
1371		case DSM_BLOCKS_PER_SECOND_READ:
1372			*destld = totalblocksread;
1373			if (etime > 0.0)
1374				*destld /= etime;
1375			else
1376				*destld = 0.0;
1377			break;
1378		case DSM_BLOCKS_PER_SECOND_WRITE:
1379			*destld = totalblockswrite;
1380			if (etime > 0.0)
1381				*destld /= etime;
1382			else
1383				*destld = 0.0;
1384			break;
1385		/*
1386		 * This calculation is somewhat bogus.  It simply divides
1387		 * the elapsed time by the total number of transactions
1388		 * completed.  While that does give the caller a good
1389		 * picture of the average rate of transaction completion,
1390		 * it doesn't necessarily give the caller a good view of
1391		 * how long transactions took to complete on average.
1392		 * Those two numbers will be different for a device that
1393		 * can handle more than one transaction at a time.  e.g.
1394		 * SCSI disks doing tagged queueing.
1395		 *
1396		 * The only way to accurately determine the real average
1397		 * time per transaction would be to compute and store the
1398		 * time on a per-transaction basis.  That currently isn't
1399		 * done in the kernel, and would only be desireable if it
1400		 * could be implemented in a somewhat non-intrusive and high
1401		 * performance way.
1402		 */
1403		case DSM_MS_PER_TRANSACTION:
1404			if (totaltransfers > 0) {
1405				*destld = etime;
1406				*destld /= totaltransfers;
1407				*destld *= 1000;
1408			} else
1409				*destld = 0.0;
1410			break;
1411		/*
1412		 * As above, these next two really only give the average
1413		 * rate of completion for read and write transactions, not
1414		 * the average time the transaction took to complete.
1415		 */
1416		case DSM_MS_PER_TRANSACTION_READ:
1417			if (totaltransfersread > 0) {
1418				*destld = etime;
1419				*destld /= totaltransfersread;
1420				*destld *= 1000;
1421			} else
1422				*destld = 0.0;
1423			break;
1424		case DSM_MS_PER_TRANSACTION_WRITE:
1425			if (totaltransferswrite > 0) {
1426				*destld = etime;
1427				*destld /= totaltransferswrite;
1428				*destld *= 1000;
1429			} else
1430				*destld = 0.0;
1431			break;
1432		default:
1433			/*
1434			 * This shouldn't happen, since we should have
1435			 * caught any out of range metrics at the top of
1436			 * the loop.
1437			 */
1438			snprintf(devstat_errbuf, sizeof(devstat_errbuf),
1439				 "%s: unknown metric %d", func_name, metric);
1440			retval = -1;
1441			goto bailout;
1442			break; /* NOTREACHED */
1443		}
1444	}
1445
1446bailout:
1447
1448	va_end(ap);
1449	return(retval);
1450}
1451
1452static int
1453readkmem(kvm_t *kd, unsigned long addr, void *buf, size_t nbytes)
1454{
1455	const char *func_name = "readkmem";
1456
1457	if (kvm_read(kd, addr, buf, nbytes) == -1) {
1458		snprintf(devstat_errbuf, sizeof(devstat_errbuf),
1459			 "%s: error reading value (kvm_read): %s", func_name,
1460			 kvm_geterr(kd));
1461		return(-1);
1462	}
1463	return(0);
1464}
1465
1466static int
1467readkmem_nl(kvm_t *kd, const char *name, void *buf, size_t nbytes)
1468{
1469	const char *func_name = "readkmem_nl";
1470	struct nlist nl[2];
1471
1472	(const char *)nl[0].n_name = name;
1473	nl[1].n_name = NULL;
1474
1475	if (kvm_nlist(kd, nl) == -1) {
1476		snprintf(devstat_errbuf, sizeof(devstat_errbuf),
1477			 "%s: error getting name list (kvm_nlist): %s",
1478			 func_name, kvm_geterr(kd));
1479		return(-1);
1480	}
1481	return(readkmem(kd, nl[0].n_value, buf, nbytes));
1482}
1483
1484/*
1485 * This duplicates the functionality of the kernel sysctl handler for poking
1486 * through crash dumps.
1487 */
1488static char *
1489get_devstat_kvm(kvm_t *kd)
1490{
1491	int error, i, wp;
1492	long gen;
1493	struct devstat *nds;
1494	struct devstat ds;
1495	struct devstatlist dhead;
1496	int num_devs;
1497	char *rv = NULL;
1498	const char *func_name = "get_devstat_kvm";
1499
1500	if ((num_devs = devstat_getnumdevs(kd)) <= 0)
1501		return(NULL);
1502	error = 0;
1503	if (KREADNL(kd, X_DEVICE_STATQ, dhead) == -1)
1504		return(NULL);
1505
1506	nds = STAILQ_FIRST(&dhead);
1507
1508	if ((rv = malloc(sizeof(gen))) == NULL) {
1509		snprintf(devstat_errbuf, sizeof(devstat_errbuf),
1510			 "%s: out of memory (initial malloc failed)",
1511			 func_name);
1512		return(NULL);
1513	}
1514	gen = devstat_getgeneration(kd);
1515	memcpy(rv, &gen, sizeof(gen));
1516	wp = sizeof(gen);
1517	/*
1518	 * Now push out all the devices.
1519	 */
1520	for (i = 0; (nds != NULL) && (i < num_devs);
1521	     nds = STAILQ_NEXT(nds, dev_links), i++) {
1522		if (readkmem(kd, (long)nds, &ds, sizeof(ds)) == -1) {
1523			free(rv);
1524			return(NULL);
1525		}
1526		nds = &ds;
1527		rv = (char *)reallocf(rv, sizeof(gen) +
1528				      sizeof(ds) * (i + 1));
1529		if (rv == NULL) {
1530			snprintf(devstat_errbuf, sizeof(devstat_errbuf),
1531				 "%s: out of memory (malloc failed)",
1532				 func_name);
1533			return(NULL);
1534		}
1535		memcpy(rv + wp, &ds, sizeof(ds));
1536		wp += sizeof(ds);
1537	}
1538	return(rv);
1539}
1540