1#!/usr/bin/sh
2#
3# iosnoop - A program to print disk I/O events as they happen, with useful
4#           details such as UID, PID, filename, command, etc. 
5#           Written using DTrace (Solaris 10 3/05).
6#
7# This is measuring disk events that have made it past system caches.
8#
9# $Id: iosnoop 8 2007-08-06 05:55:26Z brendan $
10#
11# USAGE: 	iosnoop [-a|-A|-DeghiNostv] [-d device] [-f filename] 
12#			[-m mount_point] [-n name] [-p PID]
13#
14#		iosnoop		# default output
15#
16#		-a		# print all data (mostly)
17#		-A		# dump all data, space delimited
18#		-D		# print time delta, us (elapsed)
19#		-e		# print device name
20#		-g		# print command arguments
21#		-i		# print device instance
22#		-N		# print major and minor numbers
23#		-o		# print disk delta time, us 
24#		-s		# print start time, us
25#		-t 		# print completion time, us
26#		-v		# print completion time, string
27#		-d device	# instance name to snoop (eg, dad0)
28#		-f filename	# full pathname of file to snoop
29#		-m mount_point	# this FS only (will skip raw events)
30#		-n name		# this process name only
31#		-p PID		# this PID only
32#  eg,
33#		iosnoop -v	# human readable timestamps
34#		iosnoop -N	# print major and minor numbers
35#		iosnoop -m /	# snoop events on the root filesystem only
36# 	
37# FIELDS:
38#		UID		user ID
39#		PID		process ID
40#		PPID		parennt process ID
41#		COMM		command name for the process
42#		ARGS		argument listing for the process
43#		SIZE		size of operation, bytes
44#		BLOCK		disk block for the operation (location)
45#		STIME	 	timestamp for the disk request, us
46#		TIME		timestamp for the disk completion, us
47#		DELTA		elapsed time from request to completion, us
48#		DTIME		time for disk to complete request, us
49#		STRTIME		timestamp for the disk completion, string
50#		DEVICE  	device name
51#		INS     	device instance number
52#		D		direction, Read or Write
53#		MOUNT		mount point
54#		FILE		filename (basename) for io operation
55# 
56# NOTE:
57# - There are two different delta times reported. -D prints the
58#   elapsed time from the disk request (strategy) to the disk completion
59#   (iodone); -o prints the time for the disk to complete that event 
60#   since it's last event (time between iodones), or, the time to the 
61#   strategy if the disk had been idle. 
62# - When filtering on PID or process name, be aware that poor disk event
63#   times may be due to events that have been filtered away, for example
64#   another process that may be seeking the disk heads elsewhere.
65#
66# SEE ALSO: BigAdmin: DTrace, http://www.sun.com/bigadmin/content/dtrace
67#	    Solaris Dynamic Tracing Guide, http://docs.sun.com
68#	    DTrace Tools, http://www.brendangregg.com/dtrace.html
69#
70# COPYRIGHT: Copyright (c) 2005 Brendan Gregg.
71#
72# CDDL HEADER START
73#
74#  The contents of this file are subject to the terms of the
75#  Common Development and Distribution License, Version 1.0 only
76#  (the "License").  You may not use this file except in compliance
77#  with the License.
78#
79#  You can obtain a copy of the license at Docs/cddl1.txt
80#  or http://www.opensolaris.org/os/licensing.
81#  See the License for the specific language governing permissions
82#  and limitations under the License.
83#
84# CDDL HEADER END
85#
86# Author: Brendan Gregg  [Sydney, Australia]
87#
88# 12-Mar-2004	Brendan Gregg	Created this, build 51.
89# 23-May-2004	   "	  "	Fixed mntpt bug.
90# 10-Oct-2004	   "      "	Rewritten to use the io provider, build 63.
91# 04-Jan-2005	   "	  "	Wrapped in sh to provide options.
92# 08-May-2005	   "      "	Rewritten for perfromance.
93# 15-Jul-2005	   "      "	Improved DTIME calculation.
94# 25-Jul-2005	   "      "	Added -p, -n. Improved code.
95# 17-Sep-2005	   "      "	Increased switchrate.
96# 17-Sep-2005	   "      "	Last update.
97#
98
99
100##############################
101# --- Process Arguments ---
102#
103
104### default variables
105opt_dump=0; opt_device=0; opt_delta=0; opt_devname=0; opt_file=0; opt_args=0; 
106opt_mount=0; opt_start=0 opt_end=0; opt_endstr=0; opt_ins=0; opt_nums=0
107opt_dtime=0; filter=0; device=.; filename=.; mount=.; pname=.; pid=0
108opt_name=0; opt_pid=0
109
110### process options
111while getopts aAd:Def:ghim:Nn:op:stv name
112do
113	case $name in
114	a)	opt_devname=1; opt_args=1; opt_endstr=1; opt_nums=1 ;;
115	A)	opt_dump=1 ;;
116	d)	opt_device=1; device=$OPTARG ;;
117	D)	opt_delta=1 ;;
118	e)	opt_devname=1 ;;
119	f)	opt_file=1; filename=$OPTARG ;;
120	g)	opt_args=1 ;;
121	i)	opt_ins=1 ;;
122	N)	opt_nums=1 ;;
123	n)	opt_name=1; pname=$OPTARG ;;
124	o)	opt_dtime=1 ;;
125	p)	opt_pid=1; pid=$OPTARG ;;
126	m)	opt_mount=1; mount=$OPTARG ;;
127	s)	opt_start=1 ;;
128	t)	opt_end=1 ;;
129	v)	opt_endstr=1 ;;
130	h|?)	cat <<-END >&2
131		USAGE: iosnoop [-a|-A|-DeghiNostv] [-d device] [-f filename]
132		               [-m mount_point] [-n name] [-p PID]
133		       iosnoop          # default output
134		                -a      # print all data (mostly)
135		                -A      # dump all data, space delimited
136		                -D      # print time delta, us (elapsed)
137		                -e      # print device name
138		                -g      # print command arguments
139		                -i      # print device instance
140		                -N      # print major and minor numbers
141		                -o      # print disk delta time, us
142		                -s      # print start time, us
143		                -t      # print completion time, us
144		                -v      # print completion time, string
145		                -d device       # instance name to snoop 
146		                -f filename     # snoop this file only
147		                -m mount_point  # this FS only 
148		                -n name         # this process name only 
149		                -p PID          # this PID only 
150		   eg,
151		        iosnoop -v      # human readable timestamps
152		        iosnoop -N      # print major and minor numbers
153		        iosnoop -m /    # snoop events on filesystem / only
154		END
155		exit 1
156	esac
157done
158
159### option logic
160if [ $opt_dump -eq 1 ]; then
161	opt_delta=0; opt_devname=0; opt_args=2; opt_start=0; 
162	opt_end=0; opt_endstr=0; opt_nums=0; opt_ins=0; opt_dtime=0
163fi
164if [ $opt_device -eq 1 -o $opt_file -eq 1 -o $opt_mount -eq 1 -o \
165    $opt_name -eq 1 -o $opt_pid -eq 1 ]; then
166	filter=1
167fi
168
169
170#################################
171# --- Main Program, DTrace ---
172#
173/usr/sbin/dtrace -n '
174 /*
175  * Command line arguments
176  */
177 inline int OPT_dump 	= '$opt_dump';
178 inline int OPT_device 	= '$opt_device';
179 inline int OPT_delta 	= '$opt_delta';
180 inline int OPT_devname = '$opt_devname';
181 inline int OPT_file 	= '$opt_file';
182 inline int OPT_args 	= '$opt_args';
183 inline int OPT_ins  	= '$opt_ins';
184 inline int OPT_nums  	= '$opt_nums';
185 inline int OPT_dtime 	= '$opt_dtime';
186 inline int OPT_mount 	= '$opt_mount';
187 inline int OPT_start 	= '$opt_start';
188 inline int OPT_pid 	= '$opt_pid';
189 inline int OPT_name 	= '$opt_name';
190 inline int OPT_end 	= '$opt_end';
191 inline int OPT_endstr 	= '$opt_endstr';
192 inline int FILTER 	= '$filter';
193 inline int PID 	= '$pid';
194 inline string DEVICE 	= "'$device'";
195 inline string FILENAME = "'$filename'";
196 inline string MOUNT 	= "'$mount'";
197 inline string NAME 	= "'$pname'";
198 
199 #pragma D option quiet
200 #pragma D option switchrate=10hz
201
202 /*
203  * Print header
204  */
205 dtrace:::BEGIN 
206 {
207	last_event[""] = 0;
208
209	/* print optional headers */
210 	OPT_start   ? printf("%-14s ","STIME")   : 1;
211 	OPT_end     ? printf("%-14s ","TIME")    : 1;
212 	OPT_endstr  ? printf("%-20s ","STRTIME") : 1;
213 	OPT_devname ? printf("%-7s ","DEVICE")   : 1;
214 	OPT_ins     ? printf("%-3s ","INS")      : 1;
215 	OPT_nums    ? printf("%-3s %-3s ","MAJ","MIN") : 1;
216 	OPT_delta   ? printf("%-10s ","DELTA")   : 1;
217 	OPT_dtime   ? printf("%-10s ","DTIME")   : 1;
218
219	/* print main headers */
220	OPT_dump ? 
221	    printf("%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n",
222 	    "TIME", "STIME", "DELTA", "DEVICE", "INS", "MAJ", "MIN", "UID",
223	    "PID", "PPID", "D", "BLOCK", "SIZE", "MOUNT", "FILE", "PATH",
224	    "COMM","ARGS") :
225 	    printf("%5s %5s %1s %8s %6s ", "UID", "PID", "D", "BLOCK", "SIZE");
226	OPT_args == 0 ? printf("%10s %s\n", "COMM", "PATHNAME") : 1;
227 	OPT_args == 1 ? printf("%28s %s\n", "PATHNAME", "ARGS") : 1;
228 }
229
230 /*
231  * Check event is being traced
232  */
233 io:genunix::start
234 { 
235	/* default is to trace unless filtering, */
236	self->ok = FILTER ? 0 : 1;
237
238	/* check each filter, */
239	(OPT_device == 1 && DEVICE == args[1]->dev_statname)? self->ok = 1 : 1;
240	(OPT_file == 1 && FILENAME == args[2]->fi_pathname) ? self->ok = 1 : 1;
241	(OPT_mount == 1 && MOUNT == args[2]->fi_mount) ? self->ok = 1 : 1;
242	(OPT_name == 1 && NAME == execname) ? self->ok = 1 : 1;
243	(OPT_pid == 1 && PID == pid) ? self->ok = 1 : 1;
244 }
245
246 /*
247  * Reset last_event for disk idle -> start
248  * this prevents idle time being counted as disk time.
249  */
250 io:genunix::start
251 /! pending[args[1]->dev_statname]/
252 {
253	/* save last disk event */
254	last_event[args[1]->dev_statname] = timestamp;
255 }
256
257 /*
258  * Store entry details
259  */
260 io:genunix::start
261 /self->ok/
262 {
263	/* these are used as a unique disk event key, */
264 	this->dev = args[0]->b_edev;
265 	this->blk = args[0]->b_blkno;
266
267	/* save disk event details, */
268 	start_uid[this->dev, this->blk] = uid;
269 	start_pid[this->dev, this->blk] = pid;
270 	start_ppid[this->dev, this->blk] = ppid;
271 	start_args[this->dev, this->blk] = (char *)curpsinfo->pr_psargs;
272 	start_comm[this->dev, this->blk] = execname;
273 	start_time[this->dev, this->blk] = timestamp;
274
275	/* increase disk event pending count */
276	pending[args[1]->dev_statname]++;
277
278	self->ok = 0;
279 }
280
281 /*
282  * Process and Print completion
283  */
284 io:genunix::done
285 /start_time[args[0]->b_edev, args[0]->b_blkno]/
286 {
287	/* decrease disk event pending count */
288	pending[args[1]->dev_statname]--;
289
290	/*
291	 * Process details
292	 */
293
294 	/* fetch entry values */
295 	this->dev = args[0]->b_edev;
296 	this->blk = args[0]->b_blkno;
297 	this->suid = start_uid[this->dev, this->blk];
298 	this->spid = start_pid[this->dev, this->blk];
299 	this->sppid = start_ppid[this->dev, this->blk];
300 	self->sargs = (int)start_args[this->dev, this->blk] == 0 ? 
301 	    "" : start_args[this->dev, this->blk];
302 	self->scomm = start_comm[this->dev, this->blk];
303 	this->stime = start_time[this->dev, this->blk];
304	this->etime = timestamp; /* endtime */
305	this->delta = this->etime - this->stime;
306	this->dtime = last_event[args[1]->dev_statname] == 0 ? 0 :
307	    timestamp - last_event[args[1]->dev_statname];
308
309 	/* memory cleanup */
310 	start_uid[this->dev, this->blk]  = 0;
311 	start_pid[this->dev, this->blk]  = 0;
312 	start_ppid[this->dev, this->blk] = 0;
313 	start_args[this->dev, this->blk] = 0;
314 	start_time[this->dev, this->blk] = 0;
315 	start_comm[this->dev, this->blk] = 0;
316 	start_rw[this->dev, this->blk]   = 0;
317
318	/*
319	 * Print details
320	 */
321
322	/* print optional fields */
323 	OPT_start   ? printf("%-14d ", this->stime/1000) : 1;
324 	OPT_end     ? printf("%-14d ", this->etime/1000) : 1;
325 	OPT_endstr  ? printf("%-20Y ", walltimestamp) : 1;
326 	OPT_devname ? printf("%-7s ", args[1]->dev_statname) : 1;
327 	OPT_ins     ? printf("%3d ", args[1]->dev_instance) : 1;
328 	OPT_nums    ? printf("%3d %3d ",
329	    args[1]->dev_major, args[1]->dev_minor) : 1;
330 	OPT_delta   ? printf("%-10d ", this->delta/1000) : 1;
331 	OPT_dtime   ? printf("%-10d ", this->dtime/1000) : 1;
332
333	/* print main fields */
334	OPT_dump ? 
335 	    printf("%d %d %d %s %d %d %d %d %d %d %s %d %d %s %s %s %s %S\n",
336 	    this->etime/1000, this->stime/1000, this->delta/1000,
337 	    args[1]->dev_statname, args[1]->dev_instance, args[1]->dev_major,
338	    args[1]->dev_minor, this->suid, this->spid, this->sppid, 
339	    args[0]->b_flags & B_READ ? "R" : "W", 
340	    args[0]->b_blkno, args[0]->b_bcount, args[2]->fi_mount,
341	    args[2]->fi_name, args[2]->fi_pathname, self->scomm, self->sargs) :
342 	    printf("%5d %5d %1s %8d %6d ",
343 	    this->suid, this->spid, args[0]->b_flags & B_READ ? "R" : "W",
344	    args[0]->b_blkno, args[0]->b_bcount);
345	OPT_args == 0 ? printf("%10s %s\n", self->scomm, args[2]->fi_pathname)
346	    : 1;
347 	OPT_args == 1 ? printf("%28s %S\n",
348	    args[2]->fi_pathname, self->sargs) : 1;
349
350	/* save last disk event */
351	last_event[args[1]->dev_statname] = timestamp;
352
353	/* cleanup */
354	self->scomm = 0;
355	self->sargs = 0;
356 }
357
358 /*
359  * Prevent pending from underflowing
360  * this can happen if this program is started during disk events.
361  */
362 io:genunix::done
363 /pending[args[1]->dev_statname] < 0/
364 {
365	pending[args[1]->dev_statname] = 0;
366 }
367'
368