1#!/usr/bin/ksh 2# 3# iotop - display top disk I/O events by process. 4# Written using DTrace (Solaris 10 3/05). 5# 6# This is measuring disk events that have made it past system caches. 7# 8# $Id: iotop 8 2007-08-06 05:55:26Z brendan $ 9# 10# USAGE: iotop [-C] [-D|-o|-P] [-j|-Z] [-d device] [-f filename] 11# [-m mount_point] [-t top] [interval [count]] 12# 13# iotop # default output, 5 second intervals 14# 15# -C # don't clear the screen 16# -D # print delta times, elapsed, us 17# -j # print project ID 18# -o # print disk delta times, us 19# -P # print %I/O (disk delta times) 20# -Z # print zone ID 21# -d device # instance name to snoop (eg, dad0) 22# -f filename # full pathname of file to snoop 23# -m mount_point # this FS only (will skip raw events) 24# -t top # print top number only 25# eg, 26# iotop 1 # 1 second samples 27# iotop -C # don't clear the screen 28# iotop -P # print %I/O (time based) 29# iotop -j # print project IDs 30# iotop -Z # print zone IDs 31# iotop -t 20 # print top 20 lines only 32# iotop -C 5 12 # print 12 x 5 second samples 33# 34# FIELDS: 35# UID user ID 36# PID process ID 37# PPID parent process ID 38# PROJ project ID 39# ZONE zone ID 40# CMD process command name 41# DEVICE device name 42# MAJ device major number 43# MIN device minor number 44# D direction, Read or Write 45# BYTES total size of operations, bytes 46# ELAPSED total elapsed from request to completion, us 47# DISKTIME total time for disk to complete request, us 48# %I/O percent disk I/O, based on time (DISKTIME) 49# load 1 min load average 50# disk_r total disk read Kbytes for sample 51# disk_w total disk write Kbytes for sample 52# 53# NOTE: 54# * There are two different delta times reported. -D prints the 55# elapsed time from the disk request (strategy) to the disk completion 56# (iodone); -o prints the time for the disk to complete that event 57# since it's last event (time between iodones), or, the time to the 58# strategy if the disk had been idle. 59# * The %I/O value can exceed 100%. It represents how busy a process is 60# making the disks, in terms of a single disk. A value of 200% could 61# mean 2 disks are busy at 100%, or 4 disks at 50%... 62# 63# SEE ALSO: iosnoop 64# BigAdmin: DTrace, http://www.sun.com/bigadmin/content/dtrace 65# Solaris Dynamic Tracing Guide, http://docs.sun.com 66# DTrace Tools, http://www.brendangregg.com/dtrace.html 67# 68# INSPIRATION: top(1) by William LeFebvre 69# 70# COPYRIGHT: Copyright (c) 2005, 2006 Brendan Gregg. 71# 72# CDDL HEADER START 73# 74# The contents of this file are subject to the terms of the 75# Common Development and Distribution License, Version 1.0 only 76# (the "License"). You may not use this file except in compliance 77# with the License. 78# 79# You can obtain a copy of the license at Docs/cddl1.txt 80# or http://www.opensolaris.org/os/licensing. 81# See the License for the specific language governing permissions 82# and limitations under the License. 83# 84# CDDL HEADER END 85# 86# KNOWN BUGS: 87# - This can print errors while running on servers with Veritas volumes. 88# 89# Author: Brendan Gregg [Sydney, Australia] 90# 91# 15-Jul-2005 Brendan Gregg Created this. 92# 20-Apr-2006 " " Last update. 93# 94 95 96############################## 97# --- Process Arguments --- 98# 99 100### default variables 101opt_device=0; opt_file=0; opt_mount=0; opt_clear=1; opt_proj=0; opt_zone=0 102opt_percent=0; opt_def=1; opt_bytes=1; filter=0; device=.; filename=.; mount=. 103opt_top=0; opt_elapsed=0; opt_dtime=0; interval=5; count=-1; top=0 104 105### process options 106while getopts CDd:f:hjm:oPt:Z name 107do 108 case $name in 109 C) opt_clear=0 ;; 110 D) opt_elapsed=1; opt_bytes=0 ;; 111 d) opt_device=1; device=$OPTARG ;; 112 f) opt_file=1; filename=$OPTARG ;; 113 j) opt_proj=1; opt_def=0 ;; 114 m) opt_mount=1; mount=$OPTARG ;; 115 o) opt_dtime=1; opt_bytes=0 ;; 116 P) opt_percent=1; opt_dtime=1; opt_bytes=0 ;; 117 t) opt_top=1; top=$OPTARG ;; 118 Z) opt_zone=1; opt_def=0 ;; 119 h|?) cat <<-END >&2 120 USAGE: iotop [-C] [-D|-o|-P] [-j|-Z] [-d device] [-f filename] 121 [-m mount_point] [-t top] [interval [count]] 122 123 -C # don't clear the screen 124 -D # print delta times, elapsed, us 125 -j # print project ID 126 -o # print disk delta times, us 127 -P # print %I/O (disk delta times) 128 -Z # print zone ID 129 -d device # instance name to snoop 130 -f filename # snoop this file only 131 -m mount_point # this FS only 132 -t top # print top number only 133 eg, 134 iotop # default output, 5 second samples 135 iotop 1 # 1 second samples 136 iotop -P # print %I/O (time based) 137 iotop -m / # snoop events on filesystem / only 138 iotop -t 20 # print top 20 lines only 139 iotop -C 5 12 # print 12 x 5 second samples 140 END 141 exit 1 142 esac 143done 144 145shift $(( $OPTIND - 1 )) 146 147### option logic 148if [[ "$1" > 0 ]]; then 149 interval=$1; shift 150fi 151if [[ "$1" > 0 ]]; then 152 count=$1; shift 153fi 154if (( opt_proj && opt_zone )); then 155 opt_proj=0 156fi 157if (( opt_elapsed && opt_dtime )); then 158 opt_elapsed=0 159fi 160if (( opt_device || opt_mount || opt_file )); then 161 filter=1 162fi 163if (( opt_clear )); then 164 clearstr=`clear` 165else 166 clearstr=. 167fi 168 169 170 171################################# 172# --- Main Program, DTrace --- 173# 174/usr/sbin/dtrace -n ' 175 /* 176 * Command line arguments 177 */ 178 inline int OPT_def = '$opt_def'; 179 inline int OPT_proj = '$opt_proj'; 180 inline int OPT_zone = '$opt_zone'; 181 inline int OPT_clear = '$opt_clear'; 182 inline int OPT_bytes = '$opt_bytes'; 183 inline int OPT_elapsed = '$opt_elapsed'; 184 inline int OPT_dtime = '$opt_dtime'; 185 inline int OPT_percent = '$opt_percent'; 186 inline int OPT_device = '$opt_device'; 187 inline int OPT_mount = '$opt_mount'; 188 inline int OPT_file = '$opt_file'; 189 inline int OPT_top = '$opt_top'; 190 inline int INTERVAL = '$interval'; 191 inline int COUNTER = '$count'; 192 inline int FILTER = '$filter'; 193 inline int TOP = '$top'; 194 inline string DEVICE = "'$device'"; 195 inline string FILENAME = "'$filename'"; 196 inline string MOUNT = "'$mount'"; 197 inline string CLEAR = "'$clearstr'"; 198 199 #pragma D option quiet 200 201 /* boost the following if you get "dynamic variable drops" */ 202 #pragma D option dynvarsize=8m 203 204 /* 205 * Print header 206 */ 207 dtrace:::BEGIN 208 { 209 last_event[""] = 0; 210 211 /* starting values */ 212 counts = COUNTER; 213 secs = INTERVAL; 214 disk_r = 0; 215 disk_w = 0; 216 217 printf("Tracing... Please wait.\n"); 218 } 219 220 /* 221 * Check event is being traced 222 */ 223 io:genunix::start, 224 io:genunix::done 225 { 226 /* default is to trace unless filtering, */ 227 this->ok = FILTER ? 0 : 1; 228 229 /* check each filter, */ 230 (OPT_device == 1 && DEVICE == args[1]->dev_statname)? this->ok = 1 : 1; 231 (OPT_file == 1 && FILENAME == args[2]->fi_pathname) ? this->ok = 1 : 1; 232 (OPT_mount == 1 && MOUNT == args[2]->fi_mount) ? this->ok = 1 : 1; 233 } 234 235 /* 236 * Reset last_event for disk idle -> start 237 * this prevents idle time being counted as disk time. 238 */ 239 io:genunix::start 240 /! pending[args[1]->dev_statname]/ 241 { 242 /* save last disk event */ 243 last_event[args[1]->dev_statname] = timestamp; 244 } 245 246 /* 247 * Store entry details 248 */ 249 io:genunix::start 250 /this->ok/ 251 { 252 /* these are used as a unique disk event key, */ 253 this->dev = args[0]->b_edev; 254 this->blk = args[0]->b_blkno; 255 256 /* save disk event details, */ 257 start_uid[this->dev, this->blk] = uid; 258 start_pid[this->dev, this->blk] = pid; 259 start_ppid[this->dev, this->blk] = ppid; 260 start_comm[this->dev, this->blk] = execname; 261 start_time[this->dev, this->blk] = timestamp; 262 start_proj[this->dev, this->blk] = curpsinfo->pr_projid; 263 start_zone[this->dev, this->blk] = curpsinfo->pr_zoneid; 264 start_rw[this->dev, this->blk] = args[0]->b_flags & B_READ ? "R" : "W"; 265 disk_r += args[0]->b_flags & B_READ ? args[0]->b_bcount : 0; 266 disk_w += args[0]->b_flags & B_READ ? 0 : args[0]->b_bcount; 267 268 /* increase disk event pending count */ 269 pending[args[1]->dev_statname]++; 270 } 271 272 /* 273 * Process and Print completion 274 */ 275 io:genunix::done 276 /this->ok/ 277 { 278 /* decrease disk event pending count */ 279 pending[args[1]->dev_statname]--; 280 281 /* 282 * Process details 283 */ 284 285 /* fetch entry values */ 286 this->dev = args[0]->b_edev; 287 this->blk = args[0]->b_blkno; 288 this->suid = start_uid[this->dev, this->blk]; 289 this->spid = start_pid[this->dev, this->blk]; 290 this->sppid = start_ppid[this->dev, this->blk]; 291 this->sproj = start_proj[this->dev, this->blk]; 292 this->szone = start_zone[this->dev, this->blk]; 293 self->scomm = start_comm[this->dev, this->blk]; 294 this->stime = start_time[this->dev, this->blk]; 295 this->etime = timestamp; /* endtime */ 296 this->elapsed = this->etime - this->stime; 297 self->rw = start_rw[this->dev, this->blk]; 298 this->dtime = last_event[args[1]->dev_statname] == 0 ? 0 : 299 timestamp - last_event[args[1]->dev_statname]; 300 301 /* memory cleanup */ 302 start_uid[this->dev, this->blk] = 0; 303 start_pid[this->dev, this->blk] = 0; 304 start_ppid[this->dev, this->blk] = 0; 305 start_time[this->dev, this->blk] = 0; 306 start_comm[this->dev, this->blk] = 0; 307 start_zone[this->dev, this->blk] = 0; 308 start_proj[this->dev, this->blk] = 0; 309 start_rw[this->dev, this->blk] = 0; 310 311 /* 312 * Choose statistic to track 313 */ 314 OPT_bytes ? this->value = args[0]->b_bcount : 1; 315 OPT_elapsed ? this->value = this->elapsed / 1000 : 1; 316 OPT_dtime ? this->value = this->dtime / 1000 : 1; 317 318 /* 319 * Save details 320 */ 321 OPT_def ? @out[this->suid, this->spid, this->sppid, self->scomm, 322 args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor, 323 self->rw] = sum(this->value) : 1; 324 OPT_proj ? @out[this->sproj, this->spid, this->sppid, self->scomm, 325 args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor, 326 self->rw] = sum(this->value) : 1; 327 OPT_zone ? @out[this->szone, this->spid, this->sppid, self->scomm, 328 args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor, 329 self->rw] = sum(this->value) : 1; 330 331 /* save last disk event */ 332 last_event[args[1]->dev_statname] = timestamp; 333 334 self->scomm = 0; 335 self->rw = 0; 336 } 337 338 /* 339 * Prevent pending from underflowing 340 * this can happen if this program is started during disk events. 341 */ 342 io:genunix::done 343 /pending[args[1]->dev_statname] < 0/ 344 { 345 pending[args[1]->dev_statname] = 0; 346 } 347 348 /* 349 * Timer 350 */ 351 profile:::tick-1sec 352 { 353 secs--; 354 } 355 356 /* 357 * Print Report 358 */ 359 profile:::tick-1sec 360 /secs == 0/ 361 { 362 /* fetch 1 min load average */ 363 this->load1a = `hp_avenrun[0] / 65536; 364 this->load1b = ((`hp_avenrun[0] % 65536) * 100) / 65536; 365 366 /* convert counters to Kbytes */ 367 disk_r /= 1024; 368 disk_w /= 1024; 369 370 /* print status */ 371 OPT_clear ? printf("%s", CLEAR) : 1; 372 printf("%Y, load: %d.%02d, disk_r: %6d KB, disk_w: %6d KB\n\n", 373 walltimestamp, this->load1a, this->load1b, disk_r, disk_w); 374 375 /* print headers */ 376 OPT_def ? printf(" UID ") : 1; 377 OPT_proj ? printf(" PROJ ") : 1; 378 OPT_zone ? printf(" ZONE ") : 1; 379 printf("%6s %6s %-16s %-7s %3s %3s %1s", 380 "PID", "PPID", "CMD", "DEVICE", "MAJ", "MIN", "D"); 381 OPT_bytes ? printf(" %16s\n", "BYTES") : 1; 382 OPT_elapsed ? printf(" %16s\n", "ELAPSED") : 1; 383 OPT_dtime && ! OPT_percent ? printf(" %16s\n", "DISKTIME") : 1; 384 OPT_dtime && OPT_percent ? printf(" %6s\n", "%I/O") : 1; 385 386 /* truncate to top lines if needed */ 387 OPT_top ? trunc(@out, TOP) : 1; 388 389 /* normalise to percentage if needed */ 390 OPT_percent ? normalize(@out, INTERVAL * 10000) : 1; 391 392 /* print data */ 393 ! OPT_percent ? 394 printa("%5d %6d %6d %-16s %-7s %3d %3d %1s %16@d\n", @out) : 395 printa("%5d %6d %6d %-16s %-7s %3d %3d %1s %6@d\n", @out); 396 printf("\n"); 397 398 /* clear data */ 399 trunc(@out); 400 disk_r = 0; 401 disk_w = 0; 402 secs = INTERVAL; 403 counts--; 404 } 405 406 /* 407 * End of program 408 */ 409 profile:::tick-1sec 410 /counts == 0/ 411 { 412 exit(0); 413 } 414 415 /* 416 * Cleanup for Ctrl-C 417 */ 418 dtrace:::END 419 { 420 trunc(@out); 421 } 422' 423