1#include <stdio.h>
2#include <stdlib.h>
3#include <fcntl.h>
4#include <unistd.h>
5#include <getopt.h>
6#include <string.h>
7#include <sys/resource.h>
8#include <pthread.h>
9#include <sys/time.h>
10#include <sys/stat.h>
11#include <sys/types.h>
12#include <math.h>
13#include <signal.h>
14#include <libkern/OSAtomic.h>
15#include <limits.h>
16#include <errno.h>
17
18#define IO_MODE_SEQ 		0
19#define IO_MODE_RANDOM 		1
20
21#define WORKLOAD_TYPE_RO 	0
22#define WORKLOAD_TYPE_WO 	1
23#define WORKLOAD_TYPE_RW 	2
24
25#define MAX_THREADS 		1000
26#define MAX_FILENAME 		64
27#define MAX_ITERATIONS 		10000
28#define LATENCY_BIN_SIZE 	500
29#define LATENCY_BINS 		11
30#define LOW_LATENCY_BIN_SIZE 	50
31#define LOW_LATENCY_BINS 	11
32#define THROUGHPUT_INTERVAL	5000
33#define DEFAULT_FILE_SIZE 	(262144)
34#define BLOCKSIZE 		1024
35#define MAX_CMD_SIZE 		256
36#define PG_MASK 		~(0xFFF)
37
38int burst_count = 10;                	/* Unit: Number ; Desc.: I/O Burst Count */
39int inter_burst_duration = 0; 	 	/* Unit: msecs  ; Desc.: I/O Inter-Burst Duration (-1: Random value [0,100]) */
40int inter_io_delay_ms = 0; 		/* Unit: msecs  ; Desc.: Inter I/O Delay */
41int thread_count = 1;                	/* Unit: Number ; Desc.: Thread Count */
42int workload_type = WORKLOAD_TYPE_RO;	/* Unit: 0/1/2  ; Desc.: Workload Type */
43int io_size = 4096;  	                /* Unit: Bytes  ; Desc.: I/O Unit Size */
44int sync_frequency_ms = 0; 		/* Unit: msecs  ; Desc.: Sync thread frequency (0: Indicates no sync) */
45int io_mode = 0;                     	/* Unit: 0/1	; Desc.: I/O Mode (Seq./Rand.) */
46int test_duration = 0;                  /* Unit: secs   ; Desc.: Total Test Duration (0 indicates wait for Ctrl+C signal) */
47int io_tier = 0; 			/* Unit: 0/1/2/3; Desc.: I/O Tier */
48int file_size = DEFAULT_FILE_SIZE; 	/* Unit: pages  ; Desc.: File Size in 4096 byte blocks */
49int cached_io_flag = 0; 		/* Unit: 0/1 	; Desc.: I/O Caching behavior (no-cached/cached) */
50char *user_fname;
51int user_specified_file = 0;
52
53int64_t total_io_count;
54int64_t total_io_size;
55int64_t total_io_time;
56int64_t total_burst_count;
57int64_t latency_histogram[LATENCY_BINS];
58int64_t burst_latency_histogram[LATENCY_BINS];
59int64_t low_latency_histogram[LOW_LATENCY_BINS];
60int64_t throughput_histogram[MAX_ITERATIONS];
61int64_t throughput_index;
62
63void print_usage(void);
64void print_data_percentage(int percent);
65void print_stats(void);
66unsigned int find_io_bin(int64_t latency, int latency_bin_size, int latency_bins);
67void signalHandler(int sig);
68void perform_io(int fd, char *buf, int size, int type);
69void *sync_routine(void *arg);
70void *calculate_throughput(void *arg);
71void *io_routine(void *arg);
72void validate_option(int value, int min, int max, char *option, char *units);
73void print_test_setup(int value, char *option, char *units, char *comment);
74void setup_process_io_policy(int io_tier);
75void print_latency_histogram(int64_t *data, int latency_bins, int latency_bin_size);
76
77void print_usage()
78{
79	printf("Usage: ./iosim [options]\n");
80	printf("Options:\n");
81	printf("-c: (number)  Burst Count. No. of I/Os performed in an I/O burst\n");
82	printf("-i: (msecs)   Inter Burst Duration. Amount of time the thread sleeps between bursts (-1 indicates random durations between 0-100 msecs)\n");
83	printf("-d: (msecs)   Inter I/O delay. Amount of time between issuing I/Os\n");
84	printf("-t: (number)  Thread count\n");
85	printf("-f: (0/1/2 :  Read-Only/Write-Only/Mixed RW) Workload Type\n");
86	printf("-m: (0/1   :  Sequential/Random) I/O pattern\n");
87	printf("-j: (number)  Size of I/O in bytes\n");
88	printf("-s: (msecs)   Frequency of sync() calls\n");
89	printf("-x: (secs)    Test duration (0 indicates that the tool would wait for a Ctrl-C)\n");
90	printf("-l: (0/1/2/3) I/O Tier\n");
91	printf("-z: (number)  File Size in pages (1 page = 4096 bytes) \n");
92	printf("-n: (string)  File name used for tests (the tool would create files if this option is not specified)\n");
93	printf("-a: (0/1   :  Non-cached/Cached) I/O Caching behavior\n");
94}
95
96void print_data_percentage(int percent)
97{
98	int count = (int)(round(percent / 5.0));
99	int spaces = 20 - count;
100	printf("| ");
101	for(; count > 0; count--)
102		printf("*");
103	for(; spaces > 0; spaces--)
104		printf(" ");
105	printf("|");
106}
107
108void print_latency_histogram(int64_t *data, int latency_bins, int latency_bin_size)
109{
110	double percentage;
111        char label[MAX_FILENAME];
112	int i;
113
114        for (i = 0; i < latency_bins; i++) {
115                if (i == (latency_bins - 1))
116                        snprintf(label, MAX_FILENAME, "> %d usecs", i * latency_bin_size);
117                else
118                        snprintf(label, MAX_FILENAME, "%d - %d usecs", i * latency_bin_size, (i+1) * latency_bin_size);
119                printf("%25s ", label);
120                percentage = ((double)data[i] * 100.0) / (double)total_io_count;
121                print_data_percentage((int)percentage);
122                printf(" %.2lf%%\n", percentage);
123        }
124	printf("\n");
125}
126
127void print_stats()
128{
129	int i;
130	double percentage;
131        char label[MAX_FILENAME];
132
133	printf("I/O Statistics:\n");
134
135	printf("Total I/Os      : %lld\n", total_io_count);
136	printf("Avg. Latency    : %.2lf usecs\n", ((double)total_io_time) / ((double)total_io_count));
137
138	printf("Low Latency Histogram: \n");
139	print_latency_histogram(low_latency_histogram, LOW_LATENCY_BINS, LOW_LATENCY_BIN_SIZE);
140	printf("Latency Histogram: \n");
141	print_latency_histogram(latency_histogram, LATENCY_BINS, LATENCY_BIN_SIZE);
142	printf("Burst Avg. Latency Histogram: \n");
143	print_latency_histogram(burst_latency_histogram, LATENCY_BINS, LATENCY_BIN_SIZE);
144
145	printf("Throughput Timeline: \n");
146
147	int64_t max_throughput = 0;
148	for (i = 0; i < throughput_index; i++) {
149		if (max_throughput < throughput_histogram[i])
150			max_throughput = throughput_histogram[i];
151	}
152
153	for (i = 0; i < throughput_index; i++) {
154		snprintf(label, MAX_FILENAME, "T=%d msecs", (i+1) * THROUGHPUT_INTERVAL);
155		printf("%25s ", label);
156		percentage = ((double)throughput_histogram[i] * 100) / (double)max_throughput;
157		print_data_percentage((int)percentage);
158		printf("%.2lf MBps\n", ((double)throughput_histogram[i] / 1048576.0) / ((double)THROUGHPUT_INTERVAL / 1000.0));
159	}
160	printf("\n");
161
162}
163
164unsigned int find_io_bin(int64_t latency, int latency_bin_size, int latency_bins)
165{
166	int bin = (int) (latency / latency_bin_size);
167	if (bin >= latency_bins)
168		bin = latency_bins - 1;
169	return bin;
170}
171
172void signalHandler(int sig)
173{
174	printf("\n");
175	print_stats();
176	exit(0);
177}
178
179
180void perform_io(int fd, char *buf, int size, int type)
181{
182	long ret;
183
184	if (type == WORKLOAD_TYPE_RW)
185		type = (rand() % 2) ? WORKLOAD_TYPE_WO : WORKLOAD_TYPE_RO;
186
187	while(size > 0) {
188
189		if (type == WORKLOAD_TYPE_RO)
190			ret = read(fd, buf, size);
191		else
192			ret = write(fd, buf, size);
193
194		if (ret == 0) {
195			if (lseek(fd, 0, SEEK_SET) < 0) {
196				perror("lseek() to reset file offset to zero failed!\n");
197				goto error;
198			}
199		}
200
201		if (ret < 0) {
202			perror("read/write syscall failed!\n");
203			goto error;
204		}
205		size -= ret;
206	}
207
208	return;
209
210error:
211	print_stats();
212	exit(1);
213}
214
215void *sync_routine(void *arg)
216{
217	while(1) {
218		usleep(sync_frequency_ms * 1000);
219		sync();
220	}
221	pthread_exit(NULL);
222}
223
224void *calculate_throughput(void *arg)
225{
226	int64_t prev_total_io_size = 0;
227	int64_t size;
228
229	while(1) {
230		usleep(THROUGHPUT_INTERVAL * 1000);
231		size = total_io_size - prev_total_io_size;
232		throughput_histogram[throughput_index] = size;
233		prev_total_io_size = total_io_size;
234		throughput_index++;
235	}
236	pthread_exit(NULL);
237}
238
239void *io_routine(void *arg)
240{
241	struct timeval start_tv;
242	struct timeval end_tv;
243	int64_t elapsed;
244	int64_t burst_elapsed;
245	char *data;
246	char test_filename[MAX_FILENAME];
247	struct stat filestat;
248	int i, fd, io_thread_id;
249
250	io_thread_id = (int)arg;
251	if (user_specified_file)
252		strncpy(test_filename, user_fname, MAX_FILENAME);
253	else
254		snprintf(test_filename, MAX_FILENAME, "iosim-%d-%d", (int)getpid(), io_thread_id);
255
256	if (0 > (fd = open(test_filename, O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH))) {
257		printf("Error opening file %s!\n", test_filename);
258		exit(1);
259	}
260
261	if (fstat(fd, &filestat) < 0) {
262		printf("Error stat()ing file %s!\n", test_filename);
263		exit(1);
264	}
265
266	if (filestat.st_size < io_size) {
267		printf("%s: File size (%lld) smaller than I/O size (%d)!\n", test_filename, filestat.st_size, io_size);
268		exit(1);
269	}
270
271	if (!cached_io_flag)
272		fcntl(fd, F_NOCACHE, 1);
273
274	fcntl(fd, F_RDAHEAD, 0);
275
276	if(!(data = (char *)calloc(io_size, 1))) {
277		perror("Error allocating buffers for I/O!\n");
278		exit(1);
279	}
280	memset(data, '\0', io_size);
281
282	while(1) {
283
284		burst_elapsed = 0;
285
286		for(i = 0; i < burst_count; i++) {
287			if (io_mode == IO_MODE_RANDOM) {
288				if (lseek(fd, (rand() % (filestat.st_size - io_size)) & PG_MASK, SEEK_SET) < 0) {
289					perror("Error lseek()ing to random location in file!\n");
290					exit(1);
291				}
292			}
293
294
295			gettimeofday(&start_tv, NULL);
296			perform_io(fd, data, io_size, workload_type);
297			gettimeofday(&end_tv, NULL);
298
299			OSAtomicIncrement64(&total_io_count);
300			OSAtomicAdd64(io_size, &total_io_size);
301			elapsed = ((end_tv.tv_sec - start_tv.tv_sec) * 1000000)  + (end_tv.tv_usec - start_tv.tv_usec);
302			OSAtomicAdd64(elapsed, &total_io_time);
303			OSAtomicIncrement64(&(latency_histogram[find_io_bin(elapsed, LATENCY_BIN_SIZE, LATENCY_BINS)]));
304			OSAtomicIncrement64(&(low_latency_histogram[find_io_bin(elapsed, LOW_LATENCY_BIN_SIZE, LOW_LATENCY_BINS)]));
305			burst_elapsed += elapsed;
306
307			if (inter_io_delay_ms)
308				usleep(inter_io_delay_ms * 1000);
309		}
310
311		burst_elapsed /= burst_count;
312		OSAtomicIncrement64(&(burst_latency_histogram[find_io_bin(burst_elapsed, LATENCY_BIN_SIZE, LATENCY_BINS)]));
313		OSAtomicIncrement64(&total_burst_count);
314
315		if(inter_burst_duration == -1)
316			usleep((rand() % 100) * 1000);
317		else
318			usleep(inter_burst_duration * 1000);
319	}
320
321	free(data);
322	close(fd);
323	pthread_exit(NULL);
324}
325
326void validate_option(int value, int min, int max, char *option, char *units)
327{
328	if (value < min || value > max) {
329		printf("Illegal option value %d for %s (Min value: %d %s, Max value: %d %s).\n", value, option, min, units, max, units);
330		exit(1);
331	}
332}
333
334void print_test_setup(int value, char *option, char *units, char *comment)
335{
336	if (comment == NULL)
337		printf("%32s: %16d %-16s\n", option, value, units);
338	else
339		printf("%32s: %16d %-16s (%s)\n", option, value, units, comment);
340}
341
342void setup_process_io_policy(int io_tier)
343{
344	switch(io_tier)
345	{
346		case 0:
347			if (setiopolicy_np(IOPOL_TYPE_DISK, IOPOL_SCOPE_PROCESS, IOPOL_IMPORTANT))
348				goto iopol_error;
349			break;
350		case 1:
351			if (setiopolicy_np(IOPOL_TYPE_DISK, IOPOL_SCOPE_PROCESS, IOPOL_STANDARD))
352                                goto iopol_error;
353                        break;
354		case 2:
355			if (setiopolicy_np(IOPOL_TYPE_DISK, IOPOL_SCOPE_PROCESS, IOPOL_UTILITY))
356                                goto iopol_error;
357                        break;
358		case 3:
359			if (setiopolicy_np(IOPOL_TYPE_DISK, IOPOL_SCOPE_PROCESS, IOPOL_THROTTLE))
360                                goto iopol_error;
361                        break;
362	}
363	return;
364
365iopol_error:
366	printf("Error setting process-wide I/O policy to %d\n", io_tier);
367        exit(1);
368}
369
370int main(int argc, char *argv[])
371{
372	int i, option = 0;
373	pthread_t thread_list[MAX_THREADS];
374	pthread_t sync_thread;
375	pthread_t throughput_thread;
376	char fname[MAX_FILENAME];
377
378	while((option = getopt(argc, argv,"hc:i:d:t:f:m:j:s:x:l:z:n:a:")) != -1) {
379		switch(option) {
380			case 'c':
381				burst_count = atoi(optarg);
382				validate_option(burst_count, 0, INT_MAX, "Burst Count", "I/Os");
383				break;
384			case 'i':
385				inter_burst_duration = atoi(optarg);
386				validate_option(inter_burst_duration, -1, INT_MAX, "Inter Burst duration", "msecs");
387				break;
388			case 'd':
389				inter_io_delay_ms = atoi(optarg);
390				validate_option(inter_io_delay_ms, 0, INT_MAX, "Inter I/O Delay", "msecs");
391				break;
392			case 't':
393				thread_count = atoi(optarg);
394				validate_option(thread_count, 0, MAX_THREADS, "Thread Count", "Threads");
395				break;
396			case 'f':
397				workload_type = atoi(optarg);
398				validate_option(workload_type, 0, 2, "Workload Type", "");
399				break;
400			case 'm':
401				io_mode = atoi(optarg);
402				validate_option(io_mode, 0, 1, "I/O Mode", "");
403				break;
404			case 'j':
405				io_size = atoi(optarg);
406				validate_option(io_size, 0, INT_MAX, "I/O Size", "Bytes");
407				break;
408			case 'h':
409				print_usage();
410				exit(1);
411			case 's':
412				sync_frequency_ms = atoi(optarg);
413				validate_option(sync_frequency_ms, 0, INT_MAX, "Sync. Frequency", "msecs");
414				break;
415			case 'x':
416				test_duration = atoi(optarg);
417				validate_option(test_duration, 0, INT_MAX, "Test duration", "secs");
418				break;
419			case 'l':
420				io_tier = atoi(optarg);
421				validate_option(io_tier, 0, 3, "I/O Tier", "");
422				break;
423			case 'z':
424				file_size = atoi(optarg);
425				validate_option(file_size, 0, INT_MAX, "File Size", "bytes");
426				break;
427			case 'n':
428				user_fname = optarg;
429				user_specified_file = 1;
430				break;
431			case 'a':
432				cached_io_flag = atoi(optarg);
433				validate_option(cached_io_flag, 0, 1, "I/Os cached/no-cached", "");
434				break;
435			default:
436				printf("Unknown option %c\n", option);
437				print_usage();
438				exit(1);
439		}
440	}
441
442	printf("***********************TEST SETUP*************************\n");
443
444	print_test_setup(burst_count, "Burst Count", "I/Os", 0);
445	print_test_setup(inter_burst_duration, "Inter Burst duration", "msecs", "-1 indicates random burst duration");
446	print_test_setup(inter_io_delay_ms, "Inter I/O Delay", "msecs", 0);
447	print_test_setup(thread_count, "Thread Count", "Threads", 0);
448	print_test_setup(workload_type, "Workload Type", "", "0:R 1:W 2:RW");
449	print_test_setup(io_mode, "I/O Mode", "", "0:Seq. 1:Rnd");
450	print_test_setup(io_size, "I/O Size", "Bytes", 0);
451	print_test_setup(sync_frequency_ms, "Sync. Frequency", "msecs", "0 indicates no sync. thread");
452	print_test_setup(test_duration, "Test duration", "secs", "0 indicates tool waits for Ctrl+C");
453	print_test_setup(io_tier, "I/O Tier", "", 0);
454	print_test_setup(cached_io_flag, "I/O Caching", "", "0 indicates non-cached I/Os");
455	print_test_setup(0, "File read-aheads", "", "0 indicates read-aheads disabled");
456
457	printf("**********************************************************\n");
458
459	if (user_specified_file == 0) {
460		char dd_command[MAX_CMD_SIZE];
461		for (i=0; i < thread_count; i++) {
462			snprintf(fname, MAX_FILENAME, "iosim-%d-%d", (int)getpid(), i);
463			snprintf(dd_command, MAX_CMD_SIZE, "dd if=/dev/urandom of=%s bs=4096 count=%d", fname, file_size);
464			printf("Creating file %s of size %lld...\n", fname, ((int64_t)file_size * 4096));
465			system(dd_command);
466		}
467	} else {
468		printf("Using user specified file %s for all threads...\n", user_fname);
469	}
470	system("purge");
471	setup_process_io_policy(io_tier);
472
473	printf("**********************************************************\n");
474	printf("Creating threads and generating workload...\n");
475
476	signal(SIGINT, signalHandler);
477	signal(SIGALRM, signalHandler);
478
479	for(i=0; i < thread_count; i++) {
480		if (pthread_create(&thread_list[i], NULL, io_routine, i) < 0) {
481			perror("Could not create I/O thread!\n");
482			exit(1);
483		}
484	}
485
486	if (sync_frequency_ms) {
487		if (pthread_create(&sync_thread, NULL, sync_routine, NULL) < 0) {
488			perror("Could not create sync thread!\n");
489			exit(1);
490		}
491	}
492
493	if (pthread_create(&throughput_thread, NULL, calculate_throughput, NULL) < 0) {
494		perror("Could not throughput calculation thread!\n");
495		exit(1);
496	}
497
498	/* All threads are now initialized */
499	if (test_duration)
500		alarm(test_duration);
501
502	for(i=0; i < thread_count; i++)
503		pthread_join(thread_list[i], NULL);
504
505	if (sync_frequency_ms)
506		pthread_join(sync_thread, NULL);
507
508	pthread_join(throughput_thread, NULL);
509
510	pthread_exit(0);
511
512}
513