1/*
2 * Copyright (c) 2011, Linaro Limited
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *     * Redistributions of source code must retain the above copyright
8 *       notice, this list of conditions and the following disclaimer.
9 *     * Redistributions in binary form must reproduce the above copyright
10 *       notice, this list of conditions and the following disclaimer in the
11 *       documentation and/or other materials provided with the distribution.
12 *     * Neither the name of the Linaro nor the
13 *       names of its contributors may be used to endorse or promote products
14 *       derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/** A simple harness that times how long a string function takes to
29 * run.
30 */
31
32/* PENDING: Add EPL */
33
34#include <string.h>
35#include <time.h>
36#include <stdint.h>
37#include <stdlib.h>
38#include <stdio.h>
39#include <stdbool.h>
40#include <assert.h>
41#include <unistd.h>
42#include <errno.h>
43
44#define NUM_ELEMS(_x) (sizeof(_x) / sizeof((_x)[0]))
45
46#ifndef VERSION
47#define VERSION "(unknown version)"
48#endif
49
50/** Make sure a function is called by using the return value */
51#define SPOIL(_x)  volatile long x = (long)(_x); (void)x
52
53/** Type of functions that can be tested */
54typedef void (*stub_t)(void *dest, void *src, size_t n);
55
56/** Meta data about one test */
57struct test
58{
59  /** Test name */
60  const char *name;
61  /** Function to test */
62  stub_t stub;
63};
64
65/** Flush the cache by reading a chunk of memory */
66static void empty(volatile char *against)
67{
68  /* We know that there's a 16 k cache with 64 byte lines giving
69     a total of 256 lines.  Read randomly from 256*5 places should
70     flush everything */
71  int offset = (1024 - 256)*1024;
72
73  for (int i = offset; i < offset + 16*1024*3; i += 64)
74    {
75      against[i];
76    }
77}
78
79/** Stub that does nothing.  Used for calibrating */
80static void xbounce(void *dest, void *src, size_t n)
81{
82  SPOIL(0);
83}
84
85/** Stub that calls memcpy */
86static void xmemcpy(void *dest, void *src, size_t n)
87{
88  SPOIL(memcpy(dest, src, n));
89}
90
91/** Stub that calls memset */
92static void xmemset(void *dest, void *src, size_t n)
93{
94  SPOIL(memset(dest, 0, n));
95}
96
97/** Stub that calls memcmp */
98static void xmemcmp(void *dest, void *src, size_t n)
99{
100  SPOIL(memcmp(dest, src, n));
101}
102
103/** Stub that calls strcpy */
104static void xstrcpy(void *dest, void *src, size_t n)
105{
106  SPOIL(strcpy(dest, src));
107}
108
109/** Stub that calls strlen */
110static void xstrlen(void *dest, void *src, size_t n)
111{
112  SPOIL(strlen(dest));
113}
114
115/** Stub that calls strcmp */
116static void xstrcmp(void *dest, void *src, size_t n)
117{
118  SPOIL(strcmp(dest, src));
119}
120
121/** Stub that calls strchr */
122static void xstrchr(void *dest, void *src, size_t n)
123{
124  /* Put the character at the end of the string and before the null */
125  ((char *)src)[n-1] = 32;
126  SPOIL(strchr(src, 32));
127}
128
129/** Stub that calls memchr */
130static void xmemchr(void *dest, void *src, size_t n)
131{
132  /* Put the character at the end of the block */
133  ((char *)src)[n-1] = 32;
134  SPOIL(memchr(src, 32, n));
135}
136
137/** All functions that can be tested */
138static const struct test tests[] =
139  {
140    { "bounce", xbounce },
141    { "memchr", xmemchr },
142    { "memcpy", xmemcpy },
143    { "memset", xmemset },
144    { "memcmp", xmemcmp },
145    { "strchr", xstrchr },
146    { "strcmp", xstrcmp },
147    { "strcpy", xstrcpy },
148    { "strlen", xstrlen },
149    { NULL }
150  };
151
152/** Show basic usage */
153static void usage(const char* name)
154{
155  printf("%s %s: run a string related benchmark.\n"
156         "usage: %s [-c block-size] [-l loop-count] [-a alignment|src_alignment:dst_alignment] [-f] [-t test-name] [-r run-id]\n"
157         , name, VERSION, name);
158
159  printf("Tests:");
160
161  for (const struct test *ptest = tests; ptest->name != NULL; ptest++)
162    {
163      printf(" %s", ptest->name);
164    }
165
166  printf("\n");
167
168  exit(-1);
169}
170
171/** Find the test by name */
172static const struct test *find_test(const char *name)
173{
174  if (name == NULL)
175    {
176      return tests + 0;
177    }
178  else
179    {
180      for (const struct test *p = tests; p->name != NULL; p++)
181	{
182          if (strcmp(p->name, name) == 0)
183	    {
184              return p;
185	    }
186	}
187    }
188
189  return NULL;
190}
191
192#define MIN_BUFFER_SIZE 1024*1024
193#define MAX_ALIGNMENT	256
194
195/** Take a pointer and ensure that the lower bits == alignment */
196static char *realign(char *p, int alignment)
197{
198  uintptr_t pp = (uintptr_t)p;
199  pp = (pp + (MAX_ALIGNMENT - 1)) & ~(MAX_ALIGNMENT - 1);
200  pp += alignment;
201
202  return (char *)pp;
203}
204
205static int parse_int_arg(const char *arg, const char *exe_name)
206{
207  long int ret;
208
209  errno = 0;
210  ret = strtol(arg, NULL, 0);
211
212  if (errno)
213    {
214      usage(exe_name);
215    }
216
217  return (int)ret;
218}
219
220static void parse_alignment_arg(const char *arg, const char *exe_name,
221				int *src_alignment, int *dst_alignment)
222{
223  long int ret;
224  char *endptr;
225
226  errno = 0;
227  ret = strtol(arg, &endptr, 0);
228
229  if (errno)
230    {
231      usage(exe_name);
232    }
233
234  *src_alignment = (int)ret;
235
236  if (ret > 256 || ret < 1)
237    {
238      printf("Alignment should be in the range [1, 256].\n");
239      usage(exe_name);
240    }
241
242  if (ret == 256)
243    ret = 0;
244
245  if (endptr && *endptr == ':')
246    {
247      errno = 0;
248      ret = strtol(endptr + 1, NULL, 0);
249
250      if (errno)
251	{
252	  usage(exe_name);
253	}
254
255      if (ret > 256 || ret < 1)
256	{
257	  printf("Alignment should be in the range [1, 256].\n");
258	  usage(exe_name);
259	}
260
261      if (ret == 256)
262	ret = 0;
263    }
264
265  *dst_alignment = (int)ret;
266}
267
268/** Setup and run a test */
269int main(int argc, char **argv)
270{
271  /* Size of src and dest buffers */
272  size_t buffer_size = MIN_BUFFER_SIZE;
273
274  /* Number of bytes per call */
275  int count = 31;
276  /* Number of times to run */
277  int loops = 10000000;
278  /* True to flush the cache each time */
279  int flush = 0;
280  /* Name of the test */
281  const char *name = NULL;
282  /* Alignment of buffers */
283  int src_alignment = 8;
284  int dst_alignment = 8;
285  /* Name of the run */
286  const char *run_id = "0";
287
288  int opt;
289
290  while ((opt = getopt(argc, argv, "c:l:ft:r:hva:")) > 0)
291    {
292      switch (opt)
293	{
294	case 'c':
295          count = parse_int_arg(optarg, argv[0]);
296          break;
297	case 'l':
298          loops = parse_int_arg(optarg, argv[0]);
299          break;
300	case 'a':
301          parse_alignment_arg(optarg, argv[0], &src_alignment, &dst_alignment);
302          break;
303	case 'f':
304          flush = 1;
305          break;
306	case 't':
307          name = strdup(optarg);
308          break;
309	case 'r':
310          run_id = strdup(optarg);
311          break;
312	case 'h':
313          usage(argv[0]);
314          break;
315	default:
316          usage(argv[0]);
317          break;
318	}
319    }
320
321  /* Find the test by name */
322  const struct test *ptest = find_test(name);
323
324  if (ptest == NULL)
325    {
326      usage(argv[0]);
327    }
328
329  if (count + MAX_ALIGNMENT * 2 > MIN_BUFFER_SIZE)
330    {
331      buffer_size = count + MAX_ALIGNMENT * 2;
332    }
333
334  /* Buffers to read and write from */
335  char *src = malloc(buffer_size);
336  char *dest = malloc(buffer_size);
337
338  assert(src != NULL && dest != NULL);
339
340  src = realign(src, src_alignment);
341  dest = realign(dest, dst_alignment);
342
343  /* Fill the buffer with non-zero, reproducable random data */
344  srandom(1539);
345
346  for (int i = 0; i < buffer_size; i++)
347    {
348      src[i] = (char)random() | 1;
349      dest[i] = src[i];
350    }
351
352  /* Make sure the buffers are null terminated for any string tests */
353  src[count] = 0;
354  dest[count] = 0;
355
356  struct timespec start, end;
357  int err = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
358  assert(err == 0);
359
360  /* Preload */
361  stub_t stub = ptest->stub;
362
363  /* Run two variants to reduce the cost of testing for the flush */
364  if (flush == 0)
365    {
366      for (int i = 0; i < loops; i++)
367	{
368	  (*stub)(dest, src, count);
369	}
370    }
371  else
372    {
373      for (int i = 0; i < loops; i++)
374	{
375	  (*stub)(dest, src, count);
376	  empty(dest);
377	}
378    }
379
380  err = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);
381  assert(err == 0);
382
383  /* Drop any leading path and pull the variant name out of the executable */
384  char *variant = strrchr(argv[0], '/');
385
386  if (variant == NULL)
387    {
388      variant = argv[0];
389    }
390
391  variant = strstr(variant, "try-");
392  assert(variant != NULL);
393
394  double elapsed = (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec) * 1e-9;
395  /* Estimate the bounce time.  Measured on a Panda. */
396  double bounced = 0.448730 * loops / 50000000;
397
398  /* Dump both machine and human readable versions */
399  printf("%s:%s:%u:%u:%d:%d:%s:%.6f: took %.6f s for %u calls to %s of %u bytes.  ~%.3f MB/s corrected.\n",
400         variant + 4, ptest->name,
401	 count, loops, src_alignment, dst_alignment, run_id,
402	 elapsed,
403         elapsed, loops, ptest->name, count,
404         (double)loops*count/(elapsed - bounced)/(1024*1024));
405
406  return 0;
407}
408