1/* 2 * Copyright (c) 2014 ETH Zurich. 3 * All rights reserved. 4 * 5 * This file is distributed under the terms in the attached LICENSE file. 6 * If you do not find this file, copies can be found by writing to: 7 * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group. 8 */ 9#include <limits.h> 10#include <string.h> 11#include <stdio.h> 12 13#include <barrelfish/barrelfish.h> 14#include <barrelfish/dispatch.h> 15#include <barrelfish/sys_debug.h> 16#include <bench/bench.h> 17#include <xeon_phi/xeon_phi.h> 18 19#include <dma_internal.h> 20#include <dma/dma_bench.h> 21#include <dma_device_internal.h> 22#include <dma_channel_internal.h> 23#include <dma_request_internal.h> 24 25#include <debug.h> 26 27#define DMA_BENCH_PRINT(x...) debug_printf(x) 28//#define DMA_BENCH_PRINT(x...) 29 30static volatile int dma_done_flag = 0; 31 32uint64_t request_counter = 0; 33 34static void dma_done_cb(errval_t err, 35 dma_req_id_t id, 36 void *arg) 37{ 38 assert(err_is_ok(err)); 39 dma_done_flag = 1; 40} 41 42static inline cycles_t calculate_time(cycles_t tsc_start, 43 cycles_t tsc_end) 44{ 45 cycles_t result; 46 if (tsc_end < tsc_start) { 47 result = (LONG_MAX - tsc_start) + tsc_end - bench_tscoverhead(); 48 } else { 49 result = (tsc_end - tsc_start - bench_tscoverhead()); 50 } 51 return result; 52} 53 54 55 56/* 57 * ============================================================================ 58 * 59 * ============================================================================ 60 */ 61 62errval_t dma_bench_run_default_xphi(struct dma_device *dev) 63{ 64 debug_printf("dma_bench_run_default_xphi\n"); 65#ifdef __k1om__ 66 67 if (disp_xeon_phi_id() == 0) { 68 return SYS_ERR_OK; 69 } 70#else 71 return SYS_ERR_OK; 72 if (disp_get_core_id() >= 20) { 73 return SYS_ERR_OK; 74 } 75#endif 76 77 debug_printf("DMA BENCHMARK started\n"); 78 debug_printf("================================\n"); 79 debug_printf("\n"); 80 debug_printf("DMA-BENCH: xphi[0] -> xphi[0]\n"); 81 debug_printf("\n"); 82 debug_printf("--------------------------------\n"); 83 debug_printf("\n"); 84 85 dma_bench_run(dev, DMA_BENCH_XPHI_BASE_OFFSET, 86 DMA_BENCH_XPHI_BASE_OFFSET + DMA_BENCH_BUFFER_SIZE); 87 debug_printf("\n"); 88 debug_printf("--------------------------------\n"); 89 debug_printf("\n"); 90 91 debug_printf("DMA-BENCH: xphi[0] -> host[0]\n"); 92 debug_printf("\n"); 93 debug_printf("--------------------------------\n"); 94 debug_printf("\n"); 95 dma_bench_run(dev, DMA_BENCH_XPHI_BASE_OFFSET, 96 XEON_PHI_SYSMEM_BASE + DMA_BENCH_HOST_BASE); 97 debug_printf("\n"); 98 debug_printf("--------------------------------\n"); 99 debug_printf("\n"); 100 101 debug_printf("DMA-BENCH: host[0] -> xphi[0]\n"); 102 debug_printf("\n"); 103 debug_printf("--------------------------------\n"); 104 debug_printf("\n"); 105 dma_bench_run(dev, XEON_PHI_SYSMEM_BASE + DMA_BENCH_HOST_BASE, 106 DMA_BENCH_XPHI_BASE_OFFSET); 107 debug_printf("\n"); 108 debug_printf("--------------------------------\n"); 109 debug_printf("\n"); 110 111 debug_printf("DMA-BENCH: xphi[0] -> xphi[1]\n"); 112 debug_printf("\n"); 113 debug_printf("--------------------------------\n"); 114 debug_printf("\n"); 115 dma_bench_run(dev, DMA_BENCH_XPHI_BASE_OFFSET, 116 XEON_PHI_SYSMEM_BASE + 31 * XEON_PHI_SYSMEM_PAGE_SIZE); 117 debug_printf("\n"); 118 debug_printf("--------------------------------\n"); 119 debug_printf("\n"); 120 121 debug_printf("DMA-BENCH: xphi[1] -> xphi[0]\n"); 122 debug_printf("\n"); 123 debug_printf("--------------------------------\n"); 124 debug_printf("\n"); 125 dma_bench_run(dev,XEON_PHI_SYSMEM_BASE + 31 * XEON_PHI_SYSMEM_PAGE_SIZE, 126 DMA_BENCH_XPHI_BASE_OFFSET); 127 debug_printf("\n"); 128 debug_printf("--------------------------------\n"); 129 debug_printf("\n"); 130 131 return SYS_ERR_OK; 132} 133 134errval_t dma_bench_run_default(struct dma_device *dev) 135{ 136 debug_printf("DMA BENCHMARK started\n"); 137 debug_printf("================================\n"); 138 debug_printf("\n"); 139 debug_printf("DMA-BENCH: host[0] -> host[0]\n"); 140 debug_printf("\n"); 141 debug_printf("--------------------------------\n"); 142 debug_printf("\n"); 143 //dma_bench_run(dev, DMA_BENCH_HOST_BASE, 144 // DMA_BENCH_HOST_BASE + DMA_BENCH_BUFFER_SIZE); 145 return SYS_ERR_OK; 146 debug_printf("\n"); 147 debug_printf("--------------------------------\n"); 148 debug_printf("\n"); 149 debug_printf("DMA-BENCH: host[0] -> host[1]\n"); 150 debug_printf("\n"); 151 debug_printf("--------------------------------\n"); 152 debug_printf("\n"); 153 dma_bench_run(dev, DMA_BENCH_HOST_BASE, DMA_BENCH_HOST_BASE2); 154 debug_printf("\n"); 155 debug_printf("--------------------------------\n"); 156 debug_printf("\n"); 157 debug_printf("DMA-BENCH: host[1] -> host[0]\n"); 158 debug_printf("\n"); 159 debug_printf("--------------------------------\n"); 160 debug_printf("\n"); 161 dma_bench_run(dev, DMA_BENCH_HOST_BASE2, DMA_BENCH_HOST_BASE); 162 debug_printf("\n"); 163 debug_printf("--------------------------------\n"); 164 debug_printf("\n"); 165 debug_printf("DMA-BENCH: host[1] -> host[1]\n"); 166 debug_printf("\n"); 167 debug_printf("--------------------------------\n"); 168 debug_printf("\n"); 169 dma_bench_run(dev, DMA_BENCH_HOST_BASE2,DMA_BENCH_HOST_BASE2+ DMA_BENCH_BUFFER_SIZE); 170 debug_printf("\n"); 171 debug_printf("--------------------------------\n"); 172 debug_printf("\n"); 173 debug_printf("DMA-BENCH: host->card\n"); 174 debug_printf("\n"); 175 debug_printf("--------------------------------\n"); 176 debug_printf("\n"); 177 dma_bench_run(dev, DMA_BENCH_HOST_BASE, DMA_BENCH_HOST_XEON_PHI_BASE); 178 debug_printf("\n"); 179 debug_printf("--------------------------------\n"); 180 debug_printf("\n"); 181 debug_printf("DMA-BENCH: card->card\n"); 182 debug_printf("\n"); 183 debug_printf("--------------------------------\n"); 184 debug_printf("\n"); 185 // dma_bench_run(dev, DMA_BENCH_HOST_XEON_PHI_BASE,DMA_BENCH_HOST_XEON_PHI_BASE + DMA_BENCH_BUFFER_SIZE); 186 debug_printf("\n"); 187 debug_printf("--------------------------------\n"); 188 debug_printf("\n"); 189 debug_printf("DONE.\n"); 190 debug_printf("========================================================="); 191 return SYS_ERR_OK; 192} 193 194errval_t dma_bench_run(struct dma_device *dev, lpaddr_t src, lpaddr_t dst) 195{ 196 errval_t err; 197 cycles_t tsc_start, tsc_end; 198 uint64_t tscperus; 199 bench_ctl_t *ctl; 200 201 cycles_t result; 202 203 bench_init(); 204 205 struct dma_channel *chan = dma_device_get_channel((struct dma_device *)dev); 206 207 err = sys_debug_get_tsc_per_ms(&tscperus); 208 assert(err_is_ok(err)); 209 tscperus /= 1000; 210 211 for (uint8_t i = DMA_BENCH_MIN_BITS; i <= DMA_BENCH_MAX_BITS; ++i) { 212 size_t size = (1UL << i); 213 214 ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, DMA_BENCH_NUM_REPS); 215 216 uint8_t idx = 0; 217 do { 218 tsc_start = bench_tsc(); 219 dma_done_flag = 0x0; 220 221 struct dma_req_setup setup = { 222 .done_cb = dma_done_cb, 223 .cb_arg = &request_counter, 224 .args = { 225 .memcpy = { 226 .src = src, 227 .dst = dst, 228 .bytes = size 229 } 230 } 231 }; 232 233 dma_req_id_t id; 234 235 err = dma_request_memcpy_chan(chan, &setup, &id); 236 if (err_is_fail(err)) { 237 USER_PANIC_ERR(err, "could not set the memcy request"); 238 } 239 240 tsc_end = bench_tsc(); 241 result = calculate_time(tsc_start, tsc_end); 242 243 while (!dma_done_flag) { 244 dma_channel_poll(chan); 245 } 246 247 tsc_end = bench_tsc(); 248 result = calculate_time(tsc_start, tsc_end); 249 idx++; 250 request_counter++; 251 } while (!bench_ctl_add_run(ctl, &result)); 252 char buf[50]; 253 254 snprintf(buf, sizeof(buf), "%u", i); 255 bench_ctl_dump_analysis(ctl, 0, buf, tscperus); 256 257 bench_ctl_destroy(ctl); 258 259 } 260 261 return SYS_ERR_OK; 262} 263 264errval_t dma_bench_run_memcpy(void *dst, void *src) 265{ 266 errval_t err; 267 cycles_t tsc_start, tsc_end; 268 uint64_t tscperus; 269 bench_ctl_t *ctl; 270 271 cycles_t result; 272 273 bench_init(); 274 275 err = sys_debug_get_tsc_per_ms(&tscperus); 276 assert(err_is_ok(err)); 277 tscperus /= 1000; 278 debug_printf("starting benchmark memcpy\n"); 279 debug_printf("======================================\n"); 280 281 for (uint8_t i = DMA_BENCH_MIN_BITS; i <= DMA_BENCH_MAX_BITS; ++i) { 282 size_t size = (1UL << i); 283 284 ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, DMA_BENCH_NUM_REPS); 285 286 uint8_t idx = 0; 287 do { 288 tsc_start = bench_tsc(); 289 memcpy(dst, src, size); 290 tsc_end = bench_tsc(); 291 292 result = calculate_time(tsc_start, tsc_end); 293 idx++; 294 } while (!bench_ctl_add_run(ctl, &result)); 295 char buf[50]; 296 297 snprintf(buf, sizeof(buf), "%u", i); 298 bench_ctl_dump_analysis(ctl, 0, buf, tscperus); 299 300 bench_ctl_destroy(ctl); 301 } 302 debug_printf("======================================\n"); 303 304 return SYS_ERR_OK; 305} 306