1/*
2 * Copyright (c) 2014 ETH Zurich.
3 * All rights reserved.
4 *
5 * This file is distributed under the terms in the attached LICENSE file.
6 * If you do not find this file, copies can be found by writing to:
7 * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
8 */
9#include <limits.h>
10#include <string.h>
11#include <stdio.h>
12
13#include <barrelfish/barrelfish.h>
14#include <barrelfish/dispatch.h>
15#include <barrelfish/sys_debug.h>
16#include <bench/bench.h>
17#include <xeon_phi/xeon_phi.h>
18
19#include <dma_internal.h>
20#include <dma/dma_bench.h>
21#include <dma_device_internal.h>
22#include <dma_channel_internal.h>
23#include <dma_request_internal.h>
24
25#include <debug.h>
26
27#define DMA_BENCH_PRINT(x...) debug_printf(x)
28//#define DMA_BENCH_PRINT(x...)
29
30static volatile int dma_done_flag = 0;
31
32uint64_t request_counter = 0;
33
34static void dma_done_cb(errval_t err,
35                        dma_req_id_t id,
36                        void *arg)
37{
38    assert(err_is_ok(err));
39    dma_done_flag = 1;
40}
41
42static inline cycles_t calculate_time(cycles_t tsc_start,
43                                      cycles_t tsc_end)
44{
45    cycles_t result;
46    if (tsc_end < tsc_start) {
47        result = (LONG_MAX - tsc_start) + tsc_end - bench_tscoverhead();
48    } else {
49        result = (tsc_end - tsc_start - bench_tscoverhead());
50    }
51    return result;
52}
53
54
55
56/*
57 * ============================================================================
58 *
59 * ============================================================================
60 */
61
62errval_t dma_bench_run_default_xphi(struct dma_device *dev)
63{
64    debug_printf("dma_bench_run_default_xphi\n");
65#ifdef __k1om__
66
67    if (disp_xeon_phi_id() == 0) {
68        return SYS_ERR_OK;
69    }
70#else
71    return SYS_ERR_OK;
72    if (disp_get_core_id() >= 20) {
73        return SYS_ERR_OK;
74    }
75#endif
76
77    debug_printf("DMA BENCHMARK started\n");
78    debug_printf("================================\n");
79    debug_printf("\n");
80    debug_printf("DMA-BENCH: xphi[0] -> xphi[0]\n");
81    debug_printf("\n");
82    debug_printf("--------------------------------\n");
83    debug_printf("\n");
84
85    dma_bench_run(dev, DMA_BENCH_XPHI_BASE_OFFSET,
86                  DMA_BENCH_XPHI_BASE_OFFSET + DMA_BENCH_BUFFER_SIZE);
87    debug_printf("\n");
88    debug_printf("--------------------------------\n");
89    debug_printf("\n");
90
91    debug_printf("DMA-BENCH: xphi[0] -> host[0]\n");
92    debug_printf("\n");
93    debug_printf("--------------------------------\n");
94    debug_printf("\n");
95    dma_bench_run(dev, DMA_BENCH_XPHI_BASE_OFFSET,
96                  XEON_PHI_SYSMEM_BASE + DMA_BENCH_HOST_BASE);
97    debug_printf("\n");
98    debug_printf("--------------------------------\n");
99    debug_printf("\n");
100
101    debug_printf("DMA-BENCH: host[0] -> xphi[0]\n");
102    debug_printf("\n");
103    debug_printf("--------------------------------\n");
104    debug_printf("\n");
105    dma_bench_run(dev, XEON_PHI_SYSMEM_BASE + DMA_BENCH_HOST_BASE,
106                  DMA_BENCH_XPHI_BASE_OFFSET);
107    debug_printf("\n");
108    debug_printf("--------------------------------\n");
109    debug_printf("\n");
110
111    debug_printf("DMA-BENCH: xphi[0] -> xphi[1]\n");
112    debug_printf("\n");
113    debug_printf("--------------------------------\n");
114    debug_printf("\n");
115    dma_bench_run(dev, DMA_BENCH_XPHI_BASE_OFFSET,
116                  XEON_PHI_SYSMEM_BASE + 31 * XEON_PHI_SYSMEM_PAGE_SIZE);
117    debug_printf("\n");
118    debug_printf("--------------------------------\n");
119    debug_printf("\n");
120
121    debug_printf("DMA-BENCH: xphi[1] -> xphi[0]\n");
122    debug_printf("\n");
123    debug_printf("--------------------------------\n");
124    debug_printf("\n");
125    dma_bench_run(dev,XEON_PHI_SYSMEM_BASE + 31 * XEON_PHI_SYSMEM_PAGE_SIZE,
126                  DMA_BENCH_XPHI_BASE_OFFSET);
127    debug_printf("\n");
128    debug_printf("--------------------------------\n");
129    debug_printf("\n");
130
131    return SYS_ERR_OK;
132}
133
134errval_t dma_bench_run_default(struct dma_device *dev)
135{
136    debug_printf("DMA BENCHMARK started\n");
137    debug_printf("================================\n");
138    debug_printf("\n");
139    debug_printf("DMA-BENCH: host[0] -> host[0]\n");
140    debug_printf("\n");
141    debug_printf("--------------------------------\n");
142    debug_printf("\n");
143    //dma_bench_run(dev, DMA_BENCH_HOST_BASE,
144    //              DMA_BENCH_HOST_BASE + DMA_BENCH_BUFFER_SIZE);
145    return SYS_ERR_OK;
146    debug_printf("\n");
147    debug_printf("--------------------------------\n");
148    debug_printf("\n");
149    debug_printf("DMA-BENCH: host[0] -> host[1]\n");
150    debug_printf("\n");
151    debug_printf("--------------------------------\n");
152    debug_printf("\n");
153    dma_bench_run(dev, DMA_BENCH_HOST_BASE, DMA_BENCH_HOST_BASE2);
154    debug_printf("\n");
155    debug_printf("--------------------------------\n");
156    debug_printf("\n");
157    debug_printf("DMA-BENCH: host[1] -> host[0]\n");
158    debug_printf("\n");
159    debug_printf("--------------------------------\n");
160    debug_printf("\n");
161    dma_bench_run(dev, DMA_BENCH_HOST_BASE2, DMA_BENCH_HOST_BASE);
162    debug_printf("\n");
163    debug_printf("--------------------------------\n");
164    debug_printf("\n");
165    debug_printf("DMA-BENCH: host[1] -> host[1]\n");
166    debug_printf("\n");
167    debug_printf("--------------------------------\n");
168    debug_printf("\n");
169    dma_bench_run(dev, DMA_BENCH_HOST_BASE2,DMA_BENCH_HOST_BASE2+ DMA_BENCH_BUFFER_SIZE);
170    debug_printf("\n");
171    debug_printf("--------------------------------\n");
172    debug_printf("\n");
173    debug_printf("DMA-BENCH: host->card\n");
174    debug_printf("\n");
175    debug_printf("--------------------------------\n");
176    debug_printf("\n");
177    dma_bench_run(dev, DMA_BENCH_HOST_BASE, DMA_BENCH_HOST_XEON_PHI_BASE);
178    debug_printf("\n");
179    debug_printf("--------------------------------\n");
180    debug_printf("\n");
181    debug_printf("DMA-BENCH: card->card\n");
182    debug_printf("\n");
183    debug_printf("--------------------------------\n");
184    debug_printf("\n");
185   // dma_bench_run(dev, DMA_BENCH_HOST_XEON_PHI_BASE,DMA_BENCH_HOST_XEON_PHI_BASE + DMA_BENCH_BUFFER_SIZE);
186    debug_printf("\n");
187    debug_printf("--------------------------------\n");
188    debug_printf("\n");
189    debug_printf("DONE.\n");
190    debug_printf("=========================================================");
191    return SYS_ERR_OK;
192}
193
194errval_t dma_bench_run(struct dma_device *dev, lpaddr_t src, lpaddr_t dst)
195{
196    errval_t err;
197     cycles_t tsc_start, tsc_end;
198     uint64_t tscperus;
199     bench_ctl_t *ctl;
200
201     cycles_t result;
202
203     bench_init();
204
205     struct dma_channel *chan = dma_device_get_channel((struct dma_device *)dev);
206
207     err = sys_debug_get_tsc_per_ms(&tscperus);
208     assert(err_is_ok(err));
209     tscperus /= 1000;
210
211     for (uint8_t i = DMA_BENCH_MIN_BITS; i <= DMA_BENCH_MAX_BITS; ++i) {
212         size_t size = (1UL << i);
213
214         ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, DMA_BENCH_NUM_REPS);
215
216         uint8_t idx = 0;
217         do {
218             tsc_start = bench_tsc();
219             dma_done_flag = 0x0;
220
221             struct dma_req_setup setup = {
222                 .done_cb = dma_done_cb,
223                 .cb_arg = &request_counter,
224                 .args = {
225                     .memcpy = {
226                         .src = src,
227                         .dst = dst,
228                         .bytes = size
229                     }
230                 }
231             };
232
233             dma_req_id_t id;
234
235             err = dma_request_memcpy_chan(chan, &setup, &id);
236             if (err_is_fail(err)) {
237                 USER_PANIC_ERR(err, "could not set the memcy request");
238             }
239
240             tsc_end = bench_tsc();
241             result = calculate_time(tsc_start, tsc_end);
242
243             while (!dma_done_flag) {
244                 dma_channel_poll(chan);
245             }
246
247             tsc_end = bench_tsc();
248             result = calculate_time(tsc_start, tsc_end);
249             idx++;
250             request_counter++;
251         } while (!bench_ctl_add_run(ctl, &result));
252         char buf[50];
253
254         snprintf(buf, sizeof(buf), "%u", i);
255         bench_ctl_dump_analysis(ctl, 0, buf, tscperus);
256
257         bench_ctl_destroy(ctl);
258
259     }
260
261     return SYS_ERR_OK;
262}
263
264errval_t dma_bench_run_memcpy(void *dst, void *src)
265{
266    errval_t err;
267    cycles_t tsc_start, tsc_end;
268    uint64_t tscperus;
269    bench_ctl_t *ctl;
270
271    cycles_t result;
272
273    bench_init();
274
275    err = sys_debug_get_tsc_per_ms(&tscperus);
276    assert(err_is_ok(err));
277    tscperus /= 1000;
278    debug_printf("starting benchmark memcpy\n");
279    debug_printf("======================================\n");
280
281    for (uint8_t i = DMA_BENCH_MIN_BITS; i <= DMA_BENCH_MAX_BITS; ++i) {
282        size_t size = (1UL << i);
283
284        ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, DMA_BENCH_NUM_REPS);
285
286        uint8_t idx = 0;
287        do {
288            tsc_start = bench_tsc();
289            memcpy(dst, src, size);
290            tsc_end = bench_tsc();
291
292            result = calculate_time(tsc_start, tsc_end);
293            idx++;
294        } while (!bench_ctl_add_run(ctl, &result));
295        char buf[50];
296
297        snprintf(buf, sizeof(buf), "%u", i);
298        bench_ctl_dump_analysis(ctl, 0, buf, tscperus);
299
300        bench_ctl_destroy(ctl);
301    }
302    debug_printf("======================================\n");
303
304    return SYS_ERR_OK;
305}
306