1/*
2    Copyright (c) 2014 Intel Corporation.  All Rights Reserved.
3
4    Redistribution and use in source and binary forms, with or without
5    modification, are permitted provided that the following conditions
6    are met:
7
8      * Redistributions of source code must retain the above copyright
9        notice, this list of conditions and the following disclaimer.
10      * Redistributions in binary form must reproduce the above copyright
11        notice, this list of conditions and the following disclaimer in the
12        documentation and/or other materials provided with the distribution.
13      * Neither the name of Intel Corporation nor the names of its
14        contributors may be used to endorse or promote products derived
15        from this software without specific prior written permission.
16
17    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*/
29
30
31#include "offload_engine.h"
32#include <signal.h>
33#include <errno.h>
34
35#include <algorithm>
36#include <vector>
37
38#include "offload_host.h"
39#include "offload_table.h"
40
41const char* Engine::m_func_names[Engine::c_funcs_total] =
42{
43    "server_compute",
44#ifdef MYO_SUPPORT
45    "server_myoinit",
46    "server_myofini",
47#endif // MYO_SUPPORT
48    "server_init",
49    "server_var_table_size",
50    "server_var_table_copy"
51};
52
53// Symbolic representation of system signals. Fix for CQ233593
54const char* Engine::c_signal_names[Engine::c_signal_max] =
55{
56    "Unknown SIGNAL",
57    "SIGHUP",    /*  1, Hangup (POSIX).  */
58    "SIGINT",    /*  2, Interrupt (ANSI).  */
59    "SIGQUIT",   /*  3, Quit (POSIX).  */
60    "SIGILL",    /*  4, Illegal instruction (ANSI).  */
61    "SIGTRAP",   /*  5, Trace trap (POSIX).  */
62    "SIGABRT",   /*  6, Abort (ANSI).  */
63    "SIGBUS",    /*  7, BUS error (4.2 BSD).  */
64    "SIGFPE",    /*  8, Floating-point exception (ANSI).  */
65    "SIGKILL",   /*  9, Kill, unblockable (POSIX).  */
66    "SIGUSR1",   /* 10, User-defined signal 1 (POSIX).  */
67    "SIGSEGV",   /* 11, Segmentation violation (ANSI).  */
68    "SIGUSR2",   /* 12, User-defined signal 2 (POSIX).  */
69    "SIGPIPE",   /* 13, Broken pipe (POSIX).  */
70    "SIGALRM",   /* 14, Alarm clock (POSIX).  */
71    "SIGTERM",   /* 15, Termination (ANSI).  */
72    "SIGSTKFLT", /* 16, Stack fault.  */
73    "SIGCHLD",   /* 17, Child status has changed (POSIX).  */
74    "SIGCONT",   /* 18, Continue (POSIX).  */
75    "SIGSTOP",   /* 19, Stop, unblockable (POSIX).  */
76    "SIGTSTP",   /* 20, Keyboard stop (POSIX).  */
77    "SIGTTIN",   /* 21, Background read from tty (POSIX).  */
78    "SIGTTOU",   /* 22, Background write to tty (POSIX).  */
79    "SIGURG",    /* 23, Urgent condition on socket (4.2 BSD).  */
80    "SIGXCPU",   /* 24, CPU limit exceeded (4.2 BSD).  */
81    "SIGXFSZ",   /* 25, File size limit exceeded (4.2 BSD).  */
82    "SIGVTALRM", /* 26, Virtual alarm clock (4.2 BSD).  */
83    "SIGPROF",   /* 27, Profiling alarm clock (4.2 BSD).  */
84    "SIGWINCH",  /* 28, Window size change (4.3 BSD, Sun).  */
85    "SIGIO",     /* 29, I/O now possible (4.2 BSD).  */
86    "SIGPWR",    /* 30, Power failure restart (System V).  */
87    "SIGSYS"     /* 31, Bad system call.  */
88};
89
90void Engine::init(void)
91{
92    if (!m_ready) {
93        mutex_locker_t locker(m_lock);
94
95        if (!m_ready) {
96            // start process if not done yet
97            if (m_process == 0) {
98                init_process();
99            }
100
101            // load penging images
102            load_libraries();
103
104            // and (re)build pointer table
105            init_ptr_data();
106
107            // it is ready now
108            m_ready = true;
109        }
110    }
111}
112
113void Engine::init_process(void)
114{
115    COIENGINE engine;
116    COIRESULT res;
117    const char **environ;
118
119    // create environment for the target process
120    environ = (const char**) mic_env_vars.create_environ_for_card(m_index);
121    if (environ != 0) {
122        for (const char **p = environ; *p != 0; p++) {
123            OFFLOAD_DEBUG_TRACE(3, "Env Var for card %d: %s\n", m_index, *p);
124        }
125    }
126
127    // Create execution context in the specified device
128    OFFLOAD_DEBUG_TRACE(2, "Getting device %d (engine %d) handle\n", m_index,
129                        m_physical_index);
130    res = COI::EngineGetHandle(COI_ISA_KNC, m_physical_index, &engine);
131    check_result(res, c_get_engine_handle, m_index, res);
132
133    // Target executable should be available by the time when we
134    // attempt to initialize the device
135    if (__target_exe == 0) {
136        LIBOFFLOAD_ERROR(c_no_target_exe);
137        exit(1);
138    }
139
140    OFFLOAD_DEBUG_TRACE(2,
141        "Loading target executable \"%s\" from %p, size %lld\n",
142        __target_exe->name, __target_exe->data, __target_exe->size);
143
144    res = COI::ProcessCreateFromMemory(
145        engine,                 // in_Engine
146        __target_exe->name,     // in_pBinaryName
147        __target_exe->data,     // in_pBinaryBuffer
148        __target_exe->size,     // in_BinaryBufferLength,
149        0,                      // in_Argc
150        0,                      // in_ppArgv
151        environ == 0,           // in_DupEnv
152        environ,                // in_ppAdditionalEnv
153        mic_proxy_io,           // in_ProxyActive
154        mic_proxy_fs_root,      // in_ProxyfsRoot
155        mic_buffer_size,        // in_BufferSpace
156        mic_library_path,       // in_LibrarySearchPath
157        __target_exe->origin,   // in_FileOfOrigin
158        __target_exe->offset,   // in_FileOfOriginOffset
159        &m_process              // out_pProcess
160    );
161    check_result(res, c_process_create, m_index, res);
162
163    // get function handles
164    res = COI::ProcessGetFunctionHandles(m_process, c_funcs_total,
165                                         m_func_names, m_funcs);
166    check_result(res, c_process_get_func_handles, m_index, res);
167
168    // initialize device side
169    pid_t pid = init_device();
170
171    // For IDB
172    if (__dbg_is_attached) {
173        // TODO: we have in-memory executable now.
174        // Check with IDB team what should we provide them now?
175        if (strlen(__target_exe->name) < MAX_TARGET_NAME) {
176            strcpy(__dbg_target_exe_name, __target_exe->name);
177        }
178        __dbg_target_so_pid = pid;
179        __dbg_target_id = m_physical_index;
180        __dbg_target_so_loaded();
181    }
182}
183
184void Engine::fini_process(bool verbose)
185{
186    if (m_process != 0) {
187        uint32_t sig;
188        int8_t ret;
189
190        // destroy target process
191        OFFLOAD_DEBUG_TRACE(2, "Destroying process on the device %d\n",
192                            m_index);
193
194        COIRESULT res = COI::ProcessDestroy(m_process, -1, 0, &ret, &sig);
195        m_process = 0;
196
197        if (res == COI_SUCCESS) {
198            OFFLOAD_DEBUG_TRACE(3, "Device process: signal %d, exit code %d\n",
199                                sig, ret);
200            if (verbose) {
201                if (sig != 0) {
202                    LIBOFFLOAD_ERROR(
203                        c_mic_process_exit_sig, m_index, sig,
204                        c_signal_names[sig >= c_signal_max ? 0 : sig]);
205                }
206                else {
207                    LIBOFFLOAD_ERROR(c_mic_process_exit_ret, m_index, ret);
208                }
209            }
210
211            // for idb
212            if (__dbg_is_attached) {
213                __dbg_target_so_unloaded();
214            }
215        }
216        else {
217            if (verbose) {
218                LIBOFFLOAD_ERROR(c_mic_process_exit, m_index);
219            }
220        }
221    }
222}
223
224void Engine::load_libraries()
225{
226    // load libraries collected so far
227    for (TargetImageList::iterator it = m_images.begin();
228         it != m_images.end(); it++) {
229        OFFLOAD_DEBUG_TRACE(2, "Loading library \"%s\" from %p, size %llu\n",
230                            it->name, it->data, it->size);
231
232        // load library to the device
233        COILIBRARY lib;
234        COIRESULT res;
235        res = COI::ProcessLoadLibraryFromMemory(m_process,
236                                                it->data,
237                                                it->size,
238                                                it->name,
239                                                mic_library_path,
240                                                it->origin,
241                                                it->offset,
242                                                COI_LOADLIBRARY_V1_FLAGS,
243                                                &lib);
244
245        if (res != COI_SUCCESS && res != COI_ALREADY_EXISTS) {
246            check_result(res, c_load_library, m_index, res);
247        }
248    }
249    m_images.clear();
250}
251
252static bool target_entry_cmp(
253    const VarList::BufEntry &l,
254    const VarList::BufEntry &r
255)
256{
257    const char *l_name = reinterpret_cast<const char*>(l.name);
258    const char *r_name = reinterpret_cast<const char*>(r.name);
259    return strcmp(l_name, r_name) < 0;
260}
261
262static bool host_entry_cmp(
263    const VarTable::Entry *l,
264    const VarTable::Entry *r
265)
266{
267    return strcmp(l->name, r->name) < 0;
268}
269
270void Engine::init_ptr_data(void)
271{
272    COIRESULT res;
273    COIEVENT event;
274
275    // Prepare table of host entries
276    std::vector<const VarTable::Entry*> host_table(__offload_vars.begin(),
277                                                   __offload_vars.end());
278
279    // no need to do anything further is host table is empty
280    if (host_table.size() <= 0) {
281        return;
282    }
283
284    // Get var table entries from the target.
285    // First we need to get size for the buffer to copy data
286    struct {
287        int64_t nelems;
288        int64_t length;
289    } params;
290
291    res = COI::PipelineRunFunction(get_pipeline(),
292                                   m_funcs[c_func_var_table_size],
293                                   0, 0, 0,
294                                   0, 0,
295                                   0, 0,
296                                   &params, sizeof(params),
297                                   &event);
298    check_result(res, c_pipeline_run_func, m_index, res);
299
300    res = COI::EventWait(1, &event, -1, 1, 0, 0);
301    check_result(res, c_event_wait, res);
302
303    if (params.length == 0) {
304        return;
305    }
306
307    // create buffer for target entries and copy data to host
308    COIBUFFER buffer;
309    res = COI::BufferCreate(params.length, COI_BUFFER_NORMAL, 0, 0, 1,
310                            &m_process, &buffer);
311    check_result(res, c_buf_create, m_index, res);
312
313    COI_ACCESS_FLAGS flags = COI_SINK_WRITE;
314    res = COI::PipelineRunFunction(get_pipeline(),
315                                   m_funcs[c_func_var_table_copy],
316                                   1, &buffer, &flags,
317                                   0, 0,
318                                   &params.nelems, sizeof(params.nelems),
319                                   0, 0,
320                                   &event);
321    check_result(res, c_pipeline_run_func, m_index, res);
322
323    res = COI::EventWait(1, &event, -1, 1, 0, 0);
324    check_result(res, c_event_wait, res);
325
326    // patch names in target data
327    VarList::BufEntry *target_table;
328    COIMAPINSTANCE map_inst;
329    res = COI::BufferMap(buffer, 0, params.length, COI_MAP_READ_ONLY, 0, 0,
330                         0, &map_inst,
331                         reinterpret_cast<void**>(&target_table));
332    check_result(res, c_buf_map, res);
333
334    VarList::table_patch_names(target_table, params.nelems);
335
336    // and sort entries
337    std::sort(target_table, target_table + params.nelems, target_entry_cmp);
338    std::sort(host_table.begin(), host_table.end(), host_entry_cmp);
339
340    // merge host and target entries and enter matching vars map
341    std::vector<const VarTable::Entry*>::const_iterator hi =
342        host_table.begin();
343    std::vector<const VarTable::Entry*>::const_iterator he =
344        host_table.end();
345    const VarList::BufEntry *ti = target_table;
346    const VarList::BufEntry *te = target_table + params.nelems;
347
348    while (hi != he && ti != te) {
349        int res = strcmp((*hi)->name, reinterpret_cast<const char*>(ti->name));
350        if (res == 0) {
351            // add matching entry to var map
352            std::pair<PtrSet::iterator, bool> res =
353                m_ptr_set.insert(PtrData((*hi)->addr, (*hi)->size));
354
355            // store address for new entries
356            if (res.second) {
357                PtrData *ptr = const_cast<PtrData*>(res.first.operator->());
358                ptr->mic_addr = ti->addr;
359                ptr->is_static = true;
360            }
361
362            hi++;
363            ti++;
364        }
365        else if (res < 0) {
366            hi++;
367        }
368        else {
369            ti++;
370        }
371    }
372
373    // cleanup
374    res = COI::BufferUnmap(map_inst, 0, 0, 0);
375    check_result(res, c_buf_unmap, res);
376
377    res = COI::BufferDestroy(buffer);
378    check_result(res, c_buf_destroy, res);
379}
380
381COIRESULT Engine::compute(
382    const std::list<COIBUFFER> &buffers,
383    const void*         data,
384    uint16_t            data_size,
385    void*               ret,
386    uint16_t            ret_size,
387    uint32_t            num_deps,
388    const COIEVENT*     deps,
389    COIEVENT*           event
390) /* const */
391{
392    COIBUFFER *bufs;
393    COI_ACCESS_FLAGS *flags;
394    COIRESULT res;
395
396    // convert buffers list to array
397    int num_bufs = buffers.size();
398    if (num_bufs > 0) {
399        bufs = (COIBUFFER*) alloca(num_bufs * sizeof(COIBUFFER));
400        flags = (COI_ACCESS_FLAGS*) alloca(num_bufs *
401                                           sizeof(COI_ACCESS_FLAGS));
402
403        int i = 0;
404        for (std::list<COIBUFFER>::const_iterator it = buffers.begin();
405             it != buffers.end(); it++) {
406            bufs[i] = *it;
407
408            // TODO: this should be fixed
409            flags[i++] = COI_SINK_WRITE;
410        }
411    }
412    else {
413        bufs = 0;
414        flags = 0;
415    }
416
417    // start computation
418    res = COI::PipelineRunFunction(get_pipeline(),
419                                   m_funcs[c_func_compute],
420                                   num_bufs, bufs, flags,
421                                   num_deps, deps,
422                                   data, data_size,
423                                   ret, ret_size,
424                                   event);
425    return res;
426}
427
428pid_t Engine::init_device(void)
429{
430    struct init_data {
431        int  device_index;
432        int  devices_total;
433        int  console_level;
434        int  offload_report_level;
435    } data;
436    COIRESULT res;
437    COIEVENT event;
438    pid_t pid;
439
440    OFFLOAD_DEBUG_TRACE_1(2, 0, c_offload_init,
441                          "Initializing device with logical index %d "
442                          "and physical index %d\n",
443                           m_index, m_physical_index);
444
445    // setup misc data
446    data.device_index = m_index;
447    data.devices_total = mic_engines_total;
448    data.console_level = console_enabled;
449    data.offload_report_level = offload_report_level;
450
451    res = COI::PipelineRunFunction(get_pipeline(),
452                                   m_funcs[c_func_init],
453                                   0, 0, 0, 0, 0,
454                                   &data, sizeof(data),
455                                   &pid, sizeof(pid),
456                                   &event);
457    check_result(res, c_pipeline_run_func, m_index, res);
458
459    res = COI::EventWait(1, &event, -1, 1, 0, 0);
460    check_result(res, c_event_wait, res);
461
462    OFFLOAD_DEBUG_TRACE(2, "Device process pid is %d\n", pid);
463
464    return pid;
465}
466
467// data associated with each thread
468struct Thread {
469    Thread(long* addr_coipipe_counter) {
470        m_addr_coipipe_counter = addr_coipipe_counter;
471        memset(m_pipelines, 0, sizeof(m_pipelines));
472    }
473
474    ~Thread() {
475#ifndef TARGET_WINNT
476        __sync_sub_and_fetch(m_addr_coipipe_counter, 1);
477#else // TARGET_WINNT
478        _InterlockedDecrement(m_addr_coipipe_counter);
479#endif // TARGET_WINNT
480        for (int i = 0; i < mic_engines_total; i++) {
481            if (m_pipelines[i] != 0) {
482                COI::PipelineDestroy(m_pipelines[i]);
483            }
484        }
485    }
486
487    COIPIPELINE get_pipeline(int index) const {
488        return m_pipelines[index];
489    }
490
491    void set_pipeline(int index, COIPIPELINE pipeline) {
492        m_pipelines[index] = pipeline;
493    }
494
495    AutoSet& get_auto_vars() {
496        return m_auto_vars;
497    }
498
499private:
500    long*       m_addr_coipipe_counter;
501    AutoSet     m_auto_vars;
502    COIPIPELINE m_pipelines[MIC_ENGINES_MAX];
503};
504
505COIPIPELINE Engine::get_pipeline(void)
506{
507    Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
508    if (thread == 0) {
509        thread = new Thread(&m_proc_number);
510        thread_setspecific(mic_thread_key, thread);
511    }
512
513    COIPIPELINE pipeline = thread->get_pipeline(m_index);
514    if (pipeline == 0) {
515        COIRESULT res;
516        int proc_num;
517
518#ifndef TARGET_WINNT
519        proc_num = __sync_fetch_and_add(&m_proc_number, 1);
520#else // TARGET_WINNT
521        proc_num = _InterlockedIncrement(&m_proc_number);
522#endif // TARGET_WINNT
523
524        if (proc_num > COI_PIPELINE_MAX_PIPELINES) {
525            LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES);
526            LIBOFFLOAD_ABORT;
527        }
528        // create pipeline for this thread
529        res = COI::PipelineCreate(m_process, 0, mic_stack_size, &pipeline);
530        check_result(res, c_pipeline_create, m_index, res);
531
532        thread->set_pipeline(m_index, pipeline);
533    }
534    return pipeline;
535}
536
537AutoSet& Engine::get_auto_vars(void)
538{
539    Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
540    if (thread == 0) {
541        thread = new Thread(&m_proc_number);
542        thread_setspecific(mic_thread_key, thread);
543    }
544
545    return thread->get_auto_vars();
546}
547
548void Engine::destroy_thread_data(void *data)
549{
550    delete static_cast<Thread*>(data);
551}
552