1/* 2 Copyright (c) 2014 Intel Corporation. All Rights Reserved. 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 8 * Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 * Redistributions in binary form must reproduce the above copyright 11 notice, this list of conditions and the following disclaimer in the 12 documentation and/or other materials provided with the distribution. 13 * Neither the name of Intel Corporation nor the names of its 14 contributors may be used to endorse or promote products derived 15 from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28*/ 29 30 31#include "offload_engine.h" 32#include <signal.h> 33#include <errno.h> 34 35#include <algorithm> 36#include <vector> 37 38#include "offload_host.h" 39#include "offload_table.h" 40 41const char* Engine::m_func_names[Engine::c_funcs_total] = 42{ 43 "server_compute", 44#ifdef MYO_SUPPORT 45 "server_myoinit", 46 "server_myofini", 47#endif // MYO_SUPPORT 48 "server_init", 49 "server_var_table_size", 50 "server_var_table_copy" 51}; 52 53// Symbolic representation of system signals. Fix for CQ233593 54const char* Engine::c_signal_names[Engine::c_signal_max] = 55{ 56 "Unknown SIGNAL", 57 "SIGHUP", /* 1, Hangup (POSIX). */ 58 "SIGINT", /* 2, Interrupt (ANSI). */ 59 "SIGQUIT", /* 3, Quit (POSIX). */ 60 "SIGILL", /* 4, Illegal instruction (ANSI). */ 61 "SIGTRAP", /* 5, Trace trap (POSIX). */ 62 "SIGABRT", /* 6, Abort (ANSI). */ 63 "SIGBUS", /* 7, BUS error (4.2 BSD). */ 64 "SIGFPE", /* 8, Floating-point exception (ANSI). */ 65 "SIGKILL", /* 9, Kill, unblockable (POSIX). */ 66 "SIGUSR1", /* 10, User-defined signal 1 (POSIX). */ 67 "SIGSEGV", /* 11, Segmentation violation (ANSI). */ 68 "SIGUSR2", /* 12, User-defined signal 2 (POSIX). */ 69 "SIGPIPE", /* 13, Broken pipe (POSIX). */ 70 "SIGALRM", /* 14, Alarm clock (POSIX). */ 71 "SIGTERM", /* 15, Termination (ANSI). */ 72 "SIGSTKFLT", /* 16, Stack fault. */ 73 "SIGCHLD", /* 17, Child status has changed (POSIX). */ 74 "SIGCONT", /* 18, Continue (POSIX). */ 75 "SIGSTOP", /* 19, Stop, unblockable (POSIX). */ 76 "SIGTSTP", /* 20, Keyboard stop (POSIX). */ 77 "SIGTTIN", /* 21, Background read from tty (POSIX). */ 78 "SIGTTOU", /* 22, Background write to tty (POSIX). */ 79 "SIGURG", /* 23, Urgent condition on socket (4.2 BSD). */ 80 "SIGXCPU", /* 24, CPU limit exceeded (4.2 BSD). */ 81 "SIGXFSZ", /* 25, File size limit exceeded (4.2 BSD). */ 82 "SIGVTALRM", /* 26, Virtual alarm clock (4.2 BSD). */ 83 "SIGPROF", /* 27, Profiling alarm clock (4.2 BSD). */ 84 "SIGWINCH", /* 28, Window size change (4.3 BSD, Sun). */ 85 "SIGIO", /* 29, I/O now possible (4.2 BSD). */ 86 "SIGPWR", /* 30, Power failure restart (System V). */ 87 "SIGSYS" /* 31, Bad system call. */ 88}; 89 90void Engine::init(void) 91{ 92 if (!m_ready) { 93 mutex_locker_t locker(m_lock); 94 95 if (!m_ready) { 96 // start process if not done yet 97 if (m_process == 0) { 98 init_process(); 99 } 100 101 // load penging images 102 load_libraries(); 103 104 // and (re)build pointer table 105 init_ptr_data(); 106 107 // it is ready now 108 m_ready = true; 109 } 110 } 111} 112 113void Engine::init_process(void) 114{ 115 COIENGINE engine; 116 COIRESULT res; 117 const char **environ; 118 119 // create environment for the target process 120 environ = (const char**) mic_env_vars.create_environ_for_card(m_index); 121 if (environ != 0) { 122 for (const char **p = environ; *p != 0; p++) { 123 OFFLOAD_DEBUG_TRACE(3, "Env Var for card %d: %s\n", m_index, *p); 124 } 125 } 126 127 // Create execution context in the specified device 128 OFFLOAD_DEBUG_TRACE(2, "Getting device %d (engine %d) handle\n", m_index, 129 m_physical_index); 130 res = COI::EngineGetHandle(COI_ISA_KNC, m_physical_index, &engine); 131 check_result(res, c_get_engine_handle, m_index, res); 132 133 // Target executable should be available by the time when we 134 // attempt to initialize the device 135 if (__target_exe == 0) { 136 LIBOFFLOAD_ERROR(c_no_target_exe); 137 exit(1); 138 } 139 140 OFFLOAD_DEBUG_TRACE(2, 141 "Loading target executable \"%s\" from %p, size %lld\n", 142 __target_exe->name, __target_exe->data, __target_exe->size); 143 144 res = COI::ProcessCreateFromMemory( 145 engine, // in_Engine 146 __target_exe->name, // in_pBinaryName 147 __target_exe->data, // in_pBinaryBuffer 148 __target_exe->size, // in_BinaryBufferLength, 149 0, // in_Argc 150 0, // in_ppArgv 151 environ == 0, // in_DupEnv 152 environ, // in_ppAdditionalEnv 153 mic_proxy_io, // in_ProxyActive 154 mic_proxy_fs_root, // in_ProxyfsRoot 155 mic_buffer_size, // in_BufferSpace 156 mic_library_path, // in_LibrarySearchPath 157 __target_exe->origin, // in_FileOfOrigin 158 __target_exe->offset, // in_FileOfOriginOffset 159 &m_process // out_pProcess 160 ); 161 check_result(res, c_process_create, m_index, res); 162 163 // get function handles 164 res = COI::ProcessGetFunctionHandles(m_process, c_funcs_total, 165 m_func_names, m_funcs); 166 check_result(res, c_process_get_func_handles, m_index, res); 167 168 // initialize device side 169 pid_t pid = init_device(); 170 171 // For IDB 172 if (__dbg_is_attached) { 173 // TODO: we have in-memory executable now. 174 // Check with IDB team what should we provide them now? 175 if (strlen(__target_exe->name) < MAX_TARGET_NAME) { 176 strcpy(__dbg_target_exe_name, __target_exe->name); 177 } 178 __dbg_target_so_pid = pid; 179 __dbg_target_id = m_physical_index; 180 __dbg_target_so_loaded(); 181 } 182} 183 184void Engine::fini_process(bool verbose) 185{ 186 if (m_process != 0) { 187 uint32_t sig; 188 int8_t ret; 189 190 // destroy target process 191 OFFLOAD_DEBUG_TRACE(2, "Destroying process on the device %d\n", 192 m_index); 193 194 COIRESULT res = COI::ProcessDestroy(m_process, -1, 0, &ret, &sig); 195 m_process = 0; 196 197 if (res == COI_SUCCESS) { 198 OFFLOAD_DEBUG_TRACE(3, "Device process: signal %d, exit code %d\n", 199 sig, ret); 200 if (verbose) { 201 if (sig != 0) { 202 LIBOFFLOAD_ERROR( 203 c_mic_process_exit_sig, m_index, sig, 204 c_signal_names[sig >= c_signal_max ? 0 : sig]); 205 } 206 else { 207 LIBOFFLOAD_ERROR(c_mic_process_exit_ret, m_index, ret); 208 } 209 } 210 211 // for idb 212 if (__dbg_is_attached) { 213 __dbg_target_so_unloaded(); 214 } 215 } 216 else { 217 if (verbose) { 218 LIBOFFLOAD_ERROR(c_mic_process_exit, m_index); 219 } 220 } 221 } 222} 223 224void Engine::load_libraries() 225{ 226 // load libraries collected so far 227 for (TargetImageList::iterator it = m_images.begin(); 228 it != m_images.end(); it++) { 229 OFFLOAD_DEBUG_TRACE(2, "Loading library \"%s\" from %p, size %llu\n", 230 it->name, it->data, it->size); 231 232 // load library to the device 233 COILIBRARY lib; 234 COIRESULT res; 235 res = COI::ProcessLoadLibraryFromMemory(m_process, 236 it->data, 237 it->size, 238 it->name, 239 mic_library_path, 240 it->origin, 241 it->offset, 242 COI_LOADLIBRARY_V1_FLAGS, 243 &lib); 244 245 if (res != COI_SUCCESS && res != COI_ALREADY_EXISTS) { 246 check_result(res, c_load_library, m_index, res); 247 } 248 } 249 m_images.clear(); 250} 251 252static bool target_entry_cmp( 253 const VarList::BufEntry &l, 254 const VarList::BufEntry &r 255) 256{ 257 const char *l_name = reinterpret_cast<const char*>(l.name); 258 const char *r_name = reinterpret_cast<const char*>(r.name); 259 return strcmp(l_name, r_name) < 0; 260} 261 262static bool host_entry_cmp( 263 const VarTable::Entry *l, 264 const VarTable::Entry *r 265) 266{ 267 return strcmp(l->name, r->name) < 0; 268} 269 270void Engine::init_ptr_data(void) 271{ 272 COIRESULT res; 273 COIEVENT event; 274 275 // Prepare table of host entries 276 std::vector<const VarTable::Entry*> host_table(__offload_vars.begin(), 277 __offload_vars.end()); 278 279 // no need to do anything further is host table is empty 280 if (host_table.size() <= 0) { 281 return; 282 } 283 284 // Get var table entries from the target. 285 // First we need to get size for the buffer to copy data 286 struct { 287 int64_t nelems; 288 int64_t length; 289 } params; 290 291 res = COI::PipelineRunFunction(get_pipeline(), 292 m_funcs[c_func_var_table_size], 293 0, 0, 0, 294 0, 0, 295 0, 0, 296 ¶ms, sizeof(params), 297 &event); 298 check_result(res, c_pipeline_run_func, m_index, res); 299 300 res = COI::EventWait(1, &event, -1, 1, 0, 0); 301 check_result(res, c_event_wait, res); 302 303 if (params.length == 0) { 304 return; 305 } 306 307 // create buffer for target entries and copy data to host 308 COIBUFFER buffer; 309 res = COI::BufferCreate(params.length, COI_BUFFER_NORMAL, 0, 0, 1, 310 &m_process, &buffer); 311 check_result(res, c_buf_create, m_index, res); 312 313 COI_ACCESS_FLAGS flags = COI_SINK_WRITE; 314 res = COI::PipelineRunFunction(get_pipeline(), 315 m_funcs[c_func_var_table_copy], 316 1, &buffer, &flags, 317 0, 0, 318 ¶ms.nelems, sizeof(params.nelems), 319 0, 0, 320 &event); 321 check_result(res, c_pipeline_run_func, m_index, res); 322 323 res = COI::EventWait(1, &event, -1, 1, 0, 0); 324 check_result(res, c_event_wait, res); 325 326 // patch names in target data 327 VarList::BufEntry *target_table; 328 COIMAPINSTANCE map_inst; 329 res = COI::BufferMap(buffer, 0, params.length, COI_MAP_READ_ONLY, 0, 0, 330 0, &map_inst, 331 reinterpret_cast<void**>(&target_table)); 332 check_result(res, c_buf_map, res); 333 334 VarList::table_patch_names(target_table, params.nelems); 335 336 // and sort entries 337 std::sort(target_table, target_table + params.nelems, target_entry_cmp); 338 std::sort(host_table.begin(), host_table.end(), host_entry_cmp); 339 340 // merge host and target entries and enter matching vars map 341 std::vector<const VarTable::Entry*>::const_iterator hi = 342 host_table.begin(); 343 std::vector<const VarTable::Entry*>::const_iterator he = 344 host_table.end(); 345 const VarList::BufEntry *ti = target_table; 346 const VarList::BufEntry *te = target_table + params.nelems; 347 348 while (hi != he && ti != te) { 349 int res = strcmp((*hi)->name, reinterpret_cast<const char*>(ti->name)); 350 if (res == 0) { 351 // add matching entry to var map 352 std::pair<PtrSet::iterator, bool> res = 353 m_ptr_set.insert(PtrData((*hi)->addr, (*hi)->size)); 354 355 // store address for new entries 356 if (res.second) { 357 PtrData *ptr = const_cast<PtrData*>(res.first.operator->()); 358 ptr->mic_addr = ti->addr; 359 ptr->is_static = true; 360 } 361 362 hi++; 363 ti++; 364 } 365 else if (res < 0) { 366 hi++; 367 } 368 else { 369 ti++; 370 } 371 } 372 373 // cleanup 374 res = COI::BufferUnmap(map_inst, 0, 0, 0); 375 check_result(res, c_buf_unmap, res); 376 377 res = COI::BufferDestroy(buffer); 378 check_result(res, c_buf_destroy, res); 379} 380 381COIRESULT Engine::compute( 382 const std::list<COIBUFFER> &buffers, 383 const void* data, 384 uint16_t data_size, 385 void* ret, 386 uint16_t ret_size, 387 uint32_t num_deps, 388 const COIEVENT* deps, 389 COIEVENT* event 390) /* const */ 391{ 392 COIBUFFER *bufs; 393 COI_ACCESS_FLAGS *flags; 394 COIRESULT res; 395 396 // convert buffers list to array 397 int num_bufs = buffers.size(); 398 if (num_bufs > 0) { 399 bufs = (COIBUFFER*) alloca(num_bufs * sizeof(COIBUFFER)); 400 flags = (COI_ACCESS_FLAGS*) alloca(num_bufs * 401 sizeof(COI_ACCESS_FLAGS)); 402 403 int i = 0; 404 for (std::list<COIBUFFER>::const_iterator it = buffers.begin(); 405 it != buffers.end(); it++) { 406 bufs[i] = *it; 407 408 // TODO: this should be fixed 409 flags[i++] = COI_SINK_WRITE; 410 } 411 } 412 else { 413 bufs = 0; 414 flags = 0; 415 } 416 417 // start computation 418 res = COI::PipelineRunFunction(get_pipeline(), 419 m_funcs[c_func_compute], 420 num_bufs, bufs, flags, 421 num_deps, deps, 422 data, data_size, 423 ret, ret_size, 424 event); 425 return res; 426} 427 428pid_t Engine::init_device(void) 429{ 430 struct init_data { 431 int device_index; 432 int devices_total; 433 int console_level; 434 int offload_report_level; 435 } data; 436 COIRESULT res; 437 COIEVENT event; 438 pid_t pid; 439 440 OFFLOAD_DEBUG_TRACE_1(2, 0, c_offload_init, 441 "Initializing device with logical index %d " 442 "and physical index %d\n", 443 m_index, m_physical_index); 444 445 // setup misc data 446 data.device_index = m_index; 447 data.devices_total = mic_engines_total; 448 data.console_level = console_enabled; 449 data.offload_report_level = offload_report_level; 450 451 res = COI::PipelineRunFunction(get_pipeline(), 452 m_funcs[c_func_init], 453 0, 0, 0, 0, 0, 454 &data, sizeof(data), 455 &pid, sizeof(pid), 456 &event); 457 check_result(res, c_pipeline_run_func, m_index, res); 458 459 res = COI::EventWait(1, &event, -1, 1, 0, 0); 460 check_result(res, c_event_wait, res); 461 462 OFFLOAD_DEBUG_TRACE(2, "Device process pid is %d\n", pid); 463 464 return pid; 465} 466 467// data associated with each thread 468struct Thread { 469 Thread(long* addr_coipipe_counter) { 470 m_addr_coipipe_counter = addr_coipipe_counter; 471 memset(m_pipelines, 0, sizeof(m_pipelines)); 472 } 473 474 ~Thread() { 475#ifndef TARGET_WINNT 476 __sync_sub_and_fetch(m_addr_coipipe_counter, 1); 477#else // TARGET_WINNT 478 _InterlockedDecrement(m_addr_coipipe_counter); 479#endif // TARGET_WINNT 480 for (int i = 0; i < mic_engines_total; i++) { 481 if (m_pipelines[i] != 0) { 482 COI::PipelineDestroy(m_pipelines[i]); 483 } 484 } 485 } 486 487 COIPIPELINE get_pipeline(int index) const { 488 return m_pipelines[index]; 489 } 490 491 void set_pipeline(int index, COIPIPELINE pipeline) { 492 m_pipelines[index] = pipeline; 493 } 494 495 AutoSet& get_auto_vars() { 496 return m_auto_vars; 497 } 498 499private: 500 long* m_addr_coipipe_counter; 501 AutoSet m_auto_vars; 502 COIPIPELINE m_pipelines[MIC_ENGINES_MAX]; 503}; 504 505COIPIPELINE Engine::get_pipeline(void) 506{ 507 Thread* thread = (Thread*) thread_getspecific(mic_thread_key); 508 if (thread == 0) { 509 thread = new Thread(&m_proc_number); 510 thread_setspecific(mic_thread_key, thread); 511 } 512 513 COIPIPELINE pipeline = thread->get_pipeline(m_index); 514 if (pipeline == 0) { 515 COIRESULT res; 516 int proc_num; 517 518#ifndef TARGET_WINNT 519 proc_num = __sync_fetch_and_add(&m_proc_number, 1); 520#else // TARGET_WINNT 521 proc_num = _InterlockedIncrement(&m_proc_number); 522#endif // TARGET_WINNT 523 524 if (proc_num > COI_PIPELINE_MAX_PIPELINES) { 525 LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES); 526 LIBOFFLOAD_ABORT; 527 } 528 // create pipeline for this thread 529 res = COI::PipelineCreate(m_process, 0, mic_stack_size, &pipeline); 530 check_result(res, c_pipeline_create, m_index, res); 531 532 thread->set_pipeline(m_index, pipeline); 533 } 534 return pipeline; 535} 536 537AutoSet& Engine::get_auto_vars(void) 538{ 539 Thread* thread = (Thread*) thread_getspecific(mic_thread_key); 540 if (thread == 0) { 541 thread = new Thread(&m_proc_number); 542 thread_setspecific(mic_thread_key, thread); 543 } 544 545 return thread->get_auto_vars(); 546} 547 548void Engine::destroy_thread_data(void *data) 549{ 550 delete static_cast<Thread*>(data); 551} 552