1/* Copyright (C) 2013-2020 Free Software Foundation, Inc. 2 Contributed by Jakub Jelinek <jakub@redhat.com>. 3 4 This file is part of the GNU Offloading and Multi Processing Library 5 (libgomp). 6 7 Libgomp is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26/* This file contains the support of offloading. */ 27 28#include "libgomp.h" 29#include "oacc-plugin.h" 30#include "oacc-int.h" 31#include "gomp-constants.h" 32#include <limits.h> 33#include <stdbool.h> 34#include <stdlib.h> 35#ifdef HAVE_INTTYPES_H 36# include <inttypes.h> /* For PRIu64. */ 37#endif 38#include <string.h> 39#include <assert.h> 40#include <errno.h> 41 42#ifdef PLUGIN_SUPPORT 43#include <dlfcn.h> 44#include "plugin-suffix.h" 45#endif 46 47#define FIELD_TGT_EMPTY (~(size_t) 0) 48 49static void gomp_target_init (void); 50 51/* The whole initialization code for offloading plugins is only run one. */ 52static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT; 53 54/* Mutex for offload image registration. */ 55static gomp_mutex_t register_lock; 56 57/* This structure describes an offload image. 58 It contains type of the target device, pointer to host table descriptor, and 59 pointer to target data. */ 60struct offload_image_descr { 61 unsigned version; 62 enum offload_target_type type; 63 const void *host_table; 64 const void *target_data; 65}; 66 67/* Array of descriptors of offload images. */ 68static struct offload_image_descr *offload_images; 69 70/* Total number of offload images. */ 71static int num_offload_images; 72 73/* Array of descriptors for all available devices. */ 74static struct gomp_device_descr *devices; 75 76/* Total number of available devices. */ 77static int num_devices; 78 79/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */ 80static int num_devices_openmp; 81 82/* Similar to gomp_realloc, but release register_lock before gomp_fatal. */ 83 84static void * 85gomp_realloc_unlock (void *old, size_t size) 86{ 87 void *ret = realloc (old, size); 88 if (ret == NULL) 89 { 90 gomp_mutex_unlock (®ister_lock); 91 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size); 92 } 93 return ret; 94} 95 96attribute_hidden void 97gomp_init_targets_once (void) 98{ 99 (void) pthread_once (&gomp_is_initialized, gomp_target_init); 100} 101 102attribute_hidden int 103gomp_get_num_devices (void) 104{ 105 gomp_init_targets_once (); 106 return num_devices_openmp; 107} 108 109static struct gomp_device_descr * 110resolve_device (int device_id) 111{ 112 if (device_id == GOMP_DEVICE_ICV) 113 { 114 struct gomp_task_icv *icv = gomp_icv (false); 115 device_id = icv->default_device_var; 116 } 117 118 if (device_id < 0 || device_id >= gomp_get_num_devices ()) 119 return NULL; 120 121 gomp_mutex_lock (&devices[device_id].lock); 122 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED) 123 gomp_init_device (&devices[device_id]); 124 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED) 125 { 126 gomp_mutex_unlock (&devices[device_id].lock); 127 return NULL; 128 } 129 gomp_mutex_unlock (&devices[device_id].lock); 130 131 return &devices[device_id]; 132} 133 134 135static inline splay_tree_key 136gomp_map_lookup (splay_tree mem_map, splay_tree_key key) 137{ 138 if (key->host_start != key->host_end) 139 return splay_tree_lookup (mem_map, key); 140 141 key->host_end++; 142 splay_tree_key n = splay_tree_lookup (mem_map, key); 143 key->host_end--; 144 if (n) 145 return n; 146 key->host_start--; 147 n = splay_tree_lookup (mem_map, key); 148 key->host_start++; 149 if (n) 150 return n; 151 return splay_tree_lookup (mem_map, key); 152} 153 154static inline splay_tree_key 155gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key) 156{ 157 if (key->host_start != key->host_end) 158 return splay_tree_lookup (mem_map, key); 159 160 key->host_end++; 161 splay_tree_key n = splay_tree_lookup (mem_map, key); 162 key->host_end--; 163 return n; 164} 165 166static inline void 167gomp_device_copy (struct gomp_device_descr *devicep, 168 bool (*copy_func) (int, void *, const void *, size_t), 169 const char *dst, void *dstaddr, 170 const char *src, const void *srcaddr, 171 size_t size) 172{ 173 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size)) 174 { 175 gomp_mutex_unlock (&devicep->lock); 176 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed", 177 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size); 178 } 179} 180 181static inline void 182goacc_device_copy_async (struct gomp_device_descr *devicep, 183 bool (*copy_func) (int, void *, const void *, size_t, 184 struct goacc_asyncqueue *), 185 const char *dst, void *dstaddr, 186 const char *src, const void *srcaddr, 187 size_t size, struct goacc_asyncqueue *aq) 188{ 189 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq)) 190 { 191 gomp_mutex_unlock (&devicep->lock); 192 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed", 193 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size); 194 } 195} 196 197/* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses) 198 host to device memory transfers. */ 199 200struct gomp_coalesce_chunk 201{ 202 /* The starting and ending point of a coalesced chunk of memory. */ 203 size_t start, end; 204}; 205 206struct gomp_coalesce_buf 207{ 208 /* Buffer into which gomp_copy_host2dev will memcpy data and from which 209 it will be copied to the device. */ 210 void *buf; 211 struct target_mem_desc *tgt; 212 /* Array with offsets, chunks[i].start is the starting offset and 213 chunks[i].end ending offset relative to tgt->tgt_start device address 214 of chunks which are to be copied to buf and later copied to device. */ 215 struct gomp_coalesce_chunk *chunks; 216 /* Number of chunks in chunks array, or -1 if coalesce buffering should not 217 be performed. */ 218 long chunk_cnt; 219 /* During construction of chunks array, how many memory regions are within 220 the last chunk. If there is just one memory region for a chunk, we copy 221 it directly to device rather than going through buf. */ 222 long use_cnt; 223}; 224 225/* Maximum size of memory region considered for coalescing. Larger copies 226 are performed directly. */ 227#define MAX_COALESCE_BUF_SIZE (32 * 1024) 228 229/* Maximum size of a gap in between regions to consider them being copied 230 within the same chunk. All the device offsets considered are within 231 newly allocated device memory, so it isn't fatal if we copy some padding 232 in between from host to device. The gaps come either from alignment 233 padding or from memory regions which are not supposed to be copied from 234 host to device (e.g. map(alloc:), map(from:) etc.). */ 235#define MAX_COALESCE_BUF_GAP (4 * 1024) 236 237/* Add region with device tgt_start relative offset and length to CBUF. */ 238 239static inline void 240gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len) 241{ 242 if (len > MAX_COALESCE_BUF_SIZE || len == 0) 243 return; 244 if (cbuf->chunk_cnt) 245 { 246 if (cbuf->chunk_cnt < 0) 247 return; 248 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end) 249 { 250 cbuf->chunk_cnt = -1; 251 return; 252 } 253 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP) 254 { 255 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len; 256 cbuf->use_cnt++; 257 return; 258 } 259 /* If the last chunk is only used by one mapping, discard it, 260 as it will be one host to device copy anyway and 261 memcpying it around will only waste cycles. */ 262 if (cbuf->use_cnt == 1) 263 cbuf->chunk_cnt--; 264 } 265 cbuf->chunks[cbuf->chunk_cnt].start = start; 266 cbuf->chunks[cbuf->chunk_cnt].end = start + len; 267 cbuf->chunk_cnt++; 268 cbuf->use_cnt = 1; 269} 270 271/* Return true for mapping kinds which need to copy data from the 272 host to device for regions that weren't previously mapped. */ 273 274static inline bool 275gomp_to_device_kind_p (int kind) 276{ 277 switch (kind) 278 { 279 case GOMP_MAP_ALLOC: 280 case GOMP_MAP_FROM: 281 case GOMP_MAP_FORCE_ALLOC: 282 case GOMP_MAP_FORCE_FROM: 283 case GOMP_MAP_ALWAYS_FROM: 284 return false; 285 default: 286 return true; 287 } 288} 289 290attribute_hidden void 291gomp_copy_host2dev (struct gomp_device_descr *devicep, 292 struct goacc_asyncqueue *aq, 293 void *d, const void *h, size_t sz, 294 struct gomp_coalesce_buf *cbuf) 295{ 296 if (cbuf) 297 { 298 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start; 299 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end) 300 { 301 long first = 0; 302 long last = cbuf->chunk_cnt - 1; 303 while (first <= last) 304 { 305 long middle = (first + last) >> 1; 306 if (cbuf->chunks[middle].end <= doff) 307 first = middle + 1; 308 else if (cbuf->chunks[middle].start <= doff) 309 { 310 if (doff + sz > cbuf->chunks[middle].end) 311 gomp_fatal ("internal libgomp cbuf error"); 312 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start), 313 h, sz); 314 return; 315 } 316 else 317 last = middle - 1; 318 } 319 } 320 } 321 if (__builtin_expect (aq != NULL, 0)) 322 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func, 323 "dev", d, "host", h, sz, aq); 324 else 325 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz); 326} 327 328attribute_hidden void 329gomp_copy_dev2host (struct gomp_device_descr *devicep, 330 struct goacc_asyncqueue *aq, 331 void *h, const void *d, size_t sz) 332{ 333 if (__builtin_expect (aq != NULL, 0)) 334 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func, 335 "host", h, "dev", d, sz, aq); 336 else 337 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz); 338} 339 340static void 341gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr) 342{ 343 if (!devicep->free_func (devicep->target_id, devptr)) 344 { 345 gomp_mutex_unlock (&devicep->lock); 346 gomp_fatal ("error in freeing device memory block at %p", devptr); 347 } 348} 349 350/* Handle the case where gomp_map_lookup, splay_tree_lookup or 351 gomp_map_0len_lookup found oldn for newn. 352 Helper function of gomp_map_vars. */ 353 354static inline void 355gomp_map_vars_existing (struct gomp_device_descr *devicep, 356 struct goacc_asyncqueue *aq, splay_tree_key oldn, 357 splay_tree_key newn, struct target_var_desc *tgt_var, 358 unsigned char kind, struct gomp_coalesce_buf *cbuf) 359{ 360 assert (kind != GOMP_MAP_ATTACH); 361 362 tgt_var->key = oldn; 363 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind); 364 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind); 365 tgt_var->is_attach = false; 366 tgt_var->offset = newn->host_start - oldn->host_start; 367 tgt_var->length = newn->host_end - newn->host_start; 368 369 if ((kind & GOMP_MAP_FLAG_FORCE) 370 || oldn->host_start > newn->host_start 371 || oldn->host_end < newn->host_end) 372 { 373 gomp_mutex_unlock (&devicep->lock); 374 gomp_fatal ("Trying to map into device [%p..%p) object when " 375 "[%p..%p) is already mapped", 376 (void *) newn->host_start, (void *) newn->host_end, 377 (void *) oldn->host_start, (void *) oldn->host_end); 378 } 379 380 if (GOMP_MAP_ALWAYS_TO_P (kind)) 381 gomp_copy_host2dev (devicep, aq, 382 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset 383 + newn->host_start - oldn->host_start), 384 (void *) newn->host_start, 385 newn->host_end - newn->host_start, cbuf); 386 387 if (oldn->refcount != REFCOUNT_INFINITY) 388 oldn->refcount++; 389} 390 391static int 392get_kind (bool short_mapkind, void *kinds, int idx) 393{ 394 return short_mapkind ? ((unsigned short *) kinds)[idx] 395 : ((unsigned char *) kinds)[idx]; 396} 397 398static void 399gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, 400 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias, 401 struct gomp_coalesce_buf *cbuf) 402{ 403 struct gomp_device_descr *devicep = tgt->device_descr; 404 struct splay_tree_s *mem_map = &devicep->mem_map; 405 struct splay_tree_key_s cur_node; 406 407 cur_node.host_start = host_ptr; 408 if (cur_node.host_start == (uintptr_t) NULL) 409 { 410 cur_node.tgt_offset = (uintptr_t) NULL; 411 gomp_copy_host2dev (devicep, aq, 412 (void *) (tgt->tgt_start + target_offset), 413 (void *) &cur_node.tgt_offset, 414 sizeof (void *), cbuf); 415 return; 416 } 417 /* Add bias to the pointer value. */ 418 cur_node.host_start += bias; 419 cur_node.host_end = cur_node.host_start; 420 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 421 if (n == NULL) 422 { 423 gomp_mutex_unlock (&devicep->lock); 424 gomp_fatal ("Pointer target of array section wasn't mapped"); 425 } 426 cur_node.host_start -= n->host_start; 427 cur_node.tgt_offset 428 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start; 429 /* At this point tgt_offset is target address of the 430 array section. Now subtract bias to get what we want 431 to initialize the pointer with. */ 432 cur_node.tgt_offset -= bias; 433 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset), 434 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf); 435} 436 437static void 438gomp_map_fields_existing (struct target_mem_desc *tgt, 439 struct goacc_asyncqueue *aq, splay_tree_key n, 440 size_t first, size_t i, void **hostaddrs, 441 size_t *sizes, void *kinds, 442 struct gomp_coalesce_buf *cbuf) 443{ 444 struct gomp_device_descr *devicep = tgt->device_descr; 445 struct splay_tree_s *mem_map = &devicep->mem_map; 446 struct splay_tree_key_s cur_node; 447 int kind; 448 const bool short_mapkind = true; 449 const int typemask = short_mapkind ? 0xff : 0x7; 450 451 cur_node.host_start = (uintptr_t) hostaddrs[i]; 452 cur_node.host_end = cur_node.host_start + sizes[i]; 453 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node); 454 kind = get_kind (short_mapkind, kinds, i); 455 if (n2 456 && n2->tgt == n->tgt 457 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) 458 { 459 gomp_map_vars_existing (devicep, aq, n2, &cur_node, 460 &tgt->list[i], kind & typemask, cbuf); 461 return; 462 } 463 if (sizes[i] == 0) 464 { 465 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1]) 466 { 467 cur_node.host_start--; 468 n2 = splay_tree_lookup (mem_map, &cur_node); 469 cur_node.host_start++; 470 if (n2 471 && n2->tgt == n->tgt 472 && n2->host_start - n->host_start 473 == n2->tgt_offset - n->tgt_offset) 474 { 475 gomp_map_vars_existing (devicep, aq, n2, &cur_node, 476 &tgt->list[i], kind & typemask, cbuf); 477 return; 478 } 479 } 480 cur_node.host_end++; 481 n2 = splay_tree_lookup (mem_map, &cur_node); 482 cur_node.host_end--; 483 if (n2 484 && n2->tgt == n->tgt 485 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) 486 { 487 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], 488 kind & typemask, cbuf); 489 return; 490 } 491 } 492 gomp_mutex_unlock (&devicep->lock); 493 gomp_fatal ("Trying to map into device [%p..%p) structure element when " 494 "other mapped elements from the same structure weren't mapped " 495 "together with it", (void *) cur_node.host_start, 496 (void *) cur_node.host_end); 497} 498 499attribute_hidden void 500gomp_attach_pointer (struct gomp_device_descr *devicep, 501 struct goacc_asyncqueue *aq, splay_tree mem_map, 502 splay_tree_key n, uintptr_t attach_to, size_t bias, 503 struct gomp_coalesce_buf *cbufp) 504{ 505 struct splay_tree_key_s s; 506 size_t size, idx; 507 508 if (n == NULL) 509 { 510 gomp_mutex_unlock (&devicep->lock); 511 gomp_fatal ("enclosing struct not mapped for attach"); 512 } 513 514 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *); 515 /* We might have a pointer in a packed struct: however we cannot have more 516 than one such pointer in each pointer-sized portion of the struct, so 517 this is safe. */ 518 idx = (attach_to - n->host_start) / sizeof (void *); 519 520 if (!n->aux) 521 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux)); 522 523 if (!n->aux->attach_count) 524 n->aux->attach_count 525 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size); 526 527 if (n->aux->attach_count[idx] < UINTPTR_MAX) 528 n->aux->attach_count[idx]++; 529 else 530 { 531 gomp_mutex_unlock (&devicep->lock); 532 gomp_fatal ("attach count overflow"); 533 } 534 535 if (n->aux->attach_count[idx] == 1) 536 { 537 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to 538 - n->host_start; 539 uintptr_t target = (uintptr_t) *(void **) attach_to; 540 splay_tree_key tn; 541 uintptr_t data; 542 543 if ((void *) target == NULL) 544 { 545 gomp_mutex_unlock (&devicep->lock); 546 gomp_fatal ("attempt to attach null pointer"); 547 } 548 549 s.host_start = target + bias; 550 s.host_end = s.host_start + 1; 551 tn = splay_tree_lookup (mem_map, &s); 552 553 if (!tn) 554 { 555 gomp_mutex_unlock (&devicep->lock); 556 gomp_fatal ("pointer target not mapped for attach"); 557 } 558 559 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start; 560 561 gomp_debug (1, 562 "%s: attaching host %p, target %p (struct base %p) to %p\n", 563 __FUNCTION__, (void *) attach_to, (void *) devptr, 564 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data); 565 566 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data, 567 sizeof (void *), cbufp); 568 } 569 else 570 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, 571 (void *) attach_to, (int) n->aux->attach_count[idx]); 572} 573 574attribute_hidden void 575gomp_detach_pointer (struct gomp_device_descr *devicep, 576 struct goacc_asyncqueue *aq, splay_tree_key n, 577 uintptr_t detach_from, bool finalize, 578 struct gomp_coalesce_buf *cbufp) 579{ 580 size_t idx; 581 582 if (n == NULL) 583 { 584 gomp_mutex_unlock (&devicep->lock); 585 gomp_fatal ("enclosing struct not mapped for detach"); 586 } 587 588 idx = (detach_from - n->host_start) / sizeof (void *); 589 590 if (!n->aux || !n->aux->attach_count) 591 { 592 gomp_mutex_unlock (&devicep->lock); 593 gomp_fatal ("no attachment counters for struct"); 594 } 595 596 if (finalize) 597 n->aux->attach_count[idx] = 1; 598 599 if (n->aux->attach_count[idx] == 0) 600 { 601 gomp_mutex_unlock (&devicep->lock); 602 gomp_fatal ("attach count underflow"); 603 } 604 else 605 n->aux->attach_count[idx]--; 606 607 if (n->aux->attach_count[idx] == 0) 608 { 609 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from 610 - n->host_start; 611 uintptr_t target = (uintptr_t) *(void **) detach_from; 612 613 gomp_debug (1, 614 "%s: detaching host %p, target %p (struct base %p) to %p\n", 615 __FUNCTION__, (void *) detach_from, (void *) devptr, 616 (void *) (n->tgt->tgt_start + n->tgt_offset), 617 (void *) target); 618 619 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target, 620 sizeof (void *), cbufp); 621 } 622 else 623 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, 624 (void *) detach_from, (int) n->aux->attach_count[idx]); 625} 626 627attribute_hidden uintptr_t 628gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i) 629{ 630 if (tgt->list[i].key != NULL) 631 return tgt->list[i].key->tgt->tgt_start 632 + tgt->list[i].key->tgt_offset 633 + tgt->list[i].offset; 634 635 switch (tgt->list[i].offset) 636 { 637 case OFFSET_INLINED: 638 return (uintptr_t) hostaddrs[i]; 639 640 case OFFSET_POINTER: 641 return 0; 642 643 case OFFSET_STRUCT: 644 return tgt->list[i + 1].key->tgt->tgt_start 645 + tgt->list[i + 1].key->tgt_offset 646 + tgt->list[i + 1].offset 647 + (uintptr_t) hostaddrs[i] 648 - (uintptr_t) hostaddrs[i + 1]; 649 650 default: 651 return tgt->tgt_start + tgt->list[i].offset; 652 } 653} 654 655static inline __attribute__((always_inline)) struct target_mem_desc * 656gomp_map_vars_internal (struct gomp_device_descr *devicep, 657 struct goacc_asyncqueue *aq, size_t mapnum, 658 void **hostaddrs, void **devaddrs, size_t *sizes, 659 void *kinds, bool short_mapkind, 660 enum gomp_map_vars_kind pragma_kind) 661{ 662 size_t i, tgt_align, tgt_size, not_found_cnt = 0; 663 bool has_firstprivate = false; 664 const int rshift = short_mapkind ? 8 : 3; 665 const int typemask = short_mapkind ? 0xff : 0x7; 666 struct splay_tree_s *mem_map = &devicep->mem_map; 667 struct splay_tree_key_s cur_node; 668 struct target_mem_desc *tgt 669 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); 670 tgt->list_count = mapnum; 671 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1; 672 tgt->device_descr = devicep; 673 tgt->prev = NULL; 674 struct gomp_coalesce_buf cbuf, *cbufp = NULL; 675 676 if (mapnum == 0) 677 { 678 tgt->tgt_start = 0; 679 tgt->tgt_end = 0; 680 return tgt; 681 } 682 683 tgt_align = sizeof (void *); 684 tgt_size = 0; 685 cbuf.chunks = NULL; 686 cbuf.chunk_cnt = -1; 687 cbuf.use_cnt = 0; 688 cbuf.buf = NULL; 689 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET) 690 { 691 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk); 692 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size); 693 cbuf.chunk_cnt = 0; 694 } 695 if (pragma_kind == GOMP_MAP_VARS_TARGET) 696 { 697 size_t align = 4 * sizeof (void *); 698 tgt_align = align; 699 tgt_size = mapnum * sizeof (void *); 700 cbuf.chunk_cnt = 1; 701 cbuf.use_cnt = 1 + (mapnum > 1); 702 cbuf.chunks[0].start = 0; 703 cbuf.chunks[0].end = tgt_size; 704 } 705 706 gomp_mutex_lock (&devicep->lock); 707 if (devicep->state == GOMP_DEVICE_FINALIZED) 708 { 709 gomp_mutex_unlock (&devicep->lock); 710 free (tgt); 711 return NULL; 712 } 713 714 for (i = 0; i < mapnum; i++) 715 { 716 int kind = get_kind (short_mapkind, kinds, i); 717 if (hostaddrs[i] == NULL 718 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT) 719 { 720 tgt->list[i].key = NULL; 721 tgt->list[i].offset = OFFSET_INLINED; 722 continue; 723 } 724 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR 725 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) 726 { 727 tgt->list[i].key = NULL; 728 if (!not_found_cnt) 729 { 730 /* In OpenMP < 5.0 and OpenACC the mapping has to be done 731 on a separate construct prior to using use_device_{addr,ptr}. 732 In OpenMP 5.0, map directives need to be ordered by the 733 middle-end before the use_device_* clauses. If 734 !not_found_cnt, all mappings requested (if any) are already 735 mapped, so use_device_{addr,ptr} can be resolved right away. 736 Otherwise, if not_found_cnt, gomp_map_lookup might fail 737 now but would succeed after performing the mappings in the 738 following loop. We can't defer this always to the second 739 loop, because it is not even invoked when !not_found_cnt 740 after the first loop. */ 741 cur_node.host_start = (uintptr_t) hostaddrs[i]; 742 cur_node.host_end = cur_node.host_start; 743 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 744 if (n != NULL) 745 { 746 cur_node.host_start -= n->host_start; 747 hostaddrs[i] 748 = (void *) (n->tgt->tgt_start + n->tgt_offset 749 + cur_node.host_start); 750 } 751 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) 752 { 753 gomp_mutex_unlock (&devicep->lock); 754 gomp_fatal ("use_device_ptr pointer wasn't mapped"); 755 } 756 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) 757 /* If not present, continue using the host address. */ 758 ; 759 else 760 __builtin_unreachable (); 761 tgt->list[i].offset = OFFSET_INLINED; 762 } 763 else 764 tgt->list[i].offset = 0; 765 continue; 766 } 767 else if ((kind & typemask) == GOMP_MAP_STRUCT) 768 { 769 size_t first = i + 1; 770 size_t last = i + sizes[i]; 771 cur_node.host_start = (uintptr_t) hostaddrs[i]; 772 cur_node.host_end = (uintptr_t) hostaddrs[last] 773 + sizes[last]; 774 tgt->list[i].key = NULL; 775 tgt->list[i].offset = OFFSET_STRUCT; 776 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); 777 if (n == NULL) 778 { 779 size_t align = (size_t) 1 << (kind >> rshift); 780 if (tgt_align < align) 781 tgt_align = align; 782 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start; 783 tgt_size = (tgt_size + align - 1) & ~(align - 1); 784 tgt_size += cur_node.host_end - cur_node.host_start; 785 not_found_cnt += last - i; 786 for (i = first; i <= last; i++) 787 { 788 tgt->list[i].key = NULL; 789 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i) 790 & typemask)) 791 gomp_coalesce_buf_add (&cbuf, 792 tgt_size - cur_node.host_end 793 + (uintptr_t) hostaddrs[i], 794 sizes[i]); 795 } 796 i--; 797 continue; 798 } 799 for (i = first; i <= last; i++) 800 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs, 801 sizes, kinds, NULL); 802 i--; 803 continue; 804 } 805 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER) 806 { 807 tgt->list[i].key = NULL; 808 tgt->list[i].offset = OFFSET_POINTER; 809 has_firstprivate = true; 810 continue; 811 } 812 else if ((kind & typemask) == GOMP_MAP_ATTACH) 813 { 814 tgt->list[i].key = NULL; 815 has_firstprivate = true; 816 continue; 817 } 818 cur_node.host_start = (uintptr_t) hostaddrs[i]; 819 if (!GOMP_MAP_POINTER_P (kind & typemask)) 820 cur_node.host_end = cur_node.host_start + sizes[i]; 821 else 822 cur_node.host_end = cur_node.host_start + sizeof (void *); 823 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE) 824 { 825 tgt->list[i].key = NULL; 826 827 size_t align = (size_t) 1 << (kind >> rshift); 828 if (tgt_align < align) 829 tgt_align = align; 830 tgt_size = (tgt_size + align - 1) & ~(align - 1); 831 gomp_coalesce_buf_add (&cbuf, tgt_size, 832 cur_node.host_end - cur_node.host_start); 833 tgt_size += cur_node.host_end - cur_node.host_start; 834 has_firstprivate = true; 835 continue; 836 } 837 splay_tree_key n; 838 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) 839 { 840 n = gomp_map_0len_lookup (mem_map, &cur_node); 841 if (!n) 842 { 843 tgt->list[i].key = NULL; 844 tgt->list[i].offset = OFFSET_POINTER; 845 continue; 846 } 847 } 848 else 849 n = splay_tree_lookup (mem_map, &cur_node); 850 if (n && n->refcount != REFCOUNT_LINK) 851 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i], 852 kind & typemask, NULL); 853 else 854 { 855 tgt->list[i].key = NULL; 856 857 if ((kind & typemask) == GOMP_MAP_IF_PRESENT) 858 { 859 /* Not present, hence, skip entry - including its MAP_POINTER, 860 when existing. */ 861 tgt->list[i].offset = OFFSET_POINTER; 862 if (i + 1 < mapnum 863 && ((typemask & get_kind (short_mapkind, kinds, i + 1)) 864 == GOMP_MAP_POINTER)) 865 { 866 ++i; 867 tgt->list[i].key = NULL; 868 tgt->list[i].offset = 0; 869 } 870 continue; 871 } 872 size_t align = (size_t) 1 << (kind >> rshift); 873 not_found_cnt++; 874 if (tgt_align < align) 875 tgt_align = align; 876 tgt_size = (tgt_size + align - 1) & ~(align - 1); 877 if (gomp_to_device_kind_p (kind & typemask)) 878 gomp_coalesce_buf_add (&cbuf, tgt_size, 879 cur_node.host_end - cur_node.host_start); 880 tgt_size += cur_node.host_end - cur_node.host_start; 881 if ((kind & typemask) == GOMP_MAP_TO_PSET) 882 { 883 size_t j; 884 for (j = i + 1; j < mapnum; j++) 885 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j) 886 & typemask)) 887 break; 888 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start 889 || ((uintptr_t) hostaddrs[j] + sizeof (void *) 890 > cur_node.host_end)) 891 break; 892 else 893 { 894 tgt->list[j].key = NULL; 895 i++; 896 } 897 } 898 } 899 } 900 901 if (devaddrs) 902 { 903 if (mapnum != 1) 904 { 905 gomp_mutex_unlock (&devicep->lock); 906 gomp_fatal ("unexpected aggregation"); 907 } 908 tgt->to_free = devaddrs[0]; 909 tgt->tgt_start = (uintptr_t) tgt->to_free; 910 tgt->tgt_end = tgt->tgt_start + sizes[0]; 911 } 912 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET) 913 { 914 /* Allocate tgt_align aligned tgt_size block of memory. */ 915 /* FIXME: Perhaps change interface to allocate properly aligned 916 memory. */ 917 tgt->to_free = devicep->alloc_func (devicep->target_id, 918 tgt_size + tgt_align - 1); 919 if (!tgt->to_free) 920 { 921 gomp_mutex_unlock (&devicep->lock); 922 gomp_fatal ("device memory allocation fail"); 923 } 924 925 tgt->tgt_start = (uintptr_t) tgt->to_free; 926 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1); 927 tgt->tgt_end = tgt->tgt_start + tgt_size; 928 929 if (cbuf.use_cnt == 1) 930 cbuf.chunk_cnt--; 931 if (cbuf.chunk_cnt > 0) 932 { 933 cbuf.buf 934 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start); 935 if (cbuf.buf) 936 { 937 cbuf.tgt = tgt; 938 cbufp = &cbuf; 939 } 940 } 941 } 942 else 943 { 944 tgt->to_free = NULL; 945 tgt->tgt_start = 0; 946 tgt->tgt_end = 0; 947 } 948 949 tgt_size = 0; 950 if (pragma_kind == GOMP_MAP_VARS_TARGET) 951 tgt_size = mapnum * sizeof (void *); 952 953 tgt->array = NULL; 954 if (not_found_cnt || has_firstprivate) 955 { 956 if (not_found_cnt) 957 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); 958 splay_tree_node array = tgt->array; 959 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0; 960 uintptr_t field_tgt_base = 0; 961 962 for (i = 0; i < mapnum; i++) 963 if (tgt->list[i].key == NULL) 964 { 965 int kind = get_kind (short_mapkind, kinds, i); 966 if (hostaddrs[i] == NULL) 967 continue; 968 switch (kind & typemask) 969 { 970 size_t align, len, first, last; 971 splay_tree_key n; 972 case GOMP_MAP_FIRSTPRIVATE: 973 align = (size_t) 1 << (kind >> rshift); 974 tgt_size = (tgt_size + align - 1) & ~(align - 1); 975 tgt->list[i].offset = tgt_size; 976 len = sizes[i]; 977 gomp_copy_host2dev (devicep, aq, 978 (void *) (tgt->tgt_start + tgt_size), 979 (void *) hostaddrs[i], len, cbufp); 980 tgt_size += len; 981 continue; 982 case GOMP_MAP_FIRSTPRIVATE_INT: 983 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: 984 continue; 985 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT: 986 /* The OpenACC 'host_data' construct only allows 'use_device' 987 "mapping" clauses, so in the first loop, 'not_found_cnt' 988 must always have been zero, so all OpenACC 'use_device' 989 clauses have already been handled. (We can only easily test 990 'use_device' with 'if_present' clause here.) */ 991 assert (tgt->list[i].offset == OFFSET_INLINED); 992 /* Nevertheless, FALLTHRU to the normal handling, to keep the 993 code conceptually simple, similar to the first loop. */ 994 case GOMP_MAP_USE_DEVICE_PTR: 995 if (tgt->list[i].offset == 0) 996 { 997 cur_node.host_start = (uintptr_t) hostaddrs[i]; 998 cur_node.host_end = cur_node.host_start; 999 n = gomp_map_lookup (mem_map, &cur_node); 1000 if (n != NULL) 1001 { 1002 cur_node.host_start -= n->host_start; 1003 hostaddrs[i] 1004 = (void *) (n->tgt->tgt_start + n->tgt_offset 1005 + cur_node.host_start); 1006 } 1007 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) 1008 { 1009 gomp_mutex_unlock (&devicep->lock); 1010 gomp_fatal ("use_device_ptr pointer wasn't mapped"); 1011 } 1012 else if ((kind & typemask) 1013 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) 1014 /* If not present, continue using the host address. */ 1015 ; 1016 else 1017 __builtin_unreachable (); 1018 tgt->list[i].offset = OFFSET_INLINED; 1019 } 1020 continue; 1021 case GOMP_MAP_STRUCT: 1022 first = i + 1; 1023 last = i + sizes[i]; 1024 cur_node.host_start = (uintptr_t) hostaddrs[i]; 1025 cur_node.host_end = (uintptr_t) hostaddrs[last] 1026 + sizes[last]; 1027 if (tgt->list[first].key != NULL) 1028 continue; 1029 n = splay_tree_lookup (mem_map, &cur_node); 1030 if (n == NULL) 1031 { 1032 size_t align = (size_t) 1 << (kind >> rshift); 1033 tgt_size -= (uintptr_t) hostaddrs[first] 1034 - (uintptr_t) hostaddrs[i]; 1035 tgt_size = (tgt_size + align - 1) & ~(align - 1); 1036 tgt_size += (uintptr_t) hostaddrs[first] 1037 - (uintptr_t) hostaddrs[i]; 1038 field_tgt_base = (uintptr_t) hostaddrs[first]; 1039 field_tgt_offset = tgt_size; 1040 field_tgt_clear = last; 1041 tgt_size += cur_node.host_end 1042 - (uintptr_t) hostaddrs[first]; 1043 continue; 1044 } 1045 for (i = first; i <= last; i++) 1046 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs, 1047 sizes, kinds, cbufp); 1048 i--; 1049 continue; 1050 case GOMP_MAP_ALWAYS_POINTER: 1051 cur_node.host_start = (uintptr_t) hostaddrs[i]; 1052 cur_node.host_end = cur_node.host_start + sizeof (void *); 1053 n = splay_tree_lookup (mem_map, &cur_node); 1054 if (n == NULL 1055 || n->host_start > cur_node.host_start 1056 || n->host_end < cur_node.host_end) 1057 { 1058 gomp_mutex_unlock (&devicep->lock); 1059 gomp_fatal ("always pointer not mapped"); 1060 } 1061 if ((get_kind (short_mapkind, kinds, i - 1) & typemask) 1062 != GOMP_MAP_ALWAYS_POINTER) 1063 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1); 1064 if (cur_node.tgt_offset) 1065 cur_node.tgt_offset -= sizes[i]; 1066 gomp_copy_host2dev (devicep, aq, 1067 (void *) (n->tgt->tgt_start 1068 + n->tgt_offset 1069 + cur_node.host_start 1070 - n->host_start), 1071 (void *) &cur_node.tgt_offset, 1072 sizeof (void *), cbufp); 1073 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset 1074 + cur_node.host_start - n->host_start; 1075 continue; 1076 case GOMP_MAP_IF_PRESENT: 1077 /* Not present - otherwise handled above. Skip over its 1078 MAP_POINTER as well. */ 1079 if (i + 1 < mapnum 1080 && ((typemask & get_kind (short_mapkind, kinds, i + 1)) 1081 == GOMP_MAP_POINTER)) 1082 ++i; 1083 continue; 1084 case GOMP_MAP_ATTACH: 1085 { 1086 cur_node.host_start = (uintptr_t) hostaddrs[i]; 1087 cur_node.host_end = cur_node.host_start + sizeof (void *); 1088 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); 1089 if (n != NULL) 1090 { 1091 tgt->list[i].key = n; 1092 tgt->list[i].offset = cur_node.host_start - n->host_start; 1093 tgt->list[i].length = n->host_end - n->host_start; 1094 tgt->list[i].copy_from = false; 1095 tgt->list[i].always_copy_from = false; 1096 tgt->list[i].is_attach = true; 1097 /* OpenACC 'attach'/'detach' doesn't affect 1098 structured/dynamic reference counts ('n->refcount', 1099 'n->dynamic_refcount'). */ 1100 } 1101 else 1102 { 1103 gomp_mutex_unlock (&devicep->lock); 1104 gomp_fatal ("outer struct not mapped for attach"); 1105 } 1106 gomp_attach_pointer (devicep, aq, mem_map, n, 1107 (uintptr_t) hostaddrs[i], sizes[i], 1108 cbufp); 1109 continue; 1110 } 1111 default: 1112 break; 1113 } 1114 splay_tree_key k = &array->key; 1115 k->host_start = (uintptr_t) hostaddrs[i]; 1116 if (!GOMP_MAP_POINTER_P (kind & typemask)) 1117 k->host_end = k->host_start + sizes[i]; 1118 else 1119 k->host_end = k->host_start + sizeof (void *); 1120 splay_tree_key n = splay_tree_lookup (mem_map, k); 1121 if (n && n->refcount != REFCOUNT_LINK) 1122 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i], 1123 kind & typemask, cbufp); 1124 else 1125 { 1126 k->aux = NULL; 1127 if (n && n->refcount == REFCOUNT_LINK) 1128 { 1129 /* Replace target address of the pointer with target address 1130 of mapped object in the splay tree. */ 1131 splay_tree_remove (mem_map, n); 1132 k->aux 1133 = gomp_malloc_cleared (sizeof (struct splay_tree_aux)); 1134 k->aux->link_key = n; 1135 } 1136 size_t align = (size_t) 1 << (kind >> rshift); 1137 tgt->list[i].key = k; 1138 k->tgt = tgt; 1139 if (field_tgt_clear != FIELD_TGT_EMPTY) 1140 { 1141 k->tgt_offset = k->host_start - field_tgt_base 1142 + field_tgt_offset; 1143 if (i == field_tgt_clear) 1144 field_tgt_clear = FIELD_TGT_EMPTY; 1145 } 1146 else 1147 { 1148 tgt_size = (tgt_size + align - 1) & ~(align - 1); 1149 k->tgt_offset = tgt_size; 1150 tgt_size += k->host_end - k->host_start; 1151 } 1152 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); 1153 tgt->list[i].always_copy_from 1154 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask); 1155 tgt->list[i].is_attach = false; 1156 tgt->list[i].offset = 0; 1157 tgt->list[i].length = k->host_end - k->host_start; 1158 k->refcount = 1; 1159 k->dynamic_refcount = 0; 1160 tgt->refcount++; 1161 array->left = NULL; 1162 array->right = NULL; 1163 splay_tree_insert (mem_map, array); 1164 switch (kind & typemask) 1165 { 1166 case GOMP_MAP_ALLOC: 1167 case GOMP_MAP_FROM: 1168 case GOMP_MAP_FORCE_ALLOC: 1169 case GOMP_MAP_FORCE_FROM: 1170 case GOMP_MAP_ALWAYS_FROM: 1171 break; 1172 case GOMP_MAP_TO: 1173 case GOMP_MAP_TOFROM: 1174 case GOMP_MAP_FORCE_TO: 1175 case GOMP_MAP_FORCE_TOFROM: 1176 case GOMP_MAP_ALWAYS_TO: 1177 case GOMP_MAP_ALWAYS_TOFROM: 1178 gomp_copy_host2dev (devicep, aq, 1179 (void *) (tgt->tgt_start 1180 + k->tgt_offset), 1181 (void *) k->host_start, 1182 k->host_end - k->host_start, cbufp); 1183 break; 1184 case GOMP_MAP_POINTER: 1185 gomp_map_pointer (tgt, aq, 1186 (uintptr_t) *(void **) k->host_start, 1187 k->tgt_offset, sizes[i], cbufp); 1188 break; 1189 case GOMP_MAP_TO_PSET: 1190 gomp_copy_host2dev (devicep, aq, 1191 (void *) (tgt->tgt_start 1192 + k->tgt_offset), 1193 (void *) k->host_start, 1194 k->host_end - k->host_start, cbufp); 1195 1196 for (j = i + 1; j < mapnum; j++) 1197 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, 1198 j) 1199 & typemask)) 1200 break; 1201 else if ((uintptr_t) hostaddrs[j] < k->host_start 1202 || ((uintptr_t) hostaddrs[j] + sizeof (void *) 1203 > k->host_end)) 1204 break; 1205 else 1206 { 1207 tgt->list[j].key = k; 1208 tgt->list[j].copy_from = false; 1209 tgt->list[j].always_copy_from = false; 1210 tgt->list[j].is_attach = false; 1211 if (k->refcount != REFCOUNT_INFINITY) 1212 k->refcount++; 1213 gomp_map_pointer (tgt, aq, 1214 (uintptr_t) *(void **) hostaddrs[j], 1215 k->tgt_offset 1216 + ((uintptr_t) hostaddrs[j] 1217 - k->host_start), 1218 sizes[j], cbufp); 1219 i++; 1220 } 1221 break; 1222 case GOMP_MAP_FORCE_PRESENT: 1223 { 1224 /* We already looked up the memory region above and it 1225 was missing. */ 1226 size_t size = k->host_end - k->host_start; 1227 gomp_mutex_unlock (&devicep->lock); 1228#ifdef HAVE_INTTYPES_H 1229 gomp_fatal ("present clause: !acc_is_present (%p, " 1230 "%"PRIu64" (0x%"PRIx64"))", 1231 (void *) k->host_start, 1232 (uint64_t) size, (uint64_t) size); 1233#else 1234 gomp_fatal ("present clause: !acc_is_present (%p, " 1235 "%lu (0x%lx))", (void *) k->host_start, 1236 (unsigned long) size, (unsigned long) size); 1237#endif 1238 } 1239 break; 1240 case GOMP_MAP_FORCE_DEVICEPTR: 1241 assert (k->host_end - k->host_start == sizeof (void *)); 1242 gomp_copy_host2dev (devicep, aq, 1243 (void *) (tgt->tgt_start 1244 + k->tgt_offset), 1245 (void *) k->host_start, 1246 sizeof (void *), cbufp); 1247 break; 1248 default: 1249 gomp_mutex_unlock (&devicep->lock); 1250 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__, 1251 kind); 1252 } 1253 1254 if (k->aux && k->aux->link_key) 1255 { 1256 /* Set link pointer on target to the device address of the 1257 mapped object. */ 1258 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset); 1259 /* We intentionally do not use coalescing here, as it's not 1260 data allocated by the current call to this function. */ 1261 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset, 1262 &tgt_addr, sizeof (void *), NULL); 1263 } 1264 array++; 1265 } 1266 } 1267 } 1268 1269 if (pragma_kind == GOMP_MAP_VARS_TARGET) 1270 { 1271 for (i = 0; i < mapnum; i++) 1272 { 1273 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i); 1274 gomp_copy_host2dev (devicep, aq, 1275 (void *) (tgt->tgt_start + i * sizeof (void *)), 1276 (void *) &cur_node.tgt_offset, sizeof (void *), 1277 cbufp); 1278 } 1279 } 1280 1281 if (cbufp) 1282 { 1283 long c = 0; 1284 for (c = 0; c < cbuf.chunk_cnt; ++c) 1285 gomp_copy_host2dev (devicep, aq, 1286 (void *) (tgt->tgt_start + cbuf.chunks[c].start), 1287 (char *) cbuf.buf + (cbuf.chunks[c].start 1288 - cbuf.chunks[0].start), 1289 cbuf.chunks[c].end - cbuf.chunks[c].start, NULL); 1290 free (cbuf.buf); 1291 cbuf.buf = NULL; 1292 cbufp = NULL; 1293 } 1294 1295 /* If the variable from "omp target enter data" map-list was already mapped, 1296 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or 1297 gomp_exit_data. */ 1298 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0) 1299 { 1300 free (tgt); 1301 tgt = NULL; 1302 } 1303 1304 gomp_mutex_unlock (&devicep->lock); 1305 return tgt; 1306} 1307 1308attribute_hidden struct target_mem_desc * 1309gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, 1310 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, 1311 bool short_mapkind, enum gomp_map_vars_kind pragma_kind) 1312{ 1313 return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs, 1314 sizes, kinds, short_mapkind, pragma_kind); 1315} 1316 1317attribute_hidden struct target_mem_desc * 1318gomp_map_vars_async (struct gomp_device_descr *devicep, 1319 struct goacc_asyncqueue *aq, size_t mapnum, 1320 void **hostaddrs, void **devaddrs, size_t *sizes, 1321 void *kinds, bool short_mapkind, 1322 enum gomp_map_vars_kind pragma_kind) 1323{ 1324 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs, 1325 sizes, kinds, short_mapkind, pragma_kind); 1326} 1327 1328static void 1329gomp_unmap_tgt (struct target_mem_desc *tgt) 1330{ 1331 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */ 1332 if (tgt->tgt_end) 1333 gomp_free_device_memory (tgt->device_descr, tgt->to_free); 1334 1335 free (tgt->array); 1336 free (tgt); 1337} 1338 1339static bool 1340gomp_unref_tgt (void *ptr) 1341{ 1342 bool is_tgt_unmapped = false; 1343 1344 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr; 1345 1346 if (tgt->refcount > 1) 1347 tgt->refcount--; 1348 else 1349 { 1350 gomp_unmap_tgt (tgt); 1351 is_tgt_unmapped = true; 1352 } 1353 1354 return is_tgt_unmapped; 1355} 1356 1357static void 1358gomp_unref_tgt_void (void *ptr) 1359{ 1360 (void) gomp_unref_tgt (ptr); 1361} 1362 1363static inline __attribute__((always_inline)) bool 1364gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k, 1365 struct goacc_asyncqueue *aq) 1366{ 1367 bool is_tgt_unmapped = false; 1368 splay_tree_remove (&devicep->mem_map, k); 1369 if (k->aux) 1370 { 1371 if (k->aux->link_key) 1372 splay_tree_insert (&devicep->mem_map, 1373 (splay_tree_node) k->aux->link_key); 1374 if (k->aux->attach_count) 1375 free (k->aux->attach_count); 1376 free (k->aux); 1377 k->aux = NULL; 1378 } 1379 if (aq) 1380 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void, 1381 (void *) k->tgt); 1382 else 1383 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt); 1384 return is_tgt_unmapped; 1385} 1386 1387attribute_hidden bool 1388gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k) 1389{ 1390 return gomp_remove_var_internal (devicep, k, NULL); 1391} 1392 1393/* Remove a variable asynchronously. This actually removes the variable 1394 mapping immediately, but retains the linked target_mem_desc until the 1395 asynchronous operation has completed (as it may still refer to target 1396 memory). The device lock must be held before entry, and remains locked on 1397 exit. */ 1398 1399attribute_hidden void 1400gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k, 1401 struct goacc_asyncqueue *aq) 1402{ 1403 (void) gomp_remove_var_internal (devicep, k, aq); 1404} 1405 1406/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant 1407 variables back from device to host: if it is false, it is assumed that this 1408 has been done already. */ 1409 1410static inline __attribute__((always_inline)) void 1411gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom, 1412 struct goacc_asyncqueue *aq) 1413{ 1414 struct gomp_device_descr *devicep = tgt->device_descr; 1415 1416 if (tgt->list_count == 0) 1417 { 1418 free (tgt); 1419 return; 1420 } 1421 1422 gomp_mutex_lock (&devicep->lock); 1423 if (devicep->state == GOMP_DEVICE_FINALIZED) 1424 { 1425 gomp_mutex_unlock (&devicep->lock); 1426 free (tgt->array); 1427 free (tgt); 1428 return; 1429 } 1430 1431 size_t i; 1432 1433 /* We must perform detachments before any copies back to the host. */ 1434 for (i = 0; i < tgt->list_count; i++) 1435 { 1436 splay_tree_key k = tgt->list[i].key; 1437 1438 if (k != NULL && tgt->list[i].is_attach) 1439 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start 1440 + tgt->list[i].offset, 1441 false, NULL); 1442 } 1443 1444 for (i = 0; i < tgt->list_count; i++) 1445 { 1446 splay_tree_key k = tgt->list[i].key; 1447 if (k == NULL) 1448 continue; 1449 1450 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference 1451 counts ('n->refcount', 'n->dynamic_refcount'). */ 1452 if (tgt->list[i].is_attach) 1453 continue; 1454 1455 bool do_unmap = false; 1456 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) 1457 k->refcount--; 1458 else if (k->refcount == 1) 1459 { 1460 k->refcount--; 1461 do_unmap = true; 1462 } 1463 1464 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from) 1465 || tgt->list[i].always_copy_from) 1466 gomp_copy_dev2host (devicep, aq, 1467 (void *) (k->host_start + tgt->list[i].offset), 1468 (void *) (k->tgt->tgt_start + k->tgt_offset 1469 + tgt->list[i].offset), 1470 tgt->list[i].length); 1471 if (do_unmap) 1472 { 1473 struct target_mem_desc *k_tgt = k->tgt; 1474 bool is_tgt_unmapped = gomp_remove_var (devicep, k); 1475 /* It would be bad if TGT got unmapped while we're still iterating 1476 over its LIST_COUNT, and also expect to use it in the following 1477 code. */ 1478 assert (!is_tgt_unmapped 1479 || k_tgt != tgt); 1480 } 1481 } 1482 1483 if (aq) 1484 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void, 1485 (void *) tgt); 1486 else 1487 gomp_unref_tgt ((void *) tgt); 1488 1489 gomp_mutex_unlock (&devicep->lock); 1490} 1491 1492attribute_hidden void 1493gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) 1494{ 1495 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL); 1496} 1497 1498attribute_hidden void 1499gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom, 1500 struct goacc_asyncqueue *aq) 1501{ 1502 gomp_unmap_vars_internal (tgt, do_copyfrom, aq); 1503} 1504 1505static void 1506gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, 1507 size_t *sizes, void *kinds, bool short_mapkind) 1508{ 1509 size_t i; 1510 struct splay_tree_key_s cur_node; 1511 const int typemask = short_mapkind ? 0xff : 0x7; 1512 1513 if (!devicep) 1514 return; 1515 1516 if (mapnum == 0) 1517 return; 1518 1519 gomp_mutex_lock (&devicep->lock); 1520 if (devicep->state == GOMP_DEVICE_FINALIZED) 1521 { 1522 gomp_mutex_unlock (&devicep->lock); 1523 return; 1524 } 1525 1526 for (i = 0; i < mapnum; i++) 1527 if (sizes[i]) 1528 { 1529 cur_node.host_start = (uintptr_t) hostaddrs[i]; 1530 cur_node.host_end = cur_node.host_start + sizes[i]; 1531 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); 1532 if (n) 1533 { 1534 int kind = get_kind (short_mapkind, kinds, i); 1535 if (n->host_start > cur_node.host_start 1536 || n->host_end < cur_node.host_end) 1537 { 1538 gomp_mutex_unlock (&devicep->lock); 1539 gomp_fatal ("Trying to update [%p..%p) object when " 1540 "only [%p..%p) is mapped", 1541 (void *) cur_node.host_start, 1542 (void *) cur_node.host_end, 1543 (void *) n->host_start, 1544 (void *) n->host_end); 1545 } 1546 1547 1548 void *hostaddr = (void *) cur_node.host_start; 1549 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset 1550 + cur_node.host_start - n->host_start); 1551 size_t size = cur_node.host_end - cur_node.host_start; 1552 1553 if (GOMP_MAP_COPY_TO_P (kind & typemask)) 1554 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size, 1555 NULL); 1556 if (GOMP_MAP_COPY_FROM_P (kind & typemask)) 1557 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size); 1558 } 1559 } 1560 gomp_mutex_unlock (&devicep->lock); 1561} 1562 1563/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP. 1564 And insert to splay tree the mapping between addresses from HOST_TABLE and 1565 from loaded target image. We rely in the host and device compiler 1566 emitting variable and functions in the same order. */ 1567 1568static void 1569gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, 1570 const void *host_table, const void *target_data, 1571 bool is_register_lock) 1572{ 1573 void **host_func_table = ((void ***) host_table)[0]; 1574 void **host_funcs_end = ((void ***) host_table)[1]; 1575 void **host_var_table = ((void ***) host_table)[2]; 1576 void **host_vars_end = ((void ***) host_table)[3]; 1577 1578 /* The func table contains only addresses, the var table contains addresses 1579 and corresponding sizes. */ 1580 int num_funcs = host_funcs_end - host_func_table; 1581 int num_vars = (host_vars_end - host_var_table) / 2; 1582 1583 /* Load image to device and get target addresses for the image. */ 1584 struct addr_pair *target_table = NULL; 1585 int i, num_target_entries; 1586 1587 num_target_entries 1588 = devicep->load_image_func (devicep->target_id, version, 1589 target_data, &target_table); 1590 1591 if (num_target_entries != num_funcs + num_vars) 1592 { 1593 gomp_mutex_unlock (&devicep->lock); 1594 if (is_register_lock) 1595 gomp_mutex_unlock (®ister_lock); 1596 gomp_fatal ("Cannot map target functions or variables" 1597 " (expected %u, have %u)", num_funcs + num_vars, 1598 num_target_entries); 1599 } 1600 1601 /* Insert host-target address mapping into splay tree. */ 1602 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); 1603 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array)); 1604 tgt->refcount = REFCOUNT_INFINITY; 1605 tgt->tgt_start = 0; 1606 tgt->tgt_end = 0; 1607 tgt->to_free = NULL; 1608 tgt->prev = NULL; 1609 tgt->list_count = 0; 1610 tgt->device_descr = devicep; 1611 splay_tree_node array = tgt->array; 1612 1613 for (i = 0; i < num_funcs; i++) 1614 { 1615 splay_tree_key k = &array->key; 1616 k->host_start = (uintptr_t) host_func_table[i]; 1617 k->host_end = k->host_start + 1; 1618 k->tgt = tgt; 1619 k->tgt_offset = target_table[i].start; 1620 k->refcount = REFCOUNT_INFINITY; 1621 k->dynamic_refcount = 0; 1622 k->aux = NULL; 1623 array->left = NULL; 1624 array->right = NULL; 1625 splay_tree_insert (&devicep->mem_map, array); 1626 array++; 1627 } 1628 1629 /* Most significant bit of the size in host and target tables marks 1630 "omp declare target link" variables. */ 1631 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1); 1632 const uintptr_t size_mask = ~link_bit; 1633 1634 for (i = 0; i < num_vars; i++) 1635 { 1636 struct addr_pair *target_var = &target_table[num_funcs + i]; 1637 uintptr_t target_size = target_var->end - target_var->start; 1638 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1]; 1639 1640 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size) 1641 { 1642 gomp_mutex_unlock (&devicep->lock); 1643 if (is_register_lock) 1644 gomp_mutex_unlock (®ister_lock); 1645 gomp_fatal ("Cannot map target variables (size mismatch)"); 1646 } 1647 1648 splay_tree_key k = &array->key; 1649 k->host_start = (uintptr_t) host_var_table[i * 2]; 1650 k->host_end 1651 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]); 1652 k->tgt = tgt; 1653 k->tgt_offset = target_var->start; 1654 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY; 1655 k->dynamic_refcount = 0; 1656 k->aux = NULL; 1657 array->left = NULL; 1658 array->right = NULL; 1659 splay_tree_insert (&devicep->mem_map, array); 1660 array++; 1661 } 1662 1663 free (target_table); 1664} 1665 1666/* Unload the mappings described by target_data from device DEVICE_P. 1667 The device must be locked. */ 1668 1669static void 1670gomp_unload_image_from_device (struct gomp_device_descr *devicep, 1671 unsigned version, 1672 const void *host_table, const void *target_data) 1673{ 1674 void **host_func_table = ((void ***) host_table)[0]; 1675 void **host_funcs_end = ((void ***) host_table)[1]; 1676 void **host_var_table = ((void ***) host_table)[2]; 1677 void **host_vars_end = ((void ***) host_table)[3]; 1678 1679 /* The func table contains only addresses, the var table contains addresses 1680 and corresponding sizes. */ 1681 int num_funcs = host_funcs_end - host_func_table; 1682 int num_vars = (host_vars_end - host_var_table) / 2; 1683 1684 struct splay_tree_key_s k; 1685 splay_tree_key node = NULL; 1686 1687 /* Find mapping at start of node array */ 1688 if (num_funcs || num_vars) 1689 { 1690 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0] 1691 : (uintptr_t) host_var_table[0]); 1692 k.host_end = k.host_start + 1; 1693 node = splay_tree_lookup (&devicep->mem_map, &k); 1694 } 1695 1696 if (!devicep->unload_image_func (devicep->target_id, version, target_data)) 1697 { 1698 gomp_mutex_unlock (&devicep->lock); 1699 gomp_fatal ("image unload fail"); 1700 } 1701 1702 /* Remove mappings from splay tree. */ 1703 int i; 1704 for (i = 0; i < num_funcs; i++) 1705 { 1706 k.host_start = (uintptr_t) host_func_table[i]; 1707 k.host_end = k.host_start + 1; 1708 splay_tree_remove (&devicep->mem_map, &k); 1709 } 1710 1711 /* Most significant bit of the size in host and target tables marks 1712 "omp declare target link" variables. */ 1713 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1); 1714 const uintptr_t size_mask = ~link_bit; 1715 bool is_tgt_unmapped = false; 1716 1717 for (i = 0; i < num_vars; i++) 1718 { 1719 k.host_start = (uintptr_t) host_var_table[i * 2]; 1720 k.host_end 1721 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]); 1722 1723 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1])) 1724 splay_tree_remove (&devicep->mem_map, &k); 1725 else 1726 { 1727 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k); 1728 is_tgt_unmapped = gomp_remove_var (devicep, n); 1729 } 1730 } 1731 1732 if (node && !is_tgt_unmapped) 1733 { 1734 free (node->tgt); 1735 free (node); 1736 } 1737} 1738 1739/* This function should be called from every offload image while loading. 1740 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of 1741 the target, and TARGET_DATA needed by target plugin. */ 1742 1743void 1744GOMP_offload_register_ver (unsigned version, const void *host_table, 1745 int target_type, const void *target_data) 1746{ 1747 int i; 1748 1749 if (GOMP_VERSION_LIB (version) > GOMP_VERSION) 1750 gomp_fatal ("Library too old for offload (version %u < %u)", 1751 GOMP_VERSION, GOMP_VERSION_LIB (version)); 1752 1753 gomp_mutex_lock (®ister_lock); 1754 1755 /* Load image to all initialized devices. */ 1756 for (i = 0; i < num_devices; i++) 1757 { 1758 struct gomp_device_descr *devicep = &devices[i]; 1759 gomp_mutex_lock (&devicep->lock); 1760 if (devicep->type == target_type 1761 && devicep->state == GOMP_DEVICE_INITIALIZED) 1762 gomp_load_image_to_device (devicep, version, 1763 host_table, target_data, true); 1764 gomp_mutex_unlock (&devicep->lock); 1765 } 1766 1767 /* Insert image to array of pending images. */ 1768 offload_images 1769 = gomp_realloc_unlock (offload_images, 1770 (num_offload_images + 1) 1771 * sizeof (struct offload_image_descr)); 1772 offload_images[num_offload_images].version = version; 1773 offload_images[num_offload_images].type = target_type; 1774 offload_images[num_offload_images].host_table = host_table; 1775 offload_images[num_offload_images].target_data = target_data; 1776 1777 num_offload_images++; 1778 gomp_mutex_unlock (®ister_lock); 1779} 1780 1781void 1782GOMP_offload_register (const void *host_table, int target_type, 1783 const void *target_data) 1784{ 1785 GOMP_offload_register_ver (0, host_table, target_type, target_data); 1786} 1787 1788/* This function should be called from every offload image while unloading. 1789 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of 1790 the target, and TARGET_DATA needed by target plugin. */ 1791 1792void 1793GOMP_offload_unregister_ver (unsigned version, const void *host_table, 1794 int target_type, const void *target_data) 1795{ 1796 int i; 1797 1798 gomp_mutex_lock (®ister_lock); 1799 1800 /* Unload image from all initialized devices. */ 1801 for (i = 0; i < num_devices; i++) 1802 { 1803 struct gomp_device_descr *devicep = &devices[i]; 1804 gomp_mutex_lock (&devicep->lock); 1805 if (devicep->type == target_type 1806 && devicep->state == GOMP_DEVICE_INITIALIZED) 1807 gomp_unload_image_from_device (devicep, version, 1808 host_table, target_data); 1809 gomp_mutex_unlock (&devicep->lock); 1810 } 1811 1812 /* Remove image from array of pending images. */ 1813 for (i = 0; i < num_offload_images; i++) 1814 if (offload_images[i].target_data == target_data) 1815 { 1816 offload_images[i] = offload_images[--num_offload_images]; 1817 break; 1818 } 1819 1820 gomp_mutex_unlock (®ister_lock); 1821} 1822 1823void 1824GOMP_offload_unregister (const void *host_table, int target_type, 1825 const void *target_data) 1826{ 1827 GOMP_offload_unregister_ver (0, host_table, target_type, target_data); 1828} 1829 1830/* This function initializes the target device, specified by DEVICEP. DEVICEP 1831 must be locked on entry, and remains locked on return. */ 1832 1833attribute_hidden void 1834gomp_init_device (struct gomp_device_descr *devicep) 1835{ 1836 int i; 1837 if (!devicep->init_device_func (devicep->target_id)) 1838 { 1839 gomp_mutex_unlock (&devicep->lock); 1840 gomp_fatal ("device initialization failed"); 1841 } 1842 1843 /* Load to device all images registered by the moment. */ 1844 for (i = 0; i < num_offload_images; i++) 1845 { 1846 struct offload_image_descr *image = &offload_images[i]; 1847 if (image->type == devicep->type) 1848 gomp_load_image_to_device (devicep, image->version, 1849 image->host_table, image->target_data, 1850 false); 1851 } 1852 1853 /* Initialize OpenACC asynchronous queues. */ 1854 goacc_init_asyncqueues (devicep); 1855 1856 devicep->state = GOMP_DEVICE_INITIALIZED; 1857} 1858 1859/* This function finalizes the target device, specified by DEVICEP. DEVICEP 1860 must be locked on entry, and remains locked on return. */ 1861 1862attribute_hidden bool 1863gomp_fini_device (struct gomp_device_descr *devicep) 1864{ 1865 bool ret = goacc_fini_asyncqueues (devicep); 1866 ret &= devicep->fini_device_func (devicep->target_id); 1867 devicep->state = GOMP_DEVICE_FINALIZED; 1868 return ret; 1869} 1870 1871attribute_hidden void 1872gomp_unload_device (struct gomp_device_descr *devicep) 1873{ 1874 if (devicep->state == GOMP_DEVICE_INITIALIZED) 1875 { 1876 unsigned i; 1877 1878 /* Unload from device all images registered at the moment. */ 1879 for (i = 0; i < num_offload_images; i++) 1880 { 1881 struct offload_image_descr *image = &offload_images[i]; 1882 if (image->type == devicep->type) 1883 gomp_unload_image_from_device (devicep, image->version, 1884 image->host_table, 1885 image->target_data); 1886 } 1887 } 1888} 1889 1890/* Host fallback for GOMP_target{,_ext} routines. */ 1891 1892static void 1893gomp_target_fallback (void (*fn) (void *), void **hostaddrs) 1894{ 1895 struct gomp_thread old_thr, *thr = gomp_thread (); 1896 old_thr = *thr; 1897 memset (thr, '\0', sizeof (*thr)); 1898 if (gomp_places_list) 1899 { 1900 thr->place = old_thr.place; 1901 thr->ts.place_partition_len = gomp_places_list_len; 1902 } 1903 fn (hostaddrs); 1904 gomp_free_thread (thr); 1905 *thr = old_thr; 1906} 1907 1908/* Calculate alignment and size requirements of a private copy of data shared 1909 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */ 1910 1911static inline void 1912calculate_firstprivate_requirements (size_t mapnum, size_t *sizes, 1913 unsigned short *kinds, size_t *tgt_align, 1914 size_t *tgt_size) 1915{ 1916 size_t i; 1917 for (i = 0; i < mapnum; i++) 1918 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) 1919 { 1920 size_t align = (size_t) 1 << (kinds[i] >> 8); 1921 if (*tgt_align < align) 1922 *tgt_align = align; 1923 *tgt_size = (*tgt_size + align - 1) & ~(align - 1); 1924 *tgt_size += sizes[i]; 1925 } 1926} 1927 1928/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */ 1929 1930static inline void 1931copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs, 1932 size_t *sizes, unsigned short *kinds, size_t tgt_align, 1933 size_t tgt_size) 1934{ 1935 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); 1936 if (al) 1937 tgt += tgt_align - al; 1938 tgt_size = 0; 1939 size_t i; 1940 for (i = 0; i < mapnum; i++) 1941 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) 1942 { 1943 size_t align = (size_t) 1 << (kinds[i] >> 8); 1944 tgt_size = (tgt_size + align - 1) & ~(align - 1); 1945 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); 1946 hostaddrs[i] = tgt + tgt_size; 1947 tgt_size = tgt_size + sizes[i]; 1948 } 1949} 1950 1951/* Helper function of GOMP_target{,_ext} routines. */ 1952 1953static void * 1954gomp_get_target_fn_addr (struct gomp_device_descr *devicep, 1955 void (*host_fn) (void *)) 1956{ 1957 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) 1958 return (void *) host_fn; 1959 else 1960 { 1961 gomp_mutex_lock (&devicep->lock); 1962 if (devicep->state == GOMP_DEVICE_FINALIZED) 1963 { 1964 gomp_mutex_unlock (&devicep->lock); 1965 return NULL; 1966 } 1967 1968 struct splay_tree_key_s k; 1969 k.host_start = (uintptr_t) host_fn; 1970 k.host_end = k.host_start + 1; 1971 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); 1972 gomp_mutex_unlock (&devicep->lock); 1973 if (tgt_fn == NULL) 1974 return NULL; 1975 1976 return (void *) tgt_fn->tgt_offset; 1977 } 1978} 1979 1980/* Called when encountering a target directive. If DEVICE 1981 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is 1982 GOMP_DEVICE_HOST_FALLBACK (or any value 1983 larger than last available hw device), use host fallback. 1984 FN is address of host code, UNUSED is part of the current ABI, but 1985 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays 1986 with MAPNUM entries, with addresses of the host objects, 1987 sizes of the host objects (resp. for pointer kind pointer bias 1988 and assumed sizeof (void *) size) and kinds. */ 1989 1990void 1991GOMP_target (int device, void (*fn) (void *), const void *unused, 1992 size_t mapnum, void **hostaddrs, size_t *sizes, 1993 unsigned char *kinds) 1994{ 1995 struct gomp_device_descr *devicep = resolve_device (device); 1996 1997 void *fn_addr; 1998 if (devicep == NULL 1999 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2000 /* All shared memory devices should use the GOMP_target_ext function. */ 2001 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM 2002 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))) 2003 return gomp_target_fallback (fn, hostaddrs); 2004 2005 struct target_mem_desc *tgt_vars 2006 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, 2007 GOMP_MAP_VARS_TARGET); 2008 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start, 2009 NULL); 2010 gomp_unmap_vars (tgt_vars, true); 2011} 2012 2013static inline unsigned int 2014clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags) 2015{ 2016 /* If we cannot run asynchronously, simply ignore nowait. */ 2017 if (devicep != NULL && devicep->async_run_func == NULL) 2018 flags &= ~GOMP_TARGET_FLAG_NOWAIT; 2019 2020 return flags; 2021} 2022 2023/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present, 2024 and several arguments have been added: 2025 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h. 2026 DEPEND is array of dependencies, see GOMP_task for details. 2027 2028 ARGS is a pointer to an array consisting of a variable number of both 2029 device-independent and device-specific arguments, which can take one two 2030 elements where the first specifies for which device it is intended, the type 2031 and optionally also the value. If the value is not present in the first 2032 one, the whole second element the actual value. The last element of the 2033 array is a single NULL. Among the device independent can be for example 2034 NUM_TEAMS and THREAD_LIMIT. 2035 2036 NUM_TEAMS is positive if GOMP_teams will be called in the body with 2037 that value, or 1 if teams construct is not present, or 0, if 2038 teams construct does not have num_teams clause and so the choice is 2039 implementation defined, and -1 if it can't be determined on the host 2040 what value will GOMP_teams have on the device. 2041 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the 2042 body with that value, or 0, if teams construct does not have thread_limit 2043 clause or the teams construct is not present, or -1 if it can't be 2044 determined on the host what value will GOMP_teams have on the device. */ 2045 2046void 2047GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, 2048 void **hostaddrs, size_t *sizes, unsigned short *kinds, 2049 unsigned int flags, void **depend, void **args) 2050{ 2051 struct gomp_device_descr *devicep = resolve_device (device); 2052 size_t tgt_align = 0, tgt_size = 0; 2053 bool fpc_done = false; 2054 2055 flags = clear_unsupported_flags (devicep, flags); 2056 2057 if (flags & GOMP_TARGET_FLAG_NOWAIT) 2058 { 2059 struct gomp_thread *thr = gomp_thread (); 2060 /* Create a team if we don't have any around, as nowait 2061 target tasks make sense to run asynchronously even when 2062 outside of any parallel. */ 2063 if (__builtin_expect (thr->ts.team == NULL, 0)) 2064 { 2065 struct gomp_team *team = gomp_new_team (1); 2066 struct gomp_task *task = thr->task; 2067 struct gomp_task **implicit_task = &task; 2068 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv; 2069 team->prev_ts = thr->ts; 2070 thr->ts.team = team; 2071 thr->ts.team_id = 0; 2072 thr->ts.work_share = &team->work_shares[0]; 2073 thr->ts.last_work_share = NULL; 2074#ifdef HAVE_SYNC_BUILTINS 2075 thr->ts.single_count = 0; 2076#endif 2077 thr->ts.static_trip = 0; 2078 thr->task = &team->implicit_task[0]; 2079 gomp_init_task (thr->task, NULL, icv); 2080 while (*implicit_task 2081 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT) 2082 implicit_task = &(*implicit_task)->parent; 2083 if (*implicit_task) 2084 { 2085 thr->task = *implicit_task; 2086 gomp_end_task (); 2087 free (*implicit_task); 2088 thr->task = &team->implicit_task[0]; 2089 } 2090 else 2091 pthread_setspecific (gomp_thread_destructor, thr); 2092 if (implicit_task != &task) 2093 { 2094 *implicit_task = thr->task; 2095 thr->task = task; 2096 } 2097 } 2098 if (thr->ts.team 2099 && !thr->task->final_task) 2100 { 2101 gomp_create_target_task (devicep, fn, mapnum, hostaddrs, 2102 sizes, kinds, flags, depend, args, 2103 GOMP_TARGET_TASK_BEFORE_MAP); 2104 return; 2105 } 2106 } 2107 2108 /* If there are depend clauses, but nowait is not present 2109 (or we are in a final task), block the parent task until the 2110 dependencies are resolved and then just continue with the rest 2111 of the function as if it is a merged task. */ 2112 if (depend != NULL) 2113 { 2114 struct gomp_thread *thr = gomp_thread (); 2115 if (thr->task && thr->task->depend_hash) 2116 { 2117 /* If we might need to wait, copy firstprivate now. */ 2118 calculate_firstprivate_requirements (mapnum, sizes, kinds, 2119 &tgt_align, &tgt_size); 2120 if (tgt_align) 2121 { 2122 char *tgt = gomp_alloca (tgt_size + tgt_align - 1); 2123 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, 2124 tgt_align, tgt_size); 2125 } 2126 fpc_done = true; 2127 gomp_task_maybe_wait_for_dependencies (depend); 2128 } 2129 } 2130 2131 void *fn_addr; 2132 if (devicep == NULL 2133 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2134 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)) 2135 || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) 2136 { 2137 if (!fpc_done) 2138 { 2139 calculate_firstprivate_requirements (mapnum, sizes, kinds, 2140 &tgt_align, &tgt_size); 2141 if (tgt_align) 2142 { 2143 char *tgt = gomp_alloca (tgt_size + tgt_align - 1); 2144 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, 2145 tgt_align, tgt_size); 2146 } 2147 } 2148 gomp_target_fallback (fn, hostaddrs); 2149 return; 2150 } 2151 2152 struct target_mem_desc *tgt_vars; 2153 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2154 { 2155 if (!fpc_done) 2156 { 2157 calculate_firstprivate_requirements (mapnum, sizes, kinds, 2158 &tgt_align, &tgt_size); 2159 if (tgt_align) 2160 { 2161 char *tgt = gomp_alloca (tgt_size + tgt_align - 1); 2162 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, 2163 tgt_align, tgt_size); 2164 } 2165 } 2166 tgt_vars = NULL; 2167 } 2168 else 2169 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, 2170 true, GOMP_MAP_VARS_TARGET); 2171 devicep->run_func (devicep->target_id, fn_addr, 2172 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs, 2173 args); 2174 if (tgt_vars) 2175 gomp_unmap_vars (tgt_vars, true); 2176} 2177 2178/* Host fallback for GOMP_target_data{,_ext} routines. */ 2179 2180static void 2181gomp_target_data_fallback (void) 2182{ 2183 struct gomp_task_icv *icv = gomp_icv (false); 2184 if (icv->target_data) 2185 { 2186 /* Even when doing a host fallback, if there are any active 2187 #pragma omp target data constructs, need to remember the 2188 new #pragma omp target data, otherwise GOMP_target_end_data 2189 would get out of sync. */ 2190 struct target_mem_desc *tgt 2191 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, 2192 GOMP_MAP_VARS_DATA); 2193 tgt->prev = icv->target_data; 2194 icv->target_data = tgt; 2195 } 2196} 2197 2198void 2199GOMP_target_data (int device, const void *unused, size_t mapnum, 2200 void **hostaddrs, size_t *sizes, unsigned char *kinds) 2201{ 2202 struct gomp_device_descr *devicep = resolve_device (device); 2203 2204 if (devicep == NULL 2205 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2206 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)) 2207 return gomp_target_data_fallback (); 2208 2209 struct target_mem_desc *tgt 2210 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, 2211 GOMP_MAP_VARS_DATA); 2212 struct gomp_task_icv *icv = gomp_icv (true); 2213 tgt->prev = icv->target_data; 2214 icv->target_data = tgt; 2215} 2216 2217void 2218GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, 2219 size_t *sizes, unsigned short *kinds) 2220{ 2221 struct gomp_device_descr *devicep = resolve_device (device); 2222 2223 if (devicep == NULL 2224 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2225 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2226 return gomp_target_data_fallback (); 2227 2228 struct target_mem_desc *tgt 2229 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, 2230 GOMP_MAP_VARS_DATA); 2231 struct gomp_task_icv *icv = gomp_icv (true); 2232 tgt->prev = icv->target_data; 2233 icv->target_data = tgt; 2234} 2235 2236void 2237GOMP_target_end_data (void) 2238{ 2239 struct gomp_task_icv *icv = gomp_icv (false); 2240 if (icv->target_data) 2241 { 2242 struct target_mem_desc *tgt = icv->target_data; 2243 icv->target_data = tgt->prev; 2244 gomp_unmap_vars (tgt, true); 2245 } 2246} 2247 2248void 2249GOMP_target_update (int device, const void *unused, size_t mapnum, 2250 void **hostaddrs, size_t *sizes, unsigned char *kinds) 2251{ 2252 struct gomp_device_descr *devicep = resolve_device (device); 2253 2254 if (devicep == NULL 2255 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2256 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2257 return; 2258 2259 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); 2260} 2261 2262void 2263GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, 2264 size_t *sizes, unsigned short *kinds, 2265 unsigned int flags, void **depend) 2266{ 2267 struct gomp_device_descr *devicep = resolve_device (device); 2268 2269 /* If there are depend clauses, but nowait is not present, 2270 block the parent task until the dependencies are resolved 2271 and then just continue with the rest of the function as if it 2272 is a merged task. Until we are able to schedule task during 2273 variable mapping or unmapping, ignore nowait if depend clauses 2274 are not present. */ 2275 if (depend != NULL) 2276 { 2277 struct gomp_thread *thr = gomp_thread (); 2278 if (thr->task && thr->task->depend_hash) 2279 { 2280 if ((flags & GOMP_TARGET_FLAG_NOWAIT) 2281 && thr->ts.team 2282 && !thr->task->final_task) 2283 { 2284 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, 2285 mapnum, hostaddrs, sizes, kinds, 2286 flags | GOMP_TARGET_FLAG_UPDATE, 2287 depend, NULL, GOMP_TARGET_TASK_DATA)) 2288 return; 2289 } 2290 else 2291 { 2292 struct gomp_team *team = thr->ts.team; 2293 /* If parallel or taskgroup has been cancelled, don't start new 2294 tasks. */ 2295 if (__builtin_expect (gomp_cancel_var, 0) && team) 2296 { 2297 if (gomp_team_barrier_cancelled (&team->barrier)) 2298 return; 2299 if (thr->task->taskgroup) 2300 { 2301 if (thr->task->taskgroup->cancelled) 2302 return; 2303 if (thr->task->taskgroup->workshare 2304 && thr->task->taskgroup->prev 2305 && thr->task->taskgroup->prev->cancelled) 2306 return; 2307 } 2308 } 2309 2310 gomp_task_maybe_wait_for_dependencies (depend); 2311 } 2312 } 2313 } 2314 2315 if (devicep == NULL 2316 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2317 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2318 return; 2319 2320 struct gomp_thread *thr = gomp_thread (); 2321 struct gomp_team *team = thr->ts.team; 2322 /* If parallel or taskgroup has been cancelled, don't start new tasks. */ 2323 if (__builtin_expect (gomp_cancel_var, 0) && team) 2324 { 2325 if (gomp_team_barrier_cancelled (&team->barrier)) 2326 return; 2327 if (thr->task->taskgroup) 2328 { 2329 if (thr->task->taskgroup->cancelled) 2330 return; 2331 if (thr->task->taskgroup->workshare 2332 && thr->task->taskgroup->prev 2333 && thr->task->taskgroup->prev->cancelled) 2334 return; 2335 } 2336 } 2337 2338 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true); 2339} 2340 2341static void 2342gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, 2343 void **hostaddrs, size_t *sizes, unsigned short *kinds) 2344{ 2345 const int typemask = 0xff; 2346 size_t i; 2347 gomp_mutex_lock (&devicep->lock); 2348 if (devicep->state == GOMP_DEVICE_FINALIZED) 2349 { 2350 gomp_mutex_unlock (&devicep->lock); 2351 return; 2352 } 2353 2354 for (i = 0; i < mapnum; i++) 2355 { 2356 struct splay_tree_key_s cur_node; 2357 unsigned char kind = kinds[i] & typemask; 2358 switch (kind) 2359 { 2360 case GOMP_MAP_FROM: 2361 case GOMP_MAP_ALWAYS_FROM: 2362 case GOMP_MAP_DELETE: 2363 case GOMP_MAP_RELEASE: 2364 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: 2365 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION: 2366 cur_node.host_start = (uintptr_t) hostaddrs[i]; 2367 cur_node.host_end = cur_node.host_start + sizes[i]; 2368 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION 2369 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) 2370 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node) 2371 : splay_tree_lookup (&devicep->mem_map, &cur_node); 2372 if (!k) 2373 continue; 2374 2375 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY) 2376 k->refcount--; 2377 if ((kind == GOMP_MAP_DELETE 2378 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION) 2379 && k->refcount != REFCOUNT_INFINITY) 2380 k->refcount = 0; 2381 2382 if ((kind == GOMP_MAP_FROM && k->refcount == 0) 2383 || kind == GOMP_MAP_ALWAYS_FROM) 2384 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start, 2385 (void *) (k->tgt->tgt_start + k->tgt_offset 2386 + cur_node.host_start 2387 - k->host_start), 2388 cur_node.host_end - cur_node.host_start); 2389 if (k->refcount == 0) 2390 gomp_remove_var (devicep, k); 2391 2392 break; 2393 default: 2394 gomp_mutex_unlock (&devicep->lock); 2395 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x", 2396 kind); 2397 } 2398 } 2399 2400 gomp_mutex_unlock (&devicep->lock); 2401} 2402 2403void 2404GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, 2405 size_t *sizes, unsigned short *kinds, 2406 unsigned int flags, void **depend) 2407{ 2408 struct gomp_device_descr *devicep = resolve_device (device); 2409 2410 /* If there are depend clauses, but nowait is not present, 2411 block the parent task until the dependencies are resolved 2412 and then just continue with the rest of the function as if it 2413 is a merged task. Until we are able to schedule task during 2414 variable mapping or unmapping, ignore nowait if depend clauses 2415 are not present. */ 2416 if (depend != NULL) 2417 { 2418 struct gomp_thread *thr = gomp_thread (); 2419 if (thr->task && thr->task->depend_hash) 2420 { 2421 if ((flags & GOMP_TARGET_FLAG_NOWAIT) 2422 && thr->ts.team 2423 && !thr->task->final_task) 2424 { 2425 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, 2426 mapnum, hostaddrs, sizes, kinds, 2427 flags, depend, NULL, 2428 GOMP_TARGET_TASK_DATA)) 2429 return; 2430 } 2431 else 2432 { 2433 struct gomp_team *team = thr->ts.team; 2434 /* If parallel or taskgroup has been cancelled, don't start new 2435 tasks. */ 2436 if (__builtin_expect (gomp_cancel_var, 0) && team) 2437 { 2438 if (gomp_team_barrier_cancelled (&team->barrier)) 2439 return; 2440 if (thr->task->taskgroup) 2441 { 2442 if (thr->task->taskgroup->cancelled) 2443 return; 2444 if (thr->task->taskgroup->workshare 2445 && thr->task->taskgroup->prev 2446 && thr->task->taskgroup->prev->cancelled) 2447 return; 2448 } 2449 } 2450 2451 gomp_task_maybe_wait_for_dependencies (depend); 2452 } 2453 } 2454 } 2455 2456 if (devicep == NULL 2457 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2458 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2459 return; 2460 2461 struct gomp_thread *thr = gomp_thread (); 2462 struct gomp_team *team = thr->ts.team; 2463 /* If parallel or taskgroup has been cancelled, don't start new tasks. */ 2464 if (__builtin_expect (gomp_cancel_var, 0) && team) 2465 { 2466 if (gomp_team_barrier_cancelled (&team->barrier)) 2467 return; 2468 if (thr->task->taskgroup) 2469 { 2470 if (thr->task->taskgroup->cancelled) 2471 return; 2472 if (thr->task->taskgroup->workshare 2473 && thr->task->taskgroup->prev 2474 && thr->task->taskgroup->prev->cancelled) 2475 return; 2476 } 2477 } 2478 2479 /* The variables are mapped separately such that they can be released 2480 independently. */ 2481 size_t i, j; 2482 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) 2483 for (i = 0; i < mapnum; i++) 2484 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT) 2485 { 2486 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i], 2487 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); 2488 i += sizes[i]; 2489 } 2490 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET) 2491 { 2492 for (j = i + 1; j < mapnum; j++) 2493 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)) 2494 break; 2495 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i], 2496 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); 2497 i += j - i - 1; 2498 } 2499 else 2500 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i], 2501 true, GOMP_MAP_VARS_ENTER_DATA); 2502 else 2503 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds); 2504} 2505 2506bool 2507gomp_target_task_fn (void *data) 2508{ 2509 struct gomp_target_task *ttask = (struct gomp_target_task *) data; 2510 struct gomp_device_descr *devicep = ttask->devicep; 2511 2512 if (ttask->fn != NULL) 2513 { 2514 void *fn_addr; 2515 if (devicep == NULL 2516 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2517 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn)) 2518 || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) 2519 { 2520 ttask->state = GOMP_TARGET_TASK_FALLBACK; 2521 gomp_target_fallback (ttask->fn, ttask->hostaddrs); 2522 return false; 2523 } 2524 2525 if (ttask->state == GOMP_TARGET_TASK_FINISHED) 2526 { 2527 if (ttask->tgt) 2528 gomp_unmap_vars (ttask->tgt, true); 2529 return false; 2530 } 2531 2532 void *actual_arguments; 2533 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2534 { 2535 ttask->tgt = NULL; 2536 actual_arguments = ttask->hostaddrs; 2537 } 2538 else 2539 { 2540 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, 2541 NULL, ttask->sizes, ttask->kinds, true, 2542 GOMP_MAP_VARS_TARGET); 2543 actual_arguments = (void *) ttask->tgt->tgt_start; 2544 } 2545 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN; 2546 2547 assert (devicep->async_run_func); 2548 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments, 2549 ttask->args, (void *) ttask); 2550 return true; 2551 } 2552 else if (devicep == NULL 2553 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2554 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2555 return false; 2556 2557 size_t i; 2558 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE) 2559 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, 2560 ttask->kinds, true); 2561 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) 2562 for (i = 0; i < ttask->mapnum; i++) 2563 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT) 2564 { 2565 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i], 2566 NULL, &ttask->sizes[i], &ttask->kinds[i], true, 2567 GOMP_MAP_VARS_ENTER_DATA); 2568 i += ttask->sizes[i]; 2569 } 2570 else 2571 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i], 2572 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); 2573 else 2574 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, 2575 ttask->kinds); 2576 return false; 2577} 2578 2579void 2580GOMP_teams (unsigned int num_teams, unsigned int thread_limit) 2581{ 2582 if (thread_limit) 2583 { 2584 struct gomp_task_icv *icv = gomp_icv (true); 2585 icv->thread_limit_var 2586 = thread_limit > INT_MAX ? UINT_MAX : thread_limit; 2587 } 2588 (void) num_teams; 2589} 2590 2591void * 2592omp_target_alloc (size_t size, int device_num) 2593{ 2594 if (device_num == GOMP_DEVICE_HOST_FALLBACK) 2595 return malloc (size); 2596 2597 if (device_num < 0) 2598 return NULL; 2599 2600 struct gomp_device_descr *devicep = resolve_device (device_num); 2601 if (devicep == NULL) 2602 return NULL; 2603 2604 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2605 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2606 return malloc (size); 2607 2608 gomp_mutex_lock (&devicep->lock); 2609 void *ret = devicep->alloc_func (devicep->target_id, size); 2610 gomp_mutex_unlock (&devicep->lock); 2611 return ret; 2612} 2613 2614void 2615omp_target_free (void *device_ptr, int device_num) 2616{ 2617 if (device_ptr == NULL) 2618 return; 2619 2620 if (device_num == GOMP_DEVICE_HOST_FALLBACK) 2621 { 2622 free (device_ptr); 2623 return; 2624 } 2625 2626 if (device_num < 0) 2627 return; 2628 2629 struct gomp_device_descr *devicep = resolve_device (device_num); 2630 if (devicep == NULL) 2631 return; 2632 2633 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2634 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2635 { 2636 free (device_ptr); 2637 return; 2638 } 2639 2640 gomp_mutex_lock (&devicep->lock); 2641 gomp_free_device_memory (devicep, device_ptr); 2642 gomp_mutex_unlock (&devicep->lock); 2643} 2644 2645int 2646omp_target_is_present (const void *ptr, int device_num) 2647{ 2648 if (ptr == NULL) 2649 return 1; 2650 2651 if (device_num == GOMP_DEVICE_HOST_FALLBACK) 2652 return 1; 2653 2654 if (device_num < 0) 2655 return 0; 2656 2657 struct gomp_device_descr *devicep = resolve_device (device_num); 2658 if (devicep == NULL) 2659 return 0; 2660 2661 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2662 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2663 return 1; 2664 2665 gomp_mutex_lock (&devicep->lock); 2666 struct splay_tree_s *mem_map = &devicep->mem_map; 2667 struct splay_tree_key_s cur_node; 2668 2669 cur_node.host_start = (uintptr_t) ptr; 2670 cur_node.host_end = cur_node.host_start; 2671 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node); 2672 int ret = n != NULL; 2673 gomp_mutex_unlock (&devicep->lock); 2674 return ret; 2675} 2676 2677int 2678omp_target_memcpy (void *dst, const void *src, size_t length, 2679 size_t dst_offset, size_t src_offset, int dst_device_num, 2680 int src_device_num) 2681{ 2682 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; 2683 bool ret; 2684 2685 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) 2686 { 2687 if (dst_device_num < 0) 2688 return EINVAL; 2689 2690 dst_devicep = resolve_device (dst_device_num); 2691 if (dst_devicep == NULL) 2692 return EINVAL; 2693 2694 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2695 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2696 dst_devicep = NULL; 2697 } 2698 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) 2699 { 2700 if (src_device_num < 0) 2701 return EINVAL; 2702 2703 src_devicep = resolve_device (src_device_num); 2704 if (src_devicep == NULL) 2705 return EINVAL; 2706 2707 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2708 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2709 src_devicep = NULL; 2710 } 2711 if (src_devicep == NULL && dst_devicep == NULL) 2712 { 2713 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length); 2714 return 0; 2715 } 2716 if (src_devicep == NULL) 2717 { 2718 gomp_mutex_lock (&dst_devicep->lock); 2719 ret = dst_devicep->host2dev_func (dst_devicep->target_id, 2720 (char *) dst + dst_offset, 2721 (char *) src + src_offset, length); 2722 gomp_mutex_unlock (&dst_devicep->lock); 2723 return (ret ? 0 : EINVAL); 2724 } 2725 if (dst_devicep == NULL) 2726 { 2727 gomp_mutex_lock (&src_devicep->lock); 2728 ret = src_devicep->dev2host_func (src_devicep->target_id, 2729 (char *) dst + dst_offset, 2730 (char *) src + src_offset, length); 2731 gomp_mutex_unlock (&src_devicep->lock); 2732 return (ret ? 0 : EINVAL); 2733 } 2734 if (src_devicep == dst_devicep) 2735 { 2736 gomp_mutex_lock (&src_devicep->lock); 2737 ret = src_devicep->dev2dev_func (src_devicep->target_id, 2738 (char *) dst + dst_offset, 2739 (char *) src + src_offset, length); 2740 gomp_mutex_unlock (&src_devicep->lock); 2741 return (ret ? 0 : EINVAL); 2742 } 2743 return EINVAL; 2744} 2745 2746static int 2747omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size, 2748 int num_dims, const size_t *volume, 2749 const size_t *dst_offsets, 2750 const size_t *src_offsets, 2751 const size_t *dst_dimensions, 2752 const size_t *src_dimensions, 2753 struct gomp_device_descr *dst_devicep, 2754 struct gomp_device_descr *src_devicep) 2755{ 2756 size_t dst_slice = element_size; 2757 size_t src_slice = element_size; 2758 size_t j, dst_off, src_off, length; 2759 int i, ret; 2760 2761 if (num_dims == 1) 2762 { 2763 if (__builtin_mul_overflow (element_size, volume[0], &length) 2764 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off) 2765 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off)) 2766 return EINVAL; 2767 if (dst_devicep == NULL && src_devicep == NULL) 2768 { 2769 memcpy ((char *) dst + dst_off, (const char *) src + src_off, 2770 length); 2771 ret = 1; 2772 } 2773 else if (src_devicep == NULL) 2774 ret = dst_devicep->host2dev_func (dst_devicep->target_id, 2775 (char *) dst + dst_off, 2776 (const char *) src + src_off, 2777 length); 2778 else if (dst_devicep == NULL) 2779 ret = src_devicep->dev2host_func (src_devicep->target_id, 2780 (char *) dst + dst_off, 2781 (const char *) src + src_off, 2782 length); 2783 else if (src_devicep == dst_devicep) 2784 ret = src_devicep->dev2dev_func (src_devicep->target_id, 2785 (char *) dst + dst_off, 2786 (const char *) src + src_off, 2787 length); 2788 else 2789 ret = 0; 2790 return ret ? 0 : EINVAL; 2791 } 2792 2793 /* FIXME: it would be nice to have some plugin function to handle 2794 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can 2795 be handled in the generic recursion below, and for host-host it 2796 should be used even for any num_dims >= 2. */ 2797 2798 for (i = 1; i < num_dims; i++) 2799 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice) 2800 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice)) 2801 return EINVAL; 2802 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off) 2803 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off)) 2804 return EINVAL; 2805 for (j = 0; j < volume[0]; j++) 2806 { 2807 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off, 2808 (const char *) src + src_off, 2809 element_size, num_dims - 1, 2810 volume + 1, dst_offsets + 1, 2811 src_offsets + 1, dst_dimensions + 1, 2812 src_dimensions + 1, dst_devicep, 2813 src_devicep); 2814 if (ret) 2815 return ret; 2816 dst_off += dst_slice; 2817 src_off += src_slice; 2818 } 2819 return 0; 2820} 2821 2822int 2823omp_target_memcpy_rect (void *dst, const void *src, size_t element_size, 2824 int num_dims, const size_t *volume, 2825 const size_t *dst_offsets, 2826 const size_t *src_offsets, 2827 const size_t *dst_dimensions, 2828 const size_t *src_dimensions, 2829 int dst_device_num, int src_device_num) 2830{ 2831 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; 2832 2833 if (!dst && !src) 2834 return INT_MAX; 2835 2836 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) 2837 { 2838 if (dst_device_num < 0) 2839 return EINVAL; 2840 2841 dst_devicep = resolve_device (dst_device_num); 2842 if (dst_devicep == NULL) 2843 return EINVAL; 2844 2845 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2846 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2847 dst_devicep = NULL; 2848 } 2849 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) 2850 { 2851 if (src_device_num < 0) 2852 return EINVAL; 2853 2854 src_devicep = resolve_device (src_device_num); 2855 if (src_devicep == NULL) 2856 return EINVAL; 2857 2858 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2859 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2860 src_devicep = NULL; 2861 } 2862 2863 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep) 2864 return EINVAL; 2865 2866 if (src_devicep) 2867 gomp_mutex_lock (&src_devicep->lock); 2868 else if (dst_devicep) 2869 gomp_mutex_lock (&dst_devicep->lock); 2870 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims, 2871 volume, dst_offsets, src_offsets, 2872 dst_dimensions, src_dimensions, 2873 dst_devicep, src_devicep); 2874 if (src_devicep) 2875 gomp_mutex_unlock (&src_devicep->lock); 2876 else if (dst_devicep) 2877 gomp_mutex_unlock (&dst_devicep->lock); 2878 return ret; 2879} 2880 2881int 2882omp_target_associate_ptr (const void *host_ptr, const void *device_ptr, 2883 size_t size, size_t device_offset, int device_num) 2884{ 2885 if (device_num == GOMP_DEVICE_HOST_FALLBACK) 2886 return EINVAL; 2887 2888 if (device_num < 0) 2889 return EINVAL; 2890 2891 struct gomp_device_descr *devicep = resolve_device (device_num); 2892 if (devicep == NULL) 2893 return EINVAL; 2894 2895 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2896 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2897 return EINVAL; 2898 2899 gomp_mutex_lock (&devicep->lock); 2900 2901 struct splay_tree_s *mem_map = &devicep->mem_map; 2902 struct splay_tree_key_s cur_node; 2903 int ret = EINVAL; 2904 2905 cur_node.host_start = (uintptr_t) host_ptr; 2906 cur_node.host_end = cur_node.host_start + size; 2907 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 2908 if (n) 2909 { 2910 if (n->tgt->tgt_start + n->tgt_offset 2911 == (uintptr_t) device_ptr + device_offset 2912 && n->host_start <= cur_node.host_start 2913 && n->host_end >= cur_node.host_end) 2914 ret = 0; 2915 } 2916 else 2917 { 2918 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); 2919 tgt->array = gomp_malloc (sizeof (*tgt->array)); 2920 tgt->refcount = 1; 2921 tgt->tgt_start = 0; 2922 tgt->tgt_end = 0; 2923 tgt->to_free = NULL; 2924 tgt->prev = NULL; 2925 tgt->list_count = 0; 2926 tgt->device_descr = devicep; 2927 splay_tree_node array = tgt->array; 2928 splay_tree_key k = &array->key; 2929 k->host_start = cur_node.host_start; 2930 k->host_end = cur_node.host_end; 2931 k->tgt = tgt; 2932 k->tgt_offset = (uintptr_t) device_ptr + device_offset; 2933 k->refcount = REFCOUNT_INFINITY; 2934 k->dynamic_refcount = 0; 2935 k->aux = NULL; 2936 array->left = NULL; 2937 array->right = NULL; 2938 splay_tree_insert (&devicep->mem_map, array); 2939 ret = 0; 2940 } 2941 gomp_mutex_unlock (&devicep->lock); 2942 return ret; 2943} 2944 2945int 2946omp_target_disassociate_ptr (const void *ptr, int device_num) 2947{ 2948 if (device_num == GOMP_DEVICE_HOST_FALLBACK) 2949 return EINVAL; 2950 2951 if (device_num < 0) 2952 return EINVAL; 2953 2954 struct gomp_device_descr *devicep = resolve_device (device_num); 2955 if (devicep == NULL) 2956 return EINVAL; 2957 2958 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) 2959 return EINVAL; 2960 2961 gomp_mutex_lock (&devicep->lock); 2962 2963 struct splay_tree_s *mem_map = &devicep->mem_map; 2964 struct splay_tree_key_s cur_node; 2965 int ret = EINVAL; 2966 2967 cur_node.host_start = (uintptr_t) ptr; 2968 cur_node.host_end = cur_node.host_start; 2969 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 2970 if (n 2971 && n->host_start == cur_node.host_start 2972 && n->refcount == REFCOUNT_INFINITY 2973 && n->tgt->tgt_start == 0 2974 && n->tgt->to_free == NULL 2975 && n->tgt->refcount == 1 2976 && n->tgt->list_count == 0) 2977 { 2978 splay_tree_remove (&devicep->mem_map, n); 2979 gomp_unmap_tgt (n->tgt); 2980 ret = 0; 2981 } 2982 2983 gomp_mutex_unlock (&devicep->lock); 2984 return ret; 2985} 2986 2987int 2988omp_pause_resource (omp_pause_resource_t kind, int device_num) 2989{ 2990 (void) kind; 2991 if (device_num == GOMP_DEVICE_HOST_FALLBACK) 2992 return gomp_pause_host (); 2993 if (device_num < 0 || device_num >= gomp_get_num_devices ()) 2994 return -1; 2995 /* Do nothing for target devices for now. */ 2996 return 0; 2997} 2998 2999int 3000omp_pause_resource_all (omp_pause_resource_t kind) 3001{ 3002 (void) kind; 3003 if (gomp_pause_host ()) 3004 return -1; 3005 /* Do nothing for target devices for now. */ 3006 return 0; 3007} 3008 3009ialias (omp_pause_resource) 3010ialias (omp_pause_resource_all) 3011 3012#ifdef PLUGIN_SUPPORT 3013 3014/* This function tries to load a plugin for DEVICE. Name of plugin is passed 3015 in PLUGIN_NAME. 3016 The handles of the found functions are stored in the corresponding fields 3017 of DEVICE. The function returns TRUE on success and FALSE otherwise. */ 3018 3019static bool 3020gomp_load_plugin_for_device (struct gomp_device_descr *device, 3021 const char *plugin_name) 3022{ 3023 const char *err = NULL, *last_missing = NULL; 3024 3025 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY); 3026 if (!plugin_handle) 3027 goto dl_fail; 3028 3029 /* Check if all required functions are available in the plugin and store 3030 their handlers. None of the symbols can legitimately be NULL, 3031 so we don't need to check dlerror all the time. */ 3032#define DLSYM(f) \ 3033 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \ 3034 goto dl_fail 3035 /* Similar, but missing functions are not an error. Return false if 3036 failed, true otherwise. */ 3037#define DLSYM_OPT(f, n) \ 3038 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \ 3039 || (last_missing = #n, 0)) 3040 3041 DLSYM (version); 3042 if (device->version_func () != GOMP_VERSION) 3043 { 3044 err = "plugin version mismatch"; 3045 goto fail; 3046 } 3047 3048 DLSYM (get_name); 3049 DLSYM (get_caps); 3050 DLSYM (get_type); 3051 DLSYM (get_num_devices); 3052 DLSYM (init_device); 3053 DLSYM (fini_device); 3054 DLSYM (load_image); 3055 DLSYM (unload_image); 3056 DLSYM (alloc); 3057 DLSYM (free); 3058 DLSYM (dev2host); 3059 DLSYM (host2dev); 3060 device->capabilities = device->get_caps_func (); 3061 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3062 { 3063 DLSYM (run); 3064 DLSYM_OPT (async_run, async_run); 3065 DLSYM_OPT (can_run, can_run); 3066 DLSYM (dev2dev); 3067 } 3068 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) 3069 { 3070 if (!DLSYM_OPT (openacc.exec, openacc_exec) 3071 || !DLSYM_OPT (openacc.create_thread_data, 3072 openacc_create_thread_data) 3073 || !DLSYM_OPT (openacc.destroy_thread_data, 3074 openacc_destroy_thread_data) 3075 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct) 3076 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct) 3077 || !DLSYM_OPT (openacc.async.test, openacc_async_test) 3078 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize) 3079 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize) 3080 || !DLSYM_OPT (openacc.async.queue_callback, 3081 openacc_async_queue_callback) 3082 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec) 3083 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host) 3084 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev) 3085 || !DLSYM_OPT (openacc.get_property, openacc_get_property)) 3086 { 3087 /* Require all the OpenACC handlers if we have 3088 GOMP_OFFLOAD_CAP_OPENACC_200. */ 3089 err = "plugin missing OpenACC handler function"; 3090 goto fail; 3091 } 3092 3093 unsigned cuda = 0; 3094 cuda += DLSYM_OPT (openacc.cuda.get_current_device, 3095 openacc_cuda_get_current_device); 3096 cuda += DLSYM_OPT (openacc.cuda.get_current_context, 3097 openacc_cuda_get_current_context); 3098 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream); 3099 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream); 3100 if (cuda && cuda != 4) 3101 { 3102 /* Make sure all the CUDA functions are there if any of them are. */ 3103 err = "plugin missing OpenACC CUDA handler function"; 3104 goto fail; 3105 } 3106 } 3107#undef DLSYM 3108#undef DLSYM_OPT 3109 3110 return 1; 3111 3112 dl_fail: 3113 err = dlerror (); 3114 fail: 3115 gomp_error ("while loading %s: %s", plugin_name, err); 3116 if (last_missing) 3117 gomp_error ("missing function was %s", last_missing); 3118 if (plugin_handle) 3119 dlclose (plugin_handle); 3120 3121 return 0; 3122} 3123 3124/* This function finalizes all initialized devices. */ 3125 3126static void 3127gomp_target_fini (void) 3128{ 3129 int i; 3130 for (i = 0; i < num_devices; i++) 3131 { 3132 bool ret = true; 3133 struct gomp_device_descr *devicep = &devices[i]; 3134 gomp_mutex_lock (&devicep->lock); 3135 if (devicep->state == GOMP_DEVICE_INITIALIZED) 3136 ret = gomp_fini_device (devicep); 3137 gomp_mutex_unlock (&devicep->lock); 3138 if (!ret) 3139 gomp_fatal ("device finalization failed"); 3140 } 3141} 3142 3143/* This function initializes the runtime for offloading. 3144 It parses the list of offload plugins, and tries to load these. 3145 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP 3146 will be set, and the array DEVICES initialized, containing descriptors for 3147 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows 3148 by the others. */ 3149 3150static void 3151gomp_target_init (void) 3152{ 3153 const char *prefix ="libgomp-plugin-"; 3154 const char *suffix = SONAME_SUFFIX (1); 3155 const char *cur, *next; 3156 char *plugin_name; 3157 int i, new_num_devices; 3158 3159 num_devices = 0; 3160 devices = NULL; 3161 3162 cur = OFFLOAD_PLUGINS; 3163 if (*cur) 3164 do 3165 { 3166 struct gomp_device_descr current_device; 3167 size_t prefix_len, suffix_len, cur_len; 3168 3169 next = strchr (cur, ','); 3170 3171 prefix_len = strlen (prefix); 3172 cur_len = next ? next - cur : strlen (cur); 3173 suffix_len = strlen (suffix); 3174 3175 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1); 3176 if (!plugin_name) 3177 { 3178 num_devices = 0; 3179 break; 3180 } 3181 3182 memcpy (plugin_name, prefix, prefix_len); 3183 memcpy (plugin_name + prefix_len, cur, cur_len); 3184 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1); 3185 3186 if (gomp_load_plugin_for_device (¤t_device, plugin_name)) 3187 { 3188 new_num_devices = current_device.get_num_devices_func (); 3189 if (new_num_devices >= 1) 3190 { 3191 /* Augment DEVICES and NUM_DEVICES. */ 3192 3193 devices = realloc (devices, (num_devices + new_num_devices) 3194 * sizeof (struct gomp_device_descr)); 3195 if (!devices) 3196 { 3197 num_devices = 0; 3198 free (plugin_name); 3199 break; 3200 } 3201 3202 current_device.name = current_device.get_name_func (); 3203 /* current_device.capabilities has already been set. */ 3204 current_device.type = current_device.get_type_func (); 3205 current_device.mem_map.root = NULL; 3206 current_device.state = GOMP_DEVICE_UNINITIALIZED; 3207 for (i = 0; i < new_num_devices; i++) 3208 { 3209 current_device.target_id = i; 3210 devices[num_devices] = current_device; 3211 gomp_mutex_init (&devices[num_devices].lock); 3212 num_devices++; 3213 } 3214 } 3215 } 3216 3217 free (plugin_name); 3218 cur = next + 1; 3219 } 3220 while (next); 3221 3222 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set 3223 NUM_DEVICES_OPENMP. */ 3224 struct gomp_device_descr *devices_s 3225 = malloc (num_devices * sizeof (struct gomp_device_descr)); 3226 if (!devices_s) 3227 { 3228 num_devices = 0; 3229 free (devices); 3230 devices = NULL; 3231 } 3232 num_devices_openmp = 0; 3233 for (i = 0; i < num_devices; i++) 3234 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3235 devices_s[num_devices_openmp++] = devices[i]; 3236 int num_devices_after_openmp = num_devices_openmp; 3237 for (i = 0; i < num_devices; i++) 3238 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) 3239 devices_s[num_devices_after_openmp++] = devices[i]; 3240 free (devices); 3241 devices = devices_s; 3242 3243 for (i = 0; i < num_devices; i++) 3244 { 3245 /* The 'devices' array can be moved (by the realloc call) until we have 3246 found all the plugins, so registering with the OpenACC runtime (which 3247 takes a copy of the pointer argument) must be delayed until now. */ 3248 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) 3249 goacc_register (&devices[i]); 3250 } 3251 3252 if (atexit (gomp_target_fini) != 0) 3253 gomp_fatal ("atexit failed"); 3254} 3255 3256#else /* PLUGIN_SUPPORT */ 3257/* If dlfcn.h is unavailable we always fallback to host execution. 3258 GOMP_target* routines are just stubs for this case. */ 3259static void 3260gomp_target_init (void) 3261{ 3262} 3263#endif /* PLUGIN_SUPPORT */ 3264