1/* OpenACC Runtime initialization routines 2 3 Copyright (C) 2013-2015 Free Software Foundation, Inc. 4 5 Contributed by Mentor Embedded. 6 7 This file is part of the GNU Offloading and Multi Processing Library 8 (libgomp). 9 10 Libgomp is free software; you can redistribute it and/or modify it 11 under the terms of the GNU General Public License as published by 12 the Free Software Foundation; either version 3, or (at your option) 13 any later version. 14 15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 18 more details. 19 20 Under Section 7 of GPL version 3, you are granted additional 21 permissions described in the GCC Runtime Library Exception, version 22 3.1, as published by the Free Software Foundation. 23 24 You should have received a copy of the GNU General Public License and 25 a copy of the GCC Runtime Library Exception along with this program; 26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 27 <http://www.gnu.org/licenses/>. */ 28 29#include "openacc.h" 30#include "config.h" 31#include "libgomp.h" 32#include "gomp-constants.h" 33#include "oacc-int.h" 34#include "splay-tree.h" 35#include <stdint.h> 36#include <assert.h> 37 38/* Return block containing [H->S), or NULL if not contained. */ 39 40static splay_tree_key 41lookup_host (struct gomp_device_descr *dev, void *h, size_t s) 42{ 43 struct splay_tree_key_s node; 44 splay_tree_key key; 45 46 node.host_start = (uintptr_t) h; 47 node.host_end = (uintptr_t) h + s; 48 49 gomp_mutex_lock (&dev->lock); 50 key = splay_tree_lookup (&dev->mem_map, &node); 51 gomp_mutex_unlock (&dev->lock); 52 53 return key; 54} 55 56/* Return block containing [D->S), or NULL if not contained. 57 The list isn't ordered by device address, so we have to iterate 58 over the whole array. This is not expected to be a common 59 operation. */ 60 61static splay_tree_key 62lookup_dev (struct target_mem_desc *tgt, void *d, size_t s) 63{ 64 int i; 65 struct target_mem_desc *t; 66 67 if (!tgt) 68 return NULL; 69 70 gomp_mutex_lock (&tgt->device_descr->lock); 71 72 for (t = tgt; t != NULL; t = t->prev) 73 { 74 if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s) 75 break; 76 } 77 78 gomp_mutex_unlock (&tgt->device_descr->lock); 79 80 if (!t) 81 return NULL; 82 83 for (i = 0; i < t->list_count; i++) 84 { 85 void * offset; 86 87 splay_tree_key k = &t->array[i].key; 88 offset = d - t->tgt_start + k->tgt_offset; 89 90 if (k->host_start + offset <= (void *) k->host_end) 91 return k; 92 } 93 94 return NULL; 95} 96 97/* OpenACC is silent on how memory exhaustion is indicated. We return 98 NULL. */ 99 100void * 101acc_malloc (size_t s) 102{ 103 if (!s) 104 return NULL; 105 106 goacc_lazy_initialize (); 107 108 struct goacc_thread *thr = goacc_thread (); 109 110 assert (thr->dev); 111 112 return thr->dev->alloc_func (thr->dev->target_id, s); 113} 114 115/* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event 116 the device address is mapped. We choose to check if it mapped, 117 and if it is, to unmap it. */ 118void 119acc_free (void *d) 120{ 121 splay_tree_key k; 122 struct goacc_thread *thr = goacc_thread (); 123 124 if (!d) 125 return; 126 127 assert (thr && thr->dev); 128 129 /* We don't have to call lazy open here, as the ptr value must have 130 been returned by acc_malloc. It's not permitted to pass NULL in 131 (unless you got that null from acc_malloc). */ 132 if ((k = lookup_dev (thr->dev->openacc.data_environ, d, 1))) 133 { 134 void *offset; 135 136 offset = d - k->tgt->tgt_start + k->tgt_offset; 137 138 acc_unmap_data ((void *)(k->host_start + offset)); 139 } 140 141 thr->dev->free_func (thr->dev->target_id, d); 142} 143 144void 145acc_memcpy_to_device (void *d, void *h, size_t s) 146{ 147 /* No need to call lazy open here, as the device pointer must have 148 been obtained from a routine that did that. */ 149 struct goacc_thread *thr = goacc_thread (); 150 151 assert (thr && thr->dev); 152 153 thr->dev->host2dev_func (thr->dev->target_id, d, h, s); 154} 155 156void 157acc_memcpy_from_device (void *h, void *d, size_t s) 158{ 159 /* No need to call lazy open here, as the device pointer must have 160 been obtained from a routine that did that. */ 161 struct goacc_thread *thr = goacc_thread (); 162 163 assert (thr && thr->dev); 164 165 thr->dev->dev2host_func (thr->dev->target_id, h, d, s); 166} 167 168/* Return the device pointer that corresponds to host data H. Or NULL 169 if no mapping. */ 170 171void * 172acc_deviceptr (void *h) 173{ 174 splay_tree_key n; 175 void *d; 176 void *offset; 177 178 goacc_lazy_initialize (); 179 180 struct goacc_thread *thr = goacc_thread (); 181 182 n = lookup_host (thr->dev, h, 1); 183 184 if (!n) 185 return NULL; 186 187 offset = h - n->host_start; 188 189 d = n->tgt->tgt_start + n->tgt_offset + offset; 190 191 return d; 192} 193 194/* Return the host pointer that corresponds to device data D. Or NULL 195 if no mapping. */ 196 197void * 198acc_hostptr (void *d) 199{ 200 splay_tree_key n; 201 void *h; 202 void *offset; 203 204 goacc_lazy_initialize (); 205 206 struct goacc_thread *thr = goacc_thread (); 207 208 n = lookup_dev (thr->dev->openacc.data_environ, d, 1); 209 210 if (!n) 211 return NULL; 212 213 offset = d - n->tgt->tgt_start + n->tgt_offset; 214 215 h = n->host_start + offset; 216 217 return h; 218} 219 220/* Return 1 if host data [H,+S] is present on the device. */ 221 222int 223acc_is_present (void *h, size_t s) 224{ 225 splay_tree_key n; 226 227 if (!s || !h) 228 return 0; 229 230 goacc_lazy_initialize (); 231 232 struct goacc_thread *thr = goacc_thread (); 233 struct gomp_device_descr *acc_dev = thr->dev; 234 235 n = lookup_host (acc_dev, h, s); 236 237 if (n && ((uintptr_t)h < n->host_start 238 || (uintptr_t)h + s > n->host_end 239 || s > n->host_end - n->host_start)) 240 n = NULL; 241 242 return n != NULL; 243} 244 245/* Create a mapping for host [H,+S] -> device [D,+S] */ 246 247void 248acc_map_data (void *h, void *d, size_t s) 249{ 250 struct target_mem_desc *tgt; 251 size_t mapnum = 1; 252 void *hostaddrs = h; 253 void *devaddrs = d; 254 size_t sizes = s; 255 unsigned short kinds = GOMP_MAP_ALLOC; 256 257 goacc_lazy_initialize (); 258 259 struct goacc_thread *thr = goacc_thread (); 260 struct gomp_device_descr *acc_dev = thr->dev; 261 262 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 263 { 264 if (d != h) 265 gomp_fatal ("cannot map data on shared-memory system"); 266 267 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false); 268 } 269 else 270 { 271 struct goacc_thread *thr = goacc_thread (); 272 273 if (!d || !h || !s) 274 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map", 275 (void *)h, (int)s, (void *)d, (int)s); 276 277 if (lookup_host (acc_dev, h, s)) 278 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h, 279 (int)s); 280 281 if (lookup_dev (thr->dev->openacc.data_environ, d, s)) 282 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d, 283 (int)s); 284 285 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, 286 &kinds, true, false); 287 } 288 289 tgt->prev = acc_dev->openacc.data_environ; 290 acc_dev->openacc.data_environ = tgt; 291} 292 293void 294acc_unmap_data (void *h) 295{ 296 struct goacc_thread *thr = goacc_thread (); 297 struct gomp_device_descr *acc_dev = thr->dev; 298 299 /* No need to call lazy open, as the address must have been mapped. */ 300 301 size_t host_size; 302 splay_tree_key n = lookup_host (acc_dev, h, 1); 303 struct target_mem_desc *t; 304 305 if (!n) 306 gomp_fatal ("%p is not a mapped block", (void *)h); 307 308 host_size = n->host_end - n->host_start; 309 310 if (n->host_start != (uintptr_t) h) 311 gomp_fatal ("[%p,%d] surrounds1 %p", 312 (void *) n->host_start, (int) host_size, (void *) h); 313 314 t = n->tgt; 315 316 if (t->refcount == 2) 317 { 318 struct target_mem_desc *tp; 319 320 /* This is the last reference, so pull the descriptor off the 321 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from 322 freeing the device memory. */ 323 t->tgt_end = 0; 324 t->to_free = 0; 325 326 gomp_mutex_lock (&acc_dev->lock); 327 328 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; 329 tp = t, t = t->prev) 330 if (n->tgt == t) 331 { 332 if (tp) 333 tp->prev = t->prev; 334 else 335 acc_dev->openacc.data_environ = t->prev; 336 337 break; 338 } 339 340 gomp_mutex_unlock (&acc_dev->lock); 341 } 342 343 gomp_unmap_vars (t, true); 344} 345 346#define FLAG_PRESENT (1 << 0) 347#define FLAG_CREATE (1 << 1) 348#define FLAG_COPY (1 << 2) 349 350static void * 351present_create_copy (unsigned f, void *h, size_t s) 352{ 353 void *d; 354 splay_tree_key n; 355 356 if (!h || !s) 357 gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s); 358 359 goacc_lazy_initialize (); 360 361 struct goacc_thread *thr = goacc_thread (); 362 struct gomp_device_descr *acc_dev = thr->dev; 363 364 n = lookup_host (acc_dev, h, s); 365 if (n) 366 { 367 /* Present. */ 368 d = (void *) (n->tgt->tgt_start + n->tgt_offset); 369 370 if (!(f & FLAG_PRESENT)) 371 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]", 372 (void *)h, (int)s, (void *)d, (int)s); 373 if ((h + s) > (void *)n->host_end) 374 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); 375 } 376 else if (!(f & FLAG_CREATE)) 377 { 378 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); 379 } 380 else 381 { 382 struct target_mem_desc *tgt; 383 size_t mapnum = 1; 384 unsigned short kinds; 385 void *hostaddrs = h; 386 387 if (f & FLAG_COPY) 388 kinds = GOMP_MAP_TO; 389 else 390 kinds = GOMP_MAP_ALLOC; 391 392 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true, 393 false); 394 395 gomp_mutex_lock (&acc_dev->lock); 396 397 d = tgt->to_free; 398 tgt->prev = acc_dev->openacc.data_environ; 399 acc_dev->openacc.data_environ = tgt; 400 401 gomp_mutex_unlock (&acc_dev->lock); 402 } 403 404 return d; 405} 406 407void * 408acc_create (void *h, size_t s) 409{ 410 return present_create_copy (FLAG_CREATE, h, s); 411} 412 413void * 414acc_copyin (void *h, size_t s) 415{ 416 return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s); 417} 418 419void * 420acc_present_or_create (void *h, size_t s) 421{ 422 return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s); 423} 424 425void * 426acc_present_or_copyin (void *h, size_t s) 427{ 428 return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s); 429} 430 431#define FLAG_COPYOUT (1 << 0) 432 433static void 434delete_copyout (unsigned f, void *h, size_t s) 435{ 436 size_t host_size; 437 splay_tree_key n; 438 void *d; 439 struct goacc_thread *thr = goacc_thread (); 440 struct gomp_device_descr *acc_dev = thr->dev; 441 442 n = lookup_host (acc_dev, h, s); 443 444 /* No need to call lazy open, as the data must already have been 445 mapped. */ 446 447 if (!n) 448 gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s); 449 450 d = (void *) (n->tgt->tgt_start + n->tgt_offset 451 + (uintptr_t) h - n->host_start); 452 453 host_size = n->host_end - n->host_start; 454 455 if (n->host_start != (uintptr_t) h || host_size != s) 456 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]", 457 (void *) n->host_start, (int) host_size, (void *) h, (int) s); 458 459 if (f & FLAG_COPYOUT) 460 acc_dev->dev2host_func (acc_dev->target_id, h, d, s); 461 462 acc_unmap_data (h); 463 464 acc_dev->free_func (acc_dev->target_id, d); 465} 466 467void 468acc_delete (void *h , size_t s) 469{ 470 delete_copyout (0, h, s); 471} 472 473void acc_copyout (void *h, size_t s) 474{ 475 delete_copyout (FLAG_COPYOUT, h, s); 476} 477 478static void 479update_dev_host (int is_dev, void *h, size_t s) 480{ 481 splay_tree_key n; 482 void *d; 483 struct goacc_thread *thr = goacc_thread (); 484 struct gomp_device_descr *acc_dev = thr->dev; 485 486 n = lookup_host (acc_dev, h, s); 487 488 /* No need to call lazy open, as the data must already have been 489 mapped. */ 490 491 if (!n) 492 gomp_fatal ("[%p,%d] is not mapped", h, (int)s); 493 494 d = (void *) (n->tgt->tgt_start + n->tgt_offset 495 + (uintptr_t) h - n->host_start); 496 497 if (is_dev) 498 acc_dev->host2dev_func (acc_dev->target_id, d, h, s); 499 else 500 acc_dev->dev2host_func (acc_dev->target_id, h, d, s); 501} 502 503void 504acc_update_device (void *h, size_t s) 505{ 506 update_dev_host (1, h, s); 507} 508 509void 510acc_update_self (void *h, size_t s) 511{ 512 update_dev_host (0, h, s); 513} 514 515void 516gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, 517 void *kinds) 518{ 519 struct target_mem_desc *tgt; 520 struct goacc_thread *thr = goacc_thread (); 521 struct gomp_device_descr *acc_dev = thr->dev; 522 523 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); 524 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, 525 NULL, sizes, kinds, true, false); 526 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); 527 tgt->prev = acc_dev->openacc.data_environ; 528 acc_dev->openacc.data_environ = tgt; 529} 530 531void 532gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum) 533{ 534 struct goacc_thread *thr = goacc_thread (); 535 struct gomp_device_descr *acc_dev = thr->dev; 536 splay_tree_key n; 537 struct target_mem_desc *t; 538 int minrefs = (mapnum == 1) ? 2 : 3; 539 540 n = lookup_host (acc_dev, h, 1); 541 542 if (!n) 543 gomp_fatal ("%p is not a mapped block", (void *)h); 544 545 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); 546 547 t = n->tgt; 548 549 struct target_mem_desc *tp; 550 551 gomp_mutex_lock (&acc_dev->lock); 552 553 if (t->refcount == minrefs) 554 { 555 /* This is the last reference, so pull the descriptor off the 556 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from 557 freeing the device memory. */ 558 t->tgt_end = 0; 559 t->to_free = 0; 560 561 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; 562 tp = t, t = t->prev) 563 { 564 if (n->tgt == t) 565 { 566 if (tp) 567 tp->prev = t->prev; 568 else 569 acc_dev->openacc.data_environ = t->prev; 570 break; 571 } 572 } 573 } 574 575 if (force_copyfrom) 576 t->list[0]->copy_from = 1; 577 578 gomp_mutex_unlock (&acc_dev->lock); 579 580 /* If running synchronously, unmap immediately. */ 581 if (async < acc_async_noval) 582 gomp_unmap_vars (t, true); 583 else 584 { 585 gomp_copy_from_async (t); 586 acc_dev->openacc.register_async_cleanup_func (t); 587 } 588 589 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); 590} 591