1/* Copyright (C) 2013-2015 Free Software Foundation, Inc. 2 3 Contributed by Mentor Embedded. 4 5 This file is part of the GNU Offloading and Multi Processing Library 6 (libgomp). 7 8 Libgomp is free software; you can redistribute it and/or modify it 9 under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3, or (at your option) 11 any later version. 12 13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 16 more details. 17 18 Under Section 7 of GPL version 3, you are granted additional 19 permissions described in the GCC Runtime Library Exception, version 20 3.1, as published by the Free Software Foundation. 21 22 You should have received a copy of the GNU General Public License and 23 a copy of the GCC Runtime Library Exception along with this program; 24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 25 <http://www.gnu.org/licenses/>. */ 26 27/* This file handles OpenACC constructs. */ 28 29#include "openacc.h" 30#include "libgomp.h" 31#include "libgomp_g.h" 32#include "gomp-constants.h" 33#include "oacc-int.h" 34#ifdef HAVE_INTTYPES_H 35# include <inttypes.h> /* For PRIu64. */ 36#endif 37#include <string.h> 38#include <stdarg.h> 39#include <assert.h> 40 41static int 42find_pset (int pos, size_t mapnum, unsigned short *kinds) 43{ 44 if (pos + 1 >= mapnum) 45 return 0; 46 47 unsigned char kind = kinds[pos+1] & 0xff; 48 49 return kind == GOMP_MAP_TO_PSET; 50} 51 52static void goacc_wait (int async, int num_waits, va_list ap); 53 54void 55GOACC_parallel (int device, void (*fn) (void *), 56 size_t mapnum, void **hostaddrs, size_t *sizes, 57 unsigned short *kinds, 58 int num_gangs, int num_workers, int vector_length, 59 int async, int num_waits, ...) 60{ 61 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; 62 va_list ap; 63 struct goacc_thread *thr; 64 struct gomp_device_descr *acc_dev; 65 struct target_mem_desc *tgt; 66 void **devaddrs; 67 unsigned int i; 68 struct splay_tree_key_s k; 69 splay_tree_key tgt_fn_key; 70 void (*tgt_fn); 71 72 if (num_gangs != 1) 73 gomp_fatal ("num_gangs (%d) different from one is not yet supported", 74 num_gangs); 75 if (num_workers != 1) 76 gomp_fatal ("num_workers (%d) different from one is not yet supported", 77 num_workers); 78 79#ifdef HAVE_INTTYPES_H 80 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p, " 81 "async = %d\n", 82 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds, async); 83#else 84 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n", 85 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds, 86 async); 87#endif 88 goacc_lazy_initialize (); 89 90 thr = goacc_thread (); 91 acc_dev = thr->dev; 92 93 /* Host fallback if "if" clause is false or if the current device is set to 94 the host. */ 95 if (host_fallback) 96 { 97 goacc_save_and_set_bind (acc_device_host); 98 fn (hostaddrs); 99 goacc_restore_bind (); 100 return; 101 } 102 else if (acc_device_type (acc_dev->type) == acc_device_host) 103 { 104 fn (hostaddrs); 105 return; 106 } 107 108 va_start (ap, num_waits); 109 110 if (num_waits > 0) 111 goacc_wait (async, num_waits, ap); 112 113 va_end (ap); 114 115 acc_dev->openacc.async_set_async_func (async); 116 117 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) 118 { 119 k.host_start = (uintptr_t) fn; 120 k.host_end = k.host_start + 1; 121 gomp_mutex_lock (&acc_dev->lock); 122 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k); 123 gomp_mutex_unlock (&acc_dev->lock); 124 125 if (tgt_fn_key == NULL) 126 gomp_fatal ("target function wasn't mapped"); 127 128 tgt_fn = (void (*)) tgt_fn_key->tgt_offset; 129 } 130 else 131 tgt_fn = (void (*)) fn; 132 133 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, 134 false); 135 136 devaddrs = gomp_alloca (sizeof (void *) * mapnum); 137 for (i = 0; i < mapnum; i++) 138 devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start 139 + tgt->list[i]->tgt_offset); 140 141 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds, 142 num_gangs, num_workers, vector_length, async, 143 tgt); 144 145 /* If running synchronously, unmap immediately. */ 146 if (async < acc_async_noval) 147 gomp_unmap_vars (tgt, true); 148 else 149 { 150 gomp_copy_from_async (tgt); 151 acc_dev->openacc.register_async_cleanup_func (tgt); 152 } 153 154 acc_dev->openacc.async_set_async_func (acc_async_sync); 155} 156 157void 158GOACC_data_start (int device, size_t mapnum, 159 void **hostaddrs, size_t *sizes, unsigned short *kinds) 160{ 161 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; 162 struct target_mem_desc *tgt; 163 164#ifdef HAVE_INTTYPES_H 165 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", 166 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); 167#else 168 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", 169 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); 170#endif 171 172 goacc_lazy_initialize (); 173 174 struct goacc_thread *thr = goacc_thread (); 175 struct gomp_device_descr *acc_dev = thr->dev; 176 177 /* Host fallback or 'do nothing'. */ 178 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 179 || host_fallback) 180 { 181 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false); 182 tgt->prev = thr->mapped_data; 183 thr->mapped_data = tgt; 184 185 return; 186 } 187 188 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); 189 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, 190 false); 191 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); 192 tgt->prev = thr->mapped_data; 193 thr->mapped_data = tgt; 194} 195 196void 197GOACC_data_end (void) 198{ 199 struct goacc_thread *thr = goacc_thread (); 200 struct target_mem_desc *tgt = thr->mapped_data; 201 202 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); 203 thr->mapped_data = tgt->prev; 204 gomp_unmap_vars (tgt, true); 205 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); 206} 207 208void 209GOACC_enter_exit_data (int device, size_t mapnum, 210 void **hostaddrs, size_t *sizes, unsigned short *kinds, 211 int async, int num_waits, ...) 212{ 213 struct goacc_thread *thr; 214 struct gomp_device_descr *acc_dev; 215 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; 216 bool data_enter = false; 217 size_t i; 218 219 goacc_lazy_initialize (); 220 221 thr = goacc_thread (); 222 acc_dev = thr->dev; 223 224 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 225 || host_fallback) 226 return; 227 228 if (num_waits > 0) 229 { 230 va_list ap; 231 232 va_start (ap, num_waits); 233 234 goacc_wait (async, num_waits, ap); 235 236 va_end (ap); 237 } 238 239 acc_dev->openacc.async_set_async_func (async); 240 241 /* Determine if this is an "acc enter data". */ 242 for (i = 0; i < mapnum; ++i) 243 { 244 unsigned char kind = kinds[i] & 0xff; 245 246 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) 247 continue; 248 249 if (kind == GOMP_MAP_FORCE_ALLOC 250 || kind == GOMP_MAP_FORCE_PRESENT 251 || kind == GOMP_MAP_FORCE_TO) 252 { 253 data_enter = true; 254 break; 255 } 256 257 if (kind == GOMP_MAP_FORCE_DEALLOC 258 || kind == GOMP_MAP_FORCE_FROM) 259 break; 260 261 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", 262 kind); 263 } 264 265 if (data_enter) 266 { 267 for (i = 0; i < mapnum; i++) 268 { 269 unsigned char kind = kinds[i] & 0xff; 270 271 /* Scan for PSETs. */ 272 int psets = find_pset (i, mapnum, kinds); 273 274 if (!psets) 275 { 276 switch (kind) 277 { 278 case GOMP_MAP_POINTER: 279 gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i], 280 &kinds[i]); 281 break; 282 case GOMP_MAP_FORCE_ALLOC: 283 acc_create (hostaddrs[i], sizes[i]); 284 break; 285 case GOMP_MAP_FORCE_PRESENT: 286 acc_present_or_copyin (hostaddrs[i], sizes[i]); 287 break; 288 case GOMP_MAP_FORCE_TO: 289 acc_present_or_copyin (hostaddrs[i], sizes[i]); 290 break; 291 default: 292 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", 293 kind); 294 break; 295 } 296 } 297 else 298 { 299 gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]); 300 /* Increment 'i' by two because OpenACC requires fortran 301 arrays to be contiguous, so each PSET is associated with 302 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and 303 one MAP_POINTER. */ 304 i += 2; 305 } 306 } 307 } 308 else 309 for (i = 0; i < mapnum; ++i) 310 { 311 unsigned char kind = kinds[i] & 0xff; 312 313 int psets = find_pset (i, mapnum, kinds); 314 315 if (!psets) 316 { 317 switch (kind) 318 { 319 case GOMP_MAP_POINTER: 320 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff) 321 == GOMP_MAP_FORCE_FROM, 322 async, 1); 323 break; 324 case GOMP_MAP_FORCE_DEALLOC: 325 acc_delete (hostaddrs[i], sizes[i]); 326 break; 327 case GOMP_MAP_FORCE_FROM: 328 acc_copyout (hostaddrs[i], sizes[i]); 329 break; 330 default: 331 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", 332 kind); 333 break; 334 } 335 } 336 else 337 { 338 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff) 339 == GOMP_MAP_FORCE_FROM, async, 3); 340 /* See the above comment. */ 341 i += 2; 342 } 343 } 344 345 acc_dev->openacc.async_set_async_func (acc_async_sync); 346} 347 348static void 349goacc_wait (int async, int num_waits, va_list ap) 350{ 351 struct goacc_thread *thr = goacc_thread (); 352 struct gomp_device_descr *acc_dev = thr->dev; 353 int i; 354 355 assert (num_waits >= 0); 356 357 if (async == acc_async_sync && num_waits == 0) 358 { 359 acc_wait_all (); 360 return; 361 } 362 363 if (async == acc_async_sync && num_waits) 364 { 365 for (i = 0; i < num_waits; i++) 366 { 367 int qid = va_arg (ap, int); 368 369 if (acc_async_test (qid)) 370 continue; 371 372 acc_wait (qid); 373 } 374 return; 375 } 376 377 if (async == acc_async_noval && num_waits == 0) 378 { 379 acc_dev->openacc.async_wait_all_async_func (acc_async_noval); 380 return; 381 } 382 383 for (i = 0; i < num_waits; i++) 384 { 385 int qid = va_arg (ap, int); 386 387 if (acc_async_test (qid)) 388 continue; 389 390 /* If we're waiting on the same asynchronous queue as we're launching on, 391 the queue itself will order work as required, so there's no need to 392 wait explicitly. */ 393 if (qid != async) 394 acc_dev->openacc.async_wait_async_func (qid, async); 395 } 396} 397 398void 399GOACC_update (int device, size_t mapnum, 400 void **hostaddrs, size_t *sizes, unsigned short *kinds, 401 int async, int num_waits, ...) 402{ 403 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; 404 size_t i; 405 406 goacc_lazy_initialize (); 407 408 struct goacc_thread *thr = goacc_thread (); 409 struct gomp_device_descr *acc_dev = thr->dev; 410 411 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 412 || host_fallback) 413 return; 414 415 if (num_waits > 0) 416 { 417 va_list ap; 418 419 va_start (ap, num_waits); 420 421 goacc_wait (async, num_waits, ap); 422 423 va_end (ap); 424 } 425 426 acc_dev->openacc.async_set_async_func (async); 427 428 for (i = 0; i < mapnum; ++i) 429 { 430 unsigned char kind = kinds[i] & 0xff; 431 432 switch (kind) 433 { 434 case GOMP_MAP_POINTER: 435 case GOMP_MAP_TO_PSET: 436 break; 437 438 case GOMP_MAP_FORCE_TO: 439 acc_update_device (hostaddrs[i], sizes[i]); 440 break; 441 442 case GOMP_MAP_FORCE_FROM: 443 acc_update_self (hostaddrs[i], sizes[i]); 444 break; 445 446 default: 447 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind); 448 break; 449 } 450 } 451 452 acc_dev->openacc.async_set_async_func (acc_async_sync); 453} 454 455void 456GOACC_wait (int async, int num_waits, ...) 457{ 458 va_list ap; 459 460 va_start (ap, num_waits); 461 462 goacc_wait (async, num_waits, ap); 463 464 va_end (ap); 465} 466 467int 468GOACC_get_num_threads (void) 469{ 470 return 1; 471} 472 473int 474GOACC_get_thread_num (void) 475{ 476 return 0; 477} 478