1/* Copyright (C) 2013-2020 Free Software Foundation, Inc.
2   Contributed by Jakub Jelinek <jakub@redhat.com>.
3
4   This file is part of the GNU Offloading and Multi Processing Library
5   (libgomp).
6
7   Libgomp is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15   more details.
16
17   Under Section 7 of GPL version 3, you are granted additional
18   permissions described in the GCC Runtime Library Exception, version
19   3.1, as published by the Free Software Foundation.
20
21   You should have received a copy of the GNU General Public License and
22   a copy of the GCC Runtime Library Exception along with this program;
23   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24   <http://www.gnu.org/licenses/>.  */
25
26/* This file contains the support of offloading.  */
27
28#include "libgomp.h"
29#include "oacc-plugin.h"
30#include "oacc-int.h"
31#include "gomp-constants.h"
32#include <limits.h>
33#include <stdbool.h>
34#include <stdlib.h>
35#ifdef HAVE_INTTYPES_H
36# include <inttypes.h>  /* For PRIu64.  */
37#endif
38#include <string.h>
39#include <assert.h>
40#include <errno.h>
41
42#ifdef PLUGIN_SUPPORT
43#include <dlfcn.h>
44#include "plugin-suffix.h"
45#endif
46
47#define FIELD_TGT_EMPTY (~(size_t) 0)
48
49static void gomp_target_init (void);
50
51/* The whole initialization code for offloading plugins is only run one.  */
52static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
53
54/* Mutex for offload image registration.  */
55static gomp_mutex_t register_lock;
56
57/* This structure describes an offload image.
58   It contains type of the target device, pointer to host table descriptor, and
59   pointer to target data.  */
60struct offload_image_descr {
61  unsigned version;
62  enum offload_target_type type;
63  const void *host_table;
64  const void *target_data;
65};
66
67/* Array of descriptors of offload images.  */
68static struct offload_image_descr *offload_images;
69
70/* Total number of offload images.  */
71static int num_offload_images;
72
73/* Array of descriptors for all available devices.  */
74static struct gomp_device_descr *devices;
75
76/* Total number of available devices.  */
77static int num_devices;
78
79/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices.  */
80static int num_devices_openmp;
81
82/* Similar to gomp_realloc, but release register_lock before gomp_fatal.  */
83
84static void *
85gomp_realloc_unlock (void *old, size_t size)
86{
87  void *ret = realloc (old, size);
88  if (ret == NULL)
89    {
90      gomp_mutex_unlock (&register_lock);
91      gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
92    }
93  return ret;
94}
95
96attribute_hidden void
97gomp_init_targets_once (void)
98{
99  (void) pthread_once (&gomp_is_initialized, gomp_target_init);
100}
101
102attribute_hidden int
103gomp_get_num_devices (void)
104{
105  gomp_init_targets_once ();
106  return num_devices_openmp;
107}
108
109static struct gomp_device_descr *
110resolve_device (int device_id)
111{
112  if (device_id == GOMP_DEVICE_ICV)
113    {
114      struct gomp_task_icv *icv = gomp_icv (false);
115      device_id = icv->default_device_var;
116    }
117
118  if (device_id < 0 || device_id >= gomp_get_num_devices ())
119    return NULL;
120
121  gomp_mutex_lock (&devices[device_id].lock);
122  if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
123    gomp_init_device (&devices[device_id]);
124  else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
125    {
126      gomp_mutex_unlock (&devices[device_id].lock);
127      return NULL;
128    }
129  gomp_mutex_unlock (&devices[device_id].lock);
130
131  return &devices[device_id];
132}
133
134
135static inline splay_tree_key
136gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
137{
138  if (key->host_start != key->host_end)
139    return splay_tree_lookup (mem_map, key);
140
141  key->host_end++;
142  splay_tree_key n = splay_tree_lookup (mem_map, key);
143  key->host_end--;
144  if (n)
145    return n;
146  key->host_start--;
147  n = splay_tree_lookup (mem_map, key);
148  key->host_start++;
149  if (n)
150    return n;
151  return splay_tree_lookup (mem_map, key);
152}
153
154static inline splay_tree_key
155gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
156{
157  if (key->host_start != key->host_end)
158    return splay_tree_lookup (mem_map, key);
159
160  key->host_end++;
161  splay_tree_key n = splay_tree_lookup (mem_map, key);
162  key->host_end--;
163  return n;
164}
165
166static inline void
167gomp_device_copy (struct gomp_device_descr *devicep,
168		  bool (*copy_func) (int, void *, const void *, size_t),
169		  const char *dst, void *dstaddr,
170		  const char *src, const void *srcaddr,
171		  size_t size)
172{
173  if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
174    {
175      gomp_mutex_unlock (&devicep->lock);
176      gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
177		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
178    }
179}
180
181static inline void
182goacc_device_copy_async (struct gomp_device_descr *devicep,
183			 bool (*copy_func) (int, void *, const void *, size_t,
184					    struct goacc_asyncqueue *),
185			 const char *dst, void *dstaddr,
186			 const char *src, const void *srcaddr,
187			 size_t size, struct goacc_asyncqueue *aq)
188{
189  if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
190    {
191      gomp_mutex_unlock (&devicep->lock);
192      gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
193		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
194    }
195}
196
197/* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
198   host to device memory transfers.  */
199
200struct gomp_coalesce_chunk
201{
202  /* The starting and ending point of a coalesced chunk of memory.  */
203  size_t start, end;
204};
205
206struct gomp_coalesce_buf
207{
208  /* Buffer into which gomp_copy_host2dev will memcpy data and from which
209     it will be copied to the device.  */
210  void *buf;
211  struct target_mem_desc *tgt;
212  /* Array with offsets, chunks[i].start is the starting offset and
213     chunks[i].end ending offset relative to tgt->tgt_start device address
214     of chunks which are to be copied to buf and later copied to device.  */
215  struct gomp_coalesce_chunk *chunks;
216  /* Number of chunks in chunks array, or -1 if coalesce buffering should not
217     be performed.  */
218  long chunk_cnt;
219  /* During construction of chunks array, how many memory regions are within
220     the last chunk.  If there is just one memory region for a chunk, we copy
221     it directly to device rather than going through buf.  */
222  long use_cnt;
223};
224
225/* Maximum size of memory region considered for coalescing.  Larger copies
226   are performed directly.  */
227#define MAX_COALESCE_BUF_SIZE	(32 * 1024)
228
229/* Maximum size of a gap in between regions to consider them being copied
230   within the same chunk.  All the device offsets considered are within
231   newly allocated device memory, so it isn't fatal if we copy some padding
232   in between from host to device.  The gaps come either from alignment
233   padding or from memory regions which are not supposed to be copied from
234   host to device (e.g. map(alloc:), map(from:) etc.).  */
235#define MAX_COALESCE_BUF_GAP	(4 * 1024)
236
237/* Add region with device tgt_start relative offset and length to CBUF.  */
238
239static inline void
240gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
241{
242  if (len > MAX_COALESCE_BUF_SIZE || len == 0)
243    return;
244  if (cbuf->chunk_cnt)
245    {
246      if (cbuf->chunk_cnt < 0)
247	return;
248      if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
249	{
250	  cbuf->chunk_cnt = -1;
251	  return;
252	}
253      if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
254	{
255	  cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
256	  cbuf->use_cnt++;
257	  return;
258	}
259      /* If the last chunk is only used by one mapping, discard it,
260	 as it will be one host to device copy anyway and
261	 memcpying it around will only waste cycles.  */
262      if (cbuf->use_cnt == 1)
263	cbuf->chunk_cnt--;
264    }
265  cbuf->chunks[cbuf->chunk_cnt].start = start;
266  cbuf->chunks[cbuf->chunk_cnt].end = start + len;
267  cbuf->chunk_cnt++;
268  cbuf->use_cnt = 1;
269}
270
271/* Return true for mapping kinds which need to copy data from the
272   host to device for regions that weren't previously mapped.  */
273
274static inline bool
275gomp_to_device_kind_p (int kind)
276{
277  switch (kind)
278    {
279    case GOMP_MAP_ALLOC:
280    case GOMP_MAP_FROM:
281    case GOMP_MAP_FORCE_ALLOC:
282    case GOMP_MAP_FORCE_FROM:
283    case GOMP_MAP_ALWAYS_FROM:
284      return false;
285    default:
286      return true;
287    }
288}
289
290attribute_hidden void
291gomp_copy_host2dev (struct gomp_device_descr *devicep,
292		    struct goacc_asyncqueue *aq,
293		    void *d, const void *h, size_t sz,
294		    struct gomp_coalesce_buf *cbuf)
295{
296  if (cbuf)
297    {
298      uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
299      if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
300	{
301	  long first = 0;
302	  long last = cbuf->chunk_cnt - 1;
303	  while (first <= last)
304	    {
305	      long middle = (first + last) >> 1;
306	      if (cbuf->chunks[middle].end <= doff)
307		first = middle + 1;
308	      else if (cbuf->chunks[middle].start <= doff)
309		{
310		  if (doff + sz > cbuf->chunks[middle].end)
311		    gomp_fatal ("internal libgomp cbuf error");
312		  memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
313			  h, sz);
314		  return;
315		}
316	      else
317		last = middle - 1;
318	    }
319	}
320    }
321  if (__builtin_expect (aq != NULL, 0))
322    goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
323			     "dev", d, "host", h, sz, aq);
324  else
325    gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
326}
327
328attribute_hidden void
329gomp_copy_dev2host (struct gomp_device_descr *devicep,
330		    struct goacc_asyncqueue *aq,
331		    void *h, const void *d, size_t sz)
332{
333  if (__builtin_expect (aq != NULL, 0))
334    goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
335			     "host", h, "dev", d, sz, aq);
336  else
337    gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
338}
339
340static void
341gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
342{
343  if (!devicep->free_func (devicep->target_id, devptr))
344    {
345      gomp_mutex_unlock (&devicep->lock);
346      gomp_fatal ("error in freeing device memory block at %p", devptr);
347    }
348}
349
350/* Handle the case where gomp_map_lookup, splay_tree_lookup or
351   gomp_map_0len_lookup found oldn for newn.
352   Helper function of gomp_map_vars.  */
353
354static inline void
355gomp_map_vars_existing (struct gomp_device_descr *devicep,
356			struct goacc_asyncqueue *aq, splay_tree_key oldn,
357			splay_tree_key newn, struct target_var_desc *tgt_var,
358			unsigned char kind, struct gomp_coalesce_buf *cbuf)
359{
360  assert (kind != GOMP_MAP_ATTACH);
361
362  tgt_var->key = oldn;
363  tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
364  tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
365  tgt_var->is_attach = false;
366  tgt_var->offset = newn->host_start - oldn->host_start;
367  tgt_var->length = newn->host_end - newn->host_start;
368
369  if ((kind & GOMP_MAP_FLAG_FORCE)
370      || oldn->host_start > newn->host_start
371      || oldn->host_end < newn->host_end)
372    {
373      gomp_mutex_unlock (&devicep->lock);
374      gomp_fatal ("Trying to map into device [%p..%p) object when "
375		  "[%p..%p) is already mapped",
376		  (void *) newn->host_start, (void *) newn->host_end,
377		  (void *) oldn->host_start, (void *) oldn->host_end);
378    }
379
380  if (GOMP_MAP_ALWAYS_TO_P (kind))
381    gomp_copy_host2dev (devicep, aq,
382			(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
383				  + newn->host_start - oldn->host_start),
384			(void *) newn->host_start,
385			newn->host_end - newn->host_start, cbuf);
386
387  if (oldn->refcount != REFCOUNT_INFINITY)
388    oldn->refcount++;
389}
390
391static int
392get_kind (bool short_mapkind, void *kinds, int idx)
393{
394  return short_mapkind ? ((unsigned short *) kinds)[idx]
395		       : ((unsigned char *) kinds)[idx];
396}
397
398static void
399gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
400		  uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
401		  struct gomp_coalesce_buf *cbuf)
402{
403  struct gomp_device_descr *devicep = tgt->device_descr;
404  struct splay_tree_s *mem_map = &devicep->mem_map;
405  struct splay_tree_key_s cur_node;
406
407  cur_node.host_start = host_ptr;
408  if (cur_node.host_start == (uintptr_t) NULL)
409    {
410      cur_node.tgt_offset = (uintptr_t) NULL;
411      gomp_copy_host2dev (devicep, aq,
412			  (void *) (tgt->tgt_start + target_offset),
413			  (void *) &cur_node.tgt_offset,
414			  sizeof (void *), cbuf);
415      return;
416    }
417  /* Add bias to the pointer value.  */
418  cur_node.host_start += bias;
419  cur_node.host_end = cur_node.host_start;
420  splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
421  if (n == NULL)
422    {
423      gomp_mutex_unlock (&devicep->lock);
424      gomp_fatal ("Pointer target of array section wasn't mapped");
425    }
426  cur_node.host_start -= n->host_start;
427  cur_node.tgt_offset
428    = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
429  /* At this point tgt_offset is target address of the
430     array section.  Now subtract bias to get what we want
431     to initialize the pointer with.  */
432  cur_node.tgt_offset -= bias;
433  gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
434		      (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
435}
436
437static void
438gomp_map_fields_existing (struct target_mem_desc *tgt,
439			  struct goacc_asyncqueue *aq, splay_tree_key n,
440			  size_t first, size_t i, void **hostaddrs,
441			  size_t *sizes, void *kinds,
442			  struct gomp_coalesce_buf *cbuf)
443{
444  struct gomp_device_descr *devicep = tgt->device_descr;
445  struct splay_tree_s *mem_map = &devicep->mem_map;
446  struct splay_tree_key_s cur_node;
447  int kind;
448  const bool short_mapkind = true;
449  const int typemask = short_mapkind ? 0xff : 0x7;
450
451  cur_node.host_start = (uintptr_t) hostaddrs[i];
452  cur_node.host_end = cur_node.host_start + sizes[i];
453  splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
454  kind = get_kind (short_mapkind, kinds, i);
455  if (n2
456      && n2->tgt == n->tgt
457      && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
458    {
459      gomp_map_vars_existing (devicep, aq, n2, &cur_node,
460			      &tgt->list[i], kind & typemask, cbuf);
461      return;
462    }
463  if (sizes[i] == 0)
464    {
465      if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
466	{
467	  cur_node.host_start--;
468	  n2 = splay_tree_lookup (mem_map, &cur_node);
469	  cur_node.host_start++;
470	  if (n2
471	      && n2->tgt == n->tgt
472	      && n2->host_start - n->host_start
473		 == n2->tgt_offset - n->tgt_offset)
474	    {
475	      gomp_map_vars_existing (devicep, aq, n2, &cur_node,
476				      &tgt->list[i], kind & typemask, cbuf);
477	      return;
478	    }
479	}
480      cur_node.host_end++;
481      n2 = splay_tree_lookup (mem_map, &cur_node);
482      cur_node.host_end--;
483      if (n2
484	  && n2->tgt == n->tgt
485	  && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
486	{
487	  gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
488				  kind & typemask, cbuf);
489	  return;
490	}
491    }
492  gomp_mutex_unlock (&devicep->lock);
493  gomp_fatal ("Trying to map into device [%p..%p) structure element when "
494	      "other mapped elements from the same structure weren't mapped "
495	      "together with it", (void *) cur_node.host_start,
496	      (void *) cur_node.host_end);
497}
498
499attribute_hidden void
500gomp_attach_pointer (struct gomp_device_descr *devicep,
501		     struct goacc_asyncqueue *aq, splay_tree mem_map,
502		     splay_tree_key n, uintptr_t attach_to, size_t bias,
503		     struct gomp_coalesce_buf *cbufp)
504{
505  struct splay_tree_key_s s;
506  size_t size, idx;
507
508  if (n == NULL)
509    {
510      gomp_mutex_unlock (&devicep->lock);
511      gomp_fatal ("enclosing struct not mapped for attach");
512    }
513
514  size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
515  /* We might have a pointer in a packed struct: however we cannot have more
516     than one such pointer in each pointer-sized portion of the struct, so
517     this is safe.  */
518  idx = (attach_to - n->host_start) / sizeof (void *);
519
520  if (!n->aux)
521    n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
522
523  if (!n->aux->attach_count)
524    n->aux->attach_count
525      = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
526
527  if (n->aux->attach_count[idx] < UINTPTR_MAX)
528    n->aux->attach_count[idx]++;
529  else
530    {
531      gomp_mutex_unlock (&devicep->lock);
532      gomp_fatal ("attach count overflow");
533    }
534
535  if (n->aux->attach_count[idx] == 1)
536    {
537      uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
538			 - n->host_start;
539      uintptr_t target = (uintptr_t) *(void **) attach_to;
540      splay_tree_key tn;
541      uintptr_t data;
542
543      if ((void *) target == NULL)
544	{
545	  gomp_mutex_unlock (&devicep->lock);
546	  gomp_fatal ("attempt to attach null pointer");
547	}
548
549      s.host_start = target + bias;
550      s.host_end = s.host_start + 1;
551      tn = splay_tree_lookup (mem_map, &s);
552
553      if (!tn)
554	{
555	  gomp_mutex_unlock (&devicep->lock);
556	  gomp_fatal ("pointer target not mapped for attach");
557	}
558
559      data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
560
561      gomp_debug (1,
562		  "%s: attaching host %p, target %p (struct base %p) to %p\n",
563		  __FUNCTION__, (void *) attach_to, (void *) devptr,
564		  (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
565
566      gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
567			  sizeof (void *), cbufp);
568    }
569  else
570    gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
571		(void *) attach_to, (int) n->aux->attach_count[idx]);
572}
573
574attribute_hidden void
575gomp_detach_pointer (struct gomp_device_descr *devicep,
576		     struct goacc_asyncqueue *aq, splay_tree_key n,
577		     uintptr_t detach_from, bool finalize,
578		     struct gomp_coalesce_buf *cbufp)
579{
580  size_t idx;
581
582  if (n == NULL)
583    {
584      gomp_mutex_unlock (&devicep->lock);
585      gomp_fatal ("enclosing struct not mapped for detach");
586    }
587
588  idx = (detach_from - n->host_start) / sizeof (void *);
589
590  if (!n->aux || !n->aux->attach_count)
591    {
592      gomp_mutex_unlock (&devicep->lock);
593      gomp_fatal ("no attachment counters for struct");
594    }
595
596  if (finalize)
597    n->aux->attach_count[idx] = 1;
598
599  if (n->aux->attach_count[idx] == 0)
600    {
601      gomp_mutex_unlock (&devicep->lock);
602      gomp_fatal ("attach count underflow");
603    }
604  else
605    n->aux->attach_count[idx]--;
606
607  if (n->aux->attach_count[idx] == 0)
608    {
609      uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
610			 - n->host_start;
611      uintptr_t target = (uintptr_t) *(void **) detach_from;
612
613      gomp_debug (1,
614		  "%s: detaching host %p, target %p (struct base %p) to %p\n",
615		  __FUNCTION__, (void *) detach_from, (void *) devptr,
616		  (void *) (n->tgt->tgt_start + n->tgt_offset),
617		  (void *) target);
618
619      gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
620			  sizeof (void *), cbufp);
621    }
622  else
623    gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
624		(void *) detach_from, (int) n->aux->attach_count[idx]);
625}
626
627attribute_hidden uintptr_t
628gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
629{
630  if (tgt->list[i].key != NULL)
631    return tgt->list[i].key->tgt->tgt_start
632	   + tgt->list[i].key->tgt_offset
633	   + tgt->list[i].offset;
634
635  switch (tgt->list[i].offset)
636    {
637    case OFFSET_INLINED:
638      return (uintptr_t) hostaddrs[i];
639
640    case OFFSET_POINTER:
641      return 0;
642
643    case OFFSET_STRUCT:
644      return tgt->list[i + 1].key->tgt->tgt_start
645	     + tgt->list[i + 1].key->tgt_offset
646	     + tgt->list[i + 1].offset
647	     + (uintptr_t) hostaddrs[i]
648	     - (uintptr_t) hostaddrs[i + 1];
649
650    default:
651      return tgt->tgt_start + tgt->list[i].offset;
652    }
653}
654
655static inline __attribute__((always_inline)) struct target_mem_desc *
656gomp_map_vars_internal (struct gomp_device_descr *devicep,
657			struct goacc_asyncqueue *aq, size_t mapnum,
658			void **hostaddrs, void **devaddrs, size_t *sizes,
659			void *kinds, bool short_mapkind,
660			enum gomp_map_vars_kind pragma_kind)
661{
662  size_t i, tgt_align, tgt_size, not_found_cnt = 0;
663  bool has_firstprivate = false;
664  const int rshift = short_mapkind ? 8 : 3;
665  const int typemask = short_mapkind ? 0xff : 0x7;
666  struct splay_tree_s *mem_map = &devicep->mem_map;
667  struct splay_tree_key_s cur_node;
668  struct target_mem_desc *tgt
669    = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
670  tgt->list_count = mapnum;
671  tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
672  tgt->device_descr = devicep;
673  tgt->prev = NULL;
674  struct gomp_coalesce_buf cbuf, *cbufp = NULL;
675
676  if (mapnum == 0)
677    {
678      tgt->tgt_start = 0;
679      tgt->tgt_end = 0;
680      return tgt;
681    }
682
683  tgt_align = sizeof (void *);
684  tgt_size = 0;
685  cbuf.chunks = NULL;
686  cbuf.chunk_cnt = -1;
687  cbuf.use_cnt = 0;
688  cbuf.buf = NULL;
689  if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
690    {
691      size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
692      cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
693      cbuf.chunk_cnt = 0;
694    }
695  if (pragma_kind == GOMP_MAP_VARS_TARGET)
696    {
697      size_t align = 4 * sizeof (void *);
698      tgt_align = align;
699      tgt_size = mapnum * sizeof (void *);
700      cbuf.chunk_cnt = 1;
701      cbuf.use_cnt = 1 + (mapnum > 1);
702      cbuf.chunks[0].start = 0;
703      cbuf.chunks[0].end = tgt_size;
704    }
705
706  gomp_mutex_lock (&devicep->lock);
707  if (devicep->state == GOMP_DEVICE_FINALIZED)
708    {
709      gomp_mutex_unlock (&devicep->lock);
710      free (tgt);
711      return NULL;
712    }
713
714  for (i = 0; i < mapnum; i++)
715    {
716      int kind = get_kind (short_mapkind, kinds, i);
717      if (hostaddrs[i] == NULL
718	  || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
719	{
720	  tgt->list[i].key = NULL;
721	  tgt->list[i].offset = OFFSET_INLINED;
722	  continue;
723	}
724      else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
725	       || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
726	{
727	  tgt->list[i].key = NULL;
728	  if (!not_found_cnt)
729	    {
730	      /* In OpenMP < 5.0 and OpenACC the mapping has to be done
731		 on a separate construct prior to using use_device_{addr,ptr}.
732		 In OpenMP 5.0, map directives need to be ordered by the
733		 middle-end before the use_device_* clauses.  If
734		 !not_found_cnt, all mappings requested (if any) are already
735		 mapped, so use_device_{addr,ptr} can be resolved right away.
736		 Otherwise, if not_found_cnt, gomp_map_lookup might fail
737		 now but would succeed after performing the mappings in the
738		 following loop.  We can't defer this always to the second
739		 loop, because it is not even invoked when !not_found_cnt
740		 after the first loop.  */
741	      cur_node.host_start = (uintptr_t) hostaddrs[i];
742	      cur_node.host_end = cur_node.host_start;
743	      splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
744	      if (n != NULL)
745		{
746		  cur_node.host_start -= n->host_start;
747		  hostaddrs[i]
748		    = (void *) (n->tgt->tgt_start + n->tgt_offset
749				+ cur_node.host_start);
750		}
751	      else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
752		{
753		  gomp_mutex_unlock (&devicep->lock);
754		  gomp_fatal ("use_device_ptr pointer wasn't mapped");
755		}
756	      else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
757		/* If not present, continue using the host address.  */
758		;
759	      else
760		__builtin_unreachable ();
761	      tgt->list[i].offset = OFFSET_INLINED;
762	    }
763	  else
764	    tgt->list[i].offset = 0;
765	  continue;
766	}
767      else if ((kind & typemask) == GOMP_MAP_STRUCT)
768	{
769	  size_t first = i + 1;
770	  size_t last = i + sizes[i];
771	  cur_node.host_start = (uintptr_t) hostaddrs[i];
772	  cur_node.host_end = (uintptr_t) hostaddrs[last]
773			      + sizes[last];
774	  tgt->list[i].key = NULL;
775	  tgt->list[i].offset = OFFSET_STRUCT;
776	  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
777	  if (n == NULL)
778	    {
779	      size_t align = (size_t) 1 << (kind >> rshift);
780	      if (tgt_align < align)
781		tgt_align = align;
782	      tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
783	      tgt_size = (tgt_size + align - 1) & ~(align - 1);
784	      tgt_size += cur_node.host_end - cur_node.host_start;
785	      not_found_cnt += last - i;
786	      for (i = first; i <= last; i++)
787		{
788		  tgt->list[i].key = NULL;
789		  if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
790					     & typemask))
791		    gomp_coalesce_buf_add (&cbuf,
792					   tgt_size - cur_node.host_end
793					   + (uintptr_t) hostaddrs[i],
794					   sizes[i]);
795		}
796	      i--;
797	      continue;
798	    }
799	  for (i = first; i <= last; i++)
800	    gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
801				      sizes, kinds, NULL);
802	  i--;
803	  continue;
804	}
805      else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
806	{
807	  tgt->list[i].key = NULL;
808	  tgt->list[i].offset = OFFSET_POINTER;
809	  has_firstprivate = true;
810	  continue;
811	}
812      else if ((kind & typemask) == GOMP_MAP_ATTACH)
813	{
814	  tgt->list[i].key = NULL;
815	  has_firstprivate = true;
816	  continue;
817	}
818      cur_node.host_start = (uintptr_t) hostaddrs[i];
819      if (!GOMP_MAP_POINTER_P (kind & typemask))
820	cur_node.host_end = cur_node.host_start + sizes[i];
821      else
822	cur_node.host_end = cur_node.host_start + sizeof (void *);
823      if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
824	{
825	  tgt->list[i].key = NULL;
826
827	  size_t align = (size_t) 1 << (kind >> rshift);
828	  if (tgt_align < align)
829	    tgt_align = align;
830	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
831	  gomp_coalesce_buf_add (&cbuf, tgt_size,
832				 cur_node.host_end - cur_node.host_start);
833	  tgt_size += cur_node.host_end - cur_node.host_start;
834	  has_firstprivate = true;
835	  continue;
836	}
837      splay_tree_key n;
838      if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
839	{
840	  n = gomp_map_0len_lookup (mem_map, &cur_node);
841	  if (!n)
842	    {
843	      tgt->list[i].key = NULL;
844	      tgt->list[i].offset = OFFSET_POINTER;
845	      continue;
846	    }
847	}
848      else
849	n = splay_tree_lookup (mem_map, &cur_node);
850      if (n && n->refcount != REFCOUNT_LINK)
851	gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
852				kind & typemask, NULL);
853      else
854	{
855	  tgt->list[i].key = NULL;
856
857	  if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
858	    {
859	      /* Not present, hence, skip entry - including its MAP_POINTER,
860		 when existing.  */
861	      tgt->list[i].offset = OFFSET_POINTER;
862	      if (i + 1 < mapnum
863		  && ((typemask & get_kind (short_mapkind, kinds, i + 1))
864		      == GOMP_MAP_POINTER))
865		{
866		  ++i;
867		  tgt->list[i].key = NULL;
868		  tgt->list[i].offset = 0;
869		}
870	      continue;
871	    }
872	  size_t align = (size_t) 1 << (kind >> rshift);
873	  not_found_cnt++;
874	  if (tgt_align < align)
875	    tgt_align = align;
876	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
877	  if (gomp_to_device_kind_p (kind & typemask))
878	    gomp_coalesce_buf_add (&cbuf, tgt_size,
879				   cur_node.host_end - cur_node.host_start);
880	  tgt_size += cur_node.host_end - cur_node.host_start;
881	  if ((kind & typemask) == GOMP_MAP_TO_PSET)
882	    {
883	      size_t j;
884	      for (j = i + 1; j < mapnum; j++)
885		if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
886					 & typemask))
887		  break;
888		else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
889			 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
890			     > cur_node.host_end))
891		  break;
892		else
893		  {
894		    tgt->list[j].key = NULL;
895		    i++;
896		  }
897	    }
898	}
899    }
900
901  if (devaddrs)
902    {
903      if (mapnum != 1)
904	{
905	  gomp_mutex_unlock (&devicep->lock);
906	  gomp_fatal ("unexpected aggregation");
907	}
908      tgt->to_free = devaddrs[0];
909      tgt->tgt_start = (uintptr_t) tgt->to_free;
910      tgt->tgt_end = tgt->tgt_start + sizes[0];
911    }
912  else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
913    {
914      /* Allocate tgt_align aligned tgt_size block of memory.  */
915      /* FIXME: Perhaps change interface to allocate properly aligned
916	 memory.  */
917      tgt->to_free = devicep->alloc_func (devicep->target_id,
918					  tgt_size + tgt_align - 1);
919      if (!tgt->to_free)
920	{
921	  gomp_mutex_unlock (&devicep->lock);
922	  gomp_fatal ("device memory allocation fail");
923	}
924
925      tgt->tgt_start = (uintptr_t) tgt->to_free;
926      tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
927      tgt->tgt_end = tgt->tgt_start + tgt_size;
928
929      if (cbuf.use_cnt == 1)
930	cbuf.chunk_cnt--;
931      if (cbuf.chunk_cnt > 0)
932	{
933	  cbuf.buf
934	    = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
935	  if (cbuf.buf)
936	    {
937	      cbuf.tgt = tgt;
938	      cbufp = &cbuf;
939	    }
940	}
941    }
942  else
943    {
944      tgt->to_free = NULL;
945      tgt->tgt_start = 0;
946      tgt->tgt_end = 0;
947    }
948
949  tgt_size = 0;
950  if (pragma_kind == GOMP_MAP_VARS_TARGET)
951    tgt_size = mapnum * sizeof (void *);
952
953  tgt->array = NULL;
954  if (not_found_cnt || has_firstprivate)
955    {
956      if (not_found_cnt)
957	tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
958      splay_tree_node array = tgt->array;
959      size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
960      uintptr_t field_tgt_base = 0;
961
962      for (i = 0; i < mapnum; i++)
963	if (tgt->list[i].key == NULL)
964	  {
965	    int kind = get_kind (short_mapkind, kinds, i);
966	    if (hostaddrs[i] == NULL)
967	      continue;
968	    switch (kind & typemask)
969	      {
970		size_t align, len, first, last;
971		splay_tree_key n;
972	      case GOMP_MAP_FIRSTPRIVATE:
973		align = (size_t) 1 << (kind >> rshift);
974		tgt_size = (tgt_size + align - 1) & ~(align - 1);
975		tgt->list[i].offset = tgt_size;
976		len = sizes[i];
977		gomp_copy_host2dev (devicep, aq,
978				    (void *) (tgt->tgt_start + tgt_size),
979				    (void *) hostaddrs[i], len, cbufp);
980		tgt_size += len;
981		continue;
982	      case GOMP_MAP_FIRSTPRIVATE_INT:
983	      case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
984		continue;
985	      case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
986		/* The OpenACC 'host_data' construct only allows 'use_device'
987		   "mapping" clauses, so in the first loop, 'not_found_cnt'
988		   must always have been zero, so all OpenACC 'use_device'
989		   clauses have already been handled.  (We can only easily test
990		   'use_device' with 'if_present' clause here.)  */
991		assert (tgt->list[i].offset == OFFSET_INLINED);
992		/* Nevertheless, FALLTHRU to the normal handling, to keep the
993		   code conceptually simple, similar to the first loop.  */
994	      case GOMP_MAP_USE_DEVICE_PTR:
995		if (tgt->list[i].offset == 0)
996		  {
997		    cur_node.host_start = (uintptr_t) hostaddrs[i];
998		    cur_node.host_end = cur_node.host_start;
999		    n = gomp_map_lookup (mem_map, &cur_node);
1000		    if (n != NULL)
1001		      {
1002			cur_node.host_start -= n->host_start;
1003			hostaddrs[i]
1004			  = (void *) (n->tgt->tgt_start + n->tgt_offset
1005				      + cur_node.host_start);
1006		      }
1007		    else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1008		      {
1009			gomp_mutex_unlock (&devicep->lock);
1010			gomp_fatal ("use_device_ptr pointer wasn't mapped");
1011		      }
1012		    else if ((kind & typemask)
1013			     == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1014		      /* If not present, continue using the host address.  */
1015		      ;
1016		    else
1017		      __builtin_unreachable ();
1018		    tgt->list[i].offset = OFFSET_INLINED;
1019		  }
1020		continue;
1021	      case GOMP_MAP_STRUCT:
1022		first = i + 1;
1023		last = i + sizes[i];
1024		cur_node.host_start = (uintptr_t) hostaddrs[i];
1025		cur_node.host_end = (uintptr_t) hostaddrs[last]
1026				    + sizes[last];
1027		if (tgt->list[first].key != NULL)
1028		  continue;
1029		n = splay_tree_lookup (mem_map, &cur_node);
1030		if (n == NULL)
1031		  {
1032		    size_t align = (size_t) 1 << (kind >> rshift);
1033		    tgt_size -= (uintptr_t) hostaddrs[first]
1034				- (uintptr_t) hostaddrs[i];
1035		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
1036		    tgt_size += (uintptr_t) hostaddrs[first]
1037				- (uintptr_t) hostaddrs[i];
1038		    field_tgt_base = (uintptr_t) hostaddrs[first];
1039		    field_tgt_offset = tgt_size;
1040		    field_tgt_clear = last;
1041		    tgt_size += cur_node.host_end
1042				- (uintptr_t) hostaddrs[first];
1043		    continue;
1044		  }
1045		for (i = first; i <= last; i++)
1046		  gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1047					    sizes, kinds, cbufp);
1048		i--;
1049		continue;
1050	      case GOMP_MAP_ALWAYS_POINTER:
1051		cur_node.host_start = (uintptr_t) hostaddrs[i];
1052		cur_node.host_end = cur_node.host_start + sizeof (void *);
1053		n = splay_tree_lookup (mem_map, &cur_node);
1054		if (n == NULL
1055		    || n->host_start > cur_node.host_start
1056		    || n->host_end < cur_node.host_end)
1057		  {
1058		    gomp_mutex_unlock (&devicep->lock);
1059		    gomp_fatal ("always pointer not mapped");
1060		  }
1061		if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1062		    != GOMP_MAP_ALWAYS_POINTER)
1063		  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1064		if (cur_node.tgt_offset)
1065		  cur_node.tgt_offset -= sizes[i];
1066		gomp_copy_host2dev (devicep, aq,
1067				    (void *) (n->tgt->tgt_start
1068					      + n->tgt_offset
1069					      + cur_node.host_start
1070					      - n->host_start),
1071				    (void *) &cur_node.tgt_offset,
1072				    sizeof (void *), cbufp);
1073		cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1074				      + cur_node.host_start - n->host_start;
1075		continue;
1076	      case GOMP_MAP_IF_PRESENT:
1077		/* Not present - otherwise handled above. Skip over its
1078		   MAP_POINTER as well.  */
1079		if (i + 1 < mapnum
1080		    && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1081			== GOMP_MAP_POINTER))
1082		  ++i;
1083		continue;
1084	      case GOMP_MAP_ATTACH:
1085		{
1086		  cur_node.host_start = (uintptr_t) hostaddrs[i];
1087		  cur_node.host_end = cur_node.host_start + sizeof (void *);
1088		  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1089		  if (n != NULL)
1090		    {
1091		      tgt->list[i].key = n;
1092		      tgt->list[i].offset = cur_node.host_start - n->host_start;
1093		      tgt->list[i].length = n->host_end - n->host_start;
1094		      tgt->list[i].copy_from = false;
1095		      tgt->list[i].always_copy_from = false;
1096		      tgt->list[i].is_attach = true;
1097		      /* OpenACC 'attach'/'detach' doesn't affect
1098			 structured/dynamic reference counts ('n->refcount',
1099			 'n->dynamic_refcount').  */
1100		    }
1101		  else
1102		    {
1103		      gomp_mutex_unlock (&devicep->lock);
1104		      gomp_fatal ("outer struct not mapped for attach");
1105		    }
1106		  gomp_attach_pointer (devicep, aq, mem_map, n,
1107				       (uintptr_t) hostaddrs[i], sizes[i],
1108				       cbufp);
1109		  continue;
1110		}
1111	      default:
1112		break;
1113	      }
1114	    splay_tree_key k = &array->key;
1115	    k->host_start = (uintptr_t) hostaddrs[i];
1116	    if (!GOMP_MAP_POINTER_P (kind & typemask))
1117	      k->host_end = k->host_start + sizes[i];
1118	    else
1119	      k->host_end = k->host_start + sizeof (void *);
1120	    splay_tree_key n = splay_tree_lookup (mem_map, k);
1121	    if (n && n->refcount != REFCOUNT_LINK)
1122	      gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1123				      kind & typemask, cbufp);
1124	    else
1125	      {
1126		k->aux = NULL;
1127		if (n && n->refcount == REFCOUNT_LINK)
1128		  {
1129		    /* Replace target address of the pointer with target address
1130		       of mapped object in the splay tree.  */
1131		    splay_tree_remove (mem_map, n);
1132		    k->aux
1133		      = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1134		    k->aux->link_key = n;
1135		  }
1136		size_t align = (size_t) 1 << (kind >> rshift);
1137		tgt->list[i].key = k;
1138		k->tgt = tgt;
1139		if (field_tgt_clear != FIELD_TGT_EMPTY)
1140		  {
1141		    k->tgt_offset = k->host_start - field_tgt_base
1142				    + field_tgt_offset;
1143		    if (i == field_tgt_clear)
1144		      field_tgt_clear = FIELD_TGT_EMPTY;
1145		  }
1146		else
1147		  {
1148		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
1149		    k->tgt_offset = tgt_size;
1150		    tgt_size += k->host_end - k->host_start;
1151		  }
1152		tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1153		tgt->list[i].always_copy_from
1154		  = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1155		tgt->list[i].is_attach = false;
1156		tgt->list[i].offset = 0;
1157		tgt->list[i].length = k->host_end - k->host_start;
1158		k->refcount = 1;
1159		k->dynamic_refcount = 0;
1160		tgt->refcount++;
1161		array->left = NULL;
1162		array->right = NULL;
1163		splay_tree_insert (mem_map, array);
1164		switch (kind & typemask)
1165		  {
1166		  case GOMP_MAP_ALLOC:
1167		  case GOMP_MAP_FROM:
1168		  case GOMP_MAP_FORCE_ALLOC:
1169		  case GOMP_MAP_FORCE_FROM:
1170		  case GOMP_MAP_ALWAYS_FROM:
1171		    break;
1172		  case GOMP_MAP_TO:
1173		  case GOMP_MAP_TOFROM:
1174		  case GOMP_MAP_FORCE_TO:
1175		  case GOMP_MAP_FORCE_TOFROM:
1176		  case GOMP_MAP_ALWAYS_TO:
1177		  case GOMP_MAP_ALWAYS_TOFROM:
1178		    gomp_copy_host2dev (devicep, aq,
1179					(void *) (tgt->tgt_start
1180						  + k->tgt_offset),
1181					(void *) k->host_start,
1182					k->host_end - k->host_start, cbufp);
1183		    break;
1184		  case GOMP_MAP_POINTER:
1185		    gomp_map_pointer (tgt, aq,
1186				      (uintptr_t) *(void **) k->host_start,
1187				      k->tgt_offset, sizes[i], cbufp);
1188		    break;
1189		  case GOMP_MAP_TO_PSET:
1190		    gomp_copy_host2dev (devicep, aq,
1191					(void *) (tgt->tgt_start
1192						  + k->tgt_offset),
1193					(void *) k->host_start,
1194					k->host_end - k->host_start, cbufp);
1195
1196		    for (j = i + 1; j < mapnum; j++)
1197		      if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
1198							 j)
1199					       & typemask))
1200			break;
1201		      else if ((uintptr_t) hostaddrs[j] < k->host_start
1202			       || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1203				   > k->host_end))
1204			break;
1205		      else
1206			{
1207			  tgt->list[j].key = k;
1208			  tgt->list[j].copy_from = false;
1209			  tgt->list[j].always_copy_from = false;
1210			  tgt->list[j].is_attach = false;
1211			  if (k->refcount != REFCOUNT_INFINITY)
1212			    k->refcount++;
1213			  gomp_map_pointer (tgt, aq,
1214					    (uintptr_t) *(void **) hostaddrs[j],
1215					    k->tgt_offset
1216					    + ((uintptr_t) hostaddrs[j]
1217					       - k->host_start),
1218					    sizes[j], cbufp);
1219			  i++;
1220			}
1221		    break;
1222		  case GOMP_MAP_FORCE_PRESENT:
1223		    {
1224		      /* We already looked up the memory region above and it
1225			 was missing.  */
1226		      size_t size = k->host_end - k->host_start;
1227		      gomp_mutex_unlock (&devicep->lock);
1228#ifdef HAVE_INTTYPES_H
1229		      gomp_fatal ("present clause: !acc_is_present (%p, "
1230				  "%"PRIu64" (0x%"PRIx64"))",
1231				  (void *) k->host_start,
1232				  (uint64_t) size, (uint64_t) size);
1233#else
1234		      gomp_fatal ("present clause: !acc_is_present (%p, "
1235				  "%lu (0x%lx))", (void *) k->host_start,
1236				  (unsigned long) size, (unsigned long) size);
1237#endif
1238		    }
1239		    break;
1240		  case GOMP_MAP_FORCE_DEVICEPTR:
1241		    assert (k->host_end - k->host_start == sizeof (void *));
1242		    gomp_copy_host2dev (devicep, aq,
1243					(void *) (tgt->tgt_start
1244						  + k->tgt_offset),
1245					(void *) k->host_start,
1246					sizeof (void *), cbufp);
1247		    break;
1248		  default:
1249		    gomp_mutex_unlock (&devicep->lock);
1250		    gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1251				kind);
1252		  }
1253
1254		if (k->aux && k->aux->link_key)
1255		  {
1256		    /* Set link pointer on target to the device address of the
1257		       mapped object.  */
1258		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1259		    /* We intentionally do not use coalescing here, as it's not
1260		       data allocated by the current call to this function.  */
1261		    gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1262					&tgt_addr, sizeof (void *), NULL);
1263		  }
1264		array++;
1265	      }
1266	  }
1267    }
1268
1269  if (pragma_kind == GOMP_MAP_VARS_TARGET)
1270    {
1271      for (i = 0; i < mapnum; i++)
1272	{
1273	  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1274	  gomp_copy_host2dev (devicep, aq,
1275			      (void *) (tgt->tgt_start + i * sizeof (void *)),
1276			      (void *) &cur_node.tgt_offset, sizeof (void *),
1277			      cbufp);
1278	}
1279    }
1280
1281  if (cbufp)
1282    {
1283      long c = 0;
1284      for (c = 0; c < cbuf.chunk_cnt; ++c)
1285	gomp_copy_host2dev (devicep, aq,
1286			    (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1287			    (char *) cbuf.buf + (cbuf.chunks[c].start
1288						 - cbuf.chunks[0].start),
1289			    cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
1290      free (cbuf.buf);
1291      cbuf.buf = NULL;
1292      cbufp = NULL;
1293    }
1294
1295  /* If the variable from "omp target enter data" map-list was already mapped,
1296     tgt is not needed.  Otherwise tgt will be freed by gomp_unmap_vars or
1297     gomp_exit_data.  */
1298  if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
1299    {
1300      free (tgt);
1301      tgt = NULL;
1302    }
1303
1304  gomp_mutex_unlock (&devicep->lock);
1305  return tgt;
1306}
1307
1308attribute_hidden struct target_mem_desc *
1309gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1310	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1311	       bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
1312{
1313  return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1314				 sizes, kinds, short_mapkind, pragma_kind);
1315}
1316
1317attribute_hidden struct target_mem_desc *
1318gomp_map_vars_async (struct gomp_device_descr *devicep,
1319		     struct goacc_asyncqueue *aq, size_t mapnum,
1320		     void **hostaddrs, void **devaddrs, size_t *sizes,
1321		     void *kinds, bool short_mapkind,
1322		     enum gomp_map_vars_kind pragma_kind)
1323{
1324  return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1325				 sizes, kinds, short_mapkind, pragma_kind);
1326}
1327
1328static void
1329gomp_unmap_tgt (struct target_mem_desc *tgt)
1330{
1331  /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
1332  if (tgt->tgt_end)
1333    gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1334
1335  free (tgt->array);
1336  free (tgt);
1337}
1338
1339static bool
1340gomp_unref_tgt (void *ptr)
1341{
1342  bool is_tgt_unmapped = false;
1343
1344  struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1345
1346  if (tgt->refcount > 1)
1347    tgt->refcount--;
1348  else
1349    {
1350      gomp_unmap_tgt (tgt);
1351      is_tgt_unmapped = true;
1352    }
1353
1354  return is_tgt_unmapped;
1355}
1356
1357static void
1358gomp_unref_tgt_void (void *ptr)
1359{
1360  (void) gomp_unref_tgt (ptr);
1361}
1362
1363static inline __attribute__((always_inline)) bool
1364gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1365			  struct goacc_asyncqueue *aq)
1366{
1367  bool is_tgt_unmapped = false;
1368  splay_tree_remove (&devicep->mem_map, k);
1369  if (k->aux)
1370    {
1371      if (k->aux->link_key)
1372	splay_tree_insert (&devicep->mem_map,
1373			   (splay_tree_node) k->aux->link_key);
1374      if (k->aux->attach_count)
1375	free (k->aux->attach_count);
1376      free (k->aux);
1377      k->aux = NULL;
1378    }
1379  if (aq)
1380    devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1381						(void *) k->tgt);
1382  else
1383    is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1384  return is_tgt_unmapped;
1385}
1386
1387attribute_hidden bool
1388gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1389{
1390  return gomp_remove_var_internal (devicep, k, NULL);
1391}
1392
1393/* Remove a variable asynchronously.  This actually removes the variable
1394   mapping immediately, but retains the linked target_mem_desc until the
1395   asynchronous operation has completed (as it may still refer to target
1396   memory).  The device lock must be held before entry, and remains locked on
1397   exit.  */
1398
1399attribute_hidden void
1400gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1401		       struct goacc_asyncqueue *aq)
1402{
1403  (void) gomp_remove_var_internal (devicep, k, aq);
1404}
1405
1406/* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
1407   variables back from device to host: if it is false, it is assumed that this
1408   has been done already.  */
1409
1410static inline __attribute__((always_inline)) void
1411gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1412			  struct goacc_asyncqueue *aq)
1413{
1414  struct gomp_device_descr *devicep = tgt->device_descr;
1415
1416  if (tgt->list_count == 0)
1417    {
1418      free (tgt);
1419      return;
1420    }
1421
1422  gomp_mutex_lock (&devicep->lock);
1423  if (devicep->state == GOMP_DEVICE_FINALIZED)
1424    {
1425      gomp_mutex_unlock (&devicep->lock);
1426      free (tgt->array);
1427      free (tgt);
1428      return;
1429    }
1430
1431  size_t i;
1432
1433  /* We must perform detachments before any copies back to the host.  */
1434  for (i = 0; i < tgt->list_count; i++)
1435    {
1436      splay_tree_key k = tgt->list[i].key;
1437
1438      if (k != NULL && tgt->list[i].is_attach)
1439	gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1440					     + tgt->list[i].offset,
1441			     false, NULL);
1442    }
1443
1444  for (i = 0; i < tgt->list_count; i++)
1445    {
1446      splay_tree_key k = tgt->list[i].key;
1447      if (k == NULL)
1448	continue;
1449
1450      /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1451	 counts ('n->refcount', 'n->dynamic_refcount').  */
1452      if (tgt->list[i].is_attach)
1453	continue;
1454
1455      bool do_unmap = false;
1456      if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1457	k->refcount--;
1458      else if (k->refcount == 1)
1459	{
1460	  k->refcount--;
1461	  do_unmap = true;
1462	}
1463
1464      if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1465	  || tgt->list[i].always_copy_from)
1466	gomp_copy_dev2host (devicep, aq,
1467			    (void *) (k->host_start + tgt->list[i].offset),
1468			    (void *) (k->tgt->tgt_start + k->tgt_offset
1469				      + tgt->list[i].offset),
1470			    tgt->list[i].length);
1471      if (do_unmap)
1472	{
1473	  struct target_mem_desc *k_tgt = k->tgt;
1474	  bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1475	  /* It would be bad if TGT got unmapped while we're still iterating
1476	     over its LIST_COUNT, and also expect to use it in the following
1477	     code.  */
1478	  assert (!is_tgt_unmapped
1479		  || k_tgt != tgt);
1480	}
1481    }
1482
1483  if (aq)
1484    devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1485						(void *) tgt);
1486  else
1487    gomp_unref_tgt ((void *) tgt);
1488
1489  gomp_mutex_unlock (&devicep->lock);
1490}
1491
1492attribute_hidden void
1493gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1494{
1495  gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
1496}
1497
1498attribute_hidden void
1499gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
1500		       struct goacc_asyncqueue *aq)
1501{
1502  gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
1503}
1504
1505static void
1506gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1507	     size_t *sizes, void *kinds, bool short_mapkind)
1508{
1509  size_t i;
1510  struct splay_tree_key_s cur_node;
1511  const int typemask = short_mapkind ? 0xff : 0x7;
1512
1513  if (!devicep)
1514    return;
1515
1516  if (mapnum == 0)
1517    return;
1518
1519  gomp_mutex_lock (&devicep->lock);
1520  if (devicep->state == GOMP_DEVICE_FINALIZED)
1521    {
1522      gomp_mutex_unlock (&devicep->lock);
1523      return;
1524    }
1525
1526  for (i = 0; i < mapnum; i++)
1527    if (sizes[i])
1528      {
1529	cur_node.host_start = (uintptr_t) hostaddrs[i];
1530	cur_node.host_end = cur_node.host_start + sizes[i];
1531	splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1532	if (n)
1533	  {
1534	    int kind = get_kind (short_mapkind, kinds, i);
1535	    if (n->host_start > cur_node.host_start
1536		|| n->host_end < cur_node.host_end)
1537	      {
1538		gomp_mutex_unlock (&devicep->lock);
1539		gomp_fatal ("Trying to update [%p..%p) object when "
1540			    "only [%p..%p) is mapped",
1541			    (void *) cur_node.host_start,
1542			    (void *) cur_node.host_end,
1543			    (void *) n->host_start,
1544			    (void *) n->host_end);
1545	      }
1546
1547
1548	    void *hostaddr = (void *) cur_node.host_start;
1549	    void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1550				      + cur_node.host_start - n->host_start);
1551	    size_t size = cur_node.host_end - cur_node.host_start;
1552
1553	    if (GOMP_MAP_COPY_TO_P (kind & typemask))
1554	      gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1555				  NULL);
1556	    if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1557	      gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1558	  }
1559      }
1560  gomp_mutex_unlock (&devicep->lock);
1561}
1562
1563/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1564   And insert to splay tree the mapping between addresses from HOST_TABLE and
1565   from loaded target image.  We rely in the host and device compiler
1566   emitting variable and functions in the same order.  */
1567
1568static void
1569gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1570			   const void *host_table, const void *target_data,
1571			   bool is_register_lock)
1572{
1573  void **host_func_table = ((void ***) host_table)[0];
1574  void **host_funcs_end  = ((void ***) host_table)[1];
1575  void **host_var_table  = ((void ***) host_table)[2];
1576  void **host_vars_end   = ((void ***) host_table)[3];
1577
1578  /* The func table contains only addresses, the var table contains addresses
1579     and corresponding sizes.  */
1580  int num_funcs = host_funcs_end - host_func_table;
1581  int num_vars  = (host_vars_end - host_var_table) / 2;
1582
1583  /* Load image to device and get target addresses for the image.  */
1584  struct addr_pair *target_table = NULL;
1585  int i, num_target_entries;
1586
1587  num_target_entries
1588    = devicep->load_image_func (devicep->target_id, version,
1589				target_data, &target_table);
1590
1591  if (num_target_entries != num_funcs + num_vars)
1592    {
1593      gomp_mutex_unlock (&devicep->lock);
1594      if (is_register_lock)
1595	gomp_mutex_unlock (&register_lock);
1596      gomp_fatal ("Cannot map target functions or variables"
1597		  " (expected %u, have %u)", num_funcs + num_vars,
1598		  num_target_entries);
1599    }
1600
1601  /* Insert host-target address mapping into splay tree.  */
1602  struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1603  tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1604  tgt->refcount = REFCOUNT_INFINITY;
1605  tgt->tgt_start = 0;
1606  tgt->tgt_end = 0;
1607  tgt->to_free = NULL;
1608  tgt->prev = NULL;
1609  tgt->list_count = 0;
1610  tgt->device_descr = devicep;
1611  splay_tree_node array = tgt->array;
1612
1613  for (i = 0; i < num_funcs; i++)
1614    {
1615      splay_tree_key k = &array->key;
1616      k->host_start = (uintptr_t) host_func_table[i];
1617      k->host_end = k->host_start + 1;
1618      k->tgt = tgt;
1619      k->tgt_offset = target_table[i].start;
1620      k->refcount = REFCOUNT_INFINITY;
1621      k->dynamic_refcount = 0;
1622      k->aux = NULL;
1623      array->left = NULL;
1624      array->right = NULL;
1625      splay_tree_insert (&devicep->mem_map, array);
1626      array++;
1627    }
1628
1629  /* Most significant bit of the size in host and target tables marks
1630     "omp declare target link" variables.  */
1631  const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1632  const uintptr_t size_mask = ~link_bit;
1633
1634  for (i = 0; i < num_vars; i++)
1635    {
1636      struct addr_pair *target_var = &target_table[num_funcs + i];
1637      uintptr_t target_size = target_var->end - target_var->start;
1638      bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
1639
1640      if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
1641	{
1642	  gomp_mutex_unlock (&devicep->lock);
1643	  if (is_register_lock)
1644	    gomp_mutex_unlock (&register_lock);
1645	  gomp_fatal ("Cannot map target variables (size mismatch)");
1646	}
1647
1648      splay_tree_key k = &array->key;
1649      k->host_start = (uintptr_t) host_var_table[i * 2];
1650      k->host_end
1651	= k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1652      k->tgt = tgt;
1653      k->tgt_offset = target_var->start;
1654      k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1655      k->dynamic_refcount = 0;
1656      k->aux = NULL;
1657      array->left = NULL;
1658      array->right = NULL;
1659      splay_tree_insert (&devicep->mem_map, array);
1660      array++;
1661    }
1662
1663  free (target_table);
1664}
1665
1666/* Unload the mappings described by target_data from device DEVICE_P.
1667   The device must be locked.   */
1668
1669static void
1670gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1671			       unsigned version,
1672			       const void *host_table, const void *target_data)
1673{
1674  void **host_func_table = ((void ***) host_table)[0];
1675  void **host_funcs_end  = ((void ***) host_table)[1];
1676  void **host_var_table  = ((void ***) host_table)[2];
1677  void **host_vars_end   = ((void ***) host_table)[3];
1678
1679  /* The func table contains only addresses, the var table contains addresses
1680     and corresponding sizes.  */
1681  int num_funcs = host_funcs_end - host_func_table;
1682  int num_vars  = (host_vars_end - host_var_table) / 2;
1683
1684  struct splay_tree_key_s k;
1685  splay_tree_key node = NULL;
1686
1687  /* Find mapping at start of node array */
1688  if (num_funcs || num_vars)
1689    {
1690      k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1691		      : (uintptr_t) host_var_table[0]);
1692      k.host_end = k.host_start + 1;
1693      node = splay_tree_lookup (&devicep->mem_map, &k);
1694    }
1695
1696  if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1697    {
1698      gomp_mutex_unlock (&devicep->lock);
1699      gomp_fatal ("image unload fail");
1700    }
1701
1702  /* Remove mappings from splay tree.  */
1703  int i;
1704  for (i = 0; i < num_funcs; i++)
1705    {
1706      k.host_start = (uintptr_t) host_func_table[i];
1707      k.host_end = k.host_start + 1;
1708      splay_tree_remove (&devicep->mem_map, &k);
1709    }
1710
1711  /* Most significant bit of the size in host and target tables marks
1712     "omp declare target link" variables.  */
1713  const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1714  const uintptr_t size_mask = ~link_bit;
1715  bool is_tgt_unmapped = false;
1716
1717  for (i = 0; i < num_vars; i++)
1718    {
1719      k.host_start = (uintptr_t) host_var_table[i * 2];
1720      k.host_end
1721	= k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1722
1723      if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1724	splay_tree_remove (&devicep->mem_map, &k);
1725      else
1726	{
1727	  splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1728	  is_tgt_unmapped = gomp_remove_var (devicep, n);
1729	}
1730    }
1731
1732  if (node && !is_tgt_unmapped)
1733    {
1734      free (node->tgt);
1735      free (node);
1736    }
1737}
1738
1739/* This function should be called from every offload image while loading.
1740   It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1741   the target, and TARGET_DATA needed by target plugin.  */
1742
1743void
1744GOMP_offload_register_ver (unsigned version, const void *host_table,
1745			   int target_type, const void *target_data)
1746{
1747  int i;
1748
1749  if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1750    gomp_fatal ("Library too old for offload (version %u < %u)",
1751		GOMP_VERSION, GOMP_VERSION_LIB (version));
1752
1753  gomp_mutex_lock (&register_lock);
1754
1755  /* Load image to all initialized devices.  */
1756  for (i = 0; i < num_devices; i++)
1757    {
1758      struct gomp_device_descr *devicep = &devices[i];
1759      gomp_mutex_lock (&devicep->lock);
1760      if (devicep->type == target_type
1761	  && devicep->state == GOMP_DEVICE_INITIALIZED)
1762	gomp_load_image_to_device (devicep, version,
1763				   host_table, target_data, true);
1764      gomp_mutex_unlock (&devicep->lock);
1765    }
1766
1767  /* Insert image to array of pending images.  */
1768  offload_images
1769    = gomp_realloc_unlock (offload_images,
1770			   (num_offload_images + 1)
1771			   * sizeof (struct offload_image_descr));
1772  offload_images[num_offload_images].version = version;
1773  offload_images[num_offload_images].type = target_type;
1774  offload_images[num_offload_images].host_table = host_table;
1775  offload_images[num_offload_images].target_data = target_data;
1776
1777  num_offload_images++;
1778  gomp_mutex_unlock (&register_lock);
1779}
1780
1781void
1782GOMP_offload_register (const void *host_table, int target_type,
1783		       const void *target_data)
1784{
1785  GOMP_offload_register_ver (0, host_table, target_type, target_data);
1786}
1787
1788/* This function should be called from every offload image while unloading.
1789   It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1790   the target, and TARGET_DATA needed by target plugin.  */
1791
1792void
1793GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1794			     int target_type, const void *target_data)
1795{
1796  int i;
1797
1798  gomp_mutex_lock (&register_lock);
1799
1800  /* Unload image from all initialized devices.  */
1801  for (i = 0; i < num_devices; i++)
1802    {
1803      struct gomp_device_descr *devicep = &devices[i];
1804      gomp_mutex_lock (&devicep->lock);
1805      if (devicep->type == target_type
1806	  && devicep->state == GOMP_DEVICE_INITIALIZED)
1807	gomp_unload_image_from_device (devicep, version,
1808				       host_table, target_data);
1809      gomp_mutex_unlock (&devicep->lock);
1810    }
1811
1812  /* Remove image from array of pending images.  */
1813  for (i = 0; i < num_offload_images; i++)
1814    if (offload_images[i].target_data == target_data)
1815      {
1816	offload_images[i] = offload_images[--num_offload_images];
1817	break;
1818      }
1819
1820  gomp_mutex_unlock (&register_lock);
1821}
1822
1823void
1824GOMP_offload_unregister (const void *host_table, int target_type,
1825			 const void *target_data)
1826{
1827  GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1828}
1829
1830/* This function initializes the target device, specified by DEVICEP.  DEVICEP
1831   must be locked on entry, and remains locked on return.  */
1832
1833attribute_hidden void
1834gomp_init_device (struct gomp_device_descr *devicep)
1835{
1836  int i;
1837  if (!devicep->init_device_func (devicep->target_id))
1838    {
1839      gomp_mutex_unlock (&devicep->lock);
1840      gomp_fatal ("device initialization failed");
1841    }
1842
1843  /* Load to device all images registered by the moment.  */
1844  for (i = 0; i < num_offload_images; i++)
1845    {
1846      struct offload_image_descr *image = &offload_images[i];
1847      if (image->type == devicep->type)
1848	gomp_load_image_to_device (devicep, image->version,
1849				   image->host_table, image->target_data,
1850				   false);
1851    }
1852
1853  /* Initialize OpenACC asynchronous queues.  */
1854  goacc_init_asyncqueues (devicep);
1855
1856  devicep->state = GOMP_DEVICE_INITIALIZED;
1857}
1858
1859/* This function finalizes the target device, specified by DEVICEP.  DEVICEP
1860   must be locked on entry, and remains locked on return.  */
1861
1862attribute_hidden bool
1863gomp_fini_device (struct gomp_device_descr *devicep)
1864{
1865  bool ret = goacc_fini_asyncqueues (devicep);
1866  ret &= devicep->fini_device_func (devicep->target_id);
1867  devicep->state = GOMP_DEVICE_FINALIZED;
1868  return ret;
1869}
1870
1871attribute_hidden void
1872gomp_unload_device (struct gomp_device_descr *devicep)
1873{
1874  if (devicep->state == GOMP_DEVICE_INITIALIZED)
1875    {
1876      unsigned i;
1877
1878      /* Unload from device all images registered at the moment.  */
1879      for (i = 0; i < num_offload_images; i++)
1880	{
1881	  struct offload_image_descr *image = &offload_images[i];
1882	  if (image->type == devicep->type)
1883	    gomp_unload_image_from_device (devicep, image->version,
1884					   image->host_table,
1885					   image->target_data);
1886	}
1887    }
1888}
1889
1890/* Host fallback for GOMP_target{,_ext} routines.  */
1891
1892static void
1893gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1894{
1895  struct gomp_thread old_thr, *thr = gomp_thread ();
1896  old_thr = *thr;
1897  memset (thr, '\0', sizeof (*thr));
1898  if (gomp_places_list)
1899    {
1900      thr->place = old_thr.place;
1901      thr->ts.place_partition_len = gomp_places_list_len;
1902    }
1903  fn (hostaddrs);
1904  gomp_free_thread (thr);
1905  *thr = old_thr;
1906}
1907
1908/* Calculate alignment and size requirements of a private copy of data shared
1909   as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE.  */
1910
1911static inline void
1912calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1913				     unsigned short *kinds, size_t *tgt_align,
1914				     size_t *tgt_size)
1915{
1916  size_t i;
1917  for (i = 0; i < mapnum; i++)
1918    if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1919      {
1920	size_t align = (size_t) 1 << (kinds[i] >> 8);
1921	if (*tgt_align < align)
1922	  *tgt_align = align;
1923	*tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1924	*tgt_size += sizes[i];
1925      }
1926}
1927
1928/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST.  */
1929
1930static inline void
1931copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1932			size_t *sizes, unsigned short *kinds, size_t tgt_align,
1933			size_t tgt_size)
1934{
1935  uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1936  if (al)
1937    tgt += tgt_align - al;
1938  tgt_size = 0;
1939  size_t i;
1940  for (i = 0; i < mapnum; i++)
1941    if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1942      {
1943	size_t align = (size_t) 1 << (kinds[i] >> 8);
1944	tgt_size = (tgt_size + align - 1) & ~(align - 1);
1945	memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1946	hostaddrs[i] = tgt + tgt_size;
1947	tgt_size = tgt_size + sizes[i];
1948      }
1949}
1950
1951/* Helper function of GOMP_target{,_ext} routines.  */
1952
1953static void *
1954gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1955			 void (*host_fn) (void *))
1956{
1957  if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1958    return (void *) host_fn;
1959  else
1960    {
1961      gomp_mutex_lock (&devicep->lock);
1962      if (devicep->state == GOMP_DEVICE_FINALIZED)
1963	{
1964	  gomp_mutex_unlock (&devicep->lock);
1965	  return NULL;
1966	}
1967
1968      struct splay_tree_key_s k;
1969      k.host_start = (uintptr_t) host_fn;
1970      k.host_end = k.host_start + 1;
1971      splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1972      gomp_mutex_unlock (&devicep->lock);
1973      if (tgt_fn == NULL)
1974	return NULL;
1975
1976      return (void *) tgt_fn->tgt_offset;
1977    }
1978}
1979
1980/* Called when encountering a target directive.  If DEVICE
1981   is GOMP_DEVICE_ICV, it means use device-var ICV.  If it is
1982   GOMP_DEVICE_HOST_FALLBACK (or any value
1983   larger than last available hw device), use host fallback.
1984   FN is address of host code, UNUSED is part of the current ABI, but
1985   we're not actually using it.  HOSTADDRS, SIZES and KINDS are arrays
1986   with MAPNUM entries, with addresses of the host objects,
1987   sizes of the host objects (resp. for pointer kind pointer bias
1988   and assumed sizeof (void *) size) and kinds.  */
1989
1990void
1991GOMP_target (int device, void (*fn) (void *), const void *unused,
1992	     size_t mapnum, void **hostaddrs, size_t *sizes,
1993	     unsigned char *kinds)
1994{
1995  struct gomp_device_descr *devicep = resolve_device (device);
1996
1997  void *fn_addr;
1998  if (devicep == NULL
1999      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2000      /* All shared memory devices should use the GOMP_target_ext function.  */
2001      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2002      || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2003    return gomp_target_fallback (fn, hostaddrs);
2004
2005  struct target_mem_desc *tgt_vars
2006    = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2007		     GOMP_MAP_VARS_TARGET);
2008  devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2009		     NULL);
2010  gomp_unmap_vars (tgt_vars, true);
2011}
2012
2013static inline unsigned int
2014clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2015{
2016  /* If we cannot run asynchronously, simply ignore nowait.  */
2017  if (devicep != NULL && devicep->async_run_func == NULL)
2018    flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2019
2020  return flags;
2021}
2022
2023/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2024   and several arguments have been added:
2025   FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2026   DEPEND is array of dependencies, see GOMP_task for details.
2027
2028   ARGS is a pointer to an array consisting of a variable number of both
2029   device-independent and device-specific arguments, which can take one two
2030   elements where the first specifies for which device it is intended, the type
2031   and optionally also the value.  If the value is not present in the first
2032   one, the whole second element the actual value.  The last element of the
2033   array is a single NULL.  Among the device independent can be for example
2034   NUM_TEAMS and THREAD_LIMIT.
2035
2036   NUM_TEAMS is positive if GOMP_teams will be called in the body with
2037   that value, or 1 if teams construct is not present, or 0, if
2038   teams construct does not have num_teams clause and so the choice is
2039   implementation defined, and -1 if it can't be determined on the host
2040   what value will GOMP_teams have on the device.
2041   THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2042   body with that value, or 0, if teams construct does not have thread_limit
2043   clause or the teams construct is not present, or -1 if it can't be
2044   determined on the host what value will GOMP_teams have on the device.  */
2045
2046void
2047GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2048		 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2049		 unsigned int flags, void **depend, void **args)
2050{
2051  struct gomp_device_descr *devicep = resolve_device (device);
2052  size_t tgt_align = 0, tgt_size = 0;
2053  bool fpc_done = false;
2054
2055  flags = clear_unsupported_flags (devicep, flags);
2056
2057  if (flags & GOMP_TARGET_FLAG_NOWAIT)
2058    {
2059      struct gomp_thread *thr = gomp_thread ();
2060      /* Create a team if we don't have any around, as nowait
2061	 target tasks make sense to run asynchronously even when
2062	 outside of any parallel.  */
2063      if (__builtin_expect (thr->ts.team == NULL, 0))
2064	{
2065	  struct gomp_team *team = gomp_new_team (1);
2066	  struct gomp_task *task = thr->task;
2067	  struct gomp_task **implicit_task = &task;
2068	  struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2069	  team->prev_ts = thr->ts;
2070	  thr->ts.team = team;
2071	  thr->ts.team_id = 0;
2072	  thr->ts.work_share = &team->work_shares[0];
2073	  thr->ts.last_work_share = NULL;
2074#ifdef HAVE_SYNC_BUILTINS
2075	  thr->ts.single_count = 0;
2076#endif
2077	  thr->ts.static_trip = 0;
2078	  thr->task = &team->implicit_task[0];
2079	  gomp_init_task (thr->task, NULL, icv);
2080	  while (*implicit_task
2081		 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
2082	    implicit_task = &(*implicit_task)->parent;
2083	  if (*implicit_task)
2084	    {
2085	      thr->task = *implicit_task;
2086	      gomp_end_task ();
2087	      free (*implicit_task);
2088	      thr->task = &team->implicit_task[0];
2089	    }
2090	  else
2091	    pthread_setspecific (gomp_thread_destructor, thr);
2092	  if (implicit_task != &task)
2093	    {
2094	      *implicit_task = thr->task;
2095	      thr->task = task;
2096	    }
2097	}
2098      if (thr->ts.team
2099	  && !thr->task->final_task)
2100	{
2101	  gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2102				   sizes, kinds, flags, depend, args,
2103				   GOMP_TARGET_TASK_BEFORE_MAP);
2104	  return;
2105	}
2106    }
2107
2108  /* If there are depend clauses, but nowait is not present
2109     (or we are in a final task), block the parent task until the
2110     dependencies are resolved and then just continue with the rest
2111     of the function as if it is a merged task.  */
2112  if (depend != NULL)
2113    {
2114      struct gomp_thread *thr = gomp_thread ();
2115      if (thr->task && thr->task->depend_hash)
2116	{
2117	  /* If we might need to wait, copy firstprivate now.  */
2118	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
2119					       &tgt_align, &tgt_size);
2120	  if (tgt_align)
2121	    {
2122	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2123	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2124				      tgt_align, tgt_size);
2125	    }
2126	  fpc_done = true;
2127	  gomp_task_maybe_wait_for_dependencies (depend);
2128	}
2129    }
2130
2131  void *fn_addr;
2132  if (devicep == NULL
2133      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2134      || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2135      || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2136    {
2137      if (!fpc_done)
2138	{
2139	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
2140					       &tgt_align, &tgt_size);
2141	  if (tgt_align)
2142	    {
2143	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2144	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2145				      tgt_align, tgt_size);
2146	    }
2147	}
2148      gomp_target_fallback (fn, hostaddrs);
2149      return;
2150    }
2151
2152  struct target_mem_desc *tgt_vars;
2153  if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2154    {
2155      if (!fpc_done)
2156	{
2157	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
2158					       &tgt_align, &tgt_size);
2159	  if (tgt_align)
2160	    {
2161	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2162	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2163				      tgt_align, tgt_size);
2164	    }
2165	}
2166      tgt_vars = NULL;
2167    }
2168  else
2169    tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2170			      true, GOMP_MAP_VARS_TARGET);
2171  devicep->run_func (devicep->target_id, fn_addr,
2172		     tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2173		     args);
2174  if (tgt_vars)
2175    gomp_unmap_vars (tgt_vars, true);
2176}
2177
2178/* Host fallback for GOMP_target_data{,_ext} routines.  */
2179
2180static void
2181gomp_target_data_fallback (void)
2182{
2183  struct gomp_task_icv *icv = gomp_icv (false);
2184  if (icv->target_data)
2185    {
2186      /* Even when doing a host fallback, if there are any active
2187         #pragma omp target data constructs, need to remember the
2188         new #pragma omp target data, otherwise GOMP_target_end_data
2189         would get out of sync.  */
2190      struct target_mem_desc *tgt
2191	= gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2192			 GOMP_MAP_VARS_DATA);
2193      tgt->prev = icv->target_data;
2194      icv->target_data = tgt;
2195    }
2196}
2197
2198void
2199GOMP_target_data (int device, const void *unused, size_t mapnum,
2200		  void **hostaddrs, size_t *sizes, unsigned char *kinds)
2201{
2202  struct gomp_device_descr *devicep = resolve_device (device);
2203
2204  if (devicep == NULL
2205      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2206      || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2207    return gomp_target_data_fallback ();
2208
2209  struct target_mem_desc *tgt
2210    = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2211		     GOMP_MAP_VARS_DATA);
2212  struct gomp_task_icv *icv = gomp_icv (true);
2213  tgt->prev = icv->target_data;
2214  icv->target_data = tgt;
2215}
2216
2217void
2218GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2219		      size_t *sizes, unsigned short *kinds)
2220{
2221  struct gomp_device_descr *devicep = resolve_device (device);
2222
2223  if (devicep == NULL
2224      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2225      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2226    return gomp_target_data_fallback ();
2227
2228  struct target_mem_desc *tgt
2229    = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2230		     GOMP_MAP_VARS_DATA);
2231  struct gomp_task_icv *icv = gomp_icv (true);
2232  tgt->prev = icv->target_data;
2233  icv->target_data = tgt;
2234}
2235
2236void
2237GOMP_target_end_data (void)
2238{
2239  struct gomp_task_icv *icv = gomp_icv (false);
2240  if (icv->target_data)
2241    {
2242      struct target_mem_desc *tgt = icv->target_data;
2243      icv->target_data = tgt->prev;
2244      gomp_unmap_vars (tgt, true);
2245    }
2246}
2247
2248void
2249GOMP_target_update (int device, const void *unused, size_t mapnum,
2250		    void **hostaddrs, size_t *sizes, unsigned char *kinds)
2251{
2252  struct gomp_device_descr *devicep = resolve_device (device);
2253
2254  if (devicep == NULL
2255      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2256      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2257    return;
2258
2259  gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2260}
2261
2262void
2263GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2264			size_t *sizes, unsigned short *kinds,
2265			unsigned int flags, void **depend)
2266{
2267  struct gomp_device_descr *devicep = resolve_device (device);
2268
2269  /* If there are depend clauses, but nowait is not present,
2270     block the parent task until the dependencies are resolved
2271     and then just continue with the rest of the function as if it
2272     is a merged task.  Until we are able to schedule task during
2273     variable mapping or unmapping, ignore nowait if depend clauses
2274     are not present.  */
2275  if (depend != NULL)
2276    {
2277      struct gomp_thread *thr = gomp_thread ();
2278      if (thr->task && thr->task->depend_hash)
2279	{
2280	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2281	      && thr->ts.team
2282	      && !thr->task->final_task)
2283	    {
2284	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2285					   mapnum, hostaddrs, sizes, kinds,
2286					   flags | GOMP_TARGET_FLAG_UPDATE,
2287					   depend, NULL, GOMP_TARGET_TASK_DATA))
2288		return;
2289	    }
2290	  else
2291	    {
2292	      struct gomp_team *team = thr->ts.team;
2293	      /* If parallel or taskgroup has been cancelled, don't start new
2294		 tasks.  */
2295	      if (__builtin_expect (gomp_cancel_var, 0) && team)
2296		{
2297		  if (gomp_team_barrier_cancelled (&team->barrier))
2298		    return;
2299		  if (thr->task->taskgroup)
2300		    {
2301		      if (thr->task->taskgroup->cancelled)
2302			return;
2303		      if (thr->task->taskgroup->workshare
2304			  && thr->task->taskgroup->prev
2305			  && thr->task->taskgroup->prev->cancelled)
2306			return;
2307		    }
2308		}
2309
2310	      gomp_task_maybe_wait_for_dependencies (depend);
2311	    }
2312	}
2313    }
2314
2315  if (devicep == NULL
2316      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2317      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2318    return;
2319
2320  struct gomp_thread *thr = gomp_thread ();
2321  struct gomp_team *team = thr->ts.team;
2322  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
2323  if (__builtin_expect (gomp_cancel_var, 0) && team)
2324    {
2325      if (gomp_team_barrier_cancelled (&team->barrier))
2326	return;
2327      if (thr->task->taskgroup)
2328	{
2329	  if (thr->task->taskgroup->cancelled)
2330	    return;
2331	  if (thr->task->taskgroup->workshare
2332	      && thr->task->taskgroup->prev
2333	      && thr->task->taskgroup->prev->cancelled)
2334	    return;
2335	}
2336    }
2337
2338  gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2339}
2340
2341static void
2342gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2343		void **hostaddrs, size_t *sizes, unsigned short *kinds)
2344{
2345  const int typemask = 0xff;
2346  size_t i;
2347  gomp_mutex_lock (&devicep->lock);
2348  if (devicep->state == GOMP_DEVICE_FINALIZED)
2349    {
2350      gomp_mutex_unlock (&devicep->lock);
2351      return;
2352    }
2353
2354  for (i = 0; i < mapnum; i++)
2355    {
2356      struct splay_tree_key_s cur_node;
2357      unsigned char kind = kinds[i] & typemask;
2358      switch (kind)
2359	{
2360	case GOMP_MAP_FROM:
2361	case GOMP_MAP_ALWAYS_FROM:
2362	case GOMP_MAP_DELETE:
2363	case GOMP_MAP_RELEASE:
2364	case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2365	case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2366	  cur_node.host_start = (uintptr_t) hostaddrs[i];
2367	  cur_node.host_end = cur_node.host_start + sizes[i];
2368	  splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2369			      || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2370	    ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2371	    : splay_tree_lookup (&devicep->mem_map, &cur_node);
2372	  if (!k)
2373	    continue;
2374
2375	  if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
2376	    k->refcount--;
2377	  if ((kind == GOMP_MAP_DELETE
2378	       || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
2379	      && k->refcount != REFCOUNT_INFINITY)
2380	    k->refcount = 0;
2381
2382	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
2383	      || kind == GOMP_MAP_ALWAYS_FROM)
2384	    gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
2385				(void *) (k->tgt->tgt_start + k->tgt_offset
2386					  + cur_node.host_start
2387					  - k->host_start),
2388				cur_node.host_end - cur_node.host_start);
2389	  if (k->refcount == 0)
2390	    gomp_remove_var (devicep, k);
2391
2392	  break;
2393	default:
2394	  gomp_mutex_unlock (&devicep->lock);
2395	  gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2396		      kind);
2397	}
2398    }
2399
2400  gomp_mutex_unlock (&devicep->lock);
2401}
2402
2403void
2404GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2405			     size_t *sizes, unsigned short *kinds,
2406			     unsigned int flags, void **depend)
2407{
2408  struct gomp_device_descr *devicep = resolve_device (device);
2409
2410  /* If there are depend clauses, but nowait is not present,
2411     block the parent task until the dependencies are resolved
2412     and then just continue with the rest of the function as if it
2413     is a merged task.  Until we are able to schedule task during
2414     variable mapping or unmapping, ignore nowait if depend clauses
2415     are not present.  */
2416  if (depend != NULL)
2417    {
2418      struct gomp_thread *thr = gomp_thread ();
2419      if (thr->task && thr->task->depend_hash)
2420	{
2421	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2422	      && thr->ts.team
2423	      && !thr->task->final_task)
2424	    {
2425	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2426					   mapnum, hostaddrs, sizes, kinds,
2427					   flags, depend, NULL,
2428					   GOMP_TARGET_TASK_DATA))
2429		return;
2430	    }
2431	  else
2432	    {
2433	      struct gomp_team *team = thr->ts.team;
2434	      /* If parallel or taskgroup has been cancelled, don't start new
2435		 tasks.  */
2436	      if (__builtin_expect (gomp_cancel_var, 0) && team)
2437		{
2438		  if (gomp_team_barrier_cancelled (&team->barrier))
2439		    return;
2440		  if (thr->task->taskgroup)
2441		    {
2442		      if (thr->task->taskgroup->cancelled)
2443			return;
2444		      if (thr->task->taskgroup->workshare
2445			  && thr->task->taskgroup->prev
2446			  && thr->task->taskgroup->prev->cancelled)
2447			return;
2448		    }
2449		}
2450
2451	      gomp_task_maybe_wait_for_dependencies (depend);
2452	    }
2453	}
2454    }
2455
2456  if (devicep == NULL
2457      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2458      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2459    return;
2460
2461  struct gomp_thread *thr = gomp_thread ();
2462  struct gomp_team *team = thr->ts.team;
2463  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
2464  if (__builtin_expect (gomp_cancel_var, 0) && team)
2465    {
2466      if (gomp_team_barrier_cancelled (&team->barrier))
2467	return;
2468      if (thr->task->taskgroup)
2469	{
2470	  if (thr->task->taskgroup->cancelled)
2471	    return;
2472	  if (thr->task->taskgroup->workshare
2473	      && thr->task->taskgroup->prev
2474	      && thr->task->taskgroup->prev->cancelled)
2475	    return;
2476	}
2477    }
2478
2479  /* The variables are mapped separately such that they can be released
2480     independently.  */
2481  size_t i, j;
2482  if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2483    for (i = 0; i < mapnum; i++)
2484      if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2485	{
2486	  gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2487			 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2488	  i += sizes[i];
2489	}
2490      else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
2491	{
2492	  for (j = i + 1; j < mapnum; j++)
2493	    if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff))
2494	      break;
2495	  gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
2496			 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2497	  i += j - i - 1;
2498	}
2499      else
2500	gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2501		       true, GOMP_MAP_VARS_ENTER_DATA);
2502  else
2503    gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2504}
2505
2506bool
2507gomp_target_task_fn (void *data)
2508{
2509  struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2510  struct gomp_device_descr *devicep = ttask->devicep;
2511
2512  if (ttask->fn != NULL)
2513    {
2514      void *fn_addr;
2515      if (devicep == NULL
2516	  || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2517	  || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2518	  || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2519	{
2520	  ttask->state = GOMP_TARGET_TASK_FALLBACK;
2521	  gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2522	  return false;
2523	}
2524
2525      if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2526	{
2527	  if (ttask->tgt)
2528	    gomp_unmap_vars (ttask->tgt, true);
2529	  return false;
2530	}
2531
2532      void *actual_arguments;
2533      if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2534	{
2535	  ttask->tgt = NULL;
2536	  actual_arguments = ttask->hostaddrs;
2537	}
2538      else
2539	{
2540	  ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2541				      NULL, ttask->sizes, ttask->kinds, true,
2542				      GOMP_MAP_VARS_TARGET);
2543	  actual_arguments = (void *) ttask->tgt->tgt_start;
2544	}
2545      ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2546
2547      assert (devicep->async_run_func);
2548      devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2549			       ttask->args, (void *) ttask);
2550      return true;
2551    }
2552  else if (devicep == NULL
2553	   || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2554	   || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2555    return false;
2556
2557  size_t i;
2558  if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2559    gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2560		 ttask->kinds, true);
2561  else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2562    for (i = 0; i < ttask->mapnum; i++)
2563      if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2564	{
2565	  gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2566			 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2567			 GOMP_MAP_VARS_ENTER_DATA);
2568	  i += ttask->sizes[i];
2569	}
2570      else
2571	gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2572		       &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2573  else
2574    gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2575		    ttask->kinds);
2576  return false;
2577}
2578
2579void
2580GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2581{
2582  if (thread_limit)
2583    {
2584      struct gomp_task_icv *icv = gomp_icv (true);
2585      icv->thread_limit_var
2586	= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2587    }
2588  (void) num_teams;
2589}
2590
2591void *
2592omp_target_alloc (size_t size, int device_num)
2593{
2594  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2595    return malloc (size);
2596
2597  if (device_num < 0)
2598    return NULL;
2599
2600  struct gomp_device_descr *devicep = resolve_device (device_num);
2601  if (devicep == NULL)
2602    return NULL;
2603
2604  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2605      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2606    return malloc (size);
2607
2608  gomp_mutex_lock (&devicep->lock);
2609  void *ret = devicep->alloc_func (devicep->target_id, size);
2610  gomp_mutex_unlock (&devicep->lock);
2611  return ret;
2612}
2613
2614void
2615omp_target_free (void *device_ptr, int device_num)
2616{
2617  if (device_ptr == NULL)
2618    return;
2619
2620  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2621    {
2622      free (device_ptr);
2623      return;
2624    }
2625
2626  if (device_num < 0)
2627    return;
2628
2629  struct gomp_device_descr *devicep = resolve_device (device_num);
2630  if (devicep == NULL)
2631    return;
2632
2633  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2634      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2635    {
2636      free (device_ptr);
2637      return;
2638    }
2639
2640  gomp_mutex_lock (&devicep->lock);
2641  gomp_free_device_memory (devicep, device_ptr);
2642  gomp_mutex_unlock (&devicep->lock);
2643}
2644
2645int
2646omp_target_is_present (const void *ptr, int device_num)
2647{
2648  if (ptr == NULL)
2649    return 1;
2650
2651  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2652    return 1;
2653
2654  if (device_num < 0)
2655    return 0;
2656
2657  struct gomp_device_descr *devicep = resolve_device (device_num);
2658  if (devicep == NULL)
2659    return 0;
2660
2661  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2662      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2663    return 1;
2664
2665  gomp_mutex_lock (&devicep->lock);
2666  struct splay_tree_s *mem_map = &devicep->mem_map;
2667  struct splay_tree_key_s cur_node;
2668
2669  cur_node.host_start = (uintptr_t) ptr;
2670  cur_node.host_end = cur_node.host_start;
2671  splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2672  int ret = n != NULL;
2673  gomp_mutex_unlock (&devicep->lock);
2674  return ret;
2675}
2676
2677int
2678omp_target_memcpy (void *dst, const void *src, size_t length,
2679		   size_t dst_offset, size_t src_offset, int dst_device_num,
2680		   int src_device_num)
2681{
2682  struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2683  bool ret;
2684
2685  if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2686    {
2687      if (dst_device_num < 0)
2688	return EINVAL;
2689
2690      dst_devicep = resolve_device (dst_device_num);
2691      if (dst_devicep == NULL)
2692	return EINVAL;
2693
2694      if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2695	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2696	dst_devicep = NULL;
2697    }
2698  if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2699    {
2700      if (src_device_num < 0)
2701	return EINVAL;
2702
2703      src_devicep = resolve_device (src_device_num);
2704      if (src_devicep == NULL)
2705	return EINVAL;
2706
2707      if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2708	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2709	src_devicep = NULL;
2710    }
2711  if (src_devicep == NULL && dst_devicep == NULL)
2712    {
2713      memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2714      return 0;
2715    }
2716  if (src_devicep == NULL)
2717    {
2718      gomp_mutex_lock (&dst_devicep->lock);
2719      ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2720					(char *) dst + dst_offset,
2721					(char *) src + src_offset, length);
2722      gomp_mutex_unlock (&dst_devicep->lock);
2723      return (ret ? 0 : EINVAL);
2724    }
2725  if (dst_devicep == NULL)
2726    {
2727      gomp_mutex_lock (&src_devicep->lock);
2728      ret = src_devicep->dev2host_func (src_devicep->target_id,
2729					(char *) dst + dst_offset,
2730					(char *) src + src_offset, length);
2731      gomp_mutex_unlock (&src_devicep->lock);
2732      return (ret ? 0 : EINVAL);
2733    }
2734  if (src_devicep == dst_devicep)
2735    {
2736      gomp_mutex_lock (&src_devicep->lock);
2737      ret = src_devicep->dev2dev_func (src_devicep->target_id,
2738				       (char *) dst + dst_offset,
2739				       (char *) src + src_offset, length);
2740      gomp_mutex_unlock (&src_devicep->lock);
2741      return (ret ? 0 : EINVAL);
2742    }
2743  return EINVAL;
2744}
2745
2746static int
2747omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2748			       int num_dims, const size_t *volume,
2749			       const size_t *dst_offsets,
2750			       const size_t *src_offsets,
2751			       const size_t *dst_dimensions,
2752			       const size_t *src_dimensions,
2753			       struct gomp_device_descr *dst_devicep,
2754			       struct gomp_device_descr *src_devicep)
2755{
2756  size_t dst_slice = element_size;
2757  size_t src_slice = element_size;
2758  size_t j, dst_off, src_off, length;
2759  int i, ret;
2760
2761  if (num_dims == 1)
2762    {
2763      if (__builtin_mul_overflow (element_size, volume[0], &length)
2764	  || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2765	  || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2766	return EINVAL;
2767      if (dst_devicep == NULL && src_devicep == NULL)
2768	{
2769	  memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2770		  length);
2771	  ret = 1;
2772	}
2773      else if (src_devicep == NULL)
2774	ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2775					  (char *) dst + dst_off,
2776					  (const char *) src + src_off,
2777					  length);
2778      else if (dst_devicep == NULL)
2779	ret = src_devicep->dev2host_func (src_devicep->target_id,
2780					  (char *) dst + dst_off,
2781					  (const char *) src + src_off,
2782					  length);
2783      else if (src_devicep == dst_devicep)
2784	ret = src_devicep->dev2dev_func (src_devicep->target_id,
2785					 (char *) dst + dst_off,
2786					 (const char *) src + src_off,
2787					 length);
2788      else
2789	ret = 0;
2790      return ret ? 0 : EINVAL;
2791    }
2792
2793  /* FIXME: it would be nice to have some plugin function to handle
2794     num_dims == 2 and num_dims == 3 more efficiently.  Larger ones can
2795     be handled in the generic recursion below, and for host-host it
2796     should be used even for any num_dims >= 2.  */
2797
2798  for (i = 1; i < num_dims; i++)
2799    if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2800	|| __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2801      return EINVAL;
2802  if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2803      || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2804    return EINVAL;
2805  for (j = 0; j < volume[0]; j++)
2806    {
2807      ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2808					   (const char *) src + src_off,
2809					   element_size, num_dims - 1,
2810					   volume + 1, dst_offsets + 1,
2811					   src_offsets + 1, dst_dimensions + 1,
2812					   src_dimensions + 1, dst_devicep,
2813					   src_devicep);
2814      if (ret)
2815	return ret;
2816      dst_off += dst_slice;
2817      src_off += src_slice;
2818    }
2819  return 0;
2820}
2821
2822int
2823omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2824			int num_dims, const size_t *volume,
2825			const size_t *dst_offsets,
2826			const size_t *src_offsets,
2827			const size_t *dst_dimensions,
2828			const size_t *src_dimensions,
2829			int dst_device_num, int src_device_num)
2830{
2831  struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2832
2833  if (!dst && !src)
2834    return INT_MAX;
2835
2836  if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2837    {
2838      if (dst_device_num < 0)
2839	return EINVAL;
2840
2841      dst_devicep = resolve_device (dst_device_num);
2842      if (dst_devicep == NULL)
2843	return EINVAL;
2844
2845      if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2846	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2847	dst_devicep = NULL;
2848    }
2849  if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2850    {
2851      if (src_device_num < 0)
2852	return EINVAL;
2853
2854      src_devicep = resolve_device (src_device_num);
2855      if (src_devicep == NULL)
2856	return EINVAL;
2857
2858      if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2859	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2860	src_devicep = NULL;
2861    }
2862
2863  if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2864    return EINVAL;
2865
2866  if (src_devicep)
2867    gomp_mutex_lock (&src_devicep->lock);
2868  else if (dst_devicep)
2869    gomp_mutex_lock (&dst_devicep->lock);
2870  int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2871					   volume, dst_offsets, src_offsets,
2872					   dst_dimensions, src_dimensions,
2873					   dst_devicep, src_devicep);
2874  if (src_devicep)
2875    gomp_mutex_unlock (&src_devicep->lock);
2876  else if (dst_devicep)
2877    gomp_mutex_unlock (&dst_devicep->lock);
2878  return ret;
2879}
2880
2881int
2882omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2883			  size_t size, size_t device_offset, int device_num)
2884{
2885  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2886    return EINVAL;
2887
2888  if (device_num < 0)
2889    return EINVAL;
2890
2891  struct gomp_device_descr *devicep = resolve_device (device_num);
2892  if (devicep == NULL)
2893    return EINVAL;
2894
2895  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2896      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2897    return EINVAL;
2898
2899  gomp_mutex_lock (&devicep->lock);
2900
2901  struct splay_tree_s *mem_map = &devicep->mem_map;
2902  struct splay_tree_key_s cur_node;
2903  int ret = EINVAL;
2904
2905  cur_node.host_start = (uintptr_t) host_ptr;
2906  cur_node.host_end = cur_node.host_start + size;
2907  splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2908  if (n)
2909    {
2910      if (n->tgt->tgt_start + n->tgt_offset
2911	  == (uintptr_t) device_ptr + device_offset
2912	  && n->host_start <= cur_node.host_start
2913	  && n->host_end >= cur_node.host_end)
2914	ret = 0;
2915    }
2916  else
2917    {
2918      struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2919      tgt->array = gomp_malloc (sizeof (*tgt->array));
2920      tgt->refcount = 1;
2921      tgt->tgt_start = 0;
2922      tgt->tgt_end = 0;
2923      tgt->to_free = NULL;
2924      tgt->prev = NULL;
2925      tgt->list_count = 0;
2926      tgt->device_descr = devicep;
2927      splay_tree_node array = tgt->array;
2928      splay_tree_key k = &array->key;
2929      k->host_start = cur_node.host_start;
2930      k->host_end = cur_node.host_end;
2931      k->tgt = tgt;
2932      k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2933      k->refcount = REFCOUNT_INFINITY;
2934      k->dynamic_refcount = 0;
2935      k->aux = NULL;
2936      array->left = NULL;
2937      array->right = NULL;
2938      splay_tree_insert (&devicep->mem_map, array);
2939      ret = 0;
2940    }
2941  gomp_mutex_unlock (&devicep->lock);
2942  return ret;
2943}
2944
2945int
2946omp_target_disassociate_ptr (const void *ptr, int device_num)
2947{
2948  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2949    return EINVAL;
2950
2951  if (device_num < 0)
2952    return EINVAL;
2953
2954  struct gomp_device_descr *devicep = resolve_device (device_num);
2955  if (devicep == NULL)
2956    return EINVAL;
2957
2958  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2959    return EINVAL;
2960
2961  gomp_mutex_lock (&devicep->lock);
2962
2963  struct splay_tree_s *mem_map = &devicep->mem_map;
2964  struct splay_tree_key_s cur_node;
2965  int ret = EINVAL;
2966
2967  cur_node.host_start = (uintptr_t) ptr;
2968  cur_node.host_end = cur_node.host_start;
2969  splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2970  if (n
2971      && n->host_start == cur_node.host_start
2972      && n->refcount == REFCOUNT_INFINITY
2973      && n->tgt->tgt_start == 0
2974      && n->tgt->to_free == NULL
2975      && n->tgt->refcount == 1
2976      && n->tgt->list_count == 0)
2977    {
2978      splay_tree_remove (&devicep->mem_map, n);
2979      gomp_unmap_tgt (n->tgt);
2980      ret = 0;
2981    }
2982
2983  gomp_mutex_unlock (&devicep->lock);
2984  return ret;
2985}
2986
2987int
2988omp_pause_resource (omp_pause_resource_t kind, int device_num)
2989{
2990  (void) kind;
2991  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2992    return gomp_pause_host ();
2993  if (device_num < 0 || device_num >= gomp_get_num_devices ())
2994    return -1;
2995  /* Do nothing for target devices for now.  */
2996  return 0;
2997}
2998
2999int
3000omp_pause_resource_all (omp_pause_resource_t kind)
3001{
3002  (void) kind;
3003  if (gomp_pause_host ())
3004    return -1;
3005  /* Do nothing for target devices for now.  */
3006  return 0;
3007}
3008
3009ialias (omp_pause_resource)
3010ialias (omp_pause_resource_all)
3011
3012#ifdef PLUGIN_SUPPORT
3013
3014/* This function tries to load a plugin for DEVICE.  Name of plugin is passed
3015   in PLUGIN_NAME.
3016   The handles of the found functions are stored in the corresponding fields
3017   of DEVICE.  The function returns TRUE on success and FALSE otherwise.  */
3018
3019static bool
3020gomp_load_plugin_for_device (struct gomp_device_descr *device,
3021			     const char *plugin_name)
3022{
3023  const char *err = NULL, *last_missing = NULL;
3024
3025  void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3026  if (!plugin_handle)
3027    goto dl_fail;
3028
3029  /* Check if all required functions are available in the plugin and store
3030     their handlers.  None of the symbols can legitimately be NULL,
3031     so we don't need to check dlerror all the time.  */
3032#define DLSYM(f)							\
3033  if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f)))	\
3034    goto dl_fail
3035  /* Similar, but missing functions are not an error.  Return false if
3036     failed, true otherwise.  */
3037#define DLSYM_OPT(f, n)							\
3038  ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n))	\
3039   || (last_missing = #n, 0))
3040
3041  DLSYM (version);
3042  if (device->version_func () != GOMP_VERSION)
3043    {
3044      err = "plugin version mismatch";
3045      goto fail;
3046    }
3047
3048  DLSYM (get_name);
3049  DLSYM (get_caps);
3050  DLSYM (get_type);
3051  DLSYM (get_num_devices);
3052  DLSYM (init_device);
3053  DLSYM (fini_device);
3054  DLSYM (load_image);
3055  DLSYM (unload_image);
3056  DLSYM (alloc);
3057  DLSYM (free);
3058  DLSYM (dev2host);
3059  DLSYM (host2dev);
3060  device->capabilities = device->get_caps_func ();
3061  if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3062    {
3063      DLSYM (run);
3064      DLSYM_OPT (async_run, async_run);
3065      DLSYM_OPT (can_run, can_run);
3066      DLSYM (dev2dev);
3067    }
3068  if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3069    {
3070      if (!DLSYM_OPT (openacc.exec, openacc_exec)
3071	  || !DLSYM_OPT (openacc.create_thread_data,
3072			 openacc_create_thread_data)
3073	  || !DLSYM_OPT (openacc.destroy_thread_data,
3074			 openacc_destroy_thread_data)
3075	  || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3076	  || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3077	  || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3078	  || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3079	  || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3080	  || !DLSYM_OPT (openacc.async.queue_callback,
3081			 openacc_async_queue_callback)
3082	  || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3083	  || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
3084	  || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
3085	  || !DLSYM_OPT (openacc.get_property, openacc_get_property))
3086	{
3087	  /* Require all the OpenACC handlers if we have
3088	     GOMP_OFFLOAD_CAP_OPENACC_200.  */
3089	  err = "plugin missing OpenACC handler function";
3090	  goto fail;
3091	}
3092
3093      unsigned cuda = 0;
3094      cuda += DLSYM_OPT (openacc.cuda.get_current_device,
3095			 openacc_cuda_get_current_device);
3096      cuda += DLSYM_OPT (openacc.cuda.get_current_context,
3097			 openacc_cuda_get_current_context);
3098      cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3099      cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
3100      if (cuda && cuda != 4)
3101	{
3102	  /* Make sure all the CUDA functions are there if any of them are.  */
3103	  err = "plugin missing OpenACC CUDA handler function";
3104	  goto fail;
3105	}
3106    }
3107#undef DLSYM
3108#undef DLSYM_OPT
3109
3110  return 1;
3111
3112 dl_fail:
3113  err = dlerror ();
3114 fail:
3115  gomp_error ("while loading %s: %s", plugin_name, err);
3116  if (last_missing)
3117    gomp_error ("missing function was %s", last_missing);
3118  if (plugin_handle)
3119    dlclose (plugin_handle);
3120
3121  return 0;
3122}
3123
3124/* This function finalizes all initialized devices.  */
3125
3126static void
3127gomp_target_fini (void)
3128{
3129  int i;
3130  for (i = 0; i < num_devices; i++)
3131    {
3132      bool ret = true;
3133      struct gomp_device_descr *devicep = &devices[i];
3134      gomp_mutex_lock (&devicep->lock);
3135      if (devicep->state == GOMP_DEVICE_INITIALIZED)
3136	ret = gomp_fini_device (devicep);
3137      gomp_mutex_unlock (&devicep->lock);
3138      if (!ret)
3139	gomp_fatal ("device finalization failed");
3140    }
3141}
3142
3143/* This function initializes the runtime for offloading.
3144   It parses the list of offload plugins, and tries to load these.
3145   On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3146   will be set, and the array DEVICES initialized, containing descriptors for
3147   corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3148   by the others.  */
3149
3150static void
3151gomp_target_init (void)
3152{
3153  const char *prefix ="libgomp-plugin-";
3154  const char *suffix = SONAME_SUFFIX (1);
3155  const char *cur, *next;
3156  char *plugin_name;
3157  int i, new_num_devices;
3158
3159  num_devices = 0;
3160  devices = NULL;
3161
3162  cur = OFFLOAD_PLUGINS;
3163  if (*cur)
3164    do
3165      {
3166	struct gomp_device_descr current_device;
3167	size_t prefix_len, suffix_len, cur_len;
3168
3169	next = strchr (cur, ',');
3170
3171	prefix_len = strlen (prefix);
3172	cur_len = next ? next - cur : strlen (cur);
3173	suffix_len = strlen (suffix);
3174
3175	plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
3176	if (!plugin_name)
3177	  {
3178	    num_devices = 0;
3179	    break;
3180	  }
3181
3182	memcpy (plugin_name, prefix, prefix_len);
3183	memcpy (plugin_name + prefix_len, cur, cur_len);
3184	memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
3185
3186	if (gomp_load_plugin_for_device (&current_device, plugin_name))
3187	  {
3188	    new_num_devices = current_device.get_num_devices_func ();
3189	    if (new_num_devices >= 1)
3190	      {
3191		/* Augment DEVICES and NUM_DEVICES.  */
3192
3193		devices = realloc (devices, (num_devices + new_num_devices)
3194				   * sizeof (struct gomp_device_descr));
3195		if (!devices)
3196		  {
3197		    num_devices = 0;
3198		    free (plugin_name);
3199		    break;
3200		  }
3201
3202		current_device.name = current_device.get_name_func ();
3203		/* current_device.capabilities has already been set.  */
3204		current_device.type = current_device.get_type_func ();
3205		current_device.mem_map.root = NULL;
3206		current_device.state = GOMP_DEVICE_UNINITIALIZED;
3207		for (i = 0; i < new_num_devices; i++)
3208		  {
3209		    current_device.target_id = i;
3210		    devices[num_devices] = current_device;
3211		    gomp_mutex_init (&devices[num_devices].lock);
3212		    num_devices++;
3213		  }
3214	      }
3215	  }
3216
3217	free (plugin_name);
3218	cur = next + 1;
3219      }
3220    while (next);
3221
3222  /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3223     NUM_DEVICES_OPENMP.  */
3224  struct gomp_device_descr *devices_s
3225    = malloc (num_devices * sizeof (struct gomp_device_descr));
3226  if (!devices_s)
3227    {
3228      num_devices = 0;
3229      free (devices);
3230      devices = NULL;
3231    }
3232  num_devices_openmp = 0;
3233  for (i = 0; i < num_devices; i++)
3234    if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3235      devices_s[num_devices_openmp++] = devices[i];
3236  int num_devices_after_openmp = num_devices_openmp;
3237  for (i = 0; i < num_devices; i++)
3238    if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3239      devices_s[num_devices_after_openmp++] = devices[i];
3240  free (devices);
3241  devices = devices_s;
3242
3243  for (i = 0; i < num_devices; i++)
3244    {
3245      /* The 'devices' array can be moved (by the realloc call) until we have
3246	 found all the plugins, so registering with the OpenACC runtime (which
3247	 takes a copy of the pointer argument) must be delayed until now.  */
3248      if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3249	goacc_register (&devices[i]);
3250    }
3251
3252  if (atexit (gomp_target_fini) != 0)
3253    gomp_fatal ("atexit failed");
3254}
3255
3256#else /* PLUGIN_SUPPORT */
3257/* If dlfcn.h is unavailable we always fallback to host execution.
3258   GOMP_target* routines are just stubs for this case.  */
3259static void
3260gomp_target_init (void)
3261{
3262}
3263#endif /* PLUGIN_SUPPORT */
3264