1/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2
3   Contributed by Mentor Embedded.
4
5   This file is part of the GNU Offloading and Multi Processing Library
6   (libgomp).
7
8   Libgomp is free software; you can redistribute it and/or modify it
9   under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 3, or (at your option)
11   any later version.
12
13   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
16   more details.
17
18   Under Section 7 of GPL version 3, you are granted additional
19   permissions described in the GCC Runtime Library Exception, version
20   3.1, as published by the Free Software Foundation.
21
22   You should have received a copy of the GNU General Public License and
23   a copy of the GCC Runtime Library Exception along with this program;
24   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
25   <http://www.gnu.org/licenses/>.  */
26
27/* This file handles OpenACC constructs.  */
28
29#include "openacc.h"
30#include "libgomp.h"
31#include "libgomp_g.h"
32#include "gomp-constants.h"
33#include "oacc-int.h"
34#ifdef HAVE_INTTYPES_H
35# include <inttypes.h>  /* For PRIu64.  */
36#endif
37#include <string.h>
38#include <stdarg.h>
39#include <assert.h>
40
41static int
42find_pset (int pos, size_t mapnum, unsigned short *kinds)
43{
44  if (pos + 1 >= mapnum)
45    return 0;
46
47  unsigned char kind = kinds[pos+1] & 0xff;
48
49  return kind == GOMP_MAP_TO_PSET;
50}
51
52static void goacc_wait (int async, int num_waits, va_list ap);
53
54void
55GOACC_parallel (int device, void (*fn) (void *),
56		size_t mapnum, void **hostaddrs, size_t *sizes,
57		unsigned short *kinds,
58		int num_gangs, int num_workers, int vector_length,
59		int async, int num_waits, ...)
60{
61  bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
62  va_list ap;
63  struct goacc_thread *thr;
64  struct gomp_device_descr *acc_dev;
65  struct target_mem_desc *tgt;
66  void **devaddrs;
67  unsigned int i;
68  struct splay_tree_key_s k;
69  splay_tree_key tgt_fn_key;
70  void (*tgt_fn);
71
72  if (num_gangs != 1)
73    gomp_fatal ("num_gangs (%d) different from one is not yet supported",
74		num_gangs);
75  if (num_workers != 1)
76    gomp_fatal ("num_workers (%d) different from one is not yet supported",
77		num_workers);
78
79#ifdef HAVE_INTTYPES_H
80  gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p, "
81		 "async = %d\n",
82	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds, async);
83#else
84  gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
85	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds,
86	      async);
87#endif
88  goacc_lazy_initialize ();
89
90  thr = goacc_thread ();
91  acc_dev = thr->dev;
92
93  /* Host fallback if "if" clause is false or if the current device is set to
94     the host.  */
95  if (host_fallback)
96    {
97      goacc_save_and_set_bind (acc_device_host);
98      fn (hostaddrs);
99      goacc_restore_bind ();
100      return;
101    }
102  else if (acc_device_type (acc_dev->type) == acc_device_host)
103    {
104      fn (hostaddrs);
105      return;
106    }
107
108  va_start (ap, num_waits);
109
110  if (num_waits > 0)
111    goacc_wait (async, num_waits, ap);
112
113  va_end (ap);
114
115  acc_dev->openacc.async_set_async_func (async);
116
117  if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
118    {
119      k.host_start = (uintptr_t) fn;
120      k.host_end = k.host_start + 1;
121      gomp_mutex_lock (&acc_dev->lock);
122      tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
123      gomp_mutex_unlock (&acc_dev->lock);
124
125      if (tgt_fn_key == NULL)
126	gomp_fatal ("target function wasn't mapped");
127
128      tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
129    }
130  else
131    tgt_fn = (void (*)) fn;
132
133  tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
134		       false);
135
136  devaddrs = gomp_alloca (sizeof (void *) * mapnum);
137  for (i = 0; i < mapnum; i++)
138    devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
139			    + tgt->list[i]->tgt_offset);
140
141  acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
142			      num_gangs, num_workers, vector_length, async,
143			      tgt);
144
145  /* If running synchronously, unmap immediately.  */
146  if (async < acc_async_noval)
147    gomp_unmap_vars (tgt, true);
148  else
149    {
150      gomp_copy_from_async (tgt);
151      acc_dev->openacc.register_async_cleanup_func (tgt);
152    }
153
154  acc_dev->openacc.async_set_async_func (acc_async_sync);
155}
156
157void
158GOACC_data_start (int device, size_t mapnum,
159		  void **hostaddrs, size_t *sizes, unsigned short *kinds)
160{
161  bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
162  struct target_mem_desc *tgt;
163
164#ifdef HAVE_INTTYPES_H
165  gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
166	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
167#else
168  gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
169	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
170#endif
171
172  goacc_lazy_initialize ();
173
174  struct goacc_thread *thr = goacc_thread ();
175  struct gomp_device_descr *acc_dev = thr->dev;
176
177  /* Host fallback or 'do nothing'.  */
178  if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
179      || host_fallback)
180    {
181      tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
182      tgt->prev = thr->mapped_data;
183      thr->mapped_data = tgt;
184
185      return;
186    }
187
188  gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
189  tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
190		       false);
191  gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
192  tgt->prev = thr->mapped_data;
193  thr->mapped_data = tgt;
194}
195
196void
197GOACC_data_end (void)
198{
199  struct goacc_thread *thr = goacc_thread ();
200  struct target_mem_desc *tgt = thr->mapped_data;
201
202  gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
203  thr->mapped_data = tgt->prev;
204  gomp_unmap_vars (tgt, true);
205  gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
206}
207
208void
209GOACC_enter_exit_data (int device, size_t mapnum,
210		       void **hostaddrs, size_t *sizes, unsigned short *kinds,
211		       int async, int num_waits, ...)
212{
213  struct goacc_thread *thr;
214  struct gomp_device_descr *acc_dev;
215  bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
216  bool data_enter = false;
217  size_t i;
218
219  goacc_lazy_initialize ();
220
221  thr = goacc_thread ();
222  acc_dev = thr->dev;
223
224  if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
225      || host_fallback)
226    return;
227
228  if (num_waits > 0)
229    {
230      va_list ap;
231
232      va_start (ap, num_waits);
233
234      goacc_wait (async, num_waits, ap);
235
236      va_end (ap);
237    }
238
239  acc_dev->openacc.async_set_async_func (async);
240
241  /* Determine if this is an "acc enter data".  */
242  for (i = 0; i < mapnum; ++i)
243    {
244      unsigned char kind = kinds[i] & 0xff;
245
246      if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
247	continue;
248
249      if (kind == GOMP_MAP_FORCE_ALLOC
250	  || kind == GOMP_MAP_FORCE_PRESENT
251	  || kind == GOMP_MAP_FORCE_TO)
252	{
253	  data_enter = true;
254	  break;
255	}
256
257      if (kind == GOMP_MAP_FORCE_DEALLOC
258	  || kind == GOMP_MAP_FORCE_FROM)
259	break;
260
261      gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
262		      kind);
263    }
264
265  if (data_enter)
266    {
267      for (i = 0; i < mapnum; i++)
268	{
269	  unsigned char kind = kinds[i] & 0xff;
270
271	  /* Scan for PSETs.  */
272	  int psets = find_pset (i, mapnum, kinds);
273
274	  if (!psets)
275	    {
276	      switch (kind)
277		{
278		case GOMP_MAP_POINTER:
279		  gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
280					&kinds[i]);
281		  break;
282		case GOMP_MAP_FORCE_ALLOC:
283		  acc_create (hostaddrs[i], sizes[i]);
284		  break;
285		case GOMP_MAP_FORCE_PRESENT:
286		  acc_present_or_copyin (hostaddrs[i], sizes[i]);
287		  break;
288		case GOMP_MAP_FORCE_TO:
289		  acc_present_or_copyin (hostaddrs[i], sizes[i]);
290		  break;
291		default:
292		  gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
293			      kind);
294		  break;
295		}
296	    }
297	  else
298	    {
299	      gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
300	      /* Increment 'i' by two because OpenACC requires fortran
301		 arrays to be contiguous, so each PSET is associated with
302		 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
303		 one MAP_POINTER.  */
304	      i += 2;
305	    }
306	}
307    }
308  else
309    for (i = 0; i < mapnum; ++i)
310      {
311	unsigned char kind = kinds[i] & 0xff;
312
313	int psets = find_pset (i, mapnum, kinds);
314
315	if (!psets)
316	  {
317	    switch (kind)
318	      {
319	      case GOMP_MAP_POINTER:
320		gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
321					 == GOMP_MAP_FORCE_FROM,
322					 async, 1);
323		break;
324	      case GOMP_MAP_FORCE_DEALLOC:
325		acc_delete (hostaddrs[i], sizes[i]);
326		break;
327	      case GOMP_MAP_FORCE_FROM:
328		acc_copyout (hostaddrs[i], sizes[i]);
329		break;
330	      default:
331		gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
332			    kind);
333		break;
334	      }
335	  }
336	else
337	  {
338	    gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
339				     == GOMP_MAP_FORCE_FROM, async, 3);
340	    /* See the above comment.  */
341	    i += 2;
342	  }
343      }
344
345  acc_dev->openacc.async_set_async_func (acc_async_sync);
346}
347
348static void
349goacc_wait (int async, int num_waits, va_list ap)
350{
351  struct goacc_thread *thr = goacc_thread ();
352  struct gomp_device_descr *acc_dev = thr->dev;
353  int i;
354
355  assert (num_waits >= 0);
356
357  if (async == acc_async_sync && num_waits == 0)
358    {
359      acc_wait_all ();
360      return;
361    }
362
363  if (async == acc_async_sync && num_waits)
364    {
365      for (i = 0; i < num_waits; i++)
366        {
367          int qid = va_arg (ap, int);
368
369          if (acc_async_test (qid))
370            continue;
371
372          acc_wait (qid);
373        }
374      return;
375    }
376
377  if (async == acc_async_noval && num_waits == 0)
378    {
379      acc_dev->openacc.async_wait_all_async_func (acc_async_noval);
380      return;
381    }
382
383  for (i = 0; i < num_waits; i++)
384    {
385      int qid = va_arg (ap, int);
386
387      if (acc_async_test (qid))
388	continue;
389
390      /* If we're waiting on the same asynchronous queue as we're launching on,
391         the queue itself will order work as required, so there's no need to
392	 wait explicitly.  */
393      if (qid != async)
394	acc_dev->openacc.async_wait_async_func (qid, async);
395    }
396}
397
398void
399GOACC_update (int device, size_t mapnum,
400	      void **hostaddrs, size_t *sizes, unsigned short *kinds,
401	      int async, int num_waits, ...)
402{
403  bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
404  size_t i;
405
406  goacc_lazy_initialize ();
407
408  struct goacc_thread *thr = goacc_thread ();
409  struct gomp_device_descr *acc_dev = thr->dev;
410
411  if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
412      || host_fallback)
413    return;
414
415  if (num_waits > 0)
416    {
417      va_list ap;
418
419      va_start (ap, num_waits);
420
421      goacc_wait (async, num_waits, ap);
422
423      va_end (ap);
424    }
425
426  acc_dev->openacc.async_set_async_func (async);
427
428  for (i = 0; i < mapnum; ++i)
429    {
430      unsigned char kind = kinds[i] & 0xff;
431
432      switch (kind)
433	{
434	case GOMP_MAP_POINTER:
435	case GOMP_MAP_TO_PSET:
436	  break;
437
438	case GOMP_MAP_FORCE_TO:
439	  acc_update_device (hostaddrs[i], sizes[i]);
440	  break;
441
442	case GOMP_MAP_FORCE_FROM:
443	  acc_update_self (hostaddrs[i], sizes[i]);
444	  break;
445
446	default:
447	  gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
448	  break;
449	}
450    }
451
452  acc_dev->openacc.async_set_async_func (acc_async_sync);
453}
454
455void
456GOACC_wait (int async, int num_waits, ...)
457{
458  va_list ap;
459
460  va_start (ap, num_waits);
461
462  goacc_wait (async, num_waits, ap);
463
464  va_end (ap);
465}
466
467int
468GOACC_get_num_threads (void)
469{
470  return 1;
471}
472
473int
474GOACC_get_thread_num (void)
475{
476  return 0;
477}
478