1/* OpenACC Runtime initialization routines
2
3   Copyright (C) 2013-2015 Free Software Foundation, Inc.
4
5   Contributed by Mentor Embedded.
6
7   This file is part of the GNU Offloading and Multi Processing Library
8   (libgomp).
9
10   Libgomp is free software; you can redistribute it and/or modify it
11   under the terms of the GNU General Public License as published by
12   the Free Software Foundation; either version 3, or (at your option)
13   any later version.
14
15   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18   more details.
19
20   Under Section 7 of GPL version 3, you are granted additional
21   permissions described in the GCC Runtime Library Exception, version
22   3.1, as published by the Free Software Foundation.
23
24   You should have received a copy of the GNU General Public License and
25   a copy of the GCC Runtime Library Exception along with this program;
26   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27   <http://www.gnu.org/licenses/>.  */
28
29#include "openacc.h"
30#include "config.h"
31#include "libgomp.h"
32#include "gomp-constants.h"
33#include "oacc-int.h"
34#include "splay-tree.h"
35#include <stdint.h>
36#include <assert.h>
37
38/* Return block containing [H->S), or NULL if not contained.  */
39
40static splay_tree_key
41lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
42{
43  struct splay_tree_key_s node;
44  splay_tree_key key;
45
46  node.host_start = (uintptr_t) h;
47  node.host_end = (uintptr_t) h + s;
48
49  gomp_mutex_lock (&dev->lock);
50  key = splay_tree_lookup (&dev->mem_map, &node);
51  gomp_mutex_unlock (&dev->lock);
52
53  return key;
54}
55
56/* Return block containing [D->S), or NULL if not contained.
57   The list isn't ordered by device address, so we have to iterate
58   over the whole array.  This is not expected to be a common
59   operation.  */
60
61static splay_tree_key
62lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
63{
64  int i;
65  struct target_mem_desc *t;
66
67  if (!tgt)
68    return NULL;
69
70  gomp_mutex_lock (&tgt->device_descr->lock);
71
72  for (t = tgt; t != NULL; t = t->prev)
73    {
74      if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
75        break;
76    }
77
78  gomp_mutex_unlock (&tgt->device_descr->lock);
79
80  if (!t)
81    return NULL;
82
83  for (i = 0; i < t->list_count; i++)
84    {
85      void * offset;
86
87      splay_tree_key k = &t->array[i].key;
88      offset = d - t->tgt_start + k->tgt_offset;
89
90      if (k->host_start + offset <= (void *) k->host_end)
91        return k;
92    }
93
94  return NULL;
95}
96
97/* OpenACC is silent on how memory exhaustion is indicated.  We return
98   NULL.  */
99
100void *
101acc_malloc (size_t s)
102{
103  if (!s)
104    return NULL;
105
106  goacc_lazy_initialize ();
107
108  struct goacc_thread *thr = goacc_thread ();
109
110  assert (thr->dev);
111
112  return thr->dev->alloc_func (thr->dev->target_id, s);
113}
114
115/* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
116   the device address is mapped. We choose to check if it mapped,
117   and if it is, to unmap it. */
118void
119acc_free (void *d)
120{
121  splay_tree_key k;
122  struct goacc_thread *thr = goacc_thread ();
123
124  if (!d)
125    return;
126
127  assert (thr && thr->dev);
128
129  /* We don't have to call lazy open here, as the ptr value must have
130     been returned by acc_malloc.  It's not permitted to pass NULL in
131     (unless you got that null from acc_malloc).  */
132  if ((k = lookup_dev (thr->dev->openacc.data_environ, d, 1)))
133   {
134     void *offset;
135
136     offset = d - k->tgt->tgt_start + k->tgt_offset;
137
138     acc_unmap_data ((void *)(k->host_start + offset));
139   }
140
141  thr->dev->free_func (thr->dev->target_id, d);
142}
143
144void
145acc_memcpy_to_device (void *d, void *h, size_t s)
146{
147  /* No need to call lazy open here, as the device pointer must have
148     been obtained from a routine that did that.  */
149  struct goacc_thread *thr = goacc_thread ();
150
151  assert (thr && thr->dev);
152
153  thr->dev->host2dev_func (thr->dev->target_id, d, h, s);
154}
155
156void
157acc_memcpy_from_device (void *h, void *d, size_t s)
158{
159  /* No need to call lazy open here, as the device pointer must have
160     been obtained from a routine that did that.  */
161  struct goacc_thread *thr = goacc_thread ();
162
163  assert (thr && thr->dev);
164
165  thr->dev->dev2host_func (thr->dev->target_id, h, d, s);
166}
167
168/* Return the device pointer that corresponds to host data H.  Or NULL
169   if no mapping.  */
170
171void *
172acc_deviceptr (void *h)
173{
174  splay_tree_key n;
175  void *d;
176  void *offset;
177
178  goacc_lazy_initialize ();
179
180  struct goacc_thread *thr = goacc_thread ();
181
182  n = lookup_host (thr->dev, h, 1);
183
184  if (!n)
185    return NULL;
186
187  offset = h - n->host_start;
188
189  d = n->tgt->tgt_start + n->tgt_offset + offset;
190
191  return d;
192}
193
194/* Return the host pointer that corresponds to device data D.  Or NULL
195   if no mapping.  */
196
197void *
198acc_hostptr (void *d)
199{
200  splay_tree_key n;
201  void *h;
202  void *offset;
203
204  goacc_lazy_initialize ();
205
206  struct goacc_thread *thr = goacc_thread ();
207
208  n = lookup_dev (thr->dev->openacc.data_environ, d, 1);
209
210  if (!n)
211    return NULL;
212
213  offset = d - n->tgt->tgt_start + n->tgt_offset;
214
215  h = n->host_start + offset;
216
217  return h;
218}
219
220/* Return 1 if host data [H,+S] is present on the device.  */
221
222int
223acc_is_present (void *h, size_t s)
224{
225  splay_tree_key n;
226
227  if (!s || !h)
228    return 0;
229
230  goacc_lazy_initialize ();
231
232  struct goacc_thread *thr = goacc_thread ();
233  struct gomp_device_descr *acc_dev = thr->dev;
234
235  n = lookup_host (acc_dev, h, s);
236
237  if (n && ((uintptr_t)h < n->host_start
238	    || (uintptr_t)h + s > n->host_end
239	    || s > n->host_end - n->host_start))
240    n = NULL;
241
242  return n != NULL;
243}
244
245/* Create a mapping for host [H,+S] -> device [D,+S] */
246
247void
248acc_map_data (void *h, void *d, size_t s)
249{
250  struct target_mem_desc *tgt;
251  size_t mapnum = 1;
252  void *hostaddrs = h;
253  void *devaddrs = d;
254  size_t sizes = s;
255  unsigned short kinds = GOMP_MAP_ALLOC;
256
257  goacc_lazy_initialize ();
258
259  struct goacc_thread *thr = goacc_thread ();
260  struct gomp_device_descr *acc_dev = thr->dev;
261
262  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
263    {
264      if (d != h)
265        gomp_fatal ("cannot map data on shared-memory system");
266
267      tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
268    }
269  else
270    {
271      struct goacc_thread *thr = goacc_thread ();
272
273      if (!d || !h || !s)
274	gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
275                    (void *)h, (int)s, (void *)d, (int)s);
276
277      if (lookup_host (acc_dev, h, s))
278	gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
279		    (int)s);
280
281      if (lookup_dev (thr->dev->openacc.data_environ, d, s))
282	gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
283		    (int)s);
284
285      tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
286			   &kinds, true, false);
287    }
288
289  tgt->prev = acc_dev->openacc.data_environ;
290  acc_dev->openacc.data_environ = tgt;
291}
292
293void
294acc_unmap_data (void *h)
295{
296  struct goacc_thread *thr = goacc_thread ();
297  struct gomp_device_descr *acc_dev = thr->dev;
298
299  /* No need to call lazy open, as the address must have been mapped.  */
300
301  size_t host_size;
302  splay_tree_key n = lookup_host (acc_dev, h, 1);
303  struct target_mem_desc *t;
304
305  if (!n)
306    gomp_fatal ("%p is not a mapped block", (void *)h);
307
308  host_size = n->host_end - n->host_start;
309
310  if (n->host_start != (uintptr_t) h)
311    gomp_fatal ("[%p,%d] surrounds1 %p",
312		(void *) n->host_start, (int) host_size, (void *) h);
313
314  t = n->tgt;
315
316  if (t->refcount == 2)
317    {
318      struct target_mem_desc *tp;
319
320      /* This is the last reference, so pull the descriptor off the
321         chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
322         freeing the device memory. */
323      t->tgt_end = 0;
324      t->to_free = 0;
325
326      gomp_mutex_lock (&acc_dev->lock);
327
328      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
329	   tp = t, t = t->prev)
330	if (n->tgt == t)
331	  {
332	    if (tp)
333	      tp->prev = t->prev;
334	    else
335	      acc_dev->openacc.data_environ = t->prev;
336
337	    break;
338	  }
339
340      gomp_mutex_unlock (&acc_dev->lock);
341    }
342
343  gomp_unmap_vars (t, true);
344}
345
346#define FLAG_PRESENT (1 << 0)
347#define FLAG_CREATE (1 << 1)
348#define FLAG_COPY (1 << 2)
349
350static void *
351present_create_copy (unsigned f, void *h, size_t s)
352{
353  void *d;
354  splay_tree_key n;
355
356  if (!h || !s)
357    gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
358
359  goacc_lazy_initialize ();
360
361  struct goacc_thread *thr = goacc_thread ();
362  struct gomp_device_descr *acc_dev = thr->dev;
363
364  n = lookup_host (acc_dev, h, s);
365  if (n)
366    {
367      /* Present. */
368      d = (void *) (n->tgt->tgt_start + n->tgt_offset);
369
370      if (!(f & FLAG_PRESENT))
371        gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
372            (void *)h, (int)s, (void *)d, (int)s);
373      if ((h + s) > (void *)n->host_end)
374        gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
375    }
376  else if (!(f & FLAG_CREATE))
377    {
378      gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
379    }
380  else
381    {
382      struct target_mem_desc *tgt;
383      size_t mapnum = 1;
384      unsigned short kinds;
385      void *hostaddrs = h;
386
387      if (f & FLAG_COPY)
388	kinds = GOMP_MAP_TO;
389      else
390	kinds = GOMP_MAP_ALLOC;
391
392      tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
393			   false);
394
395      gomp_mutex_lock (&acc_dev->lock);
396
397      d = tgt->to_free;
398      tgt->prev = acc_dev->openacc.data_environ;
399      acc_dev->openacc.data_environ = tgt;
400
401      gomp_mutex_unlock (&acc_dev->lock);
402    }
403
404  return d;
405}
406
407void *
408acc_create (void *h, size_t s)
409{
410  return present_create_copy (FLAG_CREATE, h, s);
411}
412
413void *
414acc_copyin (void *h, size_t s)
415{
416  return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
417}
418
419void *
420acc_present_or_create (void *h, size_t s)
421{
422  return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
423}
424
425void *
426acc_present_or_copyin (void *h, size_t s)
427{
428  return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
429}
430
431#define FLAG_COPYOUT (1 << 0)
432
433static void
434delete_copyout (unsigned f, void *h, size_t s)
435{
436  size_t host_size;
437  splay_tree_key n;
438  void *d;
439  struct goacc_thread *thr = goacc_thread ();
440  struct gomp_device_descr *acc_dev = thr->dev;
441
442  n = lookup_host (acc_dev, h, s);
443
444  /* No need to call lazy open, as the data must already have been
445     mapped.  */
446
447  if (!n)
448    gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
449
450  d = (void *) (n->tgt->tgt_start + n->tgt_offset
451		+ (uintptr_t) h - n->host_start);
452
453  host_size = n->host_end - n->host_start;
454
455  if (n->host_start != (uintptr_t) h || host_size != s)
456    gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
457		(void *) n->host_start, (int) host_size, (void *) h, (int) s);
458
459  if (f & FLAG_COPYOUT)
460    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
461
462  acc_unmap_data (h);
463
464  acc_dev->free_func (acc_dev->target_id, d);
465}
466
467void
468acc_delete (void *h , size_t s)
469{
470  delete_copyout (0, h, s);
471}
472
473void acc_copyout (void *h, size_t s)
474{
475  delete_copyout (FLAG_COPYOUT, h, s);
476}
477
478static void
479update_dev_host (int is_dev, void *h, size_t s)
480{
481  splay_tree_key n;
482  void *d;
483  struct goacc_thread *thr = goacc_thread ();
484  struct gomp_device_descr *acc_dev = thr->dev;
485
486  n = lookup_host (acc_dev, h, s);
487
488  /* No need to call lazy open, as the data must already have been
489     mapped.  */
490
491  if (!n)
492    gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
493
494  d = (void *) (n->tgt->tgt_start + n->tgt_offset
495		+ (uintptr_t) h - n->host_start);
496
497  if (is_dev)
498    acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
499  else
500    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
501}
502
503void
504acc_update_device (void *h, size_t s)
505{
506  update_dev_host (1, h, s);
507}
508
509void
510acc_update_self (void *h, size_t s)
511{
512  update_dev_host (0, h, s);
513}
514
515void
516gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
517			 void *kinds)
518{
519  struct target_mem_desc *tgt;
520  struct goacc_thread *thr = goacc_thread ();
521  struct gomp_device_descr *acc_dev = thr->dev;
522
523  gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
524  tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
525		       NULL, sizes, kinds, true, false);
526  gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
527  tgt->prev = acc_dev->openacc.data_environ;
528  acc_dev->openacc.data_environ = tgt;
529}
530
531void
532gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
533{
534  struct goacc_thread *thr = goacc_thread ();
535  struct gomp_device_descr *acc_dev = thr->dev;
536  splay_tree_key n;
537  struct target_mem_desc *t;
538  int minrefs = (mapnum == 1) ? 2 : 3;
539
540  n = lookup_host (acc_dev, h, 1);
541
542  if (!n)
543    gomp_fatal ("%p is not a mapped block", (void *)h);
544
545  gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
546
547  t = n->tgt;
548
549  struct target_mem_desc *tp;
550
551  gomp_mutex_lock (&acc_dev->lock);
552
553  if (t->refcount == minrefs)
554    {
555      /* This is the last reference, so pull the descriptor off the
556	 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
557	 freeing the device memory. */
558      t->tgt_end = 0;
559      t->to_free = 0;
560
561      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
562	   tp = t, t = t->prev)
563	{
564	  if (n->tgt == t)
565	    {
566	      if (tp)
567		tp->prev = t->prev;
568	      else
569		acc_dev->openacc.data_environ = t->prev;
570	      break;
571	    }
572	}
573    }
574
575  if (force_copyfrom)
576    t->list[0]->copy_from = 1;
577
578  gomp_mutex_unlock (&acc_dev->lock);
579
580  /* If running synchronously, unmap immediately.  */
581  if (async < acc_async_noval)
582    gomp_unmap_vars (t, true);
583  else
584    {
585      gomp_copy_from_async (t);
586      acc_dev->openacc.register_async_cleanup_func (t);
587    }
588
589  gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
590}
591