1/* OpenACC Runtime initialization routines
2
3   Copyright (C) 2013-2015 Free Software Foundation, Inc.
4
5   Contributed by Mentor Embedded.
6
7   This file is part of the GNU Offloading and Multi Processing Library
8   (libgomp).
9
10   Libgomp is free software; you can redistribute it and/or modify it
11   under the terms of the GNU General Public License as published by
12   the Free Software Foundation; either version 3, or (at your option)
13   any later version.
14
15   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18   more details.
19
20   Under Section 7 of GPL version 3, you are granted additional
21   permissions described in the GCC Runtime Library Exception, version
22   3.1, as published by the Free Software Foundation.
23
24   You should have received a copy of the GNU General Public License and
25   a copy of the GCC Runtime Library Exception along with this program;
26   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27   <http://www.gnu.org/licenses/>.  */
28
29#include "libgomp.h"
30#include "oacc-int.h"
31#include "openacc.h"
32#include "plugin/plugin-host.h"
33#include <assert.h>
34#include <stdlib.h>
35#include <strings.h>
36#include <stdbool.h>
37#include <string.h>
38
39static gomp_mutex_t acc_device_lock;
40
41/* A cached version of the dispatcher for the global "current" accelerator type,
42   e.g. used as the default when creating new host threads.  This is the
43   device-type equivalent of goacc_device_num (which specifies which device to
44   use out of potentially several of the same type).  If there are several
45   devices of a given type, this points at the first one.  */
46
47static struct gomp_device_descr *cached_base_dev = NULL;
48
49#if defined HAVE_TLS || defined USE_EMUTLS
50__thread struct goacc_thread *goacc_tls_data;
51#else
52pthread_key_t goacc_tls_key;
53#endif
54static pthread_key_t goacc_cleanup_key;
55
56static struct goacc_thread *goacc_threads;
57static gomp_mutex_t goacc_thread_lock;
58
59/* An array of dispatchers for device types, indexed by the type.  This array
60   only references "base" devices, and other instances of the same type are
61   found by simply indexing from each such device (which are stored linearly,
62   grouped by device in target.c:devices).  */
63static struct gomp_device_descr *dispatchers[_ACC_device_hwm] = { 0 };
64
65attribute_hidden void
66goacc_register (struct gomp_device_descr *disp)
67{
68  /* Only register the 0th device here.  */
69  if (disp->target_id != 0)
70    return;
71
72  gomp_mutex_lock (&acc_device_lock);
73
74  assert (acc_device_type (disp->type) != acc_device_none
75	  && acc_device_type (disp->type) != acc_device_default
76	  && acc_device_type (disp->type) != acc_device_not_host);
77  assert (!dispatchers[disp->type]);
78  dispatchers[disp->type] = disp;
79
80  gomp_mutex_unlock (&acc_device_lock);
81}
82
83/* OpenACC names some things a little differently.  */
84
85static const char *
86get_openacc_name (const char *name)
87{
88  if (strcmp (name, "nvptx") == 0)
89    return "nvidia";
90  else
91    return name;
92}
93
94static const char *
95name_of_acc_device_t (enum acc_device_t type)
96{
97  switch (type)
98    {
99    case acc_device_none: return "none";
100    case acc_device_default: return "default";
101    case acc_device_host: return "host";
102    case acc_device_host_nonshm: return "host_nonshm";
103    case acc_device_not_host: return "not_host";
104    case acc_device_nvidia: return "nvidia";
105    default: gomp_fatal ("unknown device type %u", (unsigned) type);
106    }
107}
108
109static struct gomp_device_descr *
110resolve_device (acc_device_t d)
111{
112  acc_device_t d_arg = d;
113
114  switch (d)
115    {
116    case acc_device_default:
117      {
118	if (goacc_device_type)
119	  {
120	    /* Lookup the named device.  */
121	    while (++d != _ACC_device_hwm)
122	      if (dispatchers[d]
123		  && !strcasecmp (goacc_device_type,
124				  get_openacc_name (dispatchers[d]->name))
125		  && dispatchers[d]->get_num_devices_func () > 0)
126		goto found;
127
128	    gomp_fatal ("device type %s not supported", goacc_device_type);
129	  }
130
131	/* No default device specified, so start scanning for any non-host
132	   device that is available.  */
133	d = acc_device_not_host;
134      }
135      /* FALLTHROUGH */
136
137    case acc_device_not_host:
138      /* Find the first available device after acc_device_not_host.  */
139      while (++d != _ACC_device_hwm)
140	if (dispatchers[d] && dispatchers[d]->get_num_devices_func () > 0)
141	  goto found;
142      if (d_arg == acc_device_default)
143	{
144	  d = acc_device_host;
145	  goto found;
146	}
147      gomp_fatal ("no device found");
148      break;
149
150    case acc_device_host:
151      break;
152
153    default:
154      if (d > _ACC_device_hwm)
155	gomp_fatal ("device %u out of range", (unsigned)d);
156      break;
157    }
158 found:
159
160  assert (d != acc_device_none
161	  && d != acc_device_default
162	  && d != acc_device_not_host);
163
164  return dispatchers[d];
165}
166
167/* This is called when plugins have been initialized, and serves to call
168   (indirectly) the target's device_init hook.  Calling multiple times without
169   an intervening acc_shutdown_1 call is an error.  */
170
171static struct gomp_device_descr *
172acc_init_1 (acc_device_t d)
173{
174  struct gomp_device_descr *base_dev, *acc_dev;
175  int ndevs;
176
177  base_dev = resolve_device (d);
178
179  ndevs = base_dev->get_num_devices_func ();
180
181  if (!base_dev || ndevs <= 0 || goacc_device_num >= ndevs)
182    gomp_fatal ("device %s not supported", name_of_acc_device_t (d));
183
184  acc_dev = &base_dev[goacc_device_num];
185
186  if (acc_dev->is_initialized)
187    gomp_fatal ("device already active");
188
189  gomp_init_device (acc_dev);
190
191  return base_dev;
192}
193
194static void
195acc_shutdown_1 (acc_device_t d)
196{
197  struct gomp_device_descr *base_dev;
198  struct goacc_thread *walk;
199  int ndevs, i;
200  bool devices_active = false;
201
202  /* Get the base device for this device type.  */
203  base_dev = resolve_device (d);
204
205  if (!base_dev)
206    gomp_fatal ("device %s not supported", name_of_acc_device_t (d));
207
208  gomp_mutex_lock (&goacc_thread_lock);
209
210  /* Free target-specific TLS data and close all devices.  */
211  for (walk = goacc_threads; walk != NULL; walk = walk->next)
212    {
213      if (walk->target_tls)
214	base_dev->openacc.destroy_thread_data_func (walk->target_tls);
215
216      walk->target_tls = NULL;
217
218      /* This would mean the user is shutting down OpenACC in the middle of an
219         "acc data" pragma.  Likely not intentional.  */
220      if (walk->mapped_data)
221	gomp_fatal ("shutdown in 'acc data' region");
222
223      /* Similarly, if this happens then user code has done something weird.  */
224      if (walk->saved_bound_dev)
225        gomp_fatal ("shutdown during host fallback");
226
227      if (walk->dev)
228	{
229	  gomp_mutex_lock (&walk->dev->lock);
230	  gomp_free_memmap (&walk->dev->mem_map);
231	  gomp_mutex_unlock (&walk->dev->lock);
232
233	  walk->dev = NULL;
234	  walk->base_dev = NULL;
235	}
236    }
237
238  gomp_mutex_unlock (&goacc_thread_lock);
239
240  ndevs = base_dev->get_num_devices_func ();
241
242  /* Close all the devices of this type that have been opened.  */
243  for (i = 0; i < ndevs; i++)
244    {
245      struct gomp_device_descr *acc_dev = &base_dev[i];
246      if (acc_dev->is_initialized)
247        {
248	  devices_active = true;
249	  gomp_fini_device (acc_dev);
250	}
251    }
252
253  if (!devices_active)
254    gomp_fatal ("no device initialized");
255}
256
257static struct goacc_thread *
258goacc_new_thread (void)
259{
260  struct goacc_thread *thr = gomp_malloc (sizeof (struct gomp_thread));
261
262#if defined HAVE_TLS || defined USE_EMUTLS
263  goacc_tls_data = thr;
264#else
265  pthread_setspecific (goacc_tls_key, thr);
266#endif
267
268  pthread_setspecific (goacc_cleanup_key, thr);
269
270  gomp_mutex_lock (&goacc_thread_lock);
271  thr->next = goacc_threads;
272  goacc_threads = thr;
273  gomp_mutex_unlock (&goacc_thread_lock);
274
275  return thr;
276}
277
278static void
279goacc_destroy_thread (void *data)
280{
281  struct goacc_thread *thr = data, *walk, *prev;
282
283  gomp_mutex_lock (&goacc_thread_lock);
284
285  if (thr)
286    {
287      struct gomp_device_descr *acc_dev = thr->dev;
288
289      if (acc_dev && thr->target_tls)
290	{
291	  acc_dev->openacc.destroy_thread_data_func (thr->target_tls);
292	  thr->target_tls = NULL;
293	}
294
295      assert (!thr->mapped_data);
296
297      /* Remove from thread list.  */
298      for (prev = NULL, walk = goacc_threads; walk;
299	   prev = walk, walk = walk->next)
300	if (walk == thr)
301	  {
302	    if (prev == NULL)
303	      goacc_threads = walk->next;
304	    else
305	      prev->next = walk->next;
306
307	    free (thr);
308
309	    break;
310	  }
311
312      assert (walk);
313    }
314
315  gomp_mutex_unlock (&goacc_thread_lock);
316}
317
318/* Use the ORD'th device instance for the current host thread (or -1 for the
319   current global default).  The device (and the runtime) must be initialised
320   before calling this function.  */
321
322void
323goacc_attach_host_thread_to_device (int ord)
324{
325  struct goacc_thread *thr = goacc_thread ();
326  struct gomp_device_descr *acc_dev = NULL, *base_dev = NULL;
327  int num_devices;
328
329  if (thr && thr->dev && (thr->dev->target_id == ord || ord < 0))
330    return;
331
332  if (ord < 0)
333    ord = goacc_device_num;
334
335  /* Decide which type of device to use.  If the current thread has a device
336     type already (e.g. set by acc_set_device_type), use that, else use the
337     global default.  */
338  if (thr && thr->base_dev)
339    base_dev = thr->base_dev;
340  else
341    {
342      assert (cached_base_dev);
343      base_dev = cached_base_dev;
344    }
345
346  num_devices = base_dev->get_num_devices_func ();
347  if (num_devices <= 0 || ord >= num_devices)
348    gomp_fatal ("device %u out of range", ord);
349
350  if (!thr)
351    thr = goacc_new_thread ();
352
353  thr->base_dev = base_dev;
354  thr->dev = acc_dev = &base_dev[ord];
355  thr->saved_bound_dev = NULL;
356  thr->mapped_data = NULL;
357
358  thr->target_tls
359    = acc_dev->openacc.create_thread_data_func (ord);
360
361  acc_dev->openacc.async_set_async_func (acc_async_sync);
362}
363
364/* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
365   init/shutdown is per-process or per-thread.  We choose per-process.  */
366
367void
368acc_init (acc_device_t d)
369{
370  if (!cached_base_dev)
371    gomp_init_targets_once ();
372
373  gomp_mutex_lock (&acc_device_lock);
374
375  cached_base_dev = acc_init_1 (d);
376
377  gomp_mutex_unlock (&acc_device_lock);
378
379  goacc_attach_host_thread_to_device (-1);
380}
381
382ialias (acc_init)
383
384void
385acc_shutdown (acc_device_t d)
386{
387  gomp_mutex_lock (&acc_device_lock);
388
389  acc_shutdown_1 (d);
390
391  gomp_mutex_unlock (&acc_device_lock);
392}
393
394ialias (acc_shutdown)
395
396int
397acc_get_num_devices (acc_device_t d)
398{
399  int n = 0;
400  struct gomp_device_descr *acc_dev;
401
402  if (d == acc_device_none)
403    return 0;
404
405  gomp_init_targets_once ();
406
407  acc_dev = resolve_device (d);
408  if (!acc_dev)
409    return 0;
410
411  n = acc_dev->get_num_devices_func ();
412  if (n < 0)
413    n = 0;
414
415  return n;
416}
417
418ialias (acc_get_num_devices)
419
420/* Set the device type for the current thread only (using the current global
421   default device number), initialising that device if necessary.  Also set the
422   default device type for new threads to D.  */
423
424void
425acc_set_device_type (acc_device_t d)
426{
427  struct gomp_device_descr *base_dev, *acc_dev;
428  struct goacc_thread *thr = goacc_thread ();
429
430  gomp_mutex_lock (&acc_device_lock);
431
432  if (!cached_base_dev)
433    gomp_init_targets_once ();
434
435  cached_base_dev = base_dev = resolve_device (d);
436  acc_dev = &base_dev[goacc_device_num];
437
438  if (!acc_dev->is_initialized)
439    gomp_init_device (acc_dev);
440
441  gomp_mutex_unlock (&acc_device_lock);
442
443  /* We're changing device type: invalidate the current thread's dev and
444     base_dev pointers.  */
445  if (thr && thr->base_dev != base_dev)
446    {
447      thr->base_dev = thr->dev = NULL;
448      if (thr->mapped_data)
449        gomp_fatal ("acc_set_device_type in 'acc data' region");
450    }
451
452  goacc_attach_host_thread_to_device (-1);
453}
454
455ialias (acc_set_device_type)
456
457acc_device_t
458acc_get_device_type (void)
459{
460  acc_device_t res = acc_device_none;
461  struct gomp_device_descr *dev;
462  struct goacc_thread *thr = goacc_thread ();
463
464  if (thr && thr->base_dev)
465    res = acc_device_type (thr->base_dev->type);
466  else
467    {
468      gomp_init_targets_once ();
469
470      dev = resolve_device (acc_device_default);
471      res = acc_device_type (dev->type);
472    }
473
474  assert (res != acc_device_default
475	  && res != acc_device_not_host);
476
477  return res;
478}
479
480ialias (acc_get_device_type)
481
482int
483acc_get_device_num (acc_device_t d)
484{
485  const struct gomp_device_descr *dev;
486  struct goacc_thread *thr = goacc_thread ();
487
488  if (d >= _ACC_device_hwm)
489    gomp_fatal ("device %u out of range", (unsigned)d);
490
491  if (!cached_base_dev)
492    gomp_init_targets_once ();
493
494  dev = resolve_device (d);
495  if (!dev)
496    gomp_fatal ("device %s not supported", name_of_acc_device_t (d));
497
498  if (thr && thr->base_dev == dev && thr->dev)
499    return thr->dev->target_id;
500
501  return goacc_device_num;
502}
503
504ialias (acc_get_device_num)
505
506void
507acc_set_device_num (int ord, acc_device_t d)
508{
509  struct gomp_device_descr *base_dev, *acc_dev;
510  int num_devices;
511
512  if (!cached_base_dev)
513    gomp_init_targets_once ();
514
515  if (ord < 0)
516    ord = goacc_device_num;
517
518  if ((int) d == 0)
519    /* Set whatever device is being used by the current host thread to use
520       device instance ORD.  It's unclear if this is supposed to affect other
521       host threads too (OpenACC 2.0 (3.2.4) acc_set_device_num).  */
522    goacc_attach_host_thread_to_device (ord);
523  else
524    {
525      gomp_mutex_lock (&acc_device_lock);
526
527      cached_base_dev = base_dev = resolve_device (d);
528
529      num_devices = base_dev->get_num_devices_func ();
530
531      if (ord >= num_devices)
532        gomp_fatal ("device %u out of range", ord);
533
534      acc_dev = &base_dev[ord];
535
536      if (!acc_dev->is_initialized)
537        gomp_init_device (acc_dev);
538
539      gomp_mutex_unlock (&acc_device_lock);
540
541      goacc_attach_host_thread_to_device (ord);
542    }
543
544  goacc_device_num = ord;
545}
546
547ialias (acc_set_device_num)
548
549int
550acc_on_device (acc_device_t dev)
551{
552  struct goacc_thread *thr = goacc_thread ();
553
554  /* We only want to appear to be the "host_nonshm" plugin from "offloaded"
555     code -- i.e. within a parallel region.  Test a flag set by the
556     openacc_parallel hook of the host_nonshm plugin to determine that.  */
557  if (acc_get_device_type () == acc_device_host_nonshm
558      && thr && thr->target_tls
559      && ((struct nonshm_thread *)thr->target_tls)->nonshm_exec)
560    return dev == acc_device_host_nonshm || dev == acc_device_not_host;
561
562  /* For OpenACC, libgomp is only built for the host, so this is sufficient.  */
563  return dev == acc_device_host || dev == acc_device_none;
564}
565
566ialias (acc_on_device)
567
568attribute_hidden void
569goacc_runtime_initialize (void)
570{
571  gomp_mutex_init (&acc_device_lock);
572
573#if !(defined HAVE_TLS || defined USE_EMUTLS)
574  pthread_key_create (&goacc_tls_key, NULL);
575#endif
576
577  pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread);
578
579  cached_base_dev = NULL;
580
581  goacc_threads = NULL;
582  gomp_mutex_init (&goacc_thread_lock);
583
584  /* Initialize and register the 'host' device type.  */
585  goacc_host_init ();
586}
587
588/* Compiler helper functions */
589
590attribute_hidden void
591goacc_save_and_set_bind (acc_device_t d)
592{
593  struct goacc_thread *thr = goacc_thread ();
594
595  assert (!thr->saved_bound_dev);
596
597  thr->saved_bound_dev = thr->dev;
598  thr->dev = dispatchers[d];
599}
600
601attribute_hidden void
602goacc_restore_bind (void)
603{
604  struct goacc_thread *thr = goacc_thread ();
605
606  thr->dev = thr->saved_bound_dev;
607  thr->saved_bound_dev = NULL;
608}
609
610/* This is called from any OpenACC support function that may need to implicitly
611   initialize the libgomp runtime, either globally or from a new host thread.
612   On exit "goacc_thread" will return a valid & populated thread block.  */
613
614attribute_hidden void
615goacc_lazy_initialize (void)
616{
617  struct goacc_thread *thr = goacc_thread ();
618
619  if (thr && thr->dev)
620    return;
621
622  if (!cached_base_dev)
623    acc_init (acc_device_default);
624  else
625    goacc_attach_host_thread_to_device (-1);
626}
627