1/* Copyright (C) 2005-2015 Free Software Foundation, Inc.
2   Contributed by Richard Henderson <rth@redhat.com>.
3
4   This file is part of the GNU Offloading and Multi Processing Library
5   (libgomp).
6
7   Libgomp is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15   more details.
16
17   Under Section 7 of GPL version 3, you are granted additional
18   permissions described in the GCC Runtime Library Exception, version
19   3.1, as published by the Free Software Foundation.
20
21   You should have received a copy of the GNU General Public License and
22   a copy of the GCC Runtime Library Exception along with this program;
23   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24   <http://www.gnu.org/licenses/>.  */
25
26/* This file handles the LOOP (FOR/DO) construct.  */
27
28#include <limits.h>
29#include <stdlib.h>
30#include "libgomp.h"
31
32
33/* Initialize the given work share construct from the given arguments.  */
34
35static inline void
36gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr,
37		enum gomp_schedule_type sched, long chunk_size)
38{
39  ws->sched = sched;
40  ws->chunk_size = chunk_size;
41  /* Canonicalize loops that have zero iterations to ->next == ->end.  */
42  ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end))
43	    ? start : end;
44  ws->incr = incr;
45  ws->next = start;
46  if (sched == GFS_DYNAMIC)
47    {
48      ws->chunk_size *= incr;
49
50#ifdef HAVE_SYNC_BUILTINS
51      {
52	/* For dynamic scheduling prepare things to make each iteration
53	   faster.  */
54	struct gomp_thread *thr = gomp_thread ();
55	struct gomp_team *team = thr->ts.team;
56	long nthreads = team ? team->nthreads : 1;
57
58	if (__builtin_expect (incr > 0, 1))
59	  {
60	    /* Cheap overflow protection.  */
61	    if (__builtin_expect ((nthreads | ws->chunk_size)
62				  >= 1UL << (sizeof (long)
63					     * __CHAR_BIT__ / 2 - 1), 0))
64	      ws->mode = 0;
65	    else
66	      ws->mode = ws->end < (LONG_MAX
67				    - (nthreads + 1) * ws->chunk_size);
68	  }
69	/* Cheap overflow protection.  */
70	else if (__builtin_expect ((nthreads | -ws->chunk_size)
71				   >= 1UL << (sizeof (long)
72					      * __CHAR_BIT__ / 2 - 1), 0))
73	  ws->mode = 0;
74	else
75	  ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
76      }
77#endif
78    }
79}
80
81/* The *_start routines are called when first encountering a loop construct
82   that is not bound directly to a parallel construct.  The first thread
83   that arrives will create the work-share construct; subsequent threads
84   will see the construct exists and allocate work from it.
85
86   START, END, INCR are the bounds of the loop; due to the restrictions of
87   OpenMP, these values must be the same in every thread.  This is not
88   verified (nor is it entirely verifiable, since START is not necessarily
89   retained intact in the work-share data structure).  CHUNK_SIZE is the
90   scheduling parameter; again this must be identical in all threads.
91
92   Returns true if there's any work for this thread to perform.  If so,
93   *ISTART and *IEND are filled with the bounds of the iteration block
94   allocated to this thread.  Returns false if all work was assigned to
95   other threads prior to this thread's arrival.  */
96
97static bool
98gomp_loop_static_start (long start, long end, long incr, long chunk_size,
99			long *istart, long *iend)
100{
101  struct gomp_thread *thr = gomp_thread ();
102
103  thr->ts.static_trip = 0;
104  if (gomp_work_share_start (false))
105    {
106      gomp_loop_init (thr->ts.work_share, start, end, incr,
107		      GFS_STATIC, chunk_size);
108      gomp_work_share_init_done ();
109    }
110
111  return !gomp_iter_static_next (istart, iend);
112}
113
114static bool
115gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
116			 long *istart, long *iend)
117{
118  struct gomp_thread *thr = gomp_thread ();
119  bool ret;
120
121  if (gomp_work_share_start (false))
122    {
123      gomp_loop_init (thr->ts.work_share, start, end, incr,
124		      GFS_DYNAMIC, chunk_size);
125      gomp_work_share_init_done ();
126    }
127
128#ifdef HAVE_SYNC_BUILTINS
129  ret = gomp_iter_dynamic_next (istart, iend);
130#else
131  gomp_mutex_lock (&thr->ts.work_share->lock);
132  ret = gomp_iter_dynamic_next_locked (istart, iend);
133  gomp_mutex_unlock (&thr->ts.work_share->lock);
134#endif
135
136  return ret;
137}
138
139static bool
140gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
141			long *istart, long *iend)
142{
143  struct gomp_thread *thr = gomp_thread ();
144  bool ret;
145
146  if (gomp_work_share_start (false))
147    {
148      gomp_loop_init (thr->ts.work_share, start, end, incr,
149		      GFS_GUIDED, chunk_size);
150      gomp_work_share_init_done ();
151    }
152
153#ifdef HAVE_SYNC_BUILTINS
154  ret = gomp_iter_guided_next (istart, iend);
155#else
156  gomp_mutex_lock (&thr->ts.work_share->lock);
157  ret = gomp_iter_guided_next_locked (istart, iend);
158  gomp_mutex_unlock (&thr->ts.work_share->lock);
159#endif
160
161  return ret;
162}
163
164bool
165GOMP_loop_runtime_start (long start, long end, long incr,
166			 long *istart, long *iend)
167{
168  struct gomp_task_icv *icv = gomp_icv (false);
169  switch (icv->run_sched_var)
170    {
171    case GFS_STATIC:
172      return gomp_loop_static_start (start, end, incr, icv->run_sched_modifier,
173				     istart, iend);
174    case GFS_DYNAMIC:
175      return gomp_loop_dynamic_start (start, end, incr, icv->run_sched_modifier,
176				      istart, iend);
177    case GFS_GUIDED:
178      return gomp_loop_guided_start (start, end, incr, icv->run_sched_modifier,
179				     istart, iend);
180    case GFS_AUTO:
181      /* For now map to schedule(static), later on we could play with feedback
182	 driven choice.  */
183      return gomp_loop_static_start (start, end, incr, 0, istart, iend);
184    default:
185      abort ();
186    }
187}
188
189/* The *_ordered_*_start routines are similar.  The only difference is that
190   this work-share construct is initialized to expect an ORDERED section.  */
191
192static bool
193gomp_loop_ordered_static_start (long start, long end, long incr,
194				long chunk_size, long *istart, long *iend)
195{
196  struct gomp_thread *thr = gomp_thread ();
197
198  thr->ts.static_trip = 0;
199  if (gomp_work_share_start (true))
200    {
201      gomp_loop_init (thr->ts.work_share, start, end, incr,
202		      GFS_STATIC, chunk_size);
203      gomp_ordered_static_init ();
204      gomp_work_share_init_done ();
205    }
206
207  return !gomp_iter_static_next (istart, iend);
208}
209
210static bool
211gomp_loop_ordered_dynamic_start (long start, long end, long incr,
212				 long chunk_size, long *istart, long *iend)
213{
214  struct gomp_thread *thr = gomp_thread ();
215  bool ret;
216
217  if (gomp_work_share_start (true))
218    {
219      gomp_loop_init (thr->ts.work_share, start, end, incr,
220		      GFS_DYNAMIC, chunk_size);
221      gomp_mutex_lock (&thr->ts.work_share->lock);
222      gomp_work_share_init_done ();
223    }
224  else
225    gomp_mutex_lock (&thr->ts.work_share->lock);
226
227  ret = gomp_iter_dynamic_next_locked (istart, iend);
228  if (ret)
229    gomp_ordered_first ();
230  gomp_mutex_unlock (&thr->ts.work_share->lock);
231
232  return ret;
233}
234
235static bool
236gomp_loop_ordered_guided_start (long start, long end, long incr,
237				long chunk_size, long *istart, long *iend)
238{
239  struct gomp_thread *thr = gomp_thread ();
240  bool ret;
241
242  if (gomp_work_share_start (true))
243    {
244      gomp_loop_init (thr->ts.work_share, start, end, incr,
245		      GFS_GUIDED, chunk_size);
246      gomp_mutex_lock (&thr->ts.work_share->lock);
247      gomp_work_share_init_done ();
248    }
249  else
250    gomp_mutex_lock (&thr->ts.work_share->lock);
251
252  ret = gomp_iter_guided_next_locked (istart, iend);
253  if (ret)
254    gomp_ordered_first ();
255  gomp_mutex_unlock (&thr->ts.work_share->lock);
256
257  return ret;
258}
259
260bool
261GOMP_loop_ordered_runtime_start (long start, long end, long incr,
262				 long *istart, long *iend)
263{
264  struct gomp_task_icv *icv = gomp_icv (false);
265  switch (icv->run_sched_var)
266    {
267    case GFS_STATIC:
268      return gomp_loop_ordered_static_start (start, end, incr,
269					     icv->run_sched_modifier,
270					     istart, iend);
271    case GFS_DYNAMIC:
272      return gomp_loop_ordered_dynamic_start (start, end, incr,
273					      icv->run_sched_modifier,
274					      istart, iend);
275    case GFS_GUIDED:
276      return gomp_loop_ordered_guided_start (start, end, incr,
277					     icv->run_sched_modifier,
278					     istart, iend);
279    case GFS_AUTO:
280      /* For now map to schedule(static), later on we could play with feedback
281	 driven choice.  */
282      return gomp_loop_ordered_static_start (start, end, incr,
283					     0, istart, iend);
284    default:
285      abort ();
286    }
287}
288
289/* The *_next routines are called when the thread completes processing of
290   the iteration block currently assigned to it.  If the work-share
291   construct is bound directly to a parallel construct, then the iteration
292   bounds may have been set up before the parallel.  In which case, this
293   may be the first iteration for the thread.
294
295   Returns true if there is work remaining to be performed; *ISTART and
296   *IEND are filled with a new iteration block.  Returns false if all work
297   has been assigned.  */
298
299static bool
300gomp_loop_static_next (long *istart, long *iend)
301{
302  return !gomp_iter_static_next (istart, iend);
303}
304
305static bool
306gomp_loop_dynamic_next (long *istart, long *iend)
307{
308  bool ret;
309
310#ifdef HAVE_SYNC_BUILTINS
311  ret = gomp_iter_dynamic_next (istart, iend);
312#else
313  struct gomp_thread *thr = gomp_thread ();
314  gomp_mutex_lock (&thr->ts.work_share->lock);
315  ret = gomp_iter_dynamic_next_locked (istart, iend);
316  gomp_mutex_unlock (&thr->ts.work_share->lock);
317#endif
318
319  return ret;
320}
321
322static bool
323gomp_loop_guided_next (long *istart, long *iend)
324{
325  bool ret;
326
327#ifdef HAVE_SYNC_BUILTINS
328  ret = gomp_iter_guided_next (istart, iend);
329#else
330  struct gomp_thread *thr = gomp_thread ();
331  gomp_mutex_lock (&thr->ts.work_share->lock);
332  ret = gomp_iter_guided_next_locked (istart, iend);
333  gomp_mutex_unlock (&thr->ts.work_share->lock);
334#endif
335
336  return ret;
337}
338
339bool
340GOMP_loop_runtime_next (long *istart, long *iend)
341{
342  struct gomp_thread *thr = gomp_thread ();
343
344  switch (thr->ts.work_share->sched)
345    {
346    case GFS_STATIC:
347    case GFS_AUTO:
348      return gomp_loop_static_next (istart, iend);
349    case GFS_DYNAMIC:
350      return gomp_loop_dynamic_next (istart, iend);
351    case GFS_GUIDED:
352      return gomp_loop_guided_next (istart, iend);
353    default:
354      abort ();
355    }
356}
357
358/* The *_ordered_*_next routines are called when the thread completes
359   processing of the iteration block currently assigned to it.
360
361   Returns true if there is work remaining to be performed; *ISTART and
362   *IEND are filled with a new iteration block.  Returns false if all work
363   has been assigned.  */
364
365static bool
366gomp_loop_ordered_static_next (long *istart, long *iend)
367{
368  struct gomp_thread *thr = gomp_thread ();
369  int test;
370
371  gomp_ordered_sync ();
372  gomp_mutex_lock (&thr->ts.work_share->lock);
373  test = gomp_iter_static_next (istart, iend);
374  if (test >= 0)
375    gomp_ordered_static_next ();
376  gomp_mutex_unlock (&thr->ts.work_share->lock);
377
378  return test == 0;
379}
380
381static bool
382gomp_loop_ordered_dynamic_next (long *istart, long *iend)
383{
384  struct gomp_thread *thr = gomp_thread ();
385  bool ret;
386
387  gomp_ordered_sync ();
388  gomp_mutex_lock (&thr->ts.work_share->lock);
389  ret = gomp_iter_dynamic_next_locked (istart, iend);
390  if (ret)
391    gomp_ordered_next ();
392  else
393    gomp_ordered_last ();
394  gomp_mutex_unlock (&thr->ts.work_share->lock);
395
396  return ret;
397}
398
399static bool
400gomp_loop_ordered_guided_next (long *istart, long *iend)
401{
402  struct gomp_thread *thr = gomp_thread ();
403  bool ret;
404
405  gomp_ordered_sync ();
406  gomp_mutex_lock (&thr->ts.work_share->lock);
407  ret = gomp_iter_guided_next_locked (istart, iend);
408  if (ret)
409    gomp_ordered_next ();
410  else
411    gomp_ordered_last ();
412  gomp_mutex_unlock (&thr->ts.work_share->lock);
413
414  return ret;
415}
416
417bool
418GOMP_loop_ordered_runtime_next (long *istart, long *iend)
419{
420  struct gomp_thread *thr = gomp_thread ();
421
422  switch (thr->ts.work_share->sched)
423    {
424    case GFS_STATIC:
425    case GFS_AUTO:
426      return gomp_loop_ordered_static_next (istart, iend);
427    case GFS_DYNAMIC:
428      return gomp_loop_ordered_dynamic_next (istart, iend);
429    case GFS_GUIDED:
430      return gomp_loop_ordered_guided_next (istart, iend);
431    default:
432      abort ();
433    }
434}
435
436/* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
437   to avoid one synchronization once we get into the loop.  */
438
439static void
440gomp_parallel_loop_start (void (*fn) (void *), void *data,
441			  unsigned num_threads, long start, long end,
442			  long incr, enum gomp_schedule_type sched,
443			  long chunk_size, unsigned int flags)
444{
445  struct gomp_team *team;
446
447  num_threads = gomp_resolve_num_threads (num_threads, 0);
448  team = gomp_new_team (num_threads);
449  gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
450  gomp_team_start (fn, data, num_threads, flags, team);
451}
452
453void
454GOMP_parallel_loop_static_start (void (*fn) (void *), void *data,
455				 unsigned num_threads, long start, long end,
456				 long incr, long chunk_size)
457{
458  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
459			    GFS_STATIC, chunk_size, 0);
460}
461
462void
463GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data,
464				  unsigned num_threads, long start, long end,
465				  long incr, long chunk_size)
466{
467  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
468			    GFS_DYNAMIC, chunk_size, 0);
469}
470
471void
472GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data,
473				 unsigned num_threads, long start, long end,
474				 long incr, long chunk_size)
475{
476  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
477			    GFS_GUIDED, chunk_size, 0);
478}
479
480void
481GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
482				  unsigned num_threads, long start, long end,
483				  long incr)
484{
485  struct gomp_task_icv *icv = gomp_icv (false);
486  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
487			    icv->run_sched_var, icv->run_sched_modifier, 0);
488}
489
490ialias_redirect (GOMP_parallel_end)
491
492void
493GOMP_parallel_loop_static (void (*fn) (void *), void *data,
494			   unsigned num_threads, long start, long end,
495			   long incr, long chunk_size, unsigned flags)
496{
497  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
498			    GFS_STATIC, chunk_size, flags);
499  fn (data);
500  GOMP_parallel_end ();
501}
502
503void
504GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data,
505			    unsigned num_threads, long start, long end,
506			    long incr, long chunk_size, unsigned flags)
507{
508  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
509			    GFS_DYNAMIC, chunk_size, flags);
510  fn (data);
511  GOMP_parallel_end ();
512}
513
514void
515GOMP_parallel_loop_guided (void (*fn) (void *), void *data,
516			  unsigned num_threads, long start, long end,
517			  long incr, long chunk_size, unsigned flags)
518{
519  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
520			    GFS_GUIDED, chunk_size, flags);
521  fn (data);
522  GOMP_parallel_end ();
523}
524
525void
526GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
527			    unsigned num_threads, long start, long end,
528			    long incr, unsigned flags)
529{
530  struct gomp_task_icv *icv = gomp_icv (false);
531  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
532			    icv->run_sched_var, icv->run_sched_modifier,
533			    flags);
534  fn (data);
535  GOMP_parallel_end ();
536}
537
538/* The GOMP_loop_end* routines are called after the thread is told that
539   all loop iterations are complete.  The first two versions synchronize
540   all threads; the nowait version does not.  */
541
542void
543GOMP_loop_end (void)
544{
545  gomp_work_share_end ();
546}
547
548bool
549GOMP_loop_end_cancel (void)
550{
551  return gomp_work_share_end_cancel ();
552}
553
554void
555GOMP_loop_end_nowait (void)
556{
557  gomp_work_share_end_nowait ();
558}
559
560
561/* We use static functions above so that we're sure that the "runtime"
562   function can defer to the proper routine without interposition.  We
563   export the static function with a strong alias when possible, or with
564   a wrapper function otherwise.  */
565
566#ifdef HAVE_ATTRIBUTE_ALIAS
567extern __typeof(gomp_loop_static_start) GOMP_loop_static_start
568	__attribute__((alias ("gomp_loop_static_start")));
569extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start
570	__attribute__((alias ("gomp_loop_dynamic_start")));
571extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
572	__attribute__((alias ("gomp_loop_guided_start")));
573
574extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
575	__attribute__((alias ("gomp_loop_ordered_static_start")));
576extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
577	__attribute__((alias ("gomp_loop_ordered_dynamic_start")));
578extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
579	__attribute__((alias ("gomp_loop_ordered_guided_start")));
580
581extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
582	__attribute__((alias ("gomp_loop_static_next")));
583extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
584	__attribute__((alias ("gomp_loop_dynamic_next")));
585extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
586	__attribute__((alias ("gomp_loop_guided_next")));
587
588extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
589	__attribute__((alias ("gomp_loop_ordered_static_next")));
590extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next
591	__attribute__((alias ("gomp_loop_ordered_dynamic_next")));
592extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next
593	__attribute__((alias ("gomp_loop_ordered_guided_next")));
594#else
595bool
596GOMP_loop_static_start (long start, long end, long incr, long chunk_size,
597			long *istart, long *iend)
598{
599  return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend);
600}
601
602bool
603GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size,
604			 long *istart, long *iend)
605{
606  return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
607}
608
609bool
610GOMP_loop_guided_start (long start, long end, long incr, long chunk_size,
611			long *istart, long *iend)
612{
613  return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
614}
615
616bool
617GOMP_loop_ordered_static_start (long start, long end, long incr,
618				long chunk_size, long *istart, long *iend)
619{
620  return gomp_loop_ordered_static_start (start, end, incr, chunk_size,
621					 istart, iend);
622}
623
624bool
625GOMP_loop_ordered_dynamic_start (long start, long end, long incr,
626				 long chunk_size, long *istart, long *iend)
627{
628  return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size,
629					  istart, iend);
630}
631
632bool
633GOMP_loop_ordered_guided_start (long start, long end, long incr,
634				long chunk_size, long *istart, long *iend)
635{
636  return gomp_loop_ordered_guided_start (start, end, incr, chunk_size,
637					 istart, iend);
638}
639
640bool
641GOMP_loop_static_next (long *istart, long *iend)
642{
643  return gomp_loop_static_next (istart, iend);
644}
645
646bool
647GOMP_loop_dynamic_next (long *istart, long *iend)
648{
649  return gomp_loop_dynamic_next (istart, iend);
650}
651
652bool
653GOMP_loop_guided_next (long *istart, long *iend)
654{
655  return gomp_loop_guided_next (istart, iend);
656}
657
658bool
659GOMP_loop_ordered_static_next (long *istart, long *iend)
660{
661  return gomp_loop_ordered_static_next (istart, iend);
662}
663
664bool
665GOMP_loop_ordered_dynamic_next (long *istart, long *iend)
666{
667  return gomp_loop_ordered_dynamic_next (istart, iend);
668}
669
670bool
671GOMP_loop_ordered_guided_next (long *istart, long *iend)
672{
673  return gomp_loop_ordered_guided_next (istart, iend);
674}
675#endif
676