1/* Copyright (C) 2005-2015 Free Software Foundation, Inc.
2   Contributed by Richard Henderson <rth@redhat.com>.
3
4   This file is part of the GNU Offloading and Multi Processing Library
5   (libgomp).
6
7   Libgomp is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15   more details.
16
17   Under Section 7 of GPL version 3, you are granted additional
18   permissions described in the GCC Runtime Library Exception, version
19   3.1, as published by the Free Software Foundation.
20
21   You should have received a copy of the GNU General Public License and
22   a copy of the GCC Runtime Library Exception along with this program;
23   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24   <http://www.gnu.org/licenses/>.  */
25
26/* This file handles the LOOP (FOR/DO) construct.  */
27
28#include <limits.h>
29#include <stdlib.h>
30#include "libgomp.h"
31
32typedef unsigned long long gomp_ull;
33
34/* Initialize the given work share construct from the given arguments.  */
35
36static inline void
37gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start,
38		    gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched,
39		    gomp_ull chunk_size)
40{
41  ws->sched = sched;
42  ws->chunk_size_ull = chunk_size;
43  /* Canonicalize loops that have zero iterations to ->next == ->end.  */
44  ws->end_ull = ((up && start > end) || (!up && start < end))
45		? start : end;
46  ws->incr_ull = incr;
47  ws->next_ull = start;
48  ws->mode = 0;
49  if (sched == GFS_DYNAMIC)
50    {
51      ws->chunk_size_ull *= incr;
52
53#if defined HAVE_SYNC_BUILTINS && defined __LP64__
54      {
55	/* For dynamic scheduling prepare things to make each iteration
56	   faster.  */
57	struct gomp_thread *thr = gomp_thread ();
58	struct gomp_team *team = thr->ts.team;
59	long nthreads = team ? team->nthreads : 1;
60
61	if (__builtin_expect (up, 1))
62	  {
63	    /* Cheap overflow protection.  */
64	    if (__builtin_expect ((nthreads | ws->chunk_size_ull)
65				  < 1ULL << (sizeof (gomp_ull)
66					     * __CHAR_BIT__ / 2 - 1), 1))
67	      ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1
68					- (nthreads + 1) * ws->chunk_size_ull);
69	  }
70	/* Cheap overflow protection.  */
71	else if (__builtin_expect ((nthreads | -ws->chunk_size_ull)
72				   < 1ULL << (sizeof (gomp_ull)
73					      * __CHAR_BIT__ / 2 - 1), 1))
74	  ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull
75				    - (__LONG_LONG_MAX__ * 2ULL + 1));
76      }
77#endif
78    }
79  if (!up)
80    ws->mode |= 2;
81}
82
83/* The *_start routines are called when first encountering a loop construct
84   that is not bound directly to a parallel construct.  The first thread
85   that arrives will create the work-share construct; subsequent threads
86   will see the construct exists and allocate work from it.
87
88   START, END, INCR are the bounds of the loop; due to the restrictions of
89   OpenMP, these values must be the same in every thread.  This is not
90   verified (nor is it entirely verifiable, since START is not necessarily
91   retained intact in the work-share data structure).  CHUNK_SIZE is the
92   scheduling parameter; again this must be identical in all threads.
93
94   Returns true if there's any work for this thread to perform.  If so,
95   *ISTART and *IEND are filled with the bounds of the iteration block
96   allocated to this thread.  Returns false if all work was assigned to
97   other threads prior to this thread's arrival.  */
98
99static bool
100gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
101			    gomp_ull incr, gomp_ull chunk_size,
102			    gomp_ull *istart, gomp_ull *iend)
103{
104  struct gomp_thread *thr = gomp_thread ();
105
106  thr->ts.static_trip = 0;
107  if (gomp_work_share_start (false))
108    {
109      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
110			  GFS_STATIC, chunk_size);
111      gomp_work_share_init_done ();
112    }
113
114  return !gomp_iter_ull_static_next (istart, iend);
115}
116
117static bool
118gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
119			     gomp_ull incr, gomp_ull chunk_size,
120			     gomp_ull *istart, gomp_ull *iend)
121{
122  struct gomp_thread *thr = gomp_thread ();
123  bool ret;
124
125  if (gomp_work_share_start (false))
126    {
127      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
128			  GFS_DYNAMIC, chunk_size);
129      gomp_work_share_init_done ();
130    }
131
132#if defined HAVE_SYNC_BUILTINS && defined __LP64__
133  ret = gomp_iter_ull_dynamic_next (istart, iend);
134#else
135  gomp_mutex_lock (&thr->ts.work_share->lock);
136  ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
137  gomp_mutex_unlock (&thr->ts.work_share->lock);
138#endif
139
140  return ret;
141}
142
143static bool
144gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
145			    gomp_ull incr, gomp_ull chunk_size,
146			    gomp_ull *istart, gomp_ull *iend)
147{
148  struct gomp_thread *thr = gomp_thread ();
149  bool ret;
150
151  if (gomp_work_share_start (false))
152    {
153      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
154			  GFS_GUIDED, chunk_size);
155      gomp_work_share_init_done ();
156    }
157
158#if defined HAVE_SYNC_BUILTINS && defined __LP64__
159  ret = gomp_iter_ull_guided_next (istart, iend);
160#else
161  gomp_mutex_lock (&thr->ts.work_share->lock);
162  ret = gomp_iter_ull_guided_next_locked (istart, iend);
163  gomp_mutex_unlock (&thr->ts.work_share->lock);
164#endif
165
166  return ret;
167}
168
169bool
170GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end,
171			     gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
172{
173  struct gomp_task_icv *icv = gomp_icv (false);
174  switch (icv->run_sched_var)
175    {
176    case GFS_STATIC:
177      return gomp_loop_ull_static_start (up, start, end, incr,
178					 icv->run_sched_modifier,
179					 istart, iend);
180    case GFS_DYNAMIC:
181      return gomp_loop_ull_dynamic_start (up, start, end, incr,
182					  icv->run_sched_modifier,
183					  istart, iend);
184    case GFS_GUIDED:
185      return gomp_loop_ull_guided_start (up, start, end, incr,
186					 icv->run_sched_modifier,
187					 istart, iend);
188    case GFS_AUTO:
189      /* For now map to schedule(static), later on we could play with feedback
190	 driven choice.  */
191      return gomp_loop_ull_static_start (up, start, end, incr,
192					 0, istart, iend);
193    default:
194      abort ();
195    }
196}
197
198/* The *_ordered_*_start routines are similar.  The only difference is that
199   this work-share construct is initialized to expect an ORDERED section.  */
200
201static bool
202gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
203				    gomp_ull incr, gomp_ull chunk_size,
204				    gomp_ull *istart, gomp_ull *iend)
205{
206  struct gomp_thread *thr = gomp_thread ();
207
208  thr->ts.static_trip = 0;
209  if (gomp_work_share_start (true))
210    {
211      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
212			  GFS_STATIC, chunk_size);
213      gomp_ordered_static_init ();
214      gomp_work_share_init_done ();
215    }
216
217  return !gomp_iter_ull_static_next (istart, iend);
218}
219
220static bool
221gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
222				     gomp_ull incr, gomp_ull chunk_size,
223				     gomp_ull *istart, gomp_ull *iend)
224{
225  struct gomp_thread *thr = gomp_thread ();
226  bool ret;
227
228  if (gomp_work_share_start (true))
229    {
230      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
231			  GFS_DYNAMIC, chunk_size);
232      gomp_mutex_lock (&thr->ts.work_share->lock);
233      gomp_work_share_init_done ();
234    }
235  else
236    gomp_mutex_lock (&thr->ts.work_share->lock);
237
238  ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
239  if (ret)
240    gomp_ordered_first ();
241  gomp_mutex_unlock (&thr->ts.work_share->lock);
242
243  return ret;
244}
245
246static bool
247gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
248				    gomp_ull incr, gomp_ull chunk_size,
249				    gomp_ull *istart, gomp_ull *iend)
250{
251  struct gomp_thread *thr = gomp_thread ();
252  bool ret;
253
254  if (gomp_work_share_start (true))
255    {
256      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
257			  GFS_GUIDED, chunk_size);
258      gomp_mutex_lock (&thr->ts.work_share->lock);
259      gomp_work_share_init_done ();
260    }
261  else
262    gomp_mutex_lock (&thr->ts.work_share->lock);
263
264  ret = gomp_iter_ull_guided_next_locked (istart, iend);
265  if (ret)
266    gomp_ordered_first ();
267  gomp_mutex_unlock (&thr->ts.work_share->lock);
268
269  return ret;
270}
271
272bool
273GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end,
274				     gomp_ull incr, gomp_ull *istart,
275				     gomp_ull *iend)
276{
277  struct gomp_task_icv *icv = gomp_icv (false);
278  switch (icv->run_sched_var)
279    {
280    case GFS_STATIC:
281      return gomp_loop_ull_ordered_static_start (up, start, end, incr,
282						 icv->run_sched_modifier,
283						 istart, iend);
284    case GFS_DYNAMIC:
285      return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr,
286						  icv->run_sched_modifier,
287						  istart, iend);
288    case GFS_GUIDED:
289      return gomp_loop_ull_ordered_guided_start (up, start, end, incr,
290						 icv->run_sched_modifier,
291						 istart, iend);
292    case GFS_AUTO:
293      /* For now map to schedule(static), later on we could play with feedback
294	 driven choice.  */
295      return gomp_loop_ull_ordered_static_start (up, start, end, incr,
296						 0, istart, iend);
297    default:
298      abort ();
299    }
300}
301
302/* The *_next routines are called when the thread completes processing of
303   the iteration block currently assigned to it.  If the work-share
304   construct is bound directly to a parallel construct, then the iteration
305   bounds may have been set up before the parallel.  In which case, this
306   may be the first iteration for the thread.
307
308   Returns true if there is work remaining to be performed; *ISTART and
309   *IEND are filled with a new iteration block.  Returns false if all work
310   has been assigned.  */
311
312static bool
313gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
314{
315  return !gomp_iter_ull_static_next (istart, iend);
316}
317
318static bool
319gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
320{
321  bool ret;
322
323#if defined HAVE_SYNC_BUILTINS && defined __LP64__
324  ret = gomp_iter_ull_dynamic_next (istart, iend);
325#else
326  struct gomp_thread *thr = gomp_thread ();
327  gomp_mutex_lock (&thr->ts.work_share->lock);
328  ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
329  gomp_mutex_unlock (&thr->ts.work_share->lock);
330#endif
331
332  return ret;
333}
334
335static bool
336gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
337{
338  bool ret;
339
340#if defined HAVE_SYNC_BUILTINS && defined __LP64__
341  ret = gomp_iter_ull_guided_next (istart, iend);
342#else
343  struct gomp_thread *thr = gomp_thread ();
344  gomp_mutex_lock (&thr->ts.work_share->lock);
345  ret = gomp_iter_ull_guided_next_locked (istart, iend);
346  gomp_mutex_unlock (&thr->ts.work_share->lock);
347#endif
348
349  return ret;
350}
351
352bool
353GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend)
354{
355  struct gomp_thread *thr = gomp_thread ();
356
357  switch (thr->ts.work_share->sched)
358    {
359    case GFS_STATIC:
360    case GFS_AUTO:
361      return gomp_loop_ull_static_next (istart, iend);
362    case GFS_DYNAMIC:
363      return gomp_loop_ull_dynamic_next (istart, iend);
364    case GFS_GUIDED:
365      return gomp_loop_ull_guided_next (istart, iend);
366    default:
367      abort ();
368    }
369}
370
371/* The *_ordered_*_next routines are called when the thread completes
372   processing of the iteration block currently assigned to it.
373
374   Returns true if there is work remaining to be performed; *ISTART and
375   *IEND are filled with a new iteration block.  Returns false if all work
376   has been assigned.  */
377
378static bool
379gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
380{
381  struct gomp_thread *thr = gomp_thread ();
382  int test;
383
384  gomp_ordered_sync ();
385  gomp_mutex_lock (&thr->ts.work_share->lock);
386  test = gomp_iter_ull_static_next (istart, iend);
387  if (test >= 0)
388    gomp_ordered_static_next ();
389  gomp_mutex_unlock (&thr->ts.work_share->lock);
390
391  return test == 0;
392}
393
394static bool
395gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
396{
397  struct gomp_thread *thr = gomp_thread ();
398  bool ret;
399
400  gomp_ordered_sync ();
401  gomp_mutex_lock (&thr->ts.work_share->lock);
402  ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
403  if (ret)
404    gomp_ordered_next ();
405  else
406    gomp_ordered_last ();
407  gomp_mutex_unlock (&thr->ts.work_share->lock);
408
409  return ret;
410}
411
412static bool
413gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
414{
415  struct gomp_thread *thr = gomp_thread ();
416  bool ret;
417
418  gomp_ordered_sync ();
419  gomp_mutex_lock (&thr->ts.work_share->lock);
420  ret = gomp_iter_ull_guided_next_locked (istart, iend);
421  if (ret)
422    gomp_ordered_next ();
423  else
424    gomp_ordered_last ();
425  gomp_mutex_unlock (&thr->ts.work_share->lock);
426
427  return ret;
428}
429
430bool
431GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend)
432{
433  struct gomp_thread *thr = gomp_thread ();
434
435  switch (thr->ts.work_share->sched)
436    {
437    case GFS_STATIC:
438    case GFS_AUTO:
439      return gomp_loop_ull_ordered_static_next (istart, iend);
440    case GFS_DYNAMIC:
441      return gomp_loop_ull_ordered_dynamic_next (istart, iend);
442    case GFS_GUIDED:
443      return gomp_loop_ull_ordered_guided_next (istart, iend);
444    default:
445      abort ();
446    }
447}
448
449/* We use static functions above so that we're sure that the "runtime"
450   function can defer to the proper routine without interposition.  We
451   export the static function with a strong alias when possible, or with
452   a wrapper function otherwise.  */
453
454#ifdef HAVE_ATTRIBUTE_ALIAS
455extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start
456	__attribute__((alias ("gomp_loop_ull_static_start")));
457extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start
458	__attribute__((alias ("gomp_loop_ull_dynamic_start")));
459extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start
460	__attribute__((alias ("gomp_loop_ull_guided_start")));
461
462extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
463	__attribute__((alias ("gomp_loop_ull_ordered_static_start")));
464extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start
465	__attribute__((alias ("gomp_loop_ull_ordered_dynamic_start")));
466extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start
467	__attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
468
469extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next
470	__attribute__((alias ("gomp_loop_ull_static_next")));
471extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next
472	__attribute__((alias ("gomp_loop_ull_dynamic_next")));
473extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next
474	__attribute__((alias ("gomp_loop_ull_guided_next")));
475
476extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
477	__attribute__((alias ("gomp_loop_ull_ordered_static_next")));
478extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next
479	__attribute__((alias ("gomp_loop_ull_ordered_dynamic_next")));
480extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next
481	__attribute__((alias ("gomp_loop_ull_ordered_guided_next")));
482#else
483bool
484GOMP_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
485			    gomp_ull incr, gomp_ull chunk_size,
486			    gomp_ull *istart, gomp_ull *iend)
487{
488  return gomp_loop_ull_static_start (up, start, end, incr, chunk_size, istart,
489				     iend);
490}
491
492bool
493GOMP_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
494			     gomp_ull incr, gomp_ull chunk_size,
495			     gomp_ull *istart, gomp_ull *iend)
496{
497  return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
498				      iend);
499}
500
501bool
502GOMP_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
503			    gomp_ull incr, gomp_ull chunk_size,
504			    gomp_ull *istart, gomp_ull *iend)
505{
506  return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
507				     iend);
508}
509
510bool
511GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
512				    gomp_ull incr, gomp_ull chunk_size,
513				    gomp_ull *istart, gomp_ull *iend)
514{
515  return gomp_loop_ull_ordered_static_start (up, start, end, incr, chunk_size,
516					     istart, iend);
517}
518
519bool
520GOMP_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
521				     gomp_ull incr, gomp_ull chunk_size,
522				     gomp_ull *istart, gomp_ull *iend)
523{
524  return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, chunk_size,
525					      istart, iend);
526}
527
528bool
529GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
530				    gomp_ull incr, gomp_ull chunk_size,
531				    gomp_ull *istart, gomp_ull *iend)
532{
533  return gomp_loop_ull_ordered_guided_start (up, start, end, incr, chunk_size,
534					     istart, iend);
535}
536
537bool
538GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
539{
540  return gomp_loop_ull_static_next (istart, iend);
541}
542
543bool
544GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
545{
546  return gomp_loop_ull_dynamic_next (istart, iend);
547}
548
549bool
550GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
551{
552  return gomp_loop_ull_guided_next (istart, iend);
553}
554
555bool
556GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
557{
558  return gomp_loop_ull_ordered_static_next (istart, iend);
559}
560
561bool
562GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
563{
564  return gomp_loop_ull_ordered_dynamic_next (istart, iend);
565}
566
567bool
568GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
569{
570  return gomp_loop_ull_ordered_guided_next (istart, iend);
571}
572#endif
573