1/* Copyright (C) 2005 Free Software Foundation, Inc.
2   Contributed by Richard Henderson <rth@redhat.com>.
3
4   This file is part of the GNU OpenMP Library (libgomp).
5
6   Libgomp is free software; you can redistribute it and/or modify it
7   under the terms of the GNU Lesser General Public License as published by
8   the Free Software Foundation; either version 2.1 of the License, or
9   (at your option) any later version.
10
11   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13   FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
14   more details.
15
16   You should have received a copy of the GNU Lesser General Public License
17   along with libgomp; see the file COPYING.LIB.  If not, write to the
18   Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19   MA 02110-1301, USA.  */
20
21/* As a special exception, if you link this library with other files, some
22   of which are compiled with GCC, to produce an executable, this library
23   does not by itself cause the resulting executable to be covered by the
24   GNU General Public License.  This exception does not however invalidate
25   any other reasons why the executable file might be covered by the GNU
26   General Public License.  */
27
28/* This file handles the LOOP (FOR/DO) construct.  */
29
30#include "libgomp.h"
31#include <stdlib.h>
32
33
34/* Initialize the given work share construct from the given arguments.  */
35
36static inline void
37gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr,
38		enum gomp_schedule_type sched, long chunk_size)
39{
40  ws->sched = sched;
41  ws->chunk_size = chunk_size;
42  /* Canonicalize loops that have zero iterations to ->next == ->end.  */
43  ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end))
44	    ? start : end;
45  ws->incr = incr;
46  ws->next = start;
47}
48
49/* The *_start routines are called when first encountering a loop construct
50   that is not bound directly to a parallel construct.  The first thread
51   that arrives will create the work-share construct; subsequent threads
52   will see the construct exists and allocate work from it.
53
54   START, END, INCR are the bounds of the loop; due to the restrictions of
55   OpenMP, these values must be the same in every thread.  This is not
56   verified (nor is it entirely verifiable, since START is not necessarily
57   retained intact in the work-share data structure).  CHUNK_SIZE is the
58   scheduling parameter; again this must be identical in all threads.
59
60   Returns true if there's any work for this thread to perform.  If so,
61   *ISTART and *IEND are filled with the bounds of the iteration block
62   allocated to this thread.  Returns false if all work was assigned to
63   other threads prior to this thread's arrival.  */
64
65static bool
66gomp_loop_static_start (long start, long end, long incr, long chunk_size,
67			long *istart, long *iend)
68{
69  struct gomp_thread *thr = gomp_thread ();
70
71  if (gomp_work_share_start (false))
72    gomp_loop_init (thr->ts.work_share, start, end, incr,
73		    GFS_STATIC, chunk_size);
74  gomp_mutex_unlock (&thr->ts.work_share->lock);
75
76  return !gomp_iter_static_next (istart, iend);
77}
78
79static bool
80gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
81			 long *istart, long *iend)
82{
83  struct gomp_thread *thr = gomp_thread ();
84  bool ret;
85
86  if (gomp_work_share_start (false))
87    gomp_loop_init (thr->ts.work_share, start, end, incr,
88		    GFS_DYNAMIC, chunk_size);
89
90#ifdef HAVE_SYNC_BUILTINS
91  gomp_mutex_unlock (&thr->ts.work_share->lock);
92  ret = gomp_iter_dynamic_next (istart, iend);
93#else
94  ret = gomp_iter_dynamic_next_locked (istart, iend);
95  gomp_mutex_unlock (&thr->ts.work_share->lock);
96#endif
97
98  return ret;
99}
100
101static bool
102gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
103			long *istart, long *iend)
104{
105  struct gomp_thread *thr = gomp_thread ();
106  bool ret;
107
108  if (gomp_work_share_start (false))
109    gomp_loop_init (thr->ts.work_share, start, end, incr,
110		    GFS_GUIDED, chunk_size);
111
112#ifdef HAVE_SYNC_BUILTINS
113  gomp_mutex_unlock (&thr->ts.work_share->lock);
114  ret = gomp_iter_guided_next (istart, iend);
115#else
116  ret = gomp_iter_guided_next_locked (istart, iend);
117  gomp_mutex_unlock (&thr->ts.work_share->lock);
118#endif
119
120  return ret;
121}
122
123bool
124GOMP_loop_runtime_start (long start, long end, long incr,
125			 long *istart, long *iend)
126{
127  switch (gomp_run_sched_var)
128    {
129    case GFS_STATIC:
130      return gomp_loop_static_start (start, end, incr, gomp_run_sched_chunk,
131				     istart, iend);
132    case GFS_DYNAMIC:
133      return gomp_loop_dynamic_start (start, end, incr, gomp_run_sched_chunk,
134				      istart, iend);
135    case GFS_GUIDED:
136      return gomp_loop_guided_start (start, end, incr, gomp_run_sched_chunk,
137				     istart, iend);
138    default:
139      abort ();
140    }
141}
142
143/* The *_ordered_*_start routines are similar.  The only difference is that
144   this work-share construct is initialized to expect an ORDERED section.  */
145
146static bool
147gomp_loop_ordered_static_start (long start, long end, long incr,
148				long chunk_size, long *istart, long *iend)
149{
150  struct gomp_thread *thr = gomp_thread ();
151
152  if (gomp_work_share_start (true))
153    {
154      gomp_loop_init (thr->ts.work_share, start, end, incr,
155		      GFS_STATIC, chunk_size);
156      gomp_ordered_static_init ();
157    }
158  gomp_mutex_unlock (&thr->ts.work_share->lock);
159
160  return !gomp_iter_static_next (istart, iend);
161}
162
163static bool
164gomp_loop_ordered_dynamic_start (long start, long end, long incr,
165				 long chunk_size, long *istart, long *iend)
166{
167  struct gomp_thread *thr = gomp_thread ();
168  bool ret;
169
170  if (gomp_work_share_start (true))
171    gomp_loop_init (thr->ts.work_share, start, end, incr,
172		    GFS_DYNAMIC, chunk_size);
173
174  ret = gomp_iter_dynamic_next_locked (istart, iend);
175  if (ret)
176    gomp_ordered_first ();
177  gomp_mutex_unlock (&thr->ts.work_share->lock);
178
179  return ret;
180}
181
182static bool
183gomp_loop_ordered_guided_start (long start, long end, long incr,
184				long chunk_size, long *istart, long *iend)
185{
186  struct gomp_thread *thr = gomp_thread ();
187  bool ret;
188
189  if (gomp_work_share_start (true))
190    gomp_loop_init (thr->ts.work_share, start, end, incr,
191		    GFS_GUIDED, chunk_size);
192
193  ret = gomp_iter_guided_next_locked (istart, iend);
194  if (ret)
195    gomp_ordered_first ();
196  gomp_mutex_unlock (&thr->ts.work_share->lock);
197
198  return ret;
199}
200
201bool
202GOMP_loop_ordered_runtime_start (long start, long end, long incr,
203				 long *istart, long *iend)
204{
205  switch (gomp_run_sched_var)
206    {
207    case GFS_STATIC:
208      return gomp_loop_ordered_static_start (start, end, incr,
209					     gomp_run_sched_chunk,
210					     istart, iend);
211    case GFS_DYNAMIC:
212      return gomp_loop_ordered_dynamic_start (start, end, incr,
213					      gomp_run_sched_chunk,
214					      istart, iend);
215    case GFS_GUIDED:
216      return gomp_loop_ordered_guided_start (start, end, incr,
217					     gomp_run_sched_chunk,
218					     istart, iend);
219    default:
220      abort ();
221    }
222}
223
224/* The *_next routines are called when the thread completes processing of
225   the iteration block currently assigned to it.  If the work-share
226   construct is bound directly to a parallel construct, then the iteration
227   bounds may have been set up before the parallel.  In which case, this
228   may be the first iteration for the thread.
229
230   Returns true if there is work remaining to be performed; *ISTART and
231   *IEND are filled with a new iteration block.  Returns false if all work
232   has been assigned.  */
233
234static bool
235gomp_loop_static_next (long *istart, long *iend)
236{
237  return !gomp_iter_static_next (istart, iend);
238}
239
240static bool
241gomp_loop_dynamic_next (long *istart, long *iend)
242{
243  bool ret;
244
245#ifdef HAVE_SYNC_BUILTINS
246  ret = gomp_iter_dynamic_next (istart, iend);
247#else
248  struct gomp_thread *thr = gomp_thread ();
249  gomp_mutex_lock (&thr->ts.work_share->lock);
250  ret = gomp_iter_dynamic_next_locked (istart, iend);
251  gomp_mutex_unlock (&thr->ts.work_share->lock);
252#endif
253
254  return ret;
255}
256
257static bool
258gomp_loop_guided_next (long *istart, long *iend)
259{
260  bool ret;
261
262#ifdef HAVE_SYNC_BUILTINS
263  ret = gomp_iter_guided_next (istart, iend);
264#else
265  struct gomp_thread *thr = gomp_thread ();
266  gomp_mutex_lock (&thr->ts.work_share->lock);
267  ret = gomp_iter_guided_next_locked (istart, iend);
268  gomp_mutex_unlock (&thr->ts.work_share->lock);
269#endif
270
271  return ret;
272}
273
274bool
275GOMP_loop_runtime_next (long *istart, long *iend)
276{
277  struct gomp_thread *thr = gomp_thread ();
278
279  switch (thr->ts.work_share->sched)
280    {
281    case GFS_STATIC:
282      return gomp_loop_static_next (istart, iend);
283    case GFS_DYNAMIC:
284      return gomp_loop_dynamic_next (istart, iend);
285    case GFS_GUIDED:
286      return gomp_loop_guided_next (istart, iend);
287    default:
288      abort ();
289    }
290}
291
292/* The *_ordered_*_next routines are called when the thread completes
293   processing of the iteration block currently assigned to it.
294
295   Returns true if there is work remaining to be performed; *ISTART and
296   *IEND are filled with a new iteration block.  Returns false if all work
297   has been assigned.  */
298
299static bool
300gomp_loop_ordered_static_next (long *istart, long *iend)
301{
302  struct gomp_thread *thr = gomp_thread ();
303  int test;
304
305  gomp_ordered_sync ();
306  gomp_mutex_lock (&thr->ts.work_share->lock);
307  test = gomp_iter_static_next (istart, iend);
308  if (test >= 0)
309    gomp_ordered_static_next ();
310  gomp_mutex_unlock (&thr->ts.work_share->lock);
311
312  return test == 0;
313}
314
315static bool
316gomp_loop_ordered_dynamic_next (long *istart, long *iend)
317{
318  struct gomp_thread *thr = gomp_thread ();
319  bool ret;
320
321  gomp_ordered_sync ();
322  gomp_mutex_lock (&thr->ts.work_share->lock);
323  ret = gomp_iter_dynamic_next_locked (istart, iend);
324  if (ret)
325    gomp_ordered_next ();
326  else
327    gomp_ordered_last ();
328  gomp_mutex_unlock (&thr->ts.work_share->lock);
329
330  return ret;
331}
332
333static bool
334gomp_loop_ordered_guided_next (long *istart, long *iend)
335{
336  struct gomp_thread *thr = gomp_thread ();
337  bool ret;
338
339  gomp_ordered_sync ();
340  gomp_mutex_lock (&thr->ts.work_share->lock);
341  ret = gomp_iter_guided_next_locked (istart, iend);
342  if (ret)
343    gomp_ordered_next ();
344  else
345    gomp_ordered_last ();
346  gomp_mutex_unlock (&thr->ts.work_share->lock);
347
348  return ret;
349}
350
351bool
352GOMP_loop_ordered_runtime_next (long *istart, long *iend)
353{
354  struct gomp_thread *thr = gomp_thread ();
355
356  switch (thr->ts.work_share->sched)
357    {
358    case GFS_STATIC:
359      return gomp_loop_ordered_static_next (istart, iend);
360    case GFS_DYNAMIC:
361      return gomp_loop_ordered_dynamic_next (istart, iend);
362    case GFS_GUIDED:
363      return gomp_loop_ordered_guided_next (istart, iend);
364    default:
365      abort ();
366    }
367}
368
369/* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
370   to avoid one synchronization once we get into the loop.  */
371
372static void
373gomp_parallel_loop_start (void (*fn) (void *), void *data,
374			  unsigned num_threads, long start, long end,
375			  long incr, enum gomp_schedule_type sched,
376			  long chunk_size)
377{
378  struct gomp_work_share *ws;
379
380  num_threads = gomp_resolve_num_threads (num_threads);
381  ws = gomp_new_work_share (false, num_threads);
382  gomp_loop_init (ws, start, end, incr, sched, chunk_size);
383  gomp_team_start (fn, data, num_threads, ws);
384}
385
386void
387GOMP_parallel_loop_static_start (void (*fn) (void *), void *data,
388				 unsigned num_threads, long start, long end,
389				 long incr, long chunk_size)
390{
391  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
392			    GFS_STATIC, chunk_size);
393}
394
395void
396GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data,
397				  unsigned num_threads, long start, long end,
398				  long incr, long chunk_size)
399{
400  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
401			    GFS_DYNAMIC, chunk_size);
402}
403
404void
405GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data,
406				 unsigned num_threads, long start, long end,
407				 long incr, long chunk_size)
408{
409  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
410			    GFS_GUIDED, chunk_size);
411}
412
413void
414GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
415				  unsigned num_threads, long start, long end,
416				  long incr)
417{
418  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
419			    gomp_run_sched_var, gomp_run_sched_chunk);
420}
421
422/* The GOMP_loop_end* routines are called after the thread is told that
423   all loop iterations are complete.  This first version synchronizes
424   all threads; the nowait version does not.  */
425
426void
427GOMP_loop_end (void)
428{
429  gomp_work_share_end ();
430}
431
432void
433GOMP_loop_end_nowait (void)
434{
435  gomp_work_share_end_nowait ();
436}
437
438
439/* We use static functions above so that we're sure that the "runtime"
440   function can defer to the proper routine without interposition.  We
441   export the static function with a strong alias when possible, or with
442   a wrapper function otherwise.  */
443
444#ifdef HAVE_ATTRIBUTE_ALIAS
445extern __typeof(gomp_loop_static_start) GOMP_loop_static_start
446	__attribute__((alias ("gomp_loop_static_start")));
447extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start
448	__attribute__((alias ("gomp_loop_dynamic_start")));
449extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
450	__attribute__((alias ("gomp_loop_guided_start")));
451
452extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
453	__attribute__((alias ("gomp_loop_ordered_static_start")));
454extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
455	__attribute__((alias ("gomp_loop_ordered_dynamic_start")));
456extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
457	__attribute__((alias ("gomp_loop_ordered_guided_start")));
458
459extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
460	__attribute__((alias ("gomp_loop_static_next")));
461extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
462	__attribute__((alias ("gomp_loop_dynamic_next")));
463extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
464	__attribute__((alias ("gomp_loop_guided_next")));
465
466extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
467	__attribute__((alias ("gomp_loop_ordered_static_next")));
468extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next
469	__attribute__((alias ("gomp_loop_ordered_dynamic_next")));
470extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next
471	__attribute__((alias ("gomp_loop_ordered_guided_next")));
472#else
473bool
474GOMP_loop_static_start (long start, long end, long incr, long chunk_size,
475			long *istart, long *iend)
476{
477  return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend);
478}
479
480bool
481GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size,
482			 long *istart, long *iend)
483{
484  return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
485}
486
487bool
488GOMP_loop_guided_start (long start, long end, long incr, long chunk_size,
489			long *istart, long *iend)
490{
491  return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
492}
493
494bool
495GOMP_loop_ordered_static_start (long start, long end, long incr,
496				long chunk_size, long *istart, long *iend)
497{
498  return gomp_loop_ordered_static_start (start, end, incr, chunk_size,
499					 istart, iend);
500}
501
502bool
503GOMP_loop_ordered_dynamic_start (long start, long end, long incr,
504				 long chunk_size, long *istart, long *iend)
505{
506  return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size,
507					  istart, iend);
508}
509
510bool
511GOMP_loop_ordered_guided_start (long start, long end, long incr,
512				long chunk_size, long *istart, long *iend)
513{
514  return gomp_loop_ordered_guided_start (start, end, incr, chunk_size,
515					 istart, iend);
516}
517
518bool
519GOMP_loop_static_next (long *istart, long *iend)
520{
521  return gomp_loop_static_next (istart, iend);
522}
523
524bool
525GOMP_loop_dynamic_next (long *istart, long *iend)
526{
527  return gomp_loop_dynamic_next (istart, iend);
528}
529
530bool
531GOMP_loop_guided_next (long *istart, long *iend)
532{
533  return gomp_loop_guided_next (istart, iend);
534}
535
536bool
537GOMP_loop_ordered_static_next (long *istart, long *iend)
538{
539  return gomp_loop_ordered_static_next (istart, iend);
540}
541
542bool
543GOMP_loop_ordered_dynamic_next (long *istart, long *iend)
544{
545  return gomp_loop_ordered_dynamic_next (istart, iend);
546}
547
548bool
549GOMP_loop_ordered_guided_next (long *istart, long *iend)
550{
551  return gomp_loop_ordered_guided_next (istart, iend);
552}
553#endif
554