1/*
2 * kmp_sched.cpp -- static scheduling -- iteration initialization
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13/* Static scheduling initialization.
14
15  NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16        it may change values between parallel regions.  __kmp_max_nth
17        is the largest value __kmp_nth may take, 1 is the smallest. */
18
19#include "kmp.h"
20#include "kmp_error.h"
21#include "kmp_i18n.h"
22#include "kmp_itt.h"
23#include "kmp_stats.h"
24#include "kmp_str.h"
25
26#if OMPT_SUPPORT
27#include "ompt-specific.h"
28#endif
29
30#ifdef KMP_DEBUG
31//-------------------------------------------------------------------------
32// template for debug prints specification ( d, u, lld, llu )
33char const *traits_t<int>::spec = "d";
34char const *traits_t<unsigned int>::spec = "u";
35char const *traits_t<long long>::spec = "lld";
36char const *traits_t<unsigned long long>::spec = "llu";
37char const *traits_t<long>::spec = "ld";
38//-------------------------------------------------------------------------
39#endif
40
41#if KMP_STATS_ENABLED
42#define KMP_STATS_LOOP_END(stat)                                               \
43  {                                                                            \
44    kmp_int64 t;                                                               \
45    kmp_int64 u = (kmp_int64)(*pupper);                                        \
46    kmp_int64 l = (kmp_int64)(*plower);                                        \
47    kmp_int64 i = (kmp_int64)incr;                                             \
48    if (i == 1) {                                                              \
49      t = u - l + 1;                                                           \
50    } else if (i == -1) {                                                      \
51      t = l - u + 1;                                                           \
52    } else if (i > 0) {                                                        \
53      t = (u - l) / i + 1;                                                     \
54    } else {                                                                   \
55      t = (l - u) / (-i) + 1;                                                  \
56    }                                                                          \
57    KMP_COUNT_VALUE(stat, t);                                                  \
58    KMP_POP_PARTITIONED_TIMER();                                               \
59  }
60#else
61#define KMP_STATS_LOOP_END(stat) /* Nothing */
62#endif
63
64#if USE_ITT_BUILD || defined KMP_DEBUG
65static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
66static inline void check_loc(ident_t *&loc) {
67  if (loc == NULL)
68    loc = &loc_stub; // may need to report location info to ittnotify
69}
70#endif
71
72template <typename T>
73static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
74                                  kmp_int32 schedtype, kmp_int32 *plastiter,
75                                  T *plower, T *pupper,
76                                  typename traits_t<T>::signed_t *pstride,
77                                  typename traits_t<T>::signed_t incr,
78                                  typename traits_t<T>::signed_t chunk
79#if OMPT_SUPPORT && OMPT_OPTIONAL
80                                  ,
81                                  void *codeptr
82#endif
83) {
84  KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
85  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
86  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
87
88  // Clear monotonic/nonmonotonic bits (ignore it)
89  schedtype = SCHEDULE_WITHOUT_MODIFIERS(schedtype);
90
91  typedef typename traits_t<T>::unsigned_t UT;
92  typedef typename traits_t<T>::signed_t ST;
93  /*  this all has to be changed back to TID and such.. */
94  kmp_int32 gtid = global_tid;
95  kmp_uint32 tid;
96  kmp_uint32 nth;
97  UT trip_count;
98  kmp_team_t *team;
99  __kmp_assert_valid_gtid(gtid);
100  kmp_info_t *th = __kmp_threads[gtid];
101
102#if OMPT_SUPPORT && OMPT_OPTIONAL
103  ompt_team_info_t *team_info = NULL;
104  ompt_task_info_t *task_info = NULL;
105  ompt_work_t ompt_work_type = ompt_work_loop;
106
107  static kmp_int8 warn = 0;
108
109  if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
110    // Only fully initialize variables needed by OMPT if OMPT is enabled.
111    team_info = __ompt_get_teaminfo(0, NULL);
112    task_info = __ompt_get_task_info_object(0);
113    // Determine workshare type
114    if (loc != NULL) {
115      if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
116        ompt_work_type = ompt_work_loop;
117      } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
118        ompt_work_type = ompt_work_sections;
119      } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
120        ompt_work_type = ompt_work_distribute;
121      } else {
122        kmp_int8 bool_res =
123            KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
124        if (bool_res)
125          KMP_WARNING(OmptOutdatedWorkshare);
126      }
127      KMP_DEBUG_ASSERT(ompt_work_type);
128    }
129  }
130#endif
131
132  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
133  KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
134#ifdef KMP_DEBUG
135  {
136    char *buff;
137    // create format specifiers before the debug output
138    buff = __kmp_str_format(
139        "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
140        " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
141        traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
142        traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
143    KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
144                   *pstride, incr, chunk));
145    __kmp_str_free(&buff);
146  }
147#endif
148
149  if (__kmp_env_consistency_check) {
150    __kmp_push_workshare(global_tid, ct_pdo, loc);
151    if (incr == 0) {
152      __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
153                            loc);
154    }
155  }
156  /* special handling for zero-trip loops */
157  if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
158    if (plastiter != NULL)
159      *plastiter = FALSE;
160    /* leave pupper and plower set to entire iteration space */
161    *pstride = incr; /* value should never be used */
162// *plower = *pupper - incr;
163// let compiler bypass the illegal loop (like for(i=1;i<10;i--))
164// THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
165// ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
166#ifdef KMP_DEBUG
167    {
168      char *buff;
169      // create format specifiers before the debug output
170      buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
171                              "lower=%%%s upper=%%%s stride = %%%s "
172                              "signed?<%s>, loc = %%s\n",
173                              traits_t<T>::spec, traits_t<T>::spec,
174                              traits_t<ST>::spec, traits_t<T>::spec);
175      check_loc(loc);
176      KD_TRACE(100,
177               (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
178      __kmp_str_free(&buff);
179    }
180#endif
181    KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
182
183#if OMPT_SUPPORT && OMPT_OPTIONAL
184    if (ompt_enabled.ompt_callback_work) {
185      ompt_callbacks.ompt_callback(ompt_callback_work)(
186          ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
187          &(task_info->task_data), 0, codeptr);
188    }
189#endif
190    KMP_STATS_LOOP_END(OMP_loop_static_iterations);
191    return;
192  }
193
194  // Although there are schedule enumerations above kmp_ord_upper which are not
195  // schedules for "distribute", the only ones which are useful are dynamic, so
196  // cannot be seen here, since this codepath is only executed for static
197  // schedules.
198  if (schedtype > kmp_ord_upper) {
199    // we are in DISTRIBUTE construct
200    schedtype += kmp_sch_static -
201                 kmp_distribute_static; // AC: convert to usual schedule type
202    if (th->th.th_team->t.t_serialized > 1) {
203      tid = 0;
204      team = th->th.th_team;
205    } else {
206      tid = th->th.th_team->t.t_master_tid;
207      team = th->th.th_team->t.t_parent;
208    }
209  } else {
210    tid = __kmp_tid_from_gtid(global_tid);
211    team = th->th.th_team;
212  }
213
214  /* determine if "for" loop is an active worksharing construct */
215  if (team->t.t_serialized) {
216    /* serialized parallel, each thread executes whole iteration space */
217    if (plastiter != NULL)
218      *plastiter = TRUE;
219    /* leave pupper and plower set to entire iteration space */
220    *pstride =
221        (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
222
223#ifdef KMP_DEBUG
224    {
225      char *buff;
226      // create format specifiers before the debug output
227      buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
228                              "lower=%%%s upper=%%%s stride = %%%s\n",
229                              traits_t<T>::spec, traits_t<T>::spec,
230                              traits_t<ST>::spec);
231      KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
232      __kmp_str_free(&buff);
233    }
234#endif
235    KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
236
237#if OMPT_SUPPORT && OMPT_OPTIONAL
238    if (ompt_enabled.ompt_callback_work) {
239      ompt_callbacks.ompt_callback(ompt_callback_work)(
240          ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
241          &(task_info->task_data), *pstride, codeptr);
242    }
243#endif
244    KMP_STATS_LOOP_END(OMP_loop_static_iterations);
245    return;
246  }
247  nth = team->t.t_nproc;
248  if (nth == 1) {
249    if (plastiter != NULL)
250      *plastiter = TRUE;
251    *pstride =
252        (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
253#ifdef KMP_DEBUG
254    {
255      char *buff;
256      // create format specifiers before the debug output
257      buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
258                              "lower=%%%s upper=%%%s stride = %%%s\n",
259                              traits_t<T>::spec, traits_t<T>::spec,
260                              traits_t<ST>::spec);
261      KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
262      __kmp_str_free(&buff);
263    }
264#endif
265    KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
266
267#if OMPT_SUPPORT && OMPT_OPTIONAL
268    if (ompt_enabled.ompt_callback_work) {
269      ompt_callbacks.ompt_callback(ompt_callback_work)(
270          ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
271          &(task_info->task_data), *pstride, codeptr);
272    }
273#endif
274    KMP_STATS_LOOP_END(OMP_loop_static_iterations);
275    return;
276  }
277
278  /* compute trip count */
279  if (incr == 1) {
280    trip_count = *pupper - *plower + 1;
281  } else if (incr == -1) {
282    trip_count = *plower - *pupper + 1;
283  } else if (incr > 0) {
284    // upper-lower can exceed the limit of signed type
285    trip_count = (UT)(*pupper - *plower) / incr + 1;
286  } else {
287    trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
288  }
289
290#if KMP_STATS_ENABLED
291  if (KMP_MASTER_GTID(gtid)) {
292    KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
293  }
294#endif
295
296  if (__kmp_env_consistency_check) {
297    /* tripcount overflow? */
298    if (trip_count == 0 && *pupper != *plower) {
299      __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
300                            loc);
301    }
302  }
303
304  /* compute remaining parameters */
305  switch (schedtype) {
306  case kmp_sch_static: {
307    if (trip_count < nth) {
308      KMP_DEBUG_ASSERT(
309          __kmp_static == kmp_sch_static_greedy ||
310          __kmp_static ==
311              kmp_sch_static_balanced); // Unknown static scheduling type.
312      if (tid < trip_count) {
313        *pupper = *plower = *plower + tid * incr;
314      } else {
315        // set bounds so non-active threads execute no iterations
316        *plower = *pupper + (incr > 0 ? 1 : -1);
317      }
318      if (plastiter != NULL)
319        *plastiter = (tid == trip_count - 1);
320    } else {
321      if (__kmp_static == kmp_sch_static_balanced) {
322        UT small_chunk = trip_count / nth;
323        UT extras = trip_count % nth;
324        *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
325        *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
326        if (plastiter != NULL)
327          *plastiter = (tid == nth - 1);
328      } else {
329        T big_chunk_inc_count =
330            (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
331        T old_upper = *pupper;
332
333        KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
334        // Unknown static scheduling type.
335
336        *plower += tid * big_chunk_inc_count;
337        *pupper = *plower + big_chunk_inc_count - incr;
338        if (incr > 0) {
339          if (*pupper < *plower)
340            *pupper = traits_t<T>::max_value;
341          if (plastiter != NULL)
342            *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
343          if (*pupper > old_upper)
344            *pupper = old_upper; // tracker C73258
345        } else {
346          if (*pupper > *plower)
347            *pupper = traits_t<T>::min_value;
348          if (plastiter != NULL)
349            *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
350          if (*pupper < old_upper)
351            *pupper = old_upper; // tracker C73258
352        }
353      }
354    }
355    *pstride = trip_count;
356    break;
357  }
358  case kmp_sch_static_chunked: {
359    ST span;
360    UT nchunks;
361    if (chunk < 1)
362      chunk = 1;
363    else if ((UT)chunk > trip_count)
364      chunk = trip_count;
365    nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0);
366    span = chunk * incr;
367    if (nchunks < nth) {
368      *pstride = span * nchunks;
369      if (tid < nchunks) {
370        *plower = *plower + (span * tid);
371        *pupper = *plower + span - incr;
372      } else {
373        *plower = *pupper + (incr > 0 ? 1 : -1);
374      }
375    } else {
376      *pstride = span * nth;
377      *plower = *plower + (span * tid);
378      *pupper = *plower + span - incr;
379    }
380    if (plastiter != NULL)
381      *plastiter = (tid == (nchunks - 1) % nth);
382    break;
383  }
384  case kmp_sch_static_balanced_chunked: {
385    T old_upper = *pupper;
386    // round up to make sure the chunk is enough to cover all iterations
387    UT span = (trip_count + nth - 1) / nth;
388
389    // perform chunk adjustment
390    chunk = (span + chunk - 1) & ~(chunk - 1);
391
392    span = chunk * incr;
393    *plower = *plower + (span * tid);
394    *pupper = *plower + span - incr;
395    if (incr > 0) {
396      if (*pupper > old_upper)
397        *pupper = old_upper;
398    } else if (*pupper < old_upper)
399      *pupper = old_upper;
400
401    if (plastiter != NULL)
402      *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
403    break;
404  }
405  default:
406    KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
407    break;
408  }
409
410#if USE_ITT_BUILD
411  // Report loop metadata
412  if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
413      __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
414      team->t.t_active_level == 1) {
415    kmp_uint64 cur_chunk = chunk;
416    check_loc(loc);
417    // Calculate chunk in case it was not specified; it is specified for
418    // kmp_sch_static_chunked
419    if (schedtype == kmp_sch_static) {
420      cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
421    }
422    // 0 - "static" schedule
423    __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
424  }
425#endif
426#ifdef KMP_DEBUG
427  {
428    char *buff;
429    // create format specifiers before the debug output
430    buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
431                            "upper=%%%s stride = %%%s signed?<%s>\n",
432                            traits_t<T>::spec, traits_t<T>::spec,
433                            traits_t<ST>::spec, traits_t<T>::spec);
434    KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
435    __kmp_str_free(&buff);
436  }
437#endif
438  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
439
440#if OMPT_SUPPORT && OMPT_OPTIONAL
441  if (ompt_enabled.ompt_callback_work) {
442    ompt_callbacks.ompt_callback(ompt_callback_work)(
443        ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
444        &(task_info->task_data), trip_count, codeptr);
445  }
446  if (ompt_enabled.ompt_callback_dispatch) {
447    ompt_dispatch_t dispatch_type;
448    ompt_data_t instance = ompt_data_none;
449    ompt_dispatch_chunk_t dispatch_chunk;
450    if (ompt_work_type == ompt_work_sections) {
451      dispatch_type = ompt_dispatch_section;
452      instance.ptr = codeptr;
453    } else {
454      OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr);
455      dispatch_type = (ompt_work_type == ompt_work_distribute)
456                          ? ompt_dispatch_distribute_chunk
457                          : ompt_dispatch_ws_loop_chunk;
458      instance.ptr = &dispatch_chunk;
459    }
460    ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
461        &(team_info->parallel_data), &(task_info->task_data), dispatch_type,
462        instance);
463  }
464#endif
465
466  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
467  return;
468}
469
470template <typename T>
471static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
472                                       kmp_int32 schedule, kmp_int32 *plastiter,
473                                       T *plower, T *pupper, T *pupperDist,
474                                       typename traits_t<T>::signed_t *pstride,
475                                       typename traits_t<T>::signed_t incr,
476                                       typename traits_t<T>::signed_t chunk
477#if OMPT_SUPPORT && OMPT_OPTIONAL
478                                       ,
479                                       void *codeptr
480#endif
481) {
482  KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
483  KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
484  KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
485  typedef typename traits_t<T>::unsigned_t UT;
486  typedef typename traits_t<T>::signed_t ST;
487  kmp_uint32 tid;
488  kmp_uint32 nth;
489  kmp_uint32 team_id;
490  kmp_uint32 nteams;
491  UT trip_count;
492  kmp_team_t *team;
493  kmp_info_t *th;
494
495  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
496  KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
497  __kmp_assert_valid_gtid(gtid);
498#ifdef KMP_DEBUG
499  {
500    char *buff;
501    // create format specifiers before the debug output
502    buff = __kmp_str_format(
503        "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
504        "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
505        traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
506        traits_t<ST>::spec, traits_t<T>::spec);
507    KD_TRACE(100,
508             (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
509    __kmp_str_free(&buff);
510  }
511#endif
512
513  if (__kmp_env_consistency_check) {
514    __kmp_push_workshare(gtid, ct_pdo, loc);
515    if (incr == 0) {
516      __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
517                            loc);
518    }
519    if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
520      // The loop is illegal.
521      // Some zero-trip loops maintained by compiler, e.g.:
522      //   for(i=10;i<0;++i) // lower >= upper - run-time check
523      //   for(i=0;i>10;--i) // lower <= upper - run-time check
524      //   for(i=0;i>10;++i) // incr > 0       - compile-time check
525      //   for(i=10;i<0;--i) // incr < 0       - compile-time check
526      // Compiler does not check the following illegal loops:
527      //   for(i=0;i<10;i+=incr) // where incr<0
528      //   for(i=10;i>0;i-=incr) // where incr<0
529      __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
530    }
531  }
532  tid = __kmp_tid_from_gtid(gtid);
533  th = __kmp_threads[gtid];
534  nth = th->th.th_team_nproc;
535  team = th->th.th_team;
536  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
537  nteams = th->th.th_teams_size.nteams;
538  team_id = team->t.t_master_tid;
539  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
540
541  // compute global trip count
542  if (incr == 1) {
543    trip_count = *pupper - *plower + 1;
544  } else if (incr == -1) {
545    trip_count = *plower - *pupper + 1;
546  } else if (incr > 0) {
547    // upper-lower can exceed the limit of signed type
548    trip_count = (UT)(*pupper - *plower) / incr + 1;
549  } else {
550    trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
551  }
552
553  *pstride = *pupper - *plower; // just in case (can be unused)
554  if (trip_count <= nteams) {
555    KMP_DEBUG_ASSERT(
556        __kmp_static == kmp_sch_static_greedy ||
557        __kmp_static ==
558            kmp_sch_static_balanced); // Unknown static scheduling type.
559    // only primary threads of some teams get single iteration, other threads
560    // get nothing
561    if (team_id < trip_count && tid == 0) {
562      *pupper = *pupperDist = *plower = *plower + team_id * incr;
563    } else {
564      *pupperDist = *pupper;
565      *plower = *pupper + incr; // compiler should skip loop body
566    }
567    if (plastiter != NULL)
568      *plastiter = (tid == 0 && team_id == trip_count - 1);
569  } else {
570    // Get the team's chunk first (each team gets at most one chunk)
571    if (__kmp_static == kmp_sch_static_balanced) {
572      UT chunkD = trip_count / nteams;
573      UT extras = trip_count % nteams;
574      *plower +=
575          incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
576      *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
577      if (plastiter != NULL)
578        *plastiter = (team_id == nteams - 1);
579    } else {
580      T chunk_inc_count =
581          (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
582      T upper = *pupper;
583      KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
584      // Unknown static scheduling type.
585      *plower += team_id * chunk_inc_count;
586      *pupperDist = *plower + chunk_inc_count - incr;
587      // Check/correct bounds if needed
588      if (incr > 0) {
589        if (*pupperDist < *plower)
590          *pupperDist = traits_t<T>::max_value;
591        if (plastiter != NULL)
592          *plastiter = *plower <= upper && *pupperDist > upper - incr;
593        if (*pupperDist > upper)
594          *pupperDist = upper; // tracker C73258
595        if (*plower > *pupperDist) {
596          *pupper = *pupperDist; // no iterations available for the team
597          goto end;
598        }
599      } else {
600        if (*pupperDist > *plower)
601          *pupperDist = traits_t<T>::min_value;
602        if (plastiter != NULL)
603          *plastiter = *plower >= upper && *pupperDist < upper - incr;
604        if (*pupperDist < upper)
605          *pupperDist = upper; // tracker C73258
606        if (*plower < *pupperDist) {
607          *pupper = *pupperDist; // no iterations available for the team
608          goto end;
609        }
610      }
611    }
612    // Get the parallel loop chunk now (for thread)
613    // compute trip count for team's chunk
614    if (incr == 1) {
615      trip_count = *pupperDist - *plower + 1;
616    } else if (incr == -1) {
617      trip_count = *plower - *pupperDist + 1;
618    } else if (incr > 1) {
619      // upper-lower can exceed the limit of signed type
620      trip_count = (UT)(*pupperDist - *plower) / incr + 1;
621    } else {
622      trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
623    }
624    KMP_DEBUG_ASSERT(trip_count);
625    switch (schedule) {
626    case kmp_sch_static: {
627      if (trip_count <= nth) {
628        KMP_DEBUG_ASSERT(
629            __kmp_static == kmp_sch_static_greedy ||
630            __kmp_static ==
631                kmp_sch_static_balanced); // Unknown static scheduling type.
632        if (tid < trip_count)
633          *pupper = *plower = *plower + tid * incr;
634        else
635          *plower = *pupper + incr; // no iterations available
636        if (plastiter != NULL)
637          if (*plastiter != 0 && !(tid == trip_count - 1))
638            *plastiter = 0;
639      } else {
640        if (__kmp_static == kmp_sch_static_balanced) {
641          UT chunkL = trip_count / nth;
642          UT extras = trip_count % nth;
643          *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
644          *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
645          if (plastiter != NULL)
646            if (*plastiter != 0 && !(tid == nth - 1))
647              *plastiter = 0;
648        } else {
649          T chunk_inc_count =
650              (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
651          T upper = *pupperDist;
652          KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
653          // Unknown static scheduling type.
654          *plower += tid * chunk_inc_count;
655          *pupper = *plower + chunk_inc_count - incr;
656          if (incr > 0) {
657            if (*pupper < *plower)
658              *pupper = traits_t<T>::max_value;
659            if (plastiter != NULL)
660              if (*plastiter != 0 &&
661                  !(*plower <= upper && *pupper > upper - incr))
662                *plastiter = 0;
663            if (*pupper > upper)
664              *pupper = upper; // tracker C73258
665          } else {
666            if (*pupper > *plower)
667              *pupper = traits_t<T>::min_value;
668            if (plastiter != NULL)
669              if (*plastiter != 0 &&
670                  !(*plower >= upper && *pupper < upper - incr))
671                *plastiter = 0;
672            if (*pupper < upper)
673              *pupper = upper; // tracker C73258
674          }
675        }
676      }
677      break;
678    }
679    case kmp_sch_static_chunked: {
680      ST span;
681      if (chunk < 1)
682        chunk = 1;
683      span = chunk * incr;
684      *pstride = span * nth;
685      *plower = *plower + (span * tid);
686      *pupper = *plower + span - incr;
687      if (plastiter != NULL)
688        if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
689          *plastiter = 0;
690      break;
691    }
692    default:
693      KMP_ASSERT2(0,
694                  "__kmpc_dist_for_static_init: unknown loop scheduling type");
695      break;
696    }
697  }
698end:;
699#ifdef KMP_DEBUG
700  {
701    char *buff;
702    // create format specifiers before the debug output
703    buff = __kmp_str_format(
704        "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
705        "stride=%%%s signed?<%s>\n",
706        traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
707        traits_t<ST>::spec, traits_t<T>::spec);
708    KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
709    __kmp_str_free(&buff);
710  }
711#endif
712  KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
713#if OMPT_SUPPORT && OMPT_OPTIONAL
714  if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
715    ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
716    ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
717    if (ompt_enabled.ompt_callback_work) {
718      ompt_callbacks.ompt_callback(ompt_callback_work)(
719          ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data),
720          &(task_info->task_data), 0, codeptr);
721    }
722    if (ompt_enabled.ompt_callback_dispatch) {
723      ompt_data_t instance = ompt_data_none;
724      ompt_dispatch_chunk_t dispatch_chunk;
725      OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr);
726      instance.ptr = &dispatch_chunk;
727      ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
728          &(team_info->parallel_data), &(task_info->task_data),
729          ompt_dispatch_distribute_chunk, instance);
730    }
731  }
732#endif // OMPT_SUPPORT && OMPT_OPTIONAL
733  KMP_STATS_LOOP_END(OMP_distribute_iterations);
734  return;
735}
736
737template <typename T>
738static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
739                                   kmp_int32 *p_last, T *p_lb, T *p_ub,
740                                   typename traits_t<T>::signed_t *p_st,
741                                   typename traits_t<T>::signed_t incr,
742                                   typename traits_t<T>::signed_t chunk) {
743  // The routine returns the first chunk distributed to the team and
744  // stride for next chunks calculation.
745  // Last iteration flag set for the team that will execute
746  // the last iteration of the loop.
747  // The routine is called for dist_schedule(static,chunk) only.
748  typedef typename traits_t<T>::unsigned_t UT;
749  typedef typename traits_t<T>::signed_t ST;
750  kmp_uint32 team_id;
751  kmp_uint32 nteams;
752  UT trip_count;
753  T lower;
754  T upper;
755  ST span;
756  kmp_team_t *team;
757  kmp_info_t *th;
758
759  KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
760  KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
761  __kmp_assert_valid_gtid(gtid);
762#ifdef KMP_DEBUG
763  {
764    char *buff;
765    // create format specifiers before the debug output
766    buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
767                            "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
768                            traits_t<T>::spec, traits_t<T>::spec,
769                            traits_t<ST>::spec, traits_t<ST>::spec,
770                            traits_t<T>::spec);
771    KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
772    __kmp_str_free(&buff);
773  }
774#endif
775
776  lower = *p_lb;
777  upper = *p_ub;
778  if (__kmp_env_consistency_check) {
779    if (incr == 0) {
780      __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
781                            loc);
782    }
783    if (incr > 0 ? (upper < lower) : (lower < upper)) {
784      // The loop is illegal.
785      // Some zero-trip loops maintained by compiler, e.g.:
786      //   for(i=10;i<0;++i) // lower >= upper - run-time check
787      //   for(i=0;i>10;--i) // lower <= upper - run-time check
788      //   for(i=0;i>10;++i) // incr > 0       - compile-time check
789      //   for(i=10;i<0;--i) // incr < 0       - compile-time check
790      // Compiler does not check the following illegal loops:
791      //   for(i=0;i<10;i+=incr) // where incr<0
792      //   for(i=10;i>0;i-=incr) // where incr<0
793      __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
794    }
795  }
796  th = __kmp_threads[gtid];
797  team = th->th.th_team;
798  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
799  nteams = th->th.th_teams_size.nteams;
800  team_id = team->t.t_master_tid;
801  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
802
803  // compute trip count
804  if (incr == 1) {
805    trip_count = upper - lower + 1;
806  } else if (incr == -1) {
807    trip_count = lower - upper + 1;
808  } else if (incr > 0) {
809    // upper-lower can exceed the limit of signed type
810    trip_count = (UT)(upper - lower) / incr + 1;
811  } else {
812    trip_count = (UT)(lower - upper) / (-incr) + 1;
813  }
814  if (chunk < 1)
815    chunk = 1;
816  span = chunk * incr;
817  *p_st = span * nteams;
818  *p_lb = lower + (span * team_id);
819  *p_ub = *p_lb + span - incr;
820  if (p_last != NULL)
821    *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
822  // Correct upper bound if needed
823  if (incr > 0) {
824    if (*p_ub < *p_lb) // overflow?
825      *p_ub = traits_t<T>::max_value;
826    if (*p_ub > upper)
827      *p_ub = upper; // tracker C73258
828  } else { // incr < 0
829    if (*p_ub > *p_lb)
830      *p_ub = traits_t<T>::min_value;
831    if (*p_ub < upper)
832      *p_ub = upper; // tracker C73258
833  }
834#ifdef KMP_DEBUG
835  {
836    char *buff;
837    // create format specifiers before the debug output
838    buff =
839        __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
840                         "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
841                         traits_t<T>::spec, traits_t<T>::spec,
842                         traits_t<ST>::spec, traits_t<ST>::spec);
843    KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
844    __kmp_str_free(&buff);
845  }
846#endif
847}
848
849//------------------------------------------------------------------------------
850extern "C" {
851/*!
852@ingroup WORK_SHARING
853@param    loc       Source code location
854@param    gtid      Global thread id of this thread
855@param    schedtype  Scheduling type
856@param    plastiter Pointer to the "last iteration" flag
857@param    plower    Pointer to the lower bound
858@param    pupper    Pointer to the upper bound
859@param    pstride   Pointer to the stride
860@param    incr      Loop increment
861@param    chunk     The chunk size
862
863Each of the four functions here are identical apart from the argument types.
864
865The functions compute the upper and lower bounds and stride to be used for the
866set of iterations to be executed by the current thread from the statically
867scheduled loop that is described by the initial values of the bounds, stride,
868increment and chunk size.
869
870@{
871*/
872void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
873                              kmp_int32 *plastiter, kmp_int32 *plower,
874                              kmp_int32 *pupper, kmp_int32 *pstride,
875                              kmp_int32 incr, kmp_int32 chunk) {
876  __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
877                                   pupper, pstride, incr, chunk
878#if OMPT_SUPPORT && OMPT_OPTIONAL
879                                   ,
880                                   OMPT_GET_RETURN_ADDRESS(0)
881#endif
882  );
883}
884
885/*!
886 See @ref __kmpc_for_static_init_4
887 */
888void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
889                               kmp_int32 schedtype, kmp_int32 *plastiter,
890                               kmp_uint32 *plower, kmp_uint32 *pupper,
891                               kmp_int32 *pstride, kmp_int32 incr,
892                               kmp_int32 chunk) {
893  __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
894                                    pupper, pstride, incr, chunk
895#if OMPT_SUPPORT && OMPT_OPTIONAL
896                                    ,
897                                    OMPT_GET_RETURN_ADDRESS(0)
898#endif
899  );
900}
901
902/*!
903 See @ref __kmpc_for_static_init_4
904 */
905void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
906                              kmp_int32 *plastiter, kmp_int64 *plower,
907                              kmp_int64 *pupper, kmp_int64 *pstride,
908                              kmp_int64 incr, kmp_int64 chunk) {
909  __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
910                                   pupper, pstride, incr, chunk
911#if OMPT_SUPPORT && OMPT_OPTIONAL
912                                   ,
913                                   OMPT_GET_RETURN_ADDRESS(0)
914#endif
915  );
916}
917
918/*!
919 See @ref __kmpc_for_static_init_4
920 */
921void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
922                               kmp_int32 schedtype, kmp_int32 *plastiter,
923                               kmp_uint64 *plower, kmp_uint64 *pupper,
924                               kmp_int64 *pstride, kmp_int64 incr,
925                               kmp_int64 chunk) {
926  __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
927                                    pupper, pstride, incr, chunk
928#if OMPT_SUPPORT && OMPT_OPTIONAL
929                                    ,
930                                    OMPT_GET_RETURN_ADDRESS(0)
931#endif
932  );
933}
934/*!
935@}
936*/
937
938#if OMPT_SUPPORT && OMPT_OPTIONAL
939#define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0)
940#else
941#define OMPT_CODEPTR_ARG
942#endif
943
944/*!
945@ingroup WORK_SHARING
946@param    loc       Source code location
947@param    gtid      Global thread id of this thread
948@param    schedule  Scheduling type for the parallel loop
949@param    plastiter Pointer to the "last iteration" flag
950@param    plower    Pointer to the lower bound
951@param    pupper    Pointer to the upper bound of loop chunk
952@param    pupperD   Pointer to the upper bound of dist_chunk
953@param    pstride   Pointer to the stride for parallel loop
954@param    incr      Loop increment
955@param    chunk     The chunk size for the parallel loop
956
957Each of the four functions here are identical apart from the argument types.
958
959The functions compute the upper and lower bounds and strides to be used for the
960set of iterations to be executed by the current thread from the statically
961scheduled loop that is described by the initial values of the bounds, strides,
962increment and chunks for parallel loop and distribute constructs.
963
964@{
965*/
966void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
967                                   kmp_int32 schedule, kmp_int32 *plastiter,
968                                   kmp_int32 *plower, kmp_int32 *pupper,
969                                   kmp_int32 *pupperD, kmp_int32 *pstride,
970                                   kmp_int32 incr, kmp_int32 chunk) {
971  __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
972                                        pupper, pupperD, pstride, incr,
973                                        chunk OMPT_CODEPTR_ARG);
974}
975
976/*!
977 See @ref __kmpc_dist_for_static_init_4
978 */
979void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
980                                    kmp_int32 schedule, kmp_int32 *plastiter,
981                                    kmp_uint32 *plower, kmp_uint32 *pupper,
982                                    kmp_uint32 *pupperD, kmp_int32 *pstride,
983                                    kmp_int32 incr, kmp_int32 chunk) {
984  __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
985                                         pupper, pupperD, pstride, incr,
986                                         chunk OMPT_CODEPTR_ARG);
987}
988
989/*!
990 See @ref __kmpc_dist_for_static_init_4
991 */
992void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
993                                   kmp_int32 schedule, kmp_int32 *plastiter,
994                                   kmp_int64 *plower, kmp_int64 *pupper,
995                                   kmp_int64 *pupperD, kmp_int64 *pstride,
996                                   kmp_int64 incr, kmp_int64 chunk) {
997  __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
998                                        pupper, pupperD, pstride, incr,
999                                        chunk OMPT_CODEPTR_ARG);
1000}
1001
1002/*!
1003 See @ref __kmpc_dist_for_static_init_4
1004 */
1005void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
1006                                    kmp_int32 schedule, kmp_int32 *plastiter,
1007                                    kmp_uint64 *plower, kmp_uint64 *pupper,
1008                                    kmp_uint64 *pupperD, kmp_int64 *pstride,
1009                                    kmp_int64 incr, kmp_int64 chunk) {
1010  __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
1011                                         pupper, pupperD, pstride, incr,
1012                                         chunk OMPT_CODEPTR_ARG);
1013}
1014/*!
1015@}
1016*/
1017
1018//------------------------------------------------------------------------------
1019// Auxiliary routines for Distribute Parallel Loop construct implementation
1020//    Transfer call to template< type T >
1021//    __kmp_team_static_init( ident_t *loc, int gtid,
1022//        int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
1023
1024/*!
1025@ingroup WORK_SHARING
1026@{
1027@param loc Source location
1028@param gtid Global thread id
1029@param p_last pointer to last iteration flag
1030@param p_lb  pointer to Lower bound
1031@param p_ub  pointer to Upper bound
1032@param p_st  Step (or increment if you prefer)
1033@param incr  Loop increment
1034@param chunk The chunk size to block with
1035
1036The functions compute the upper and lower bounds and stride to be used for the
1037set of iterations to be executed by the current team from the statically
1038scheduled loop that is described by the initial values of the bounds, stride,
1039increment and chunk for the distribute construct as part of composite distribute
1040parallel loop construct. These functions are all identical apart from the types
1041of the arguments.
1042*/
1043
1044void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1045                               kmp_int32 *p_lb, kmp_int32 *p_ub,
1046                               kmp_int32 *p_st, kmp_int32 incr,
1047                               kmp_int32 chunk) {
1048  KMP_DEBUG_ASSERT(__kmp_init_serial);
1049  __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1050                                    chunk);
1051}
1052
1053/*!
1054 See @ref __kmpc_team_static_init_4
1055 */
1056void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1057                                kmp_uint32 *p_lb, kmp_uint32 *p_ub,
1058                                kmp_int32 *p_st, kmp_int32 incr,
1059                                kmp_int32 chunk) {
1060  KMP_DEBUG_ASSERT(__kmp_init_serial);
1061  __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1062                                     chunk);
1063}
1064
1065/*!
1066 See @ref __kmpc_team_static_init_4
1067 */
1068void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1069                               kmp_int64 *p_lb, kmp_int64 *p_ub,
1070                               kmp_int64 *p_st, kmp_int64 incr,
1071                               kmp_int64 chunk) {
1072  KMP_DEBUG_ASSERT(__kmp_init_serial);
1073  __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1074                                    chunk);
1075}
1076
1077/*!
1078 See @ref __kmpc_team_static_init_4
1079 */
1080void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1081                                kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1082                                kmp_int64 *p_st, kmp_int64 incr,
1083                                kmp_int64 chunk) {
1084  KMP_DEBUG_ASSERT(__kmp_init_serial);
1085  __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1086                                     chunk);
1087}
1088/*!
1089@}
1090*/
1091
1092} // extern "C"
1093