1/*
2 * kmp_wait_release.h -- Wait/Release implementation
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef KMP_WAIT_RELEASE_H
14#define KMP_WAIT_RELEASE_H
15
16#include "kmp.h"
17#include "kmp_itt.h"
18#include "kmp_stats.h"
19#if OMPT_SUPPORT
20#include "ompt-specific.h"
21#endif
22
23/*!
24@defgroup WAIT_RELEASE Wait/Release operations
25
26The definitions and functions here implement the lowest level thread
27synchronizations of suspending a thread and awaking it. They are used to build
28higher level operations such as barriers and fork/join.
29*/
30
31/*!
32@ingroup WAIT_RELEASE
33@{
34*/
35
36struct flag_properties {
37  unsigned int type : 16;
38  unsigned int reserved : 16;
39};
40
41template <enum flag_type FlagType> struct flag_traits {};
42
43template <> struct flag_traits<flag32> {
44  typedef kmp_uint32 flag_t;
45  static const flag_type t = flag32;
46  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
47  static inline flag_t test_then_add4(volatile flag_t *f) {
48    return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
49  }
50  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
51    return KMP_TEST_THEN_OR32(f, v);
52  }
53  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
54    return KMP_TEST_THEN_AND32(f, v);
55  }
56};
57
58template <> struct flag_traits<atomic_flag64> {
59  typedef kmp_uint64 flag_t;
60  static const flag_type t = atomic_flag64;
61  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
62  static inline flag_t test_then_add4(volatile flag_t *f) {
63    return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
64  }
65  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
66    return KMP_TEST_THEN_OR64(f, v);
67  }
68  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
69    return KMP_TEST_THEN_AND64(f, v);
70  }
71};
72
73template <> struct flag_traits<flag64> {
74  typedef kmp_uint64 flag_t;
75  static const flag_type t = flag64;
76  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
77  static inline flag_t test_then_add4(volatile flag_t *f) {
78    return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
79  }
80  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
81    return KMP_TEST_THEN_OR64(f, v);
82  }
83  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
84    return KMP_TEST_THEN_AND64(f, v);
85  }
86};
87
88template <> struct flag_traits<flag_oncore> {
89  typedef kmp_uint64 flag_t;
90  static const flag_type t = flag_oncore;
91  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
92  static inline flag_t test_then_add4(volatile flag_t *f) {
93    return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
94  }
95  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
96    return KMP_TEST_THEN_OR64(f, v);
97  }
98  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
99    return KMP_TEST_THEN_AND64(f, v);
100  }
101};
102
103/*! Base class for all flags */
104template <flag_type FlagType> class kmp_flag {
105protected:
106  flag_properties t; /**< "Type" of the flag in loc */
107  kmp_info_t *waiting_threads[1]; /**< Threads sleeping on this thread. */
108  kmp_uint32 num_waiting_threads; /**< Num threads sleeping on this thread. */
109  std::atomic<bool> *sleepLoc;
110
111public:
112  typedef flag_traits<FlagType> traits_type;
113  kmp_flag() : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(nullptr) {}
114  kmp_flag(int nwaiters)
115      : t({FlagType, 0U}), num_waiting_threads(nwaiters), sleepLoc(nullptr) {}
116  kmp_flag(std::atomic<bool> *sloc)
117      : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(sloc) {}
118  /*! @result the flag_type */
119  flag_type get_type() { return (flag_type)(t.type); }
120
121  /*! param i in   index into waiting_threads
122   *  @result the thread that is waiting at index i */
123  kmp_info_t *get_waiter(kmp_uint32 i) {
124    KMP_DEBUG_ASSERT(i < num_waiting_threads);
125    return waiting_threads[i];
126  }
127  /*! @result num_waiting_threads */
128  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
129  /*! @param thr in   the thread which is now waiting
130   *  Insert a waiting thread at index 0. */
131  void set_waiter(kmp_info_t *thr) {
132    waiting_threads[0] = thr;
133    num_waiting_threads = 1;
134  }
135  enum barrier_type get_bt() { return bs_last_barrier; }
136};
137
138/*! Base class for wait/release volatile flag */
139template <typename PtrType, flag_type FlagType, bool Sleepable>
140class kmp_flag_native : public kmp_flag<FlagType> {
141protected:
142  volatile PtrType *loc;
143  PtrType checker; /**< When flag==checker, it has been released. */
144  typedef flag_traits<FlagType> traits_type;
145
146public:
147  typedef PtrType flag_t;
148  kmp_flag_native(volatile PtrType *p) : kmp_flag<FlagType>(), loc(p) {}
149  kmp_flag_native(volatile PtrType *p, kmp_info_t *thr)
150      : kmp_flag<FlagType>(1), loc(p) {
151    this->waiting_threads[0] = thr;
152  }
153  kmp_flag_native(volatile PtrType *p, PtrType c)
154      : kmp_flag<FlagType>(), loc(p), checker(c) {}
155  kmp_flag_native(volatile PtrType *p, PtrType c, std::atomic<bool> *sloc)
156      : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
157  virtual ~kmp_flag_native() {}
158  void *operator new(size_t size) { return __kmp_allocate(size); }
159  void operator delete(void *p) { __kmp_free(p); }
160  volatile PtrType *get() { return loc; }
161  void *get_void_p() { return RCAST(void *, CCAST(PtrType *, loc)); }
162  void set(volatile PtrType *new_loc) { loc = new_loc; }
163  PtrType load() { return *loc; }
164  void store(PtrType val) { *loc = val; }
165  /*! @result true if the flag object has been released. */
166  virtual bool done_check() {
167    if (Sleepable && !(this->sleepLoc))
168      return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
169             checker;
170    else
171      return traits_type::tcr(*(this->get())) == checker;
172  }
173  /*! @param old_loc in   old value of flag
174   *  @result true if the flag's old value indicates it was released. */
175  virtual bool done_check_val(PtrType old_loc) { return old_loc == checker; }
176  /*! @result true if the flag object is not yet released.
177   * Used in __kmp_wait_template like:
178   * @code
179   * while (flag.notdone_check()) { pause(); }
180   * @endcode */
181  virtual bool notdone_check() {
182    return traits_type::tcr(*(this->get())) != checker;
183  }
184  /*! @result Actual flag value before release was applied.
185   * Trigger all waiting threads to run by modifying flag to release state. */
186  void internal_release() {
187    (void)traits_type::test_then_add4((volatile PtrType *)this->get());
188  }
189  /*! @result Actual flag value before sleep bit(s) set.
190   * Notes that there is at least one thread sleeping on the flag by setting
191   * sleep bit(s). */
192  PtrType set_sleeping() {
193    if (this->sleepLoc) {
194      this->sleepLoc->store(true);
195      return *(this->get());
196    }
197    return traits_type::test_then_or((volatile PtrType *)this->get(),
198                                     KMP_BARRIER_SLEEP_STATE);
199  }
200  /*! @result Actual flag value before sleep bit(s) cleared.
201   * Notes that there are no longer threads sleeping on the flag by clearing
202   * sleep bit(s). */
203  void unset_sleeping() {
204    if (this->sleepLoc) {
205      this->sleepLoc->store(false);
206      return;
207    }
208    traits_type::test_then_and((volatile PtrType *)this->get(),
209                               ~KMP_BARRIER_SLEEP_STATE);
210  }
211  /*! @param old_loc in   old value of flag
212   * Test if there are threads sleeping on the flag's old value in old_loc. */
213  bool is_sleeping_val(PtrType old_loc) {
214    if (this->sleepLoc)
215      return this->sleepLoc->load();
216    return old_loc & KMP_BARRIER_SLEEP_STATE;
217  }
218  /*! Test whether there are threads sleeping on the flag. */
219  bool is_sleeping() {
220    if (this->sleepLoc)
221      return this->sleepLoc->load();
222    return is_sleeping_val(*(this->get()));
223  }
224  bool is_any_sleeping() {
225    if (this->sleepLoc)
226      return this->sleepLoc->load();
227    return is_sleeping_val(*(this->get()));
228  }
229  kmp_uint8 *get_stolen() { return NULL; }
230};
231
232/*! Base class for wait/release atomic flag */
233template <typename PtrType, flag_type FlagType, bool Sleepable>
234class kmp_flag_atomic : public kmp_flag<FlagType> {
235protected:
236  std::atomic<PtrType> *loc; /**< Pointer to flag location to wait on */
237  PtrType checker; /**< Flag == checker means it has been released. */
238public:
239  typedef flag_traits<FlagType> traits_type;
240  typedef PtrType flag_t;
241  kmp_flag_atomic(std::atomic<PtrType> *p) : kmp_flag<FlagType>(), loc(p) {}
242  kmp_flag_atomic(std::atomic<PtrType> *p, kmp_info_t *thr)
243      : kmp_flag<FlagType>(1), loc(p) {
244    this->waiting_threads[0] = thr;
245  }
246  kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c)
247      : kmp_flag<FlagType>(), loc(p), checker(c) {}
248  kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c, std::atomic<bool> *sloc)
249      : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
250  /*! @result the pointer to the actual flag */
251  std::atomic<PtrType> *get() { return loc; }
252  /*! @result void* pointer to the actual flag */
253  void *get_void_p() { return RCAST(void *, loc); }
254  /*! @param new_loc in   set loc to point at new_loc */
255  void set(std::atomic<PtrType> *new_loc) { loc = new_loc; }
256  /*! @result flag value */
257  PtrType load() { return loc->load(std::memory_order_acquire); }
258  /*! @param val the new flag value to be stored */
259  void store(PtrType val) { loc->store(val, std::memory_order_release); }
260  /*! @result true if the flag object has been released. */
261  bool done_check() {
262    if (Sleepable && !(this->sleepLoc))
263      return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
264    else
265      return this->load() == checker;
266  }
267  /*! @param old_loc in   old value of flag
268   * @result true if the flag's old value indicates it was released. */
269  bool done_check_val(PtrType old_loc) { return old_loc == checker; }
270  /*! @result true if the flag object is not yet released.
271   * Used in __kmp_wait_template like:
272   * @code
273   * while (flag.notdone_check()) { pause(); }
274   * @endcode */
275  bool notdone_check() { return this->load() != checker; }
276  /*! @result Actual flag value before release was applied.
277   * Trigger all waiting threads to run by modifying flag to release state. */
278  void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
279  /*! @result Actual flag value before sleep bit(s) set.
280   * Notes that there is at least one thread sleeping on the flag by setting
281   * sleep bit(s). */
282  PtrType set_sleeping() {
283    if (this->sleepLoc) {
284      this->sleepLoc->store(true);
285      return *(this->get());
286    }
287    return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
288  }
289  /*! @result Actual flag value before sleep bit(s) cleared.
290   * Notes that there are no longer threads sleeping on the flag by clearing
291   * sleep bit(s). */
292  void unset_sleeping() {
293    if (this->sleepLoc) {
294      this->sleepLoc->store(false);
295      return;
296    }
297    KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
298  }
299  /*! @param old_loc in   old value of flag
300   * Test whether there are threads sleeping on flag's old value in old_loc. */
301  bool is_sleeping_val(PtrType old_loc) {
302    if (this->sleepLoc)
303      return this->sleepLoc->load();
304    return old_loc & KMP_BARRIER_SLEEP_STATE;
305  }
306  /*! Test whether there are threads sleeping on the flag. */
307  bool is_sleeping() {
308    if (this->sleepLoc)
309      return this->sleepLoc->load();
310    return is_sleeping_val(this->load());
311  }
312  bool is_any_sleeping() {
313    if (this->sleepLoc)
314      return this->sleepLoc->load();
315    return is_sleeping_val(this->load());
316  }
317  kmp_uint8 *get_stolen() { return NULL; }
318};
319
320#if OMPT_SUPPORT
321OMPT_NOINLINE
322static void __ompt_implicit_task_end(kmp_info_t *this_thr,
323                                     ompt_state_t ompt_state,
324                                     ompt_data_t *tId) {
325  int ds_tid = this_thr->th.th_info.ds.ds_tid;
326  if (ompt_state == ompt_state_wait_barrier_implicit) {
327    this_thr->th.ompt_thread_info.state = ompt_state_overhead;
328#if OMPT_OPTIONAL
329    void *codeptr = NULL;
330    if (ompt_enabled.ompt_callback_sync_region_wait) {
331      ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
332          ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
333          codeptr);
334    }
335    if (ompt_enabled.ompt_callback_sync_region) {
336      ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
337          ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
338          codeptr);
339    }
340#endif
341    if (!KMP_MASTER_TID(ds_tid)) {
342      if (ompt_enabled.ompt_callback_implicit_task) {
343        int flags = this_thr->th.ompt_thread_info.parallel_flags;
344        flags = (flags & ompt_parallel_league) ? ompt_task_initial
345                                               : ompt_task_implicit;
346        ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
347            ompt_scope_end, NULL, tId, 0, ds_tid, flags);
348      }
349      // return to idle state
350      this_thr->th.ompt_thread_info.state = ompt_state_idle;
351    } else {
352      this_thr->th.ompt_thread_info.state = ompt_state_overhead;
353    }
354  }
355}
356#endif
357
358/* Spin wait loop that first does pause/yield, then sleep. A thread that calls
359   __kmp_wait_*  must make certain that another thread calls __kmp_release
360   to wake it back up to prevent deadlocks!
361
362   NOTE: We may not belong to a team at this point.  */
363template <class C, bool final_spin, bool Cancellable = false,
364          bool Sleepable = true>
365static inline bool
366__kmp_wait_template(kmp_info_t *this_thr,
367                    C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
368#if USE_ITT_BUILD && USE_ITT_NOTIFY
369  volatile void *spin = flag->get();
370#endif
371  kmp_uint32 spins;
372  int th_gtid;
373  int tasks_completed = FALSE;
374#if !KMP_USE_MONITOR
375  kmp_uint64 poll_count;
376  kmp_uint64 hibernate_goal;
377#else
378  kmp_uint32 hibernate;
379#endif
380  kmp_uint64 time;
381
382  KMP_FSYNC_SPIN_INIT(spin, NULL);
383  if (flag->done_check()) {
384    KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
385    return false;
386  }
387  th_gtid = this_thr->th.th_info.ds.ds_gtid;
388  if (Cancellable) {
389    kmp_team_t *team = this_thr->th.th_team;
390    if (team && team->t.t_cancel_request == cancel_parallel)
391      return true;
392  }
393#if KMP_OS_UNIX
394  if (final_spin)
395    KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
396#endif
397  KA_TRACE(20,
398           ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
399#if KMP_STATS_ENABLED
400  stats_state_e thread_state = KMP_GET_THREAD_STATE();
401#endif
402
403/* OMPT Behavior:
404THIS function is called from
405  __kmp_barrier (2 times)  (implicit or explicit barrier in parallel regions)
406            these have join / fork behavior
407
408       In these cases, we don't change the state or trigger events in THIS
409function.
410       Events are triggered in the calling code (__kmp_barrier):
411
412                state := ompt_state_overhead
413            barrier-begin
414            barrier-wait-begin
415                state := ompt_state_wait_barrier
416          call join-barrier-implementation (finally arrive here)
417          {}
418          call fork-barrier-implementation (finally arrive here)
419          {}
420                state := ompt_state_overhead
421            barrier-wait-end
422            barrier-end
423                state := ompt_state_work_parallel
424
425
426  __kmp_fork_barrier  (after thread creation, before executing implicit task)
427          call fork-barrier-implementation (finally arrive here)
428          {} // worker arrive here with state = ompt_state_idle
429
430
431  __kmp_join_barrier  (implicit barrier at end of parallel region)
432                state := ompt_state_barrier_implicit
433            barrier-begin
434            barrier-wait-begin
435          call join-barrier-implementation (finally arrive here
436final_spin=FALSE)
437          {
438          }
439  __kmp_fork_barrier  (implicit barrier at end of parallel region)
440          call fork-barrier-implementation (finally arrive here final_spin=TRUE)
441
442       Worker after task-team is finished:
443            barrier-wait-end
444            barrier-end
445            implicit-task-end
446            idle-begin
447                state := ompt_state_idle
448
449       Before leaving, if state = ompt_state_idle
450            idle-end
451                state := ompt_state_overhead
452*/
453#if OMPT_SUPPORT
454  ompt_state_t ompt_entry_state;
455  ompt_data_t *tId;
456  if (ompt_enabled.enabled) {
457    ompt_entry_state = this_thr->th.ompt_thread_info.state;
458    if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
459        KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
460      ompt_lw_taskteam_t *team = NULL;
461      if (this_thr->th.th_team)
462        team = this_thr->th.th_team->t.ompt_serialized_team_info;
463      if (team) {
464        tId = &(team->ompt_task_info.task_data);
465      } else {
466        tId = OMPT_CUR_TASK_DATA(this_thr);
467      }
468    } else {
469      tId = &(this_thr->th.ompt_thread_info.task_data);
470    }
471    if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
472                       this_thr->th.th_task_team == NULL)) {
473      // implicit task is done. Either no taskqueue, or task-team finished
474      __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
475    }
476  }
477#endif
478
479  KMP_INIT_YIELD(spins); // Setup for waiting
480  KMP_INIT_BACKOFF(time);
481
482  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
483      __kmp_pause_status == kmp_soft_paused) {
484#if KMP_USE_MONITOR
485// The worker threads cannot rely on the team struct existing at this point.
486// Use the bt values cached in the thread struct instead.
487#ifdef KMP_ADJUST_BLOCKTIME
488    if (__kmp_pause_status == kmp_soft_paused ||
489        (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
490      // Force immediate suspend if not set by user and more threads than
491      // available procs
492      hibernate = 0;
493    else
494      hibernate = this_thr->th.th_team_bt_intervals;
495#else
496    hibernate = this_thr->th.th_team_bt_intervals;
497#endif /* KMP_ADJUST_BLOCKTIME */
498
499    /* If the blocktime is nonzero, we want to make sure that we spin wait for
500       the entirety of the specified #intervals, plus up to one interval more.
501       This increment make certain that this thread doesn't go to sleep too
502       soon.  */
503    if (hibernate != 0)
504      hibernate++;
505
506    // Add in the current time value.
507    hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
508    KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
509                  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
510                  hibernate - __kmp_global.g.g_time.dt.t_value));
511#else
512    if (__kmp_pause_status == kmp_soft_paused) {
513      // Force immediate suspend
514      hibernate_goal = KMP_NOW();
515    } else
516      hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
517    poll_count = 0;
518    (void)poll_count;
519#endif // KMP_USE_MONITOR
520  }
521
522  KMP_MB();
523
524  // Main wait spin loop
525  while (flag->notdone_check()) {
526    kmp_task_team_t *task_team = NULL;
527    if (__kmp_tasking_mode != tskm_immediate_exec) {
528      task_team = this_thr->th.th_task_team;
529      /* If the thread's task team pointer is NULL, it means one of 3 things:
530         1) A newly-created thread is first being released by
531         __kmp_fork_barrier(), and its task team has not been set up yet.
532         2) All tasks have been executed to completion.
533         3) Tasking is off for this region.  This could be because we are in a
534         serialized region (perhaps the outer one), or else tasking was manually
535         disabled (KMP_TASKING=0).  */
536      if (task_team != NULL) {
537        if (TCR_SYNC_4(task_team->tt.tt_active)) {
538          if (KMP_TASKING_ENABLED(task_team)) {
539            flag->execute_tasks(
540                this_thr, th_gtid, final_spin,
541                &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
542          } else
543            this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
544        } else {
545          KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
546#if OMPT_SUPPORT
547          // task-team is done now, other cases should be catched above
548          if (final_spin && ompt_enabled.enabled)
549            __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
550#endif
551          this_thr->th.th_task_team = NULL;
552          this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
553        }
554      } else {
555        this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
556      } // if
557    } // if
558
559    KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
560    if (TCR_4(__kmp_global.g.g_done)) {
561      if (__kmp_global.g.g_abort)
562        __kmp_abort_thread();
563      break;
564    }
565
566    // If we are oversubscribed, or have waited a bit (and
567    // KMP_LIBRARY=throughput), then yield
568    KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
569
570#if KMP_STATS_ENABLED
571    // Check if thread has been signalled to idle state
572    // This indicates that the logical "join-barrier" has finished
573    if (this_thr->th.th_stats->isIdle() &&
574        KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
575      KMP_SET_THREAD_STATE(IDLE);
576      KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
577    }
578#endif
579    // Check if the barrier surrounding this wait loop has been cancelled
580    if (Cancellable) {
581      kmp_team_t *team = this_thr->th.th_team;
582      if (team && team->t.t_cancel_request == cancel_parallel)
583        break;
584    }
585
586    // For hidden helper thread, if task_team is nullptr, it means the main
587    // thread has not released the barrier. We cannot wait here because once the
588    // main thread releases all children barriers, all hidden helper threads are
589    // still sleeping. This leads to a problem that following configuration,
590    // such as task team sync, will not be performed such that this thread does
591    // not have task team. Usually it is not bad. However, a corner case is,
592    // when the first task encountered is an untied task, the check in
593    // __kmp_task_alloc will crash because it uses the task team pointer without
594    // checking whether it is nullptr. It is probably under some kind of
595    // assumption.
596    if (task_team && KMP_HIDDEN_HELPER_WORKER_THREAD(th_gtid) &&
597        !TCR_4(__kmp_hidden_helper_team_done)) {
598      // If there is still hidden helper tasks to be executed, the hidden helper
599      // thread will not enter a waiting status.
600      if (KMP_ATOMIC_LD_ACQ(&__kmp_unexecuted_hidden_helper_tasks) == 0) {
601        __kmp_hidden_helper_worker_thread_wait();
602      }
603      continue;
604    }
605
606    // Don't suspend if KMP_BLOCKTIME is set to "infinite"
607    if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
608        __kmp_pause_status != kmp_soft_paused)
609      continue;
610
611    // Don't suspend if there is a likelihood of new tasks being spawned.
612    if (task_team != NULL && TCR_4(task_team->tt.tt_found_tasks) &&
613        !__kmp_wpolicy_passive)
614      continue;
615
616#if KMP_USE_MONITOR
617    // If we have waited a bit more, fall asleep
618    if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
619      continue;
620#else
621    if (KMP_BLOCKING(hibernate_goal, poll_count++))
622      continue;
623#endif
624    // Don't suspend if wait loop designated non-sleepable
625    // in template parameters
626    if (!Sleepable)
627      continue;
628
629#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
630    if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
631      KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid));
632      flag->mwait(th_gtid);
633    } else {
634#endif
635      KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
636#if KMP_OS_UNIX
637      if (final_spin)
638        KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
639#endif
640      flag->suspend(th_gtid);
641#if KMP_OS_UNIX
642      if (final_spin)
643        KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
644#endif
645#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
646    }
647#endif
648
649    if (TCR_4(__kmp_global.g.g_done)) {
650      if (__kmp_global.g.g_abort)
651        __kmp_abort_thread();
652      break;
653    } else if (__kmp_tasking_mode != tskm_immediate_exec &&
654               this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
655      this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
656    }
657    // TODO: If thread is done with work and times out, disband/free
658  }
659
660#if OMPT_SUPPORT
661  ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
662  if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
663#if OMPT_OPTIONAL
664    if (final_spin) {
665      __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
666      ompt_exit_state = this_thr->th.ompt_thread_info.state;
667    }
668#endif
669    if (ompt_exit_state == ompt_state_idle) {
670      this_thr->th.ompt_thread_info.state = ompt_state_overhead;
671    }
672  }
673#endif
674#if KMP_STATS_ENABLED
675  // If we were put into idle state, pop that off the state stack
676  if (KMP_GET_THREAD_STATE() == IDLE) {
677    KMP_POP_PARTITIONED_TIMER();
678    KMP_SET_THREAD_STATE(thread_state);
679    this_thr->th.th_stats->resetIdleFlag();
680  }
681#endif
682
683#if KMP_OS_UNIX
684  if (final_spin)
685    KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
686#endif
687  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
688  if (Cancellable) {
689    kmp_team_t *team = this_thr->th.th_team;
690    if (team && team->t.t_cancel_request == cancel_parallel) {
691      if (tasks_completed) {
692        // undo the previous decrement of unfinished_threads so that the
693        // thread can decrement at the join barrier with no problem
694        kmp_task_team_t *task_team = this_thr->th.th_task_team;
695        std::atomic<kmp_int32> *unfinished_threads =
696            &(task_team->tt.tt_unfinished_threads);
697        KMP_ATOMIC_INC(unfinished_threads);
698      }
699      return true;
700    }
701  }
702  return false;
703}
704
705#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
706// Set up a monitor on the flag variable causing the calling thread to wait in
707// a less active state until the flag variable is modified.
708template <class C>
709static inline void __kmp_mwait_template(int th_gtid, C *flag) {
710  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait);
711  kmp_info_t *th = __kmp_threads[th_gtid];
712
713  KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid,
714                flag->get()));
715
716  // User-level mwait is available
717  KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled);
718
719  __kmp_suspend_initialize_thread(th);
720  __kmp_lock_suspend_mx(th);
721
722  volatile void *spin = flag->get();
723  void *cacheline = (void *)(kmp_uintptr_t(spin) & ~(CACHE_LINE - 1));
724
725  if (!flag->done_check()) {
726    // Mark thread as no longer active
727    th->th.th_active = FALSE;
728    if (th->th.th_active_in_pool) {
729      th->th.th_active_in_pool = FALSE;
730      KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
731      KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
732    }
733    flag->set_sleeping();
734    KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n", th_gtid));
735#if KMP_HAVE_UMWAIT
736    if (__kmp_umwait_enabled) {
737      __kmp_umonitor(cacheline);
738    }
739#elif KMP_HAVE_MWAIT
740    if (__kmp_mwait_enabled) {
741      __kmp_mm_monitor(cacheline, 0, 0);
742    }
743#endif
744    // To avoid a race, check flag between 'monitor' and 'mwait'. A write to
745    // the address could happen after the last time we checked and before
746    // monitoring started, in which case monitor can't detect the change.
747    if (flag->done_check())
748      flag->unset_sleeping();
749    else {
750      // if flag changes here, wake-up happens immediately
751      TCW_PTR(th->th.th_sleep_loc, (void *)flag);
752      th->th.th_sleep_loc_type = flag->get_type();
753      __kmp_unlock_suspend_mx(th);
754      KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
755#if KMP_HAVE_UMWAIT
756      if (__kmp_umwait_enabled) {
757        __kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter
758      }
759#elif KMP_HAVE_MWAIT
760      if (__kmp_mwait_enabled) {
761        __kmp_mm_mwait(0, __kmp_mwait_hints);
762      }
763#endif
764      KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid));
765      __kmp_lock_suspend_mx(th);
766      // Clean up sleep info; doesn't matter how/why this thread stopped waiting
767      if (flag->is_sleeping())
768        flag->unset_sleeping();
769      TCW_PTR(th->th.th_sleep_loc, NULL);
770      th->th.th_sleep_loc_type = flag_unset;
771    }
772    // Mark thread as active again
773    th->th.th_active = TRUE;
774    if (TCR_4(th->th.th_in_pool)) {
775      KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
776      th->th.th_active_in_pool = TRUE;
777    }
778  } // Drop out to main wait loop to check flag, handle tasks, etc.
779  __kmp_unlock_suspend_mx(th);
780  KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n", th_gtid));
781}
782#endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
783
784/* Release any threads specified as waiting on the flag by releasing the flag
785   and resume the waiting thread if indicated by the sleep bit(s). A thread that
786   calls __kmp_wait_template must call this function to wake up the potentially
787   sleeping thread and prevent deadlocks!  */
788template <class C> static inline void __kmp_release_template(C *flag) {
789#ifdef KMP_DEBUG
790  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
791#endif
792  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
793  KMP_DEBUG_ASSERT(flag->get());
794  KMP_FSYNC_RELEASING(flag->get_void_p());
795
796  flag->internal_release();
797
798  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
799                 flag->load()));
800
801  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
802    // Only need to check sleep stuff if infinite block time not set.
803    // Are *any* threads waiting on flag sleeping?
804    if (flag->is_any_sleeping()) {
805      for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
806        // if sleeping waiter exists at i, sets current_waiter to i inside flag
807        kmp_info_t *waiter = flag->get_waiter(i);
808        if (waiter) {
809          int wait_gtid = waiter->th.th_info.ds.ds_gtid;
810          // Wake up thread if needed
811          KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
812                        "flag(%p) set\n",
813                        gtid, wait_gtid, flag->get()));
814          flag->resume(wait_gtid); // unsets flag's current_waiter when done
815        }
816      }
817    }
818  }
819}
820
821template <bool Cancellable, bool Sleepable>
822class kmp_flag_32 : public kmp_flag_atomic<kmp_uint32, flag32, Sleepable> {
823public:
824  kmp_flag_32(std::atomic<kmp_uint32> *p)
825      : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p) {}
826  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
827      : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, thr) {}
828  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
829      : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, c) {}
830  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
831#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
832  void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }
833#endif
834  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
835  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
836                    int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
837                    kmp_int32 is_constrained) {
838    return __kmp_execute_tasks_32(
839        this_thr, gtid, this, final_spin,
840        thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
841  }
842  bool wait(kmp_info_t *this_thr,
843            int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
844    if (final_spin)
845      return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>(
846          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
847    else
848      return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>(
849          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
850  }
851  void release() { __kmp_release_template(this); }
852  flag_type get_ptr_type() { return flag32; }
853};
854
855template <bool Cancellable, bool Sleepable>
856class kmp_flag_64 : public kmp_flag_native<kmp_uint64, flag64, Sleepable> {
857public:
858  kmp_flag_64(volatile kmp_uint64 *p)
859      : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p) {}
860  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
861      : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, thr) {}
862  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
863      : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c) {}
864  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c, std::atomic<bool> *loc)
865      : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c, loc) {}
866  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
867#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
868  void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }
869#endif
870  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
871  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
872                    int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
873                    kmp_int32 is_constrained) {
874    return __kmp_execute_tasks_64(
875        this_thr, gtid, this, final_spin,
876        thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
877  }
878  bool wait(kmp_info_t *this_thr,
879            int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
880    if (final_spin)
881      return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>(
882          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
883    else
884      return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>(
885          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
886  }
887  void release() { __kmp_release_template(this); }
888  flag_type get_ptr_type() { return flag64; }
889};
890
891template <bool Cancellable, bool Sleepable>
892class kmp_atomic_flag_64
893    : public kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable> {
894public:
895  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p)
896      : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p) {}
897  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_info_t *thr)
898      : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, thr) {}
899  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c)
900      : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c) {}
901  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c,
902                     std::atomic<bool> *loc)
903      : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c, loc) {}
904  void suspend(int th_gtid) { __kmp_atomic_suspend_64(th_gtid, this); }
905  void mwait(int th_gtid) { __kmp_atomic_mwait_64(th_gtid, this); }
906  void resume(int th_gtid) { __kmp_atomic_resume_64(th_gtid, this); }
907  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
908                    int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
909                    kmp_int32 is_constrained) {
910    return __kmp_atomic_execute_tasks_64(
911        this_thr, gtid, this, final_spin,
912        thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
913  }
914  bool wait(kmp_info_t *this_thr,
915            int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
916    if (final_spin)
917      return __kmp_wait_template<kmp_atomic_flag_64, TRUE, Cancellable,
918                                 Sleepable>(
919          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
920    else
921      return __kmp_wait_template<kmp_atomic_flag_64, FALSE, Cancellable,
922                                 Sleepable>(
923          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
924  }
925  void release() { __kmp_release_template(this); }
926  flag_type get_ptr_type() { return atomic_flag64; }
927};
928
929// Hierarchical 64-bit on-core barrier instantiation
930class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> {
931  kmp_uint32 offset; /**< Portion of flag of interest for an operation. */
932  bool flag_switch; /**< Indicates a switch in flag location. */
933  enum barrier_type bt; /**< Barrier type. */
934  kmp_info_t *this_thr; /**< Thread to redirect to different flag location. */
935#if USE_ITT_BUILD
936  void *itt_sync_obj; /**< ITT object to pass to new flag location. */
937#endif
938  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
939    return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
940  }
941
942public:
943  kmp_flag_oncore(volatile kmp_uint64 *p)
944      : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), flag_switch(false) {
945  }
946  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
947      : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), offset(idx),
948        flag_switch(false),
949        bt(bs_last_barrier) USE_ITT_BUILD_ARG(itt_sync_obj(nullptr)) {}
950  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
951                  enum barrier_type bar_t,
952                  kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
953      : kmp_flag_native<kmp_uint64, flag_oncore, false>(p, c), offset(idx),
954        flag_switch(false), bt(bar_t),
955        this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
956  virtual ~kmp_flag_oncore() override {}
957  void *operator new(size_t size) { return __kmp_allocate(size); }
958  void operator delete(void *p) { __kmp_free(p); }
959  bool done_check_val(kmp_uint64 old_loc) override {
960    return byteref(&old_loc, offset) == checker;
961  }
962  bool done_check() override { return done_check_val(*get()); }
963  bool notdone_check() override {
964    // Calculate flag_switch
965    if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
966      flag_switch = true;
967    if (byteref(get(), offset) != 1 && !flag_switch)
968      return true;
969    else if (flag_switch) {
970      this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
971      kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go,
972                         (kmp_uint64)KMP_BARRIER_STATE_BUMP);
973      __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
974    }
975    return false;
976  }
977  void internal_release() {
978    // Other threads can write their own bytes simultaneously.
979    if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
980      byteref(get(), offset) = 1;
981    } else {
982      kmp_uint64 mask = 0;
983      byteref(&mask, offset) = 1;
984      KMP_TEST_THEN_OR64(get(), mask);
985    }
986  }
987  void wait(kmp_info_t *this_thr, int final_spin) {
988    if (final_spin)
989      __kmp_wait_template<kmp_flag_oncore, TRUE>(
990          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
991    else
992      __kmp_wait_template<kmp_flag_oncore, FALSE>(
993          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
994  }
995  void release() { __kmp_release_template(this); }
996  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
997#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
998  void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, this); }
999#endif
1000  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
1001  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
1002                    int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
1003                    kmp_int32 is_constrained) {
1004#if OMPD_SUPPORT
1005    int ret = __kmp_execute_tasks_oncore(
1006        this_thr, gtid, this, final_spin,
1007        thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1008    if (ompd_state & OMPD_ENABLE_BP)
1009      ompd_bp_task_end();
1010    return ret;
1011#else
1012    return __kmp_execute_tasks_oncore(
1013        this_thr, gtid, this, final_spin,
1014        thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1015#endif
1016  }
1017  enum barrier_type get_bt() { return bt; }
1018  flag_type get_ptr_type() { return flag_oncore; }
1019};
1020
1021static inline void __kmp_null_resume_wrapper(kmp_info_t *thr) {
1022  int gtid = __kmp_gtid_from_thread(thr);
1023  void *flag = CCAST(void *, thr->th.th_sleep_loc);
1024  flag_type type = thr->th.th_sleep_loc_type;
1025  if (!flag)
1026    return;
1027  // Attempt to wake up a thread: examine its type and call appropriate template
1028  switch (type) {
1029  case flag32:
1030    __kmp_resume_32(gtid, RCAST(kmp_flag_32<> *, flag));
1031    break;
1032  case flag64:
1033    __kmp_resume_64(gtid, RCAST(kmp_flag_64<> *, flag));
1034    break;
1035  case atomic_flag64:
1036    __kmp_atomic_resume_64(gtid, RCAST(kmp_atomic_flag_64<> *, flag));
1037    break;
1038  case flag_oncore:
1039    __kmp_resume_oncore(gtid, RCAST(kmp_flag_oncore *, flag));
1040    break;
1041  case flag_unset:
1042    KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d is unset\n", type));
1043    break;
1044  }
1045}
1046
1047/*!
1048@}
1049*/
1050
1051#endif // KMP_WAIT_RELEASE_H
1052