1345153Sdim/*! \file */
2345153Sdim/*
3345153Sdim * kmp.h -- KPTS runtime header file.
4345153Sdim */
5345153Sdim
6345153Sdim//===----------------------------------------------------------------------===//
7345153Sdim//
8353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9353358Sdim// See https://llvm.org/LICENSE.txt for license information.
10353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11345153Sdim//
12345153Sdim//===----------------------------------------------------------------------===//
13345153Sdim
14345153Sdim#ifndef KMP_H
15345153Sdim#define KMP_H
16345153Sdim
17345153Sdim#include "kmp_config.h"
18345153Sdim
19345153Sdim/* #define BUILD_PARALLEL_ORDERED 1 */
20345153Sdim
21345153Sdim/* This fix replaces gettimeofday with clock_gettime for better scalability on
22345153Sdim   the Altix.  Requires user code to be linked with -lrt. */
23345153Sdim//#define FIX_SGI_CLOCK
24345153Sdim
25345153Sdim/* Defines for OpenMP 3.0 tasking and auto scheduling */
26345153Sdim
27345153Sdim#ifndef KMP_STATIC_STEAL_ENABLED
28345153Sdim#define KMP_STATIC_STEAL_ENABLED 1
29345153Sdim#endif
30345153Sdim
31345153Sdim#define TASK_CURRENT_NOT_QUEUED 0
32345153Sdim#define TASK_CURRENT_QUEUED 1
33345153Sdim
34345153Sdim#ifdef BUILD_TIED_TASK_STACK
35345153Sdim#define TASK_STACK_EMPTY 0 // entries when the stack is empty
36345153Sdim#define TASK_STACK_BLOCK_BITS 5 // Used in TASK_STACK_SIZE and TASK_STACK_MASK
37345153Sdim// Number of entries in each task stack array
38345153Sdim#define TASK_STACK_BLOCK_SIZE (1 << TASK_STACK_BLOCK_BITS)
39345153Sdim// Mask for determining index into stack block
40345153Sdim#define TASK_STACK_INDEX_MASK (TASK_STACK_BLOCK_SIZE - 1)
41345153Sdim#endif // BUILD_TIED_TASK_STACK
42345153Sdim
43345153Sdim#define TASK_NOT_PUSHED 1
44345153Sdim#define TASK_SUCCESSFULLY_PUSHED 0
45345153Sdim#define TASK_TIED 1
46345153Sdim#define TASK_UNTIED 0
47345153Sdim#define TASK_EXPLICIT 1
48345153Sdim#define TASK_IMPLICIT 0
49345153Sdim#define TASK_PROXY 1
50345153Sdim#define TASK_FULL 0
51353358Sdim#define TASK_DETACHABLE 1
52353358Sdim#define TASK_UNDETACHABLE 0
53345153Sdim
54345153Sdim#define KMP_CANCEL_THREADS
55345153Sdim#define KMP_THREAD_ATTR
56345153Sdim
57345153Sdim// Android does not have pthread_cancel.  Undefine KMP_CANCEL_THREADS if being
58345153Sdim// built on Android
59345153Sdim#if defined(__ANDROID__)
60345153Sdim#undef KMP_CANCEL_THREADS
61345153Sdim#endif
62345153Sdim
63345153Sdim#include <signal.h>
64345153Sdim#include <stdarg.h>
65345153Sdim#include <stddef.h>
66345153Sdim#include <stdio.h>
67345153Sdim#include <stdlib.h>
68345153Sdim#include <string.h>
69345153Sdim/* include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad
70345153Sdim   Microsoft library. Some macros provided below to replace these functions  */
71345153Sdim#ifndef __ABSOFT_WIN
72345153Sdim#include <sys/types.h>
73345153Sdim#endif
74345153Sdim#include <limits.h>
75345153Sdim#include <time.h>
76345153Sdim
77345153Sdim#include <errno.h>
78345153Sdim
79345153Sdim#include "kmp_os.h"
80345153Sdim
81345153Sdim#include "kmp_safe_c_api.h"
82345153Sdim
83345153Sdim#if KMP_STATS_ENABLED
84345153Sdimclass kmp_stats_list;
85345153Sdim#endif
86345153Sdim
87345153Sdim#if KMP_USE_HIER_SCHED
88345153Sdim// Only include hierarchical scheduling if affinity is supported
89345153Sdim#undef KMP_USE_HIER_SCHED
90345153Sdim#define KMP_USE_HIER_SCHED KMP_AFFINITY_SUPPORTED
91345153Sdim#endif
92345153Sdim
93345153Sdim#if KMP_USE_HWLOC && KMP_AFFINITY_SUPPORTED
94345153Sdim#include "hwloc.h"
95345153Sdim#ifndef HWLOC_OBJ_NUMANODE
96345153Sdim#define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
97345153Sdim#endif
98345153Sdim#ifndef HWLOC_OBJ_PACKAGE
99345153Sdim#define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
100345153Sdim#endif
101353358Sdim#if HWLOC_API_VERSION >= 0x00020000
102353358Sdim// hwloc 2.0 changed type of depth of object from unsigned to int
103353358Sdimtypedef int kmp_hwloc_depth_t;
104353358Sdim#else
105353358Sdimtypedef unsigned int kmp_hwloc_depth_t;
106345153Sdim#endif
107353358Sdim#endif
108345153Sdim
109345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64
110345153Sdim#include <xmmintrin.h>
111345153Sdim#endif
112345153Sdim
113345153Sdim#include "kmp_debug.h"
114345153Sdim#include "kmp_lock.h"
115345153Sdim#include "kmp_version.h"
116345153Sdim#if USE_DEBUGGER
117345153Sdim#include "kmp_debugger.h"
118345153Sdim#endif
119345153Sdim#include "kmp_i18n.h"
120345153Sdim
121345153Sdim#define KMP_HANDLE_SIGNALS (KMP_OS_UNIX || KMP_OS_WINDOWS)
122345153Sdim
123345153Sdim#include "kmp_wrapper_malloc.h"
124345153Sdim#if KMP_OS_UNIX
125345153Sdim#include <unistd.h>
126345153Sdim#if !defined NSIG && defined _NSIG
127345153Sdim#define NSIG _NSIG
128345153Sdim#endif
129345153Sdim#endif
130345153Sdim
131345153Sdim#if KMP_OS_LINUX
132345153Sdim#pragma weak clock_gettime
133345153Sdim#endif
134345153Sdim
135345153Sdim#if OMPT_SUPPORT
136345153Sdim#include "ompt-internal.h"
137345153Sdim#endif
138345153Sdim
139345153Sdim// Affinity format function
140345153Sdim#include "kmp_str.h"
141345153Sdim
142345153Sdim// 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64.
143345153Sdim// 3 - fast allocation using sync, non-sync free lists of any size, non-self
144345153Sdim// free lists of limited size.
145345153Sdim#ifndef USE_FAST_MEMORY
146345153Sdim#define USE_FAST_MEMORY 3
147345153Sdim#endif
148345153Sdim
149345153Sdim#ifndef KMP_NESTED_HOT_TEAMS
150345153Sdim#define KMP_NESTED_HOT_TEAMS 0
151345153Sdim#define USE_NESTED_HOT_ARG(x)
152345153Sdim#else
153345153Sdim#if KMP_NESTED_HOT_TEAMS
154345153Sdim#define USE_NESTED_HOT_ARG(x) , x
155345153Sdim#else
156345153Sdim#define USE_NESTED_HOT_ARG(x)
157345153Sdim#endif
158345153Sdim#endif
159345153Sdim
160345153Sdim// Assume using BGET compare_exchange instruction instead of lock by default.
161345153Sdim#ifndef USE_CMP_XCHG_FOR_BGET
162345153Sdim#define USE_CMP_XCHG_FOR_BGET 1
163345153Sdim#endif
164345153Sdim
165345153Sdim// Test to see if queuing lock is better than bootstrap lock for bget
166345153Sdim// #ifndef USE_QUEUING_LOCK_FOR_BGET
167345153Sdim// #define USE_QUEUING_LOCK_FOR_BGET
168345153Sdim// #endif
169345153Sdim
170345153Sdim#define KMP_NSEC_PER_SEC 1000000000L
171345153Sdim#define KMP_USEC_PER_SEC 1000000L
172345153Sdim
173345153Sdim/*!
174345153Sdim@ingroup BASIC_TYPES
175345153Sdim@{
176345153Sdim*/
177345153Sdim
178345153Sdim/*!
179345153SdimValues for bit flags used in the ident_t to describe the fields.
180345153Sdim*/
181345153Sdimenum {
182345153Sdim  /*! Use trampoline for internal microtasks */
183345153Sdim  KMP_IDENT_IMB = 0x01,
184345153Sdim  /*! Use c-style ident structure */
185345153Sdim  KMP_IDENT_KMPC = 0x02,
186345153Sdim  /* 0x04 is no longer used */
187345153Sdim  /*! Entry point generated by auto-parallelization */
188345153Sdim  KMP_IDENT_AUTOPAR = 0x08,
189345153Sdim  /*! Compiler generates atomic reduction option for kmpc_reduce* */
190345153Sdim  KMP_IDENT_ATOMIC_REDUCE = 0x10,
191345153Sdim  /*! To mark a 'barrier' directive in user code */
192345153Sdim  KMP_IDENT_BARRIER_EXPL = 0x20,
193345153Sdim  /*! To Mark implicit barriers. */
194345153Sdim  KMP_IDENT_BARRIER_IMPL = 0x0040,
195345153Sdim  KMP_IDENT_BARRIER_IMPL_MASK = 0x01C0,
196345153Sdim  KMP_IDENT_BARRIER_IMPL_FOR = 0x0040,
197345153Sdim  KMP_IDENT_BARRIER_IMPL_SECTIONS = 0x00C0,
198345153Sdim
199345153Sdim  KMP_IDENT_BARRIER_IMPL_SINGLE = 0x0140,
200345153Sdim  KMP_IDENT_BARRIER_IMPL_WORKSHARE = 0x01C0,
201345153Sdim
202345153Sdim  /*! To mark a static loop in OMPT callbacks */
203345153Sdim  KMP_IDENT_WORK_LOOP = 0x200,
204345153Sdim  /*! To mark a sections directive in OMPT callbacks */
205345153Sdim  KMP_IDENT_WORK_SECTIONS = 0x400,
206360784Sdim  /*! To mark a distribute construct in OMPT callbacks */
207345153Sdim  KMP_IDENT_WORK_DISTRIBUTE = 0x800,
208345153Sdim  /*! Atomic hint; bottom four bits as omp_sync_hint_t. Top four reserved and
209345153Sdim      not currently used. If one day we need more bits, then we can use
210345153Sdim      an invalid combination of hints to mean that another, larger field
211345153Sdim      should be used in a different flag. */
212345153Sdim  KMP_IDENT_ATOMIC_HINT_MASK = 0xFF0000,
213345153Sdim  KMP_IDENT_ATOMIC_HINT_UNCONTENDED = 0x010000,
214345153Sdim  KMP_IDENT_ATOMIC_HINT_CONTENDED = 0x020000,
215345153Sdim  KMP_IDENT_ATOMIC_HINT_NONSPECULATIVE = 0x040000,
216345153Sdim  KMP_IDENT_ATOMIC_HINT_SPECULATIVE = 0x080000,
217345153Sdim};
218345153Sdim
219345153Sdim/*!
220345153Sdim * The ident structure that describes a source location.
221345153Sdim */
222345153Sdimtypedef struct ident {
223345153Sdim  kmp_int32 reserved_1; /**<  might be used in Fortran; see above  */
224345153Sdim  kmp_int32 flags; /**<  also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC
225345153Sdim                      identifies this union member  */
226345153Sdim  kmp_int32 reserved_2; /**<  not really used in Fortran any more; see above */
227345153Sdim#if USE_ITT_BUILD
228345153Sdim/*  but currently used for storing region-specific ITT */
229345153Sdim/*  contextual information. */
230345153Sdim#endif /* USE_ITT_BUILD */
231345153Sdim  kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++  */
232345153Sdim  char const *psource; /**< String describing the source location.
233345153Sdim                       The string is composed of semi-colon separated fields
234345153Sdim                       which describe the source file, the function and a pair
235345153Sdim                       of line numbers that delimit the construct. */
236345153Sdim} ident_t;
237345153Sdim/*!
238345153Sdim@}
239345153Sdim*/
240345153Sdim
241345153Sdim// Some forward declarations.
242345153Sdimtypedef union kmp_team kmp_team_t;
243345153Sdimtypedef struct kmp_taskdata kmp_taskdata_t;
244345153Sdimtypedef union kmp_task_team kmp_task_team_t;
245345153Sdimtypedef union kmp_team kmp_team_p;
246345153Sdimtypedef union kmp_info kmp_info_p;
247345153Sdimtypedef union kmp_root kmp_root_p;
248345153Sdim
249345153Sdim#ifdef __cplusplus
250345153Sdimextern "C" {
251345153Sdim#endif
252345153Sdim
253345153Sdim/* ------------------------------------------------------------------------ */
254345153Sdim
255345153Sdim/* Pack two 32-bit signed integers into a 64-bit signed integer */
256345153Sdim/* ToDo: Fix word ordering for big-endian machines. */
257345153Sdim#define KMP_PACK_64(HIGH_32, LOW_32)                                           \
258345153Sdim  ((kmp_int64)((((kmp_uint64)(HIGH_32)) << 32) | (kmp_uint64)(LOW_32)))
259345153Sdim
260345153Sdim// Generic string manipulation macros. Assume that _x is of type char *
261345153Sdim#define SKIP_WS(_x)                                                            \
262345153Sdim  {                                                                            \
263345153Sdim    while (*(_x) == ' ' || *(_x) == '\t')                                      \
264345153Sdim      (_x)++;                                                                  \
265345153Sdim  }
266345153Sdim#define SKIP_DIGITS(_x)                                                        \
267345153Sdim  {                                                                            \
268345153Sdim    while (*(_x) >= '0' && *(_x) <= '9')                                       \
269345153Sdim      (_x)++;                                                                  \
270345153Sdim  }
271345153Sdim#define SKIP_TOKEN(_x)                                                         \
272345153Sdim  {                                                                            \
273345153Sdim    while ((*(_x) >= '0' && *(_x) <= '9') || (*(_x) >= 'a' && *(_x) <= 'z') || \
274345153Sdim           (*(_x) >= 'A' && *(_x) <= 'Z') || *(_x) == '_')                     \
275345153Sdim      (_x)++;                                                                  \
276345153Sdim  }
277345153Sdim#define SKIP_TO(_x, _c)                                                        \
278345153Sdim  {                                                                            \
279345153Sdim    while (*(_x) != '\0' && *(_x) != (_c))                                     \
280345153Sdim      (_x)++;                                                                  \
281345153Sdim  }
282345153Sdim
283345153Sdim/* ------------------------------------------------------------------------ */
284345153Sdim
285345153Sdim#define KMP_MAX(x, y) ((x) > (y) ? (x) : (y))
286345153Sdim#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
287345153Sdim
288345153Sdim/* ------------------------------------------------------------------------ */
289345153Sdim/* Enumeration types */
290345153Sdim
291345153Sdimenum kmp_state_timer {
292345153Sdim  ts_stop,
293345153Sdim  ts_start,
294345153Sdim  ts_pause,
295345153Sdim
296345153Sdim  ts_last_state
297345153Sdim};
298345153Sdim
299345153Sdimenum dynamic_mode {
300345153Sdim  dynamic_default,
301345153Sdim#ifdef USE_LOAD_BALANCE
302345153Sdim  dynamic_load_balance,
303345153Sdim#endif /* USE_LOAD_BALANCE */
304345153Sdim  dynamic_random,
305345153Sdim  dynamic_thread_limit,
306345153Sdim  dynamic_max
307345153Sdim};
308345153Sdim
309345153Sdim/* external schedule constants, duplicate enum omp_sched in omp.h in order to
310345153Sdim * not include it here */
311345153Sdim#ifndef KMP_SCHED_TYPE_DEFINED
312345153Sdim#define KMP_SCHED_TYPE_DEFINED
313345153Sdimtypedef enum kmp_sched {
314345153Sdim  kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check
315345153Sdim  // Note: need to adjust __kmp_sch_map global array in case enum is changed
316345153Sdim  kmp_sched_static = 1, // mapped to kmp_sch_static_chunked           (33)
317345153Sdim  kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked          (35)
318345153Sdim  kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked           (36)
319345153Sdim  kmp_sched_auto = 4, // mapped to kmp_sch_auto                     (38)
320345153Sdim  kmp_sched_upper_std = 5, // upper bound for standard schedules
321345153Sdim  kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules
322345153Sdim  kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39)
323345153Sdim#if KMP_STATIC_STEAL_ENABLED
324345153Sdim  kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44)
325345153Sdim#endif
326345153Sdim  kmp_sched_upper,
327353358Sdim  kmp_sched_default = kmp_sched_static, // default scheduling
328353358Sdim  kmp_sched_monotonic = 0x80000000
329345153Sdim} kmp_sched_t;
330345153Sdim#endif
331345153Sdim
332345153Sdim/*!
333345153Sdim @ingroup WORK_SHARING
334345153Sdim * Describes the loop schedule to be used for a parallel for loop.
335345153Sdim */
336345153Sdimenum sched_type : kmp_int32 {
337345153Sdim  kmp_sch_lower = 32, /**< lower bound for unordered values */
338345153Sdim  kmp_sch_static_chunked = 33,
339345153Sdim  kmp_sch_static = 34, /**< static unspecialized */
340345153Sdim  kmp_sch_dynamic_chunked = 35,
341345153Sdim  kmp_sch_guided_chunked = 36, /**< guided unspecialized */
342345153Sdim  kmp_sch_runtime = 37,
343345153Sdim  kmp_sch_auto = 38, /**< auto */
344345153Sdim  kmp_sch_trapezoidal = 39,
345345153Sdim
346345153Sdim  /* accessible only through KMP_SCHEDULE environment variable */
347345153Sdim  kmp_sch_static_greedy = 40,
348345153Sdim  kmp_sch_static_balanced = 41,
349345153Sdim  /* accessible only through KMP_SCHEDULE environment variable */
350345153Sdim  kmp_sch_guided_iterative_chunked = 42,
351345153Sdim  kmp_sch_guided_analytical_chunked = 43,
352345153Sdim  /* accessible only through KMP_SCHEDULE environment variable */
353345153Sdim  kmp_sch_static_steal = 44,
354345153Sdim
355345153Sdim  /* static with chunk adjustment (e.g., simd) */
356345153Sdim  kmp_sch_static_balanced_chunked = 45,
357345153Sdim  kmp_sch_guided_simd = 46, /**< guided with chunk adjustment */
358345153Sdim  kmp_sch_runtime_simd = 47, /**< runtime with chunk adjustment */
359345153Sdim
360345153Sdim  /* accessible only through KMP_SCHEDULE environment variable */
361345153Sdim  kmp_sch_upper, /**< upper bound for unordered values */
362345153Sdim
363345153Sdim  kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */
364345153Sdim  kmp_ord_static_chunked = 65,
365345153Sdim  kmp_ord_static = 66, /**< ordered static unspecialized */
366345153Sdim  kmp_ord_dynamic_chunked = 67,
367345153Sdim  kmp_ord_guided_chunked = 68,
368345153Sdim  kmp_ord_runtime = 69,
369345153Sdim  kmp_ord_auto = 70, /**< ordered auto */
370345153Sdim  kmp_ord_trapezoidal = 71,
371345153Sdim  kmp_ord_upper, /**< upper bound for ordered values */
372345153Sdim
373345153Sdim  /* Schedules for Distribute construct */
374345153Sdim  kmp_distribute_static_chunked = 91, /**< distribute static chunked */
375345153Sdim  kmp_distribute_static = 92, /**< distribute static unspecialized */
376345153Sdim
377345153Sdim  /* For the "nomerge" versions, kmp_dispatch_next*() will always return a
378345153Sdim     single iteration/chunk, even if the loop is serialized. For the schedule
379345153Sdim     types listed above, the entire iteration vector is returned if the loop is
380345153Sdim     serialized. This doesn't work for gcc/gcomp sections. */
381345153Sdim  kmp_nm_lower = 160, /**< lower bound for nomerge values */
382345153Sdim
383345153Sdim  kmp_nm_static_chunked =
384345153Sdim      (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower),
385345153Sdim  kmp_nm_static = 162, /**< static unspecialized */
386345153Sdim  kmp_nm_dynamic_chunked = 163,
387345153Sdim  kmp_nm_guided_chunked = 164, /**< guided unspecialized */
388345153Sdim  kmp_nm_runtime = 165,
389345153Sdim  kmp_nm_auto = 166, /**< auto */
390345153Sdim  kmp_nm_trapezoidal = 167,
391345153Sdim
392345153Sdim  /* accessible only through KMP_SCHEDULE environment variable */
393345153Sdim  kmp_nm_static_greedy = 168,
394345153Sdim  kmp_nm_static_balanced = 169,
395345153Sdim  /* accessible only through KMP_SCHEDULE environment variable */
396345153Sdim  kmp_nm_guided_iterative_chunked = 170,
397345153Sdim  kmp_nm_guided_analytical_chunked = 171,
398345153Sdim  kmp_nm_static_steal =
399345153Sdim      172, /* accessible only through OMP_SCHEDULE environment variable */
400345153Sdim
401345153Sdim  kmp_nm_ord_static_chunked = 193,
402345153Sdim  kmp_nm_ord_static = 194, /**< ordered static unspecialized */
403345153Sdim  kmp_nm_ord_dynamic_chunked = 195,
404345153Sdim  kmp_nm_ord_guided_chunked = 196,
405345153Sdim  kmp_nm_ord_runtime = 197,
406345153Sdim  kmp_nm_ord_auto = 198, /**< auto */
407345153Sdim  kmp_nm_ord_trapezoidal = 199,
408345153Sdim  kmp_nm_upper, /**< upper bound for nomerge values */
409345153Sdim
410345153Sdim  /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. Since
411345153Sdim     we need to distinguish the three possible cases (no modifier, monotonic
412345153Sdim     modifier, nonmonotonic modifier), we need separate bits for each modifier.
413345153Sdim     The absence of monotonic does not imply nonmonotonic, especially since 4.5
414345153Sdim     says that the behaviour of the "no modifier" case is implementation defined
415345153Sdim     in 4.5, but will become "nonmonotonic" in 5.0.
416345153Sdim
417345153Sdim     Since we're passing a full 32 bit value, we can use a couple of high bits
418345153Sdim     for these flags; out of paranoia we avoid the sign bit.
419345153Sdim
420345153Sdim     These modifiers can be or-ed into non-static schedules by the compiler to
421345153Sdim     pass the additional information. They will be stripped early in the
422345153Sdim     processing in __kmp_dispatch_init when setting up schedules, so most of the
423345153Sdim     code won't ever see schedules with these bits set.  */
424345153Sdim  kmp_sch_modifier_monotonic =
425345153Sdim      (1 << 29), /**< Set if the monotonic schedule modifier was present */
426345153Sdim  kmp_sch_modifier_nonmonotonic =
427345153Sdim      (1 << 30), /**< Set if the nonmonotonic schedule modifier was present */
428345153Sdim
429345153Sdim#define SCHEDULE_WITHOUT_MODIFIERS(s)                                          \
430345153Sdim  (enum sched_type)(                                                           \
431345153Sdim      (s) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic))
432345153Sdim#define SCHEDULE_HAS_MONOTONIC(s) (((s)&kmp_sch_modifier_monotonic) != 0)
433345153Sdim#define SCHEDULE_HAS_NONMONOTONIC(s) (((s)&kmp_sch_modifier_nonmonotonic) != 0)
434345153Sdim#define SCHEDULE_HAS_NO_MODIFIERS(s)                                           \
435345153Sdim  (((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0)
436353358Sdim#define SCHEDULE_GET_MODIFIERS(s)                                              \
437353358Sdim  ((enum sched_type)(                                                          \
438353358Sdim      (s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)))
439353358Sdim#define SCHEDULE_SET_MODIFIERS(s, m)                                           \
440353358Sdim  (s = (enum sched_type)((kmp_int32)s | (kmp_int32)m))
441353358Sdim#define SCHEDULE_NONMONOTONIC 0
442353358Sdim#define SCHEDULE_MONOTONIC 1
443345153Sdim
444345153Sdim  kmp_sch_default = kmp_sch_static /**< default scheduling algorithm */
445345153Sdim};
446345153Sdim
447353358Sdim// Apply modifiers on internal kind to standard kind
448353358Sdimstatic inline void
449353358Sdim__kmp_sched_apply_mods_stdkind(kmp_sched_t *kind,
450353358Sdim                               enum sched_type internal_kind) {
451353358Sdim  if (SCHEDULE_HAS_MONOTONIC(internal_kind)) {
452353358Sdim    *kind = (kmp_sched_t)((int)*kind | (int)kmp_sched_monotonic);
453353358Sdim  }
454353358Sdim}
455353358Sdim
456353358Sdim// Apply modifiers on standard kind to internal kind
457353358Sdimstatic inline void
458353358Sdim__kmp_sched_apply_mods_intkind(kmp_sched_t kind,
459353358Sdim                               enum sched_type *internal_kind) {
460353358Sdim  if ((int)kind & (int)kmp_sched_monotonic) {
461353358Sdim    *internal_kind = (enum sched_type)((int)*internal_kind |
462353358Sdim                                       (int)kmp_sch_modifier_monotonic);
463353358Sdim  }
464353358Sdim}
465353358Sdim
466353358Sdim// Get standard schedule without modifiers
467353358Sdimstatic inline kmp_sched_t __kmp_sched_without_mods(kmp_sched_t kind) {
468353358Sdim  return (kmp_sched_t)((int)kind & ~((int)kmp_sched_monotonic));
469353358Sdim}
470353358Sdim
471345153Sdim/* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */
472345153Sdimtypedef union kmp_r_sched {
473345153Sdim  struct {
474345153Sdim    enum sched_type r_sched_type;
475345153Sdim    int chunk;
476345153Sdim  };
477345153Sdim  kmp_int64 sched;
478345153Sdim} kmp_r_sched_t;
479345153Sdim
480345153Sdimextern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our
481345153Sdim// internal schedule types
482345153Sdim
483345153Sdimenum library_type {
484345153Sdim  library_none,
485345153Sdim  library_serial,
486345153Sdim  library_turnaround,
487345153Sdim  library_throughput
488345153Sdim};
489345153Sdim
490345153Sdim#if KMP_OS_LINUX
491345153Sdimenum clock_function_type {
492345153Sdim  clock_function_gettimeofday,
493345153Sdim  clock_function_clock_gettime
494345153Sdim};
495345153Sdim#endif /* KMP_OS_LINUX */
496345153Sdim
497345153Sdim#if KMP_MIC_SUPPORTED
498345153Sdimenum mic_type { non_mic, mic1, mic2, mic3, dummy };
499345153Sdim#endif
500345153Sdim
501345153Sdim/* -- fast reduction stuff ------------------------------------------------ */
502345153Sdim
503345153Sdim#undef KMP_FAST_REDUCTION_BARRIER
504345153Sdim#define KMP_FAST_REDUCTION_BARRIER 1
505345153Sdim
506345153Sdim#undef KMP_FAST_REDUCTION_CORE_DUO
507345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64
508345153Sdim#define KMP_FAST_REDUCTION_CORE_DUO 1
509345153Sdim#endif
510345153Sdim
511345153Sdimenum _reduction_method {
512345153Sdim  reduction_method_not_defined = 0,
513345153Sdim  critical_reduce_block = (1 << 8),
514345153Sdim  atomic_reduce_block = (2 << 8),
515345153Sdim  tree_reduce_block = (3 << 8),
516345153Sdim  empty_reduce_block = (4 << 8)
517345153Sdim};
518345153Sdim
519345153Sdim// Description of the packed_reduction_method variable:
520345153Sdim// The packed_reduction_method variable consists of two enum types variables
521345153Sdim// that are packed together into 0-th byte and 1-st byte:
522345153Sdim// 0: (packed_reduction_method & 0x000000FF) is a 'enum barrier_type' value of
523345153Sdim// barrier that will be used in fast reduction: bs_plain_barrier or
524345153Sdim// bs_reduction_barrier
525345153Sdim// 1: (packed_reduction_method & 0x0000FF00) is a reduction method that will
526345153Sdim// be used in fast reduction;
527345153Sdim// Reduction method is of 'enum _reduction_method' type and it's defined the way
528345153Sdim// so that the bits of 0-th byte are empty, so no need to execute a shift
529345153Sdim// instruction while packing/unpacking
530345153Sdim
531345153Sdim#if KMP_FAST_REDUCTION_BARRIER
532345153Sdim#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type)      \
533345153Sdim  ((reduction_method) | (barrier_type))
534345153Sdim
535345153Sdim#define UNPACK_REDUCTION_METHOD(packed_reduction_method)                       \
536345153Sdim  ((enum _reduction_method)((packed_reduction_method) & (0x0000FF00)))
537345153Sdim
538345153Sdim#define UNPACK_REDUCTION_BARRIER(packed_reduction_method)                      \
539345153Sdim  ((enum barrier_type)((packed_reduction_method) & (0x000000FF)))
540345153Sdim#else
541345153Sdim#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type)      \
542345153Sdim  (reduction_method)
543345153Sdim
544345153Sdim#define UNPACK_REDUCTION_METHOD(packed_reduction_method)                       \
545345153Sdim  (packed_reduction_method)
546345153Sdim
547345153Sdim#define UNPACK_REDUCTION_BARRIER(packed_reduction_method) (bs_plain_barrier)
548345153Sdim#endif
549345153Sdim
550345153Sdim#define TEST_REDUCTION_METHOD(packed_reduction_method, which_reduction_block)  \
551345153Sdim  ((UNPACK_REDUCTION_METHOD(packed_reduction_method)) ==                       \
552345153Sdim   (which_reduction_block))
553345153Sdim
554345153Sdim#if KMP_FAST_REDUCTION_BARRIER
555345153Sdim#define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER                               \
556345153Sdim  (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_reduction_barrier))
557345153Sdim
558345153Sdim#define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER                                   \
559345153Sdim  (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_plain_barrier))
560345153Sdim#endif
561345153Sdim
562345153Sdimtypedef int PACKED_REDUCTION_METHOD_T;
563345153Sdim
564345153Sdim/* -- end of fast reduction stuff ----------------------------------------- */
565345153Sdim
566345153Sdim#if KMP_OS_WINDOWS
567345153Sdim#define USE_CBLKDATA
568345153Sdim#if KMP_MSVC_COMPAT
569345153Sdim#pragma warning(push)
570345153Sdim#pragma warning(disable : 271 310)
571345153Sdim#endif
572345153Sdim#include <windows.h>
573345153Sdim#if KMP_MSVC_COMPAT
574345153Sdim#pragma warning(pop)
575345153Sdim#endif
576345153Sdim#endif
577345153Sdim
578345153Sdim#if KMP_OS_UNIX
579345153Sdim#include <dlfcn.h>
580345153Sdim#include <pthread.h>
581345153Sdim#endif
582345153Sdim
583345153Sdim/* Only Linux* OS and Windows* OS support thread affinity. */
584345153Sdim#if KMP_AFFINITY_SUPPORTED
585345153Sdim
586345153Sdim// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
587345153Sdim#if KMP_OS_WINDOWS
588345153Sdim#if _MSC_VER < 1600 && KMP_MSVC_COMPAT
589345153Sdimtypedef struct GROUP_AFFINITY {
590345153Sdim  KAFFINITY Mask;
591345153Sdim  WORD Group;
592345153Sdim  WORD Reserved[3];
593345153Sdim} GROUP_AFFINITY;
594345153Sdim#endif /* _MSC_VER < 1600 */
595345153Sdim#if KMP_GROUP_AFFINITY
596345153Sdimextern int __kmp_num_proc_groups;
597345153Sdim#else
598345153Sdimstatic const int __kmp_num_proc_groups = 1;
599345153Sdim#endif /* KMP_GROUP_AFFINITY */
600345153Sdimtypedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
601345153Sdimextern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
602345153Sdim
603345153Sdimtypedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void);
604345153Sdimextern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount;
605345153Sdim
606345153Sdimtypedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *);
607345153Sdimextern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;
608345153Sdim
609345153Sdimtypedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *,
610345153Sdim                                             GROUP_AFFINITY *);
611345153Sdimextern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
612345153Sdim#endif /* KMP_OS_WINDOWS */
613345153Sdim
614345153Sdim#if KMP_USE_HWLOC
615345153Sdimextern hwloc_topology_t __kmp_hwloc_topology;
616345153Sdimextern int __kmp_hwloc_error;
617345153Sdimextern int __kmp_numa_detected;
618345153Sdimextern int __kmp_tile_depth;
619345153Sdim#endif
620345153Sdim
621345153Sdimextern size_t __kmp_affin_mask_size;
622345153Sdim#define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0)
623345153Sdim#define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0)
624345153Sdim#define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size)
625345153Sdim#define KMP_CPU_SET_ITERATE(i, mask)                                           \
626345153Sdim  for (i = (mask)->begin(); (int)i != (mask)->end(); i = (mask)->next(i))
627345153Sdim#define KMP_CPU_SET(i, mask) (mask)->set(i)
628345153Sdim#define KMP_CPU_ISSET(i, mask) (mask)->is_set(i)
629345153Sdim#define KMP_CPU_CLR(i, mask) (mask)->clear(i)
630345153Sdim#define KMP_CPU_ZERO(mask) (mask)->zero()
631345153Sdim#define KMP_CPU_COPY(dest, src) (dest)->copy(src)
632345153Sdim#define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src)
633345153Sdim#define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not()
634345153Sdim#define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src)
635345153Sdim#define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask())
636345153Sdim#define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr)
637345153Sdim#define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)
638345153Sdim#define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr)
639345153Sdim#define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr)
640345153Sdim#define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr)
641345153Sdim#define KMP_CPU_INDEX(arr, i) __kmp_affinity_dispatch->index_mask_array(arr, i)
642345153Sdim#define KMP_CPU_ALLOC_ARRAY(arr, n)                                            \
643345153Sdim  (arr = __kmp_affinity_dispatch->allocate_mask_array(n))
644345153Sdim#define KMP_CPU_FREE_ARRAY(arr, n)                                             \
645345153Sdim  __kmp_affinity_dispatch->deallocate_mask_array(arr)
646345153Sdim#define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) KMP_CPU_ALLOC_ARRAY(arr, n)
647345153Sdim#define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_CPU_FREE_ARRAY(arr, n)
648345153Sdim#define __kmp_get_system_affinity(mask, abort_bool)                            \
649345153Sdim  (mask)->get_system_affinity(abort_bool)
650345153Sdim#define __kmp_set_system_affinity(mask, abort_bool)                            \
651345153Sdim  (mask)->set_system_affinity(abort_bool)
652345153Sdim#define __kmp_get_proc_group(mask) (mask)->get_proc_group()
653345153Sdim
654345153Sdimclass KMPAffinity {
655345153Sdimpublic:
656345153Sdim  class Mask {
657345153Sdim  public:
658345153Sdim    void *operator new(size_t n);
659345153Sdim    void operator delete(void *p);
660345153Sdim    void *operator new[](size_t n);
661345153Sdim    void operator delete[](void *p);
662345153Sdim    virtual ~Mask() {}
663345153Sdim    // Set bit i to 1
664345153Sdim    virtual void set(int i) {}
665345153Sdim    // Return bit i
666345153Sdim    virtual bool is_set(int i) const { return false; }
667345153Sdim    // Set bit i to 0
668345153Sdim    virtual void clear(int i) {}
669345153Sdim    // Zero out entire mask
670345153Sdim    virtual void zero() {}
671345153Sdim    // Copy src into this mask
672345153Sdim    virtual void copy(const Mask *src) {}
673345153Sdim    // this &= rhs
674345153Sdim    virtual void bitwise_and(const Mask *rhs) {}
675345153Sdim    // this |= rhs
676345153Sdim    virtual void bitwise_or(const Mask *rhs) {}
677345153Sdim    // this = ~this
678345153Sdim    virtual void bitwise_not() {}
679345153Sdim    // API for iterating over an affinity mask
680345153Sdim    // for (int i = mask->begin(); i != mask->end(); i = mask->next(i))
681345153Sdim    virtual int begin() const { return 0; }
682345153Sdim    virtual int end() const { return 0; }
683345153Sdim    virtual int next(int previous) const { return 0; }
684345153Sdim    // Set the system's affinity to this affinity mask's value
685345153Sdim    virtual int set_system_affinity(bool abort_on_error) const { return -1; }
686345153Sdim    // Set this affinity mask to the current system affinity
687345153Sdim    virtual int get_system_affinity(bool abort_on_error) { return -1; }
688345153Sdim    // Only 1 DWORD in the mask should have any procs set.
689345153Sdim    // Return the appropriate index, or -1 for an invalid mask.
690345153Sdim    virtual int get_proc_group() const { return -1; }
691345153Sdim  };
692345153Sdim  void *operator new(size_t n);
693345153Sdim  void operator delete(void *p);
694345153Sdim  // Need virtual destructor
695345153Sdim  virtual ~KMPAffinity() = default;
696345153Sdim  // Determine if affinity is capable
697345153Sdim  virtual void determine_capable(const char *env_var) {}
698345153Sdim  // Bind the current thread to os proc
699345153Sdim  virtual void bind_thread(int proc) {}
700345153Sdim  // Factory functions to allocate/deallocate a mask
701345153Sdim  virtual Mask *allocate_mask() { return nullptr; }
702345153Sdim  virtual void deallocate_mask(Mask *m) {}
703345153Sdim  virtual Mask *allocate_mask_array(int num) { return nullptr; }
704345153Sdim  virtual void deallocate_mask_array(Mask *m) {}
705345153Sdim  virtual Mask *index_mask_array(Mask *m, int index) { return nullptr; }
706345153Sdim  static void pick_api();
707345153Sdim  static void destroy_api();
708345153Sdim  enum api_type {
709345153Sdim    NATIVE_OS
710345153Sdim#if KMP_USE_HWLOC
711345153Sdim    ,
712345153Sdim    HWLOC
713345153Sdim#endif
714345153Sdim  };
715345153Sdim  virtual api_type get_api_type() const {
716345153Sdim    KMP_ASSERT(0);
717345153Sdim    return NATIVE_OS;
718345153Sdim  }
719345153Sdim
720345153Sdimprivate:
721345153Sdim  static bool picked_api;
722345153Sdim};
723345153Sdim
724345153Sdimtypedef KMPAffinity::Mask kmp_affin_mask_t;
725345153Sdimextern KMPAffinity *__kmp_affinity_dispatch;
726345153Sdim
727345153Sdim// Declare local char buffers with this size for printing debug and info
728345153Sdim// messages, using __kmp_affinity_print_mask().
729345153Sdim#define KMP_AFFIN_MASK_PRINT_LEN 1024
730345153Sdim
731345153Sdimenum affinity_type {
732345153Sdim  affinity_none = 0,
733345153Sdim  affinity_physical,
734345153Sdim  affinity_logical,
735345153Sdim  affinity_compact,
736345153Sdim  affinity_scatter,
737345153Sdim  affinity_explicit,
738345153Sdim  affinity_balanced,
739345153Sdim  affinity_disabled, // not used outsize the env var parser
740345153Sdim  affinity_default
741345153Sdim};
742345153Sdim
743345153Sdimenum affinity_gran {
744345153Sdim  affinity_gran_fine = 0,
745345153Sdim  affinity_gran_thread,
746345153Sdim  affinity_gran_core,
747345153Sdim  affinity_gran_tile,
748345153Sdim  affinity_gran_numa,
749345153Sdim  affinity_gran_package,
750345153Sdim  affinity_gran_node,
751345153Sdim#if KMP_GROUP_AFFINITY
752345153Sdim  // The "group" granularity isn't necesssarily coarser than all of the
753345153Sdim  // other levels, but we put it last in the enum.
754345153Sdim  affinity_gran_group,
755345153Sdim#endif /* KMP_GROUP_AFFINITY */
756345153Sdim  affinity_gran_default
757345153Sdim};
758345153Sdim
759345153Sdimenum affinity_top_method {
760345153Sdim  affinity_top_method_all = 0, // try all (supported) methods, in order
761345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64
762345153Sdim  affinity_top_method_apicid,
763345153Sdim  affinity_top_method_x2apicid,
764345153Sdim#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
765345153Sdim  affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too
766345153Sdim#if KMP_GROUP_AFFINITY
767345153Sdim  affinity_top_method_group,
768345153Sdim#endif /* KMP_GROUP_AFFINITY */
769345153Sdim  affinity_top_method_flat,
770345153Sdim#if KMP_USE_HWLOC
771345153Sdim  affinity_top_method_hwloc,
772345153Sdim#endif
773345153Sdim  affinity_top_method_default
774345153Sdim};
775345153Sdim
776345153Sdim#define affinity_respect_mask_default (-1)
777345153Sdim
778345153Sdimextern enum affinity_type __kmp_affinity_type; /* Affinity type */
779345153Sdimextern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */
780345153Sdimextern int __kmp_affinity_gran_levels; /* corresponding int value */
781345153Sdimextern int __kmp_affinity_dups; /* Affinity duplicate masks */
782345153Sdimextern enum affinity_top_method __kmp_affinity_top_method;
783345153Sdimextern int __kmp_affinity_compact; /* Affinity 'compact' value */
784345153Sdimextern int __kmp_affinity_offset; /* Affinity offset value  */
785345153Sdimextern int __kmp_affinity_verbose; /* Was verbose specified for KMP_AFFINITY? */
786345153Sdimextern int __kmp_affinity_warnings; /* KMP_AFFINITY warnings enabled ? */
787345153Sdimextern int __kmp_affinity_respect_mask; // Respect process' init affinity mask?
788345153Sdimextern char *__kmp_affinity_proclist; /* proc ID list */
789345153Sdimextern kmp_affin_mask_t *__kmp_affinity_masks;
790345153Sdimextern unsigned __kmp_affinity_num_masks;
791345153Sdimextern void __kmp_affinity_bind_thread(int which);
792345153Sdim
793345153Sdimextern kmp_affin_mask_t *__kmp_affin_fullMask;
794345153Sdimextern char *__kmp_cpuinfo_file;
795345153Sdim
796345153Sdim#endif /* KMP_AFFINITY_SUPPORTED */
797345153Sdim
798345153Sdim// This needs to be kept in sync with the values in omp.h !!!
799345153Sdimtypedef enum kmp_proc_bind_t {
800345153Sdim  proc_bind_false = 0,
801345153Sdim  proc_bind_true,
802345153Sdim  proc_bind_master,
803345153Sdim  proc_bind_close,
804345153Sdim  proc_bind_spread,
805345153Sdim  proc_bind_intel, // use KMP_AFFINITY interface
806345153Sdim  proc_bind_default
807345153Sdim} kmp_proc_bind_t;
808345153Sdim
809345153Sdimtypedef struct kmp_nested_proc_bind_t {
810345153Sdim  kmp_proc_bind_t *bind_types;
811345153Sdim  int size;
812345153Sdim  int used;
813345153Sdim} kmp_nested_proc_bind_t;
814345153Sdim
815345153Sdimextern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
816345153Sdim
817345153Sdimextern int __kmp_display_affinity;
818345153Sdimextern char *__kmp_affinity_format;
819345153Sdimstatic const size_t KMP_AFFINITY_FORMAT_SIZE = 512;
820345153Sdim
821345153Sdim#if KMP_AFFINITY_SUPPORTED
822345153Sdim#define KMP_PLACE_ALL (-1)
823345153Sdim#define KMP_PLACE_UNDEFINED (-2)
824345153Sdim// Is KMP_AFFINITY is being used instead of OMP_PROC_BIND/OMP_PLACES?
825345153Sdim#define KMP_AFFINITY_NON_PROC_BIND                                             \
826345153Sdim  ((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false ||                 \
827345153Sdim    __kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) &&                \
828345153Sdim   (__kmp_affinity_num_masks > 0 || __kmp_affinity_type == affinity_balanced))
829345153Sdim#endif /* KMP_AFFINITY_SUPPORTED */
830345153Sdim
831345153Sdimextern int __kmp_affinity_num_places;
832345153Sdim
833345153Sdimtypedef enum kmp_cancel_kind_t {
834345153Sdim  cancel_noreq = 0,
835345153Sdim  cancel_parallel = 1,
836345153Sdim  cancel_loop = 2,
837345153Sdim  cancel_sections = 3,
838345153Sdim  cancel_taskgroup = 4
839345153Sdim} kmp_cancel_kind_t;
840345153Sdim
841345153Sdim// KMP_HW_SUBSET support:
842345153Sdimtypedef struct kmp_hws_item {
843345153Sdim  int num;
844345153Sdim  int offset;
845345153Sdim} kmp_hws_item_t;
846345153Sdim
847345153Sdimextern kmp_hws_item_t __kmp_hws_socket;
848345153Sdimextern kmp_hws_item_t __kmp_hws_node;
849345153Sdimextern kmp_hws_item_t __kmp_hws_tile;
850345153Sdimextern kmp_hws_item_t __kmp_hws_core;
851345153Sdimextern kmp_hws_item_t __kmp_hws_proc;
852345153Sdimextern int __kmp_hws_requested;
853345153Sdimextern int __kmp_hws_abs_flag; // absolute or per-item number requested
854345153Sdim
855345153Sdim/* ------------------------------------------------------------------------ */
856345153Sdim
857345153Sdim#define KMP_PAD(type, sz)                                                      \
858345153Sdim  (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
859345153Sdim
860345153Sdim// We need to avoid using -1 as a GTID as +1 is added to the gtid
861345153Sdim// when storing it in a lock, and the value 0 is reserved.
862345153Sdim#define KMP_GTID_DNE (-2) /* Does not exist */
863345153Sdim#define KMP_GTID_SHUTDOWN (-3) /* Library is shutting down */
864345153Sdim#define KMP_GTID_MONITOR (-4) /* Monitor thread ID */
865345153Sdim#define KMP_GTID_UNKNOWN (-5) /* Is not known */
866345153Sdim#define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */
867345153Sdim
868345153Sdim/* OpenMP 5.0 Memory Management support */
869353358Sdim
870353358Sdim#ifndef __OMP_H
871360784Sdim// Duplicate type definitions from omp.h
872353358Sdimtypedef uintptr_t omp_uintptr_t;
873353358Sdim
874353358Sdimtypedef enum {
875353358Sdim  OMP_ATK_THREADMODEL = 1,
876353358Sdim  OMP_ATK_ALIGNMENT = 2,
877353358Sdim  OMP_ATK_ACCESS = 3,
878353358Sdim  OMP_ATK_POOL_SIZE = 4,
879353358Sdim  OMP_ATK_FALLBACK = 5,
880353358Sdim  OMP_ATK_FB_DATA = 6,
881353358Sdim  OMP_ATK_PINNED = 7,
882353358Sdim  OMP_ATK_PARTITION = 8
883353358Sdim} omp_alloctrait_key_t;
884353358Sdim
885353358Sdimtypedef enum {
886353358Sdim  OMP_ATV_FALSE = 0,
887353358Sdim  OMP_ATV_TRUE = 1,
888353358Sdim  OMP_ATV_DEFAULT = 2,
889353358Sdim  OMP_ATV_CONTENDED = 3,
890353358Sdim  OMP_ATV_UNCONTENDED = 4,
891353358Sdim  OMP_ATV_SEQUENTIAL = 5,
892353358Sdim  OMP_ATV_PRIVATE = 6,
893353358Sdim  OMP_ATV_ALL = 7,
894353358Sdim  OMP_ATV_THREAD = 8,
895353358Sdim  OMP_ATV_PTEAM = 9,
896353358Sdim  OMP_ATV_CGROUP = 10,
897353358Sdim  OMP_ATV_DEFAULT_MEM_FB = 11,
898353358Sdim  OMP_ATV_NULL_FB = 12,
899353358Sdim  OMP_ATV_ABORT_FB = 13,
900353358Sdim  OMP_ATV_ALLOCATOR_FB = 14,
901353358Sdim  OMP_ATV_ENVIRONMENT = 15,
902353358Sdim  OMP_ATV_NEAREST = 16,
903353358Sdim  OMP_ATV_BLOCKED = 17,
904353358Sdim  OMP_ATV_INTERLEAVED = 18
905353358Sdim} omp_alloctrait_value_t;
906353358Sdim
907353358Sdimtypedef void *omp_memspace_handle_t;
908353358Sdimextern omp_memspace_handle_t const omp_default_mem_space;
909353358Sdimextern omp_memspace_handle_t const omp_large_cap_mem_space;
910353358Sdimextern omp_memspace_handle_t const omp_const_mem_space;
911353358Sdimextern omp_memspace_handle_t const omp_high_bw_mem_space;
912353358Sdimextern omp_memspace_handle_t const omp_low_lat_mem_space;
913353358Sdim
914353358Sdimtypedef struct {
915353358Sdim  omp_alloctrait_key_t key;
916353358Sdim  omp_uintptr_t value;
917353358Sdim} omp_alloctrait_t;
918353358Sdim
919353358Sdimtypedef void *omp_allocator_handle_t;
920353358Sdimextern omp_allocator_handle_t const omp_null_allocator;
921353358Sdimextern omp_allocator_handle_t const omp_default_mem_alloc;
922353358Sdimextern omp_allocator_handle_t const omp_large_cap_mem_alloc;
923353358Sdimextern omp_allocator_handle_t const omp_const_mem_alloc;
924353358Sdimextern omp_allocator_handle_t const omp_high_bw_mem_alloc;
925353358Sdimextern omp_allocator_handle_t const omp_low_lat_mem_alloc;
926353358Sdimextern omp_allocator_handle_t const omp_cgroup_mem_alloc;
927353358Sdimextern omp_allocator_handle_t const omp_pteam_mem_alloc;
928353358Sdimextern omp_allocator_handle_t const omp_thread_mem_alloc;
929353358Sdimextern omp_allocator_handle_t const kmp_max_mem_alloc;
930353358Sdimextern omp_allocator_handle_t __kmp_def_allocator;
931353358Sdim
932360784Sdim// end of duplicate type definitions from omp.h
933353358Sdim#endif
934353358Sdim
935345153Sdimextern int __kmp_memkind_available;
936345153Sdim
937353358Sdimtypedef omp_memspace_handle_t kmp_memspace_t; // placeholder
938345153Sdim
939353358Sdimtypedef struct kmp_allocator_t {
940353358Sdim  omp_memspace_handle_t memspace;
941353358Sdim  void **memkind; // pointer to memkind
942353358Sdim  int alignment;
943353358Sdim  omp_alloctrait_value_t fb;
944353358Sdim  kmp_allocator_t *fb_data;
945353358Sdim  kmp_uint64 pool_size;
946353358Sdim  kmp_uint64 pool_used;
947353358Sdim} kmp_allocator_t;
948353358Sdim
949353358Sdimextern omp_allocator_handle_t __kmpc_init_allocator(int gtid,
950353358Sdim                                                    omp_memspace_handle_t,
951353358Sdim                                                    int ntraits,
952353358Sdim                                                    omp_alloctrait_t traits[]);
953353358Sdimextern void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al);
954353358Sdimextern void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t al);
955353358Sdimextern omp_allocator_handle_t __kmpc_get_default_allocator(int gtid);
956353358Sdimextern void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
957353358Sdimextern void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
958353358Sdim
959345153Sdimextern void __kmp_init_memkind();
960345153Sdimextern void __kmp_fini_memkind();
961345153Sdim
962345153Sdim/* ------------------------------------------------------------------------ */
963345153Sdim
964345153Sdim#define KMP_UINT64_MAX                                                         \
965345153Sdim  (~((kmp_uint64)1 << ((sizeof(kmp_uint64) * (1 << 3)) - 1)))
966345153Sdim
967345153Sdim#define KMP_MIN_NTH 1
968345153Sdim
969345153Sdim#ifndef KMP_MAX_NTH
970345153Sdim#if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX
971345153Sdim#define KMP_MAX_NTH PTHREAD_THREADS_MAX
972345153Sdim#else
973345153Sdim#define KMP_MAX_NTH INT_MAX
974345153Sdim#endif
975345153Sdim#endif /* KMP_MAX_NTH */
976345153Sdim
977345153Sdim#ifdef PTHREAD_STACK_MIN
978345153Sdim#define KMP_MIN_STKSIZE PTHREAD_STACK_MIN
979345153Sdim#else
980345153Sdim#define KMP_MIN_STKSIZE ((size_t)(32 * 1024))
981345153Sdim#endif
982345153Sdim
983345153Sdim#define KMP_MAX_STKSIZE (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
984345153Sdim
985345153Sdim#if KMP_ARCH_X86
986345153Sdim#define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024))
987345153Sdim#elif KMP_ARCH_X86_64
988345153Sdim#define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))
989345153Sdim#define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024))
990345153Sdim#else
991345153Sdim#define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024))
992345153Sdim#endif
993345153Sdim
994345153Sdim#define KMP_DEFAULT_MALLOC_POOL_INCR ((size_t)(1024 * 1024))
995345153Sdim#define KMP_MIN_MALLOC_POOL_INCR ((size_t)(4 * 1024))
996345153Sdim#define KMP_MAX_MALLOC_POOL_INCR                                               \
997345153Sdim  (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
998345153Sdim
999345153Sdim#define KMP_MIN_STKOFFSET (0)
1000345153Sdim#define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE
1001345153Sdim#if KMP_OS_DARWIN
1002345153Sdim#define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET
1003345153Sdim#else
1004345153Sdim#define KMP_DEFAULT_STKOFFSET CACHE_LINE
1005345153Sdim#endif
1006345153Sdim
1007345153Sdim#define KMP_MIN_STKPADDING (0)
1008345153Sdim#define KMP_MAX_STKPADDING (2 * 1024 * 1024)
1009345153Sdim
1010345153Sdim#define KMP_BLOCKTIME_MULTIPLIER                                               \
1011345153Sdim  (1000) /* number of blocktime units per second */
1012345153Sdim#define KMP_MIN_BLOCKTIME (0)
1013345153Sdim#define KMP_MAX_BLOCKTIME                                                      \
1014345153Sdim  (INT_MAX) /* Must be this for "infinite" setting the work */
1015345153Sdim#define KMP_DEFAULT_BLOCKTIME (200) /*  __kmp_blocktime is in milliseconds  */
1016345153Sdim
1017345153Sdim#if KMP_USE_MONITOR
1018345153Sdim#define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024))
1019345153Sdim#define KMP_MIN_MONITOR_WAKEUPS (1) // min times monitor wakes up per second
1020345153Sdim#define KMP_MAX_MONITOR_WAKEUPS (1000) // max times monitor can wake up per sec
1021345153Sdim
1022345153Sdim/* Calculate new number of monitor wakeups for a specific block time based on
1023345153Sdim   previous monitor_wakeups. Only allow increasing number of wakeups */
1024345153Sdim#define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups)                 \
1025345153Sdim  (((blocktime) == KMP_MAX_BLOCKTIME)                                          \
1026345153Sdim       ? (monitor_wakeups)                                                     \
1027345153Sdim       : ((blocktime) == KMP_MIN_BLOCKTIME)                                    \
1028345153Sdim             ? KMP_MAX_MONITOR_WAKEUPS                                         \
1029345153Sdim             : ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime)))  \
1030345153Sdim                   ? (monitor_wakeups)                                         \
1031345153Sdim                   : (KMP_BLOCKTIME_MULTIPLIER) / (blocktime))
1032345153Sdim
1033345153Sdim/* Calculate number of intervals for a specific block time based on
1034345153Sdim   monitor_wakeups */
1035345153Sdim#define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups)               \
1036345153Sdim  (((blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1) /        \
1037345153Sdim   (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)))
1038345153Sdim#else
1039345153Sdim#define KMP_BLOCKTIME(team, tid)                                               \
1040345153Sdim  (get__bt_set(team, tid) ? get__blocktime(team, tid) : __kmp_dflt_blocktime)
1041345153Sdim#if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1042345153Sdim// HW TSC is used to reduce overhead (clock tick instead of nanosecond).
1043345153Sdimextern kmp_uint64 __kmp_ticks_per_msec;
1044345153Sdim#if KMP_COMPILER_ICC
1045345153Sdim#define KMP_NOW() ((kmp_uint64)_rdtsc())
1046345153Sdim#else
1047345153Sdim#define KMP_NOW() __kmp_hardware_timestamp()
1048345153Sdim#endif
1049345153Sdim#define KMP_NOW_MSEC() (KMP_NOW() / __kmp_ticks_per_msec)
1050345153Sdim#define KMP_BLOCKTIME_INTERVAL(team, tid)                                      \
1051345153Sdim  (KMP_BLOCKTIME(team, tid) * __kmp_ticks_per_msec)
1052345153Sdim#define KMP_BLOCKING(goal, count) ((goal) > KMP_NOW())
1053345153Sdim#else
1054345153Sdim// System time is retrieved sporadically while blocking.
1055345153Sdimextern kmp_uint64 __kmp_now_nsec();
1056345153Sdim#define KMP_NOW() __kmp_now_nsec()
1057345153Sdim#define KMP_NOW_MSEC() (KMP_NOW() / KMP_USEC_PER_SEC)
1058345153Sdim#define KMP_BLOCKTIME_INTERVAL(team, tid)                                      \
1059345153Sdim  (KMP_BLOCKTIME(team, tid) * KMP_USEC_PER_SEC)
1060345153Sdim#define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW())
1061345153Sdim#endif
1062345153Sdim#endif // KMP_USE_MONITOR
1063345153Sdim
1064345153Sdim#define KMP_MIN_STATSCOLS 40
1065345153Sdim#define KMP_MAX_STATSCOLS 4096
1066345153Sdim#define KMP_DEFAULT_STATSCOLS 80
1067345153Sdim
1068345153Sdim#define KMP_MIN_INTERVAL 0
1069345153Sdim#define KMP_MAX_INTERVAL (INT_MAX - 1)
1070345153Sdim#define KMP_DEFAULT_INTERVAL 0
1071345153Sdim
1072345153Sdim#define KMP_MIN_CHUNK 1
1073345153Sdim#define KMP_MAX_CHUNK (INT_MAX - 1)
1074345153Sdim#define KMP_DEFAULT_CHUNK 1
1075345153Sdim
1076345153Sdim#define KMP_DFLT_DISP_NUM_BUFF 7
1077345153Sdim#define KMP_MAX_ORDERED 8
1078345153Sdim
1079345153Sdim#define KMP_MAX_FIELDS 32
1080345153Sdim
1081345153Sdim#define KMP_MAX_BRANCH_BITS 31
1082345153Sdim
1083345153Sdim#define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX
1084345153Sdim
1085345153Sdim#define KMP_MAX_DEFAULT_DEVICE_LIMIT INT_MAX
1086345153Sdim
1087345153Sdim#define KMP_MAX_TASK_PRIORITY_LIMIT INT_MAX
1088345153Sdim
1089345153Sdim/* Minimum number of threads before switch to TLS gtid (experimentally
1090345153Sdim   determined) */
1091345153Sdim/* josh TODO: what about OS X* tuning? */
1092345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1093345153Sdim#define KMP_TLS_GTID_MIN 5
1094345153Sdim#else
1095345153Sdim#define KMP_TLS_GTID_MIN INT_MAX
1096345153Sdim#endif
1097345153Sdim
1098345153Sdim#define KMP_MASTER_TID(tid) ((tid) == 0)
1099345153Sdim#define KMP_WORKER_TID(tid) ((tid) != 0)
1100345153Sdim
1101345153Sdim#define KMP_MASTER_GTID(gtid) (__kmp_tid_from_gtid((gtid)) == 0)
1102345153Sdim#define KMP_WORKER_GTID(gtid) (__kmp_tid_from_gtid((gtid)) != 0)
1103345153Sdim#define KMP_INITIAL_GTID(gtid) ((gtid) == 0)
1104345153Sdim
1105345153Sdim#ifndef TRUE
1106345153Sdim#define FALSE 0
1107345153Sdim#define TRUE (!FALSE)
1108345153Sdim#endif
1109345153Sdim
1110345153Sdim/* NOTE: all of the following constants must be even */
1111345153Sdim
1112345153Sdim#if KMP_OS_WINDOWS
1113345153Sdim#define KMP_INIT_WAIT 64U /* initial number of spin-tests   */
1114345153Sdim#define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */
1115345153Sdim#elif KMP_OS_CNK
1116345153Sdim#define KMP_INIT_WAIT 16U /* initial number of spin-tests   */
1117345153Sdim#define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */
1118345153Sdim#elif KMP_OS_LINUX
1119345153Sdim#define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
1120345153Sdim#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1121345153Sdim#elif KMP_OS_DARWIN
1122345153Sdim/* TODO: tune for KMP_OS_DARWIN */
1123345153Sdim#define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
1124345153Sdim#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1125345153Sdim#elif KMP_OS_DRAGONFLY
1126345153Sdim/* TODO: tune for KMP_OS_DRAGONFLY */
1127345153Sdim#define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
1128345153Sdim#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1129345153Sdim#elif KMP_OS_FREEBSD
1130345153Sdim/* TODO: tune for KMP_OS_FREEBSD */
1131345153Sdim#define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
1132345153Sdim#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1133345153Sdim#elif KMP_OS_NETBSD
1134345153Sdim/* TODO: tune for KMP_OS_NETBSD */
1135345153Sdim#define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
1136345153Sdim#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1137345153Sdim#elif KMP_OS_HURD
1138345153Sdim/* TODO: tune for KMP_OS_HURD */
1139345153Sdim#define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
1140345153Sdim#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1141345153Sdim#elif KMP_OS_OPENBSD
1142345153Sdim/* TODO: tune for KMP_OS_OPENBSD */
1143345153Sdim#define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
1144345153Sdim#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1145345153Sdim#endif
1146345153Sdim
1147345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1148345153Sdimtypedef struct kmp_cpuid {
1149345153Sdim  kmp_uint32 eax;
1150345153Sdim  kmp_uint32 ebx;
1151345153Sdim  kmp_uint32 ecx;
1152345153Sdim  kmp_uint32 edx;
1153345153Sdim} kmp_cpuid_t;
1154353358Sdim
1155353358Sdimtypedef struct kmp_cpuinfo {
1156353358Sdim  int initialized; // If 0, other fields are not initialized.
1157353358Sdim  int signature; // CPUID(1).EAX
1158353358Sdim  int family; // CPUID(1).EAX[27:20]+CPUID(1).EAX[11:8] (Extended Family+Family)
1159353358Sdim  int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended
1160353358Sdim  // Model << 4 ) + Model)
1161353358Sdim  int stepping; // CPUID(1).EAX[3:0] ( Stepping )
1162353358Sdim  int sse2; // 0 if SSE2 instructions are not supported, 1 otherwise.
1163353358Sdim  int rtm; // 0 if RTM instructions are not supported, 1 otherwise.
1164353358Sdim  int cpu_stackoffset;
1165353358Sdim  int apic_id;
1166353358Sdim  int physical_id;
1167353358Sdim  int logical_id;
1168353358Sdim  kmp_uint64 frequency; // Nominal CPU frequency in Hz.
1169353358Sdim  char name[3 * sizeof(kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004)
1170353358Sdim} kmp_cpuinfo_t;
1171353358Sdim
1172353358Sdimextern void __kmp_query_cpuid(kmp_cpuinfo_t *p);
1173353358Sdim
1174353358Sdim#if KMP_OS_UNIX
1175353358Sdim// subleaf is only needed for cache and topology discovery and can be set to
1176353358Sdim// zero in most cases
1177353358Sdimstatic inline void __kmp_x86_cpuid(int leaf, int subleaf, struct kmp_cpuid *p) {
1178353358Sdim  __asm__ __volatile__("cpuid"
1179353358Sdim                       : "=a"(p->eax), "=b"(p->ebx), "=c"(p->ecx), "=d"(p->edx)
1180353358Sdim                       : "a"(leaf), "c"(subleaf));
1181353358Sdim}
1182353358Sdim// Load p into FPU control word
1183353358Sdimstatic inline void __kmp_load_x87_fpu_control_word(const kmp_int16 *p) {
1184353358Sdim  __asm__ __volatile__("fldcw %0" : : "m"(*p));
1185353358Sdim}
1186353358Sdim// Store FPU control word into p
1187353358Sdimstatic inline void __kmp_store_x87_fpu_control_word(kmp_int16 *p) {
1188353358Sdim  __asm__ __volatile__("fstcw %0" : "=m"(*p));
1189353358Sdim}
1190353358Sdimstatic inline void __kmp_clear_x87_fpu_status_word() {
1191353358Sdim#if KMP_MIC
1192353358Sdim  // 32-bit protected mode x87 FPU state
1193353358Sdim  struct x87_fpu_state {
1194353358Sdim    unsigned cw;
1195353358Sdim    unsigned sw;
1196353358Sdim    unsigned tw;
1197353358Sdim    unsigned fip;
1198353358Sdim    unsigned fips;
1199353358Sdim    unsigned fdp;
1200353358Sdim    unsigned fds;
1201353358Sdim  };
1202353358Sdim  struct x87_fpu_state fpu_state = {0, 0, 0, 0, 0, 0, 0};
1203353358Sdim  __asm__ __volatile__("fstenv %0\n\t" // store FP env
1204353358Sdim                       "andw $0x7f00, %1\n\t" // clear 0-7,15 bits of FP SW
1205353358Sdim                       "fldenv %0\n\t" // load FP env back
1206353358Sdim                       : "+m"(fpu_state), "+m"(fpu_state.sw));
1207353358Sdim#else
1208353358Sdim  __asm__ __volatile__("fnclex");
1209353358Sdim#endif // KMP_MIC
1210353358Sdim}
1211353358Sdim#if __SSE__
1212353358Sdimstatic inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }
1213353358Sdimstatic inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
1214353358Sdim#else
1215353358Sdimstatic inline void __kmp_load_mxcsr(const kmp_uint32 *p) {}
1216353358Sdimstatic inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = 0; }
1217353358Sdim#endif
1218353358Sdim#else
1219353358Sdim// Windows still has these as external functions in assembly file
1220345153Sdimextern void __kmp_x86_cpuid(int mode, int mode2, struct kmp_cpuid *p);
1221353358Sdimextern void __kmp_load_x87_fpu_control_word(const kmp_int16 *p);
1222353358Sdimextern void __kmp_store_x87_fpu_control_word(kmp_int16 *p);
1223353358Sdimextern void __kmp_clear_x87_fpu_status_word();
1224353358Sdimstatic inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }
1225353358Sdimstatic inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
1226353358Sdim#endif // KMP_OS_UNIX
1227353358Sdim
1228353358Sdim#define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */
1229353358Sdim
1230345153Sdim#if KMP_ARCH_X86
1231345153Sdimextern void __kmp_x86_pause(void);
1232345153Sdim#elif KMP_MIC
1233345153Sdim// Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed
1234353358Sdim// regression after removal of extra PAUSE from spin loops. Changing
1235345153Sdim// the delay from 100 to 300 showed even better performance than double PAUSE
1236345153Sdim// on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC.
1237345153Sdimstatic inline void __kmp_x86_pause(void) { _mm_delay_32(300); }
1238345153Sdim#else
1239345153Sdimstatic inline void __kmp_x86_pause(void) { _mm_pause(); }
1240345153Sdim#endif
1241345153Sdim#define KMP_CPU_PAUSE() __kmp_x86_pause()
1242345153Sdim#elif KMP_ARCH_PPC64
1243345153Sdim#define KMP_PPC64_PRI_LOW() __asm__ volatile("or 1, 1, 1")
1244345153Sdim#define KMP_PPC64_PRI_MED() __asm__ volatile("or 2, 2, 2")
1245345153Sdim#define KMP_PPC64_PRI_LOC_MB() __asm__ volatile("" : : : "memory")
1246345153Sdim#define KMP_CPU_PAUSE()                                                        \
1247345153Sdim  do {                                                                         \
1248345153Sdim    KMP_PPC64_PRI_LOW();                                                       \
1249345153Sdim    KMP_PPC64_PRI_MED();                                                       \
1250345153Sdim    KMP_PPC64_PRI_LOC_MB();                                                    \
1251345153Sdim  } while (0)
1252345153Sdim#else
1253345153Sdim#define KMP_CPU_PAUSE() /* nothing to do */
1254345153Sdim#endif
1255345153Sdim
1256345153Sdim#define KMP_INIT_YIELD(count)                                                  \
1257345153Sdim  { (count) = __kmp_yield_init; }
1258345153Sdim
1259353358Sdim#define KMP_OVERSUBSCRIBED                                                     \
1260353358Sdim  (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))
1261353358Sdim
1262353358Sdim#define KMP_TRY_YIELD                                                          \
1263353358Sdim  ((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (KMP_OVERSUBSCRIBED)))
1264353358Sdim
1265353358Sdim#define KMP_TRY_YIELD_OVERSUB                                                  \
1266353358Sdim  ((__kmp_use_yield == 1 || __kmp_use_yield == 2) && (KMP_OVERSUBSCRIBED))
1267353358Sdim
1268345153Sdim#define KMP_YIELD(cond)                                                        \
1269345153Sdim  {                                                                            \
1270345153Sdim    KMP_CPU_PAUSE();                                                           \
1271353358Sdim    if ((cond) && (KMP_TRY_YIELD))                                             \
1272353358Sdim      __kmp_yield();                                                           \
1273345153Sdim  }
1274345153Sdim
1275353358Sdim#define KMP_YIELD_OVERSUB()                                                    \
1276353358Sdim  {                                                                            \
1277353358Sdim    KMP_CPU_PAUSE();                                                           \
1278353358Sdim    if ((KMP_TRY_YIELD_OVERSUB))                                               \
1279353358Sdim      __kmp_yield();                                                           \
1280353358Sdim  }
1281353358Sdim
1282345153Sdim// Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,
1283345153Sdim// there should be no yielding since initial value from KMP_INIT_YIELD() is odd.
1284353358Sdim#define KMP_YIELD_SPIN(count)                                                  \
1285345153Sdim  {                                                                            \
1286345153Sdim    KMP_CPU_PAUSE();                                                           \
1287353358Sdim    if (KMP_TRY_YIELD) {                                                       \
1288353358Sdim      (count) -= 2;                                                            \
1289353358Sdim      if (!(count)) {                                                          \
1290353358Sdim        __kmp_yield();                                                         \
1291353358Sdim        (count) = __kmp_yield_next;                                            \
1292353358Sdim      }                                                                        \
1293345153Sdim    }                                                                          \
1294345153Sdim  }
1295353358Sdim
1296353358Sdim#define KMP_YIELD_OVERSUB_ELSE_SPIN(count)                                     \
1297345153Sdim  {                                                                            \
1298345153Sdim    KMP_CPU_PAUSE();                                                           \
1299353358Sdim    if ((KMP_TRY_YIELD_OVERSUB))                                               \
1300353358Sdim      __kmp_yield();                                                           \
1301353358Sdim    else if (__kmp_use_yield == 1) {                                           \
1302353358Sdim      (count) -= 2;                                                            \
1303353358Sdim      if (!(count)) {                                                          \
1304353358Sdim        __kmp_yield();                                                         \
1305353358Sdim        (count) = __kmp_yield_next;                                            \
1306353358Sdim      }                                                                        \
1307345153Sdim    }                                                                          \
1308345153Sdim  }
1309345153Sdim
1310345153Sdim/* ------------------------------------------------------------------------ */
1311345153Sdim/* Support datatypes for the orphaned construct nesting checks.             */
1312345153Sdim/* ------------------------------------------------------------------------ */
1313345153Sdim
1314345153Sdimenum cons_type {
1315345153Sdim  ct_none,
1316345153Sdim  ct_parallel,
1317345153Sdim  ct_pdo,
1318345153Sdim  ct_pdo_ordered,
1319345153Sdim  ct_psections,
1320345153Sdim  ct_psingle,
1321345153Sdim  ct_critical,
1322345153Sdim  ct_ordered_in_parallel,
1323345153Sdim  ct_ordered_in_pdo,
1324345153Sdim  ct_master,
1325345153Sdim  ct_reduce,
1326345153Sdim  ct_barrier
1327345153Sdim};
1328345153Sdim
1329353358Sdim#define IS_CONS_TYPE_ORDERED(ct) ((ct) == ct_pdo_ordered)
1330345153Sdim
1331345153Sdimstruct cons_data {
1332345153Sdim  ident_t const *ident;
1333345153Sdim  enum cons_type type;
1334345153Sdim  int prev;
1335345153Sdim  kmp_user_lock_p
1336345153Sdim      name; /* address exclusively for critical section name comparison */
1337345153Sdim};
1338345153Sdim
1339345153Sdimstruct cons_header {
1340345153Sdim  int p_top, w_top, s_top;
1341345153Sdim  int stack_size, stack_top;
1342345153Sdim  struct cons_data *stack_data;
1343345153Sdim};
1344345153Sdim
1345345153Sdimstruct kmp_region_info {
1346345153Sdim  char *text;
1347345153Sdim  int offset[KMP_MAX_FIELDS];
1348345153Sdim  int length[KMP_MAX_FIELDS];
1349345153Sdim};
1350345153Sdim
1351345153Sdim/* ---------------------------------------------------------------------- */
1352345153Sdim/* ---------------------------------------------------------------------- */
1353345153Sdim
1354345153Sdim#if KMP_OS_WINDOWS
1355345153Sdimtypedef HANDLE kmp_thread_t;
1356345153Sdimtypedef DWORD kmp_key_t;
1357345153Sdim#endif /* KMP_OS_WINDOWS */
1358345153Sdim
1359345153Sdim#if KMP_OS_UNIX
1360345153Sdimtypedef pthread_t kmp_thread_t;
1361345153Sdimtypedef pthread_key_t kmp_key_t;
1362345153Sdim#endif
1363345153Sdim
1364345153Sdimextern kmp_key_t __kmp_gtid_threadprivate_key;
1365345153Sdim
1366345153Sdimtypedef struct kmp_sys_info {
1367345153Sdim  long maxrss; /* the maximum resident set size utilized (in kilobytes)     */
1368345153Sdim  long minflt; /* the number of page faults serviced without any I/O        */
1369345153Sdim  long majflt; /* the number of page faults serviced that required I/O      */
1370345153Sdim  long nswap; /* the number of times a process was "swapped" out of memory */
1371345153Sdim  long inblock; /* the number of times the file system had to perform input  */
1372345153Sdim  long oublock; /* the number of times the file system had to perform output */
1373345153Sdim  long nvcsw; /* the number of times a context switch was voluntarily      */
1374345153Sdim  long nivcsw; /* the number of times a context switch was forced           */
1375345153Sdim} kmp_sys_info_t;
1376345153Sdim
1377345153Sdim#if USE_ITT_BUILD
1378345153Sdim// We cannot include "kmp_itt.h" due to circular dependency. Declare the only
1379345153Sdim// required type here. Later we will check the type meets requirements.
1380345153Sdimtypedef int kmp_itt_mark_t;
1381345153Sdim#define KMP_ITT_DEBUG 0
1382345153Sdim#endif /* USE_ITT_BUILD */
1383345153Sdim
1384345153Sdimtypedef kmp_int32 kmp_critical_name[8];
1385345153Sdim
1386345153Sdim/*!
1387345153Sdim@ingroup PARALLEL
1388345153SdimThe type for a microtask which gets passed to @ref __kmpc_fork_call().
1389345153SdimThe arguments to the outlined function are
1390345153Sdim@param global_tid the global thread identity of the thread executing the
1391345153Sdimfunction.
1392360784Sdim@param bound_tid  the local identity of the thread executing the function
1393345153Sdim@param ... pointers to shared variables accessed by the function.
1394345153Sdim*/
1395345153Sdimtypedef void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...);
1396345153Sdimtypedef void (*kmpc_micro_bound)(kmp_int32 *bound_tid, kmp_int32 *bound_nth,
1397345153Sdim                                 ...);
1398345153Sdim
1399345153Sdim/*!
1400345153Sdim@ingroup THREADPRIVATE
1401345153Sdim@{
1402345153Sdim*/
1403345153Sdim/* ---------------------------------------------------------------------------
1404345153Sdim */
1405345153Sdim/* Threadprivate initialization/finalization function declarations */
1406345153Sdim
1407345153Sdim/*  for non-array objects:  __kmpc_threadprivate_register()  */
1408345153Sdim
1409345153Sdim/*!
1410345153Sdim Pointer to the constructor function.
1411345153Sdim The first argument is the <tt>this</tt> pointer
1412345153Sdim*/
1413345153Sdimtypedef void *(*kmpc_ctor)(void *);
1414345153Sdim
1415345153Sdim/*!
1416345153Sdim Pointer to the destructor function.
1417345153Sdim The first argument is the <tt>this</tt> pointer
1418345153Sdim*/
1419345153Sdimtypedef void (*kmpc_dtor)(
1420345153Sdim    void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel
1421345153Sdim                              compiler */
1422345153Sdim/*!
1423345153Sdim Pointer to an alternate constructor.
1424345153Sdim The first argument is the <tt>this</tt> pointer.
1425345153Sdim*/
1426345153Sdimtypedef void *(*kmpc_cctor)(void *, void *);
1427345153Sdim
1428345153Sdim/* for array objects: __kmpc_threadprivate_register_vec() */
1429345153Sdim/* First arg: "this" pointer */
1430345153Sdim/* Last arg: number of array elements */
1431345153Sdim/*!
1432345153Sdim Array constructor.
1433345153Sdim First argument is the <tt>this</tt> pointer
1434345153Sdim Second argument the number of array elements.
1435345153Sdim*/
1436345153Sdimtypedef void *(*kmpc_ctor_vec)(void *, size_t);
1437345153Sdim/*!
1438345153Sdim Pointer to the array destructor function.
1439345153Sdim The first argument is the <tt>this</tt> pointer
1440345153Sdim Second argument the number of array elements.
1441345153Sdim*/
1442345153Sdimtypedef void (*kmpc_dtor_vec)(void *, size_t);
1443345153Sdim/*!
1444345153Sdim Array constructor.
1445345153Sdim First argument is the <tt>this</tt> pointer
1446345153Sdim Third argument the number of array elements.
1447345153Sdim*/
1448345153Sdimtypedef void *(*kmpc_cctor_vec)(void *, void *,
1449345153Sdim                                size_t); /* function unused by compiler */
1450345153Sdim
1451345153Sdim/*!
1452345153Sdim@}
1453345153Sdim*/
1454345153Sdim
1455345153Sdim/* keeps tracked of threadprivate cache allocations for cleanup later */
1456345153Sdimtypedef struct kmp_cached_addr {
1457345153Sdim  void **addr; /* address of allocated cache */
1458345153Sdim  void ***compiler_cache; /* pointer to compiler's cache */
1459345153Sdim  void *data; /* pointer to global data */
1460345153Sdim  struct kmp_cached_addr *next; /* pointer to next cached address */
1461345153Sdim} kmp_cached_addr_t;
1462345153Sdim
1463345153Sdimstruct private_data {
1464345153Sdim  struct private_data *next; /* The next descriptor in the list      */
1465345153Sdim  void *data; /* The data buffer for this descriptor  */
1466345153Sdim  int more; /* The repeat count for this descriptor */
1467345153Sdim  size_t size; /* The data size for this descriptor    */
1468345153Sdim};
1469345153Sdim
1470345153Sdimstruct private_common {
1471345153Sdim  struct private_common *next;
1472345153Sdim  struct private_common *link;
1473345153Sdim  void *gbl_addr;
1474345153Sdim  void *par_addr; /* par_addr == gbl_addr for MASTER thread */
1475345153Sdim  size_t cmn_size;
1476345153Sdim};
1477345153Sdim
1478345153Sdimstruct shared_common {
1479345153Sdim  struct shared_common *next;
1480345153Sdim  struct private_data *pod_init;
1481345153Sdim  void *obj_init;
1482345153Sdim  void *gbl_addr;
1483345153Sdim  union {
1484345153Sdim    kmpc_ctor ctor;
1485345153Sdim    kmpc_ctor_vec ctorv;
1486345153Sdim  } ct;
1487345153Sdim  union {
1488345153Sdim    kmpc_cctor cctor;
1489345153Sdim    kmpc_cctor_vec cctorv;
1490345153Sdim  } cct;
1491345153Sdim  union {
1492345153Sdim    kmpc_dtor dtor;
1493345153Sdim    kmpc_dtor_vec dtorv;
1494345153Sdim  } dt;
1495345153Sdim  size_t vec_len;
1496345153Sdim  int is_vec;
1497345153Sdim  size_t cmn_size;
1498345153Sdim};
1499345153Sdim
1500345153Sdim#define KMP_HASH_TABLE_LOG2 9 /* log2 of the hash table size */
1501345153Sdim#define KMP_HASH_TABLE_SIZE                                                    \
1502345153Sdim  (1 << KMP_HASH_TABLE_LOG2) /* size of the hash table */
1503345153Sdim#define KMP_HASH_SHIFT 3 /* throw away this many low bits from the address */
1504345153Sdim#define KMP_HASH(x)                                                            \
1505345153Sdim  ((((kmp_uintptr_t)x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE - 1))
1506345153Sdim
1507345153Sdimstruct common_table {
1508345153Sdim  struct private_common *data[KMP_HASH_TABLE_SIZE];
1509345153Sdim};
1510345153Sdim
1511345153Sdimstruct shared_table {
1512345153Sdim  struct shared_common *data[KMP_HASH_TABLE_SIZE];
1513345153Sdim};
1514345153Sdim
1515345153Sdim/* ------------------------------------------------------------------------ */
1516345153Sdim
1517345153Sdim#if KMP_USE_HIER_SCHED
1518345153Sdim// Shared barrier data that exists inside a single unit of the scheduling
1519345153Sdim// hierarchy
1520345153Sdimtypedef struct kmp_hier_private_bdata_t {
1521345153Sdim  kmp_int32 num_active;
1522345153Sdim  kmp_uint64 index;
1523345153Sdim  kmp_uint64 wait_val[2];
1524345153Sdim} kmp_hier_private_bdata_t;
1525345153Sdim#endif
1526345153Sdim
1527345153Sdimtypedef struct kmp_sched_flags {
1528345153Sdim  unsigned ordered : 1;
1529345153Sdim  unsigned nomerge : 1;
1530345153Sdim  unsigned contains_last : 1;
1531345153Sdim#if KMP_USE_HIER_SCHED
1532345153Sdim  unsigned use_hier : 1;
1533345153Sdim  unsigned unused : 28;
1534345153Sdim#else
1535345153Sdim  unsigned unused : 29;
1536345153Sdim#endif
1537345153Sdim} kmp_sched_flags_t;
1538345153Sdim
1539345153SdimKMP_BUILD_ASSERT(sizeof(kmp_sched_flags_t) == 4);
1540345153Sdim
1541345153Sdim#if KMP_STATIC_STEAL_ENABLED
1542345153Sdimtypedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
1543345153Sdim  kmp_int32 count;
1544345153Sdim  kmp_int32 ub;
1545345153Sdim  /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
1546345153Sdim  kmp_int32 lb;
1547345153Sdim  kmp_int32 st;
1548345153Sdim  kmp_int32 tc;
1549345153Sdim  kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put
1550345153Sdim                                     after ub */
1551345153Sdim
1552345153Sdim  // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on )
1553345153Sdim  //    a) parm3 is properly aligned and
1554345153Sdim  //    b) all parm1-4 are in the same cache line.
1555345153Sdim  // Because of parm1-4 are used together, performance seems to be better
1556345153Sdim  // if they are in the same line (not measured though).
1557345153Sdim
1558345153Sdim  struct KMP_ALIGN(32) { // AC: changed 16 to 32 in order to simplify template
1559345153Sdim    kmp_int32 parm1; //     structures in kmp_dispatch.cpp. This should
1560345153Sdim    kmp_int32 parm2; //     make no real change at least while padding is off.
1561345153Sdim    kmp_int32 parm3;
1562345153Sdim    kmp_int32 parm4;
1563345153Sdim  };
1564345153Sdim
1565345153Sdim  kmp_uint32 ordered_lower;
1566345153Sdim  kmp_uint32 ordered_upper;
1567345153Sdim#if KMP_OS_WINDOWS
1568345153Sdim  // This var can be placed in the hole between 'tc' and 'parm1', instead of
1569345153Sdim  // 'static_steal_counter'. It would be nice to measure execution times.
1570345153Sdim  // Conditional if/endif can be removed at all.
1571345153Sdim  kmp_int32 last_upper;
1572345153Sdim#endif /* KMP_OS_WINDOWS */
1573345153Sdim} dispatch_private_info32_t;
1574345153Sdim
1575345153Sdimtypedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
1576345153Sdim  kmp_int64 count; // current chunk number for static & static-steal scheduling
1577345153Sdim  kmp_int64 ub; /* upper-bound */
1578345153Sdim  /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
1579345153Sdim  kmp_int64 lb; /* lower-bound */
1580345153Sdim  kmp_int64 st; /* stride */
1581345153Sdim  kmp_int64 tc; /* trip count (number of iterations) */
1582345153Sdim  kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put
1583345153Sdim                                     after ub */
1584345153Sdim
1585345153Sdim  /* parm[1-4] are used in different ways by different scheduling algorithms */
1586345153Sdim
1587345153Sdim  // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on )
1588345153Sdim  //    a) parm3 is properly aligned and
1589345153Sdim  //    b) all parm1-4 are in the same cache line.
1590345153Sdim  // Because of parm1-4 are used together, performance seems to be better
1591345153Sdim  // if they are in the same line (not measured though).
1592345153Sdim
1593345153Sdim  struct KMP_ALIGN(32) {
1594345153Sdim    kmp_int64 parm1;
1595345153Sdim    kmp_int64 parm2;
1596345153Sdim    kmp_int64 parm3;
1597345153Sdim    kmp_int64 parm4;
1598345153Sdim  };
1599345153Sdim
1600345153Sdim  kmp_uint64 ordered_lower;
1601345153Sdim  kmp_uint64 ordered_upper;
1602345153Sdim#if KMP_OS_WINDOWS
1603345153Sdim  // This var can be placed in the hole between 'tc' and 'parm1', instead of
1604345153Sdim  // 'static_steal_counter'. It would be nice to measure execution times.
1605345153Sdim  // Conditional if/endif can be removed at all.
1606345153Sdim  kmp_int64 last_upper;
1607345153Sdim#endif /* KMP_OS_WINDOWS */
1608345153Sdim} dispatch_private_info64_t;
1609345153Sdim#else /* KMP_STATIC_STEAL_ENABLED */
1610345153Sdimtypedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
1611345153Sdim  kmp_int32 lb;
1612345153Sdim  kmp_int32 ub;
1613345153Sdim  kmp_int32 st;
1614345153Sdim  kmp_int32 tc;
1615345153Sdim
1616345153Sdim  kmp_int32 parm1;
1617345153Sdim  kmp_int32 parm2;
1618345153Sdim  kmp_int32 parm3;
1619345153Sdim  kmp_int32 parm4;
1620345153Sdim
1621345153Sdim  kmp_int32 count;
1622345153Sdim
1623345153Sdim  kmp_uint32 ordered_lower;
1624345153Sdim  kmp_uint32 ordered_upper;
1625345153Sdim#if KMP_OS_WINDOWS
1626345153Sdim  kmp_int32 last_upper;
1627345153Sdim#endif /* KMP_OS_WINDOWS */
1628345153Sdim} dispatch_private_info32_t;
1629345153Sdim
1630345153Sdimtypedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
1631345153Sdim  kmp_int64 lb; /* lower-bound */
1632345153Sdim  kmp_int64 ub; /* upper-bound */
1633345153Sdim  kmp_int64 st; /* stride */
1634345153Sdim  kmp_int64 tc; /* trip count (number of iterations) */
1635345153Sdim
1636345153Sdim  /* parm[1-4] are used in different ways by different scheduling algorithms */
1637345153Sdim  kmp_int64 parm1;
1638345153Sdim  kmp_int64 parm2;
1639345153Sdim  kmp_int64 parm3;
1640345153Sdim  kmp_int64 parm4;
1641345153Sdim
1642345153Sdim  kmp_int64 count; /* current chunk number for static scheduling */
1643345153Sdim
1644345153Sdim  kmp_uint64 ordered_lower;
1645345153Sdim  kmp_uint64 ordered_upper;
1646345153Sdim#if KMP_OS_WINDOWS
1647345153Sdim  kmp_int64 last_upper;
1648345153Sdim#endif /* KMP_OS_WINDOWS */
1649345153Sdim} dispatch_private_info64_t;
1650345153Sdim#endif /* KMP_STATIC_STEAL_ENABLED */
1651345153Sdim
1652345153Sdimtypedef struct KMP_ALIGN_CACHE dispatch_private_info {
1653345153Sdim  union private_info {
1654345153Sdim    dispatch_private_info32_t p32;
1655345153Sdim    dispatch_private_info64_t p64;
1656345153Sdim  } u;
1657345153Sdim  enum sched_type schedule; /* scheduling algorithm */
1658345153Sdim  kmp_sched_flags_t flags; /* flags (e.g., ordered, nomerge, etc.) */
1659345153Sdim  kmp_int32 ordered_bumped;
1660345153Sdim  // To retain the structure size after making ordered_iteration scalar
1661345153Sdim  kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 3];
1662345153Sdim  // Stack of buffers for nest of serial regions
1663345153Sdim  struct dispatch_private_info *next;
1664345153Sdim  kmp_int32 type_size; /* the size of types in private_info */
1665345153Sdim#if KMP_USE_HIER_SCHED
1666345153Sdim  kmp_int32 hier_id;
1667345153Sdim  void *parent; /* hierarchical scheduling parent pointer */
1668345153Sdim#endif
1669345153Sdim  enum cons_type pushed_ws;
1670345153Sdim} dispatch_private_info_t;
1671345153Sdim
1672345153Sdimtypedef struct dispatch_shared_info32 {
1673345153Sdim  /* chunk index under dynamic, number of idle threads under static-steal;
1674345153Sdim     iteration index otherwise */
1675345153Sdim  volatile kmp_uint32 iteration;
1676345153Sdim  volatile kmp_uint32 num_done;
1677345153Sdim  volatile kmp_uint32 ordered_iteration;
1678345153Sdim  // Dummy to retain the structure size after making ordered_iteration scalar
1679345153Sdim  kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 1];
1680345153Sdim} dispatch_shared_info32_t;
1681345153Sdim
1682345153Sdimtypedef struct dispatch_shared_info64 {
1683345153Sdim  /* chunk index under dynamic, number of idle threads under static-steal;
1684345153Sdim     iteration index otherwise */
1685345153Sdim  volatile kmp_uint64 iteration;
1686345153Sdim  volatile kmp_uint64 num_done;
1687345153Sdim  volatile kmp_uint64 ordered_iteration;
1688345153Sdim  // Dummy to retain the structure size after making ordered_iteration scalar
1689345153Sdim  kmp_int64 ordered_dummy[KMP_MAX_ORDERED - 3];
1690345153Sdim} dispatch_shared_info64_t;
1691345153Sdim
1692345153Sdimtypedef struct dispatch_shared_info {
1693345153Sdim  union shared_info {
1694345153Sdim    dispatch_shared_info32_t s32;
1695345153Sdim    dispatch_shared_info64_t s64;
1696345153Sdim  } u;
1697345153Sdim  volatile kmp_uint32 buffer_index;
1698345153Sdim  volatile kmp_int32 doacross_buf_idx; // teamwise index
1699345153Sdim  volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1)
1700345153Sdim  kmp_int32 doacross_num_done; // count finished threads
1701345153Sdim#if KMP_USE_HIER_SCHED
1702345153Sdim  void *hier;
1703345153Sdim#endif
1704345153Sdim#if KMP_USE_HWLOC
1705345153Sdim  // When linking with libhwloc, the ORDERED EPCC test slows down on big
1706345153Sdim  // machines (> 48 cores). Performance analysis showed that a cache thrash
1707345153Sdim  // was occurring and this padding helps alleviate the problem.
1708345153Sdim  char padding[64];
1709345153Sdim#endif
1710345153Sdim} dispatch_shared_info_t;
1711345153Sdim
1712345153Sdimtypedef struct kmp_disp {
1713345153Sdim  /* Vector for ORDERED SECTION */
1714345153Sdim  void (*th_deo_fcn)(int *gtid, int *cid, ident_t *);
1715345153Sdim  /* Vector for END ORDERED SECTION */
1716345153Sdim  void (*th_dxo_fcn)(int *gtid, int *cid, ident_t *);
1717345153Sdim
1718345153Sdim  dispatch_shared_info_t *th_dispatch_sh_current;
1719345153Sdim  dispatch_private_info_t *th_dispatch_pr_current;
1720345153Sdim
1721345153Sdim  dispatch_private_info_t *th_disp_buffer;
1722345153Sdim  kmp_int32 th_disp_index;
1723345153Sdim  kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index
1724345153Sdim  volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags
1725345153Sdim  union { // we can use union here because doacross cannot be used in
1726345153Sdim    // nonmonotonic loops
1727345153Sdim    kmp_int64 *th_doacross_info; // info on loop bounds
1728345153Sdim    kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable)
1729345153Sdim  };
1730345153Sdim#if KMP_USE_INTERNODE_ALIGNMENT
1731345153Sdim  char more_padding[INTERNODE_CACHE_LINE];
1732345153Sdim#endif
1733345153Sdim} kmp_disp_t;
1734345153Sdim
1735345153Sdim/* ------------------------------------------------------------------------ */
1736345153Sdim/* Barrier stuff */
1737345153Sdim
1738345153Sdim/* constants for barrier state update */
1739345153Sdim#define KMP_INIT_BARRIER_STATE 0 /* should probably start from zero */
1740345153Sdim#define KMP_BARRIER_SLEEP_BIT 0 /* bit used for suspend/sleep part of state */
1741345153Sdim#define KMP_BARRIER_UNUSED_BIT 1 // bit that must never be set for valid state
1742345153Sdim#define KMP_BARRIER_BUMP_BIT 2 /* lsb used for bump of go/arrived state */
1743345153Sdim
1744345153Sdim#define KMP_BARRIER_SLEEP_STATE (1 << KMP_BARRIER_SLEEP_BIT)
1745345153Sdim#define KMP_BARRIER_UNUSED_STATE (1 << KMP_BARRIER_UNUSED_BIT)
1746345153Sdim#define KMP_BARRIER_STATE_BUMP (1 << KMP_BARRIER_BUMP_BIT)
1747345153Sdim
1748345153Sdim#if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT)
1749345153Sdim#error "Barrier sleep bit must be smaller than barrier bump bit"
1750345153Sdim#endif
1751345153Sdim#if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT)
1752345153Sdim#error "Barrier unused bit must be smaller than barrier bump bit"
1753345153Sdim#endif
1754345153Sdim
1755345153Sdim// Constants for release barrier wait state: currently, hierarchical only
1756345153Sdim#define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep
1757345153Sdim#define KMP_BARRIER_OWN_FLAG                                                   \
1758345153Sdim  1 // Normal state; worker waiting on own b_go flag in release
1759345153Sdim#define KMP_BARRIER_PARENT_FLAG                                                \
1760345153Sdim  2 // Special state; worker waiting on parent's b_go flag in release
1761345153Sdim#define KMP_BARRIER_SWITCH_TO_OWN_FLAG                                         \
1762345153Sdim  3 // Special state; tells worker to shift from parent to own b_go
1763345153Sdim#define KMP_BARRIER_SWITCHING                                                  \
1764345153Sdim  4 // Special state; worker resets appropriate flag on wake-up
1765345153Sdim
1766345153Sdim#define KMP_NOT_SAFE_TO_REAP                                                   \
1767345153Sdim  0 // Thread th_reap_state: not safe to reap (tasking)
1768345153Sdim#define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking)
1769345153Sdim
1770345153Sdimenum barrier_type {
1771345153Sdim  bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction
1772345153Sdim                           barriers if enabled) */
1773345153Sdim  bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */
1774345153Sdim#if KMP_FAST_REDUCTION_BARRIER
1775345153Sdim  bs_reduction_barrier, /* 2, All barriers that are used in reduction */
1776345153Sdim#endif // KMP_FAST_REDUCTION_BARRIER
1777345153Sdim  bs_last_barrier /* Just a placeholder to mark the end */
1778345153Sdim};
1779345153Sdim
1780345153Sdim// to work with reduction barriers just like with plain barriers
1781345153Sdim#if !KMP_FAST_REDUCTION_BARRIER
1782345153Sdim#define bs_reduction_barrier bs_plain_barrier
1783345153Sdim#endif // KMP_FAST_REDUCTION_BARRIER
1784345153Sdim
1785345153Sdimtypedef enum kmp_bar_pat { /* Barrier communication patterns */
1786345153Sdim                           bp_linear_bar =
1787345153Sdim                               0, /* Single level (degenerate) tree */
1788345153Sdim                           bp_tree_bar =
1789345153Sdim                               1, /* Balanced tree with branching factor 2^n */
1790345153Sdim                           bp_hyper_bar =
1791345153Sdim                               2, /* Hypercube-embedded tree with min branching
1792345153Sdim                                     factor 2^n */
1793345153Sdim                           bp_hierarchical_bar = 3, /* Machine hierarchy tree */
1794345153Sdim                           bp_last_bar /* Placeholder to mark the end */
1795345153Sdim} kmp_bar_pat_e;
1796345153Sdim
1797345153Sdim#define KMP_BARRIER_ICV_PUSH 1
1798345153Sdim
1799345153Sdim/* Record for holding the values of the internal controls stack records */
1800345153Sdimtypedef struct kmp_internal_control {
1801345153Sdim  int serial_nesting_level; /* corresponds to the value of the
1802345153Sdim                               th_team_serialized field */
1803345153Sdim  kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per
1804345153Sdim                       thread) */
1805345153Sdim  kmp_int8
1806345153Sdim      bt_set; /* internal control for whether blocktime is explicitly set */
1807345153Sdim  int blocktime; /* internal control for blocktime */
1808345153Sdim#if KMP_USE_MONITOR
1809345153Sdim  int bt_intervals; /* internal control for blocktime intervals */
1810345153Sdim#endif
1811345153Sdim  int nproc; /* internal control for #threads for next parallel region (per
1812345153Sdim                thread) */
1813353358Sdim  int thread_limit; /* internal control for thread-limit-var */
1814345153Sdim  int max_active_levels; /* internal control for max_active_levels */
1815345153Sdim  kmp_r_sched_t
1816345153Sdim      sched; /* internal control for runtime schedule {sched,chunk} pair */
1817345153Sdim  kmp_proc_bind_t proc_bind; /* internal control for affinity  */
1818345153Sdim  kmp_int32 default_device; /* internal control for default device */
1819345153Sdim  struct kmp_internal_control *next;
1820345153Sdim} kmp_internal_control_t;
1821345153Sdim
1822345153Sdimstatic inline void copy_icvs(kmp_internal_control_t *dst,
1823345153Sdim                             kmp_internal_control_t *src) {
1824345153Sdim  *dst = *src;
1825345153Sdim}
1826345153Sdim
1827345153Sdim/* Thread barrier needs volatile barrier fields */
1828345153Sdimtypedef struct KMP_ALIGN_CACHE kmp_bstate {
1829345153Sdim  // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all
1830345153Sdim  // uses of it). It is not explicitly aligned below, because we *don't* want
1831345153Sdim  // it to be padded -- instead, we fit b_go into the same cache line with
1832345153Sdim  // th_fixed_icvs, enabling NGO cache lines stores in the hierarchical barrier.
1833345153Sdim  kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread
1834345153Sdim  // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with
1835345153Sdim  // same NGO store
1836345153Sdim  volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical)
1837345153Sdim  KMP_ALIGN_CACHE volatile kmp_uint64
1838345153Sdim      b_arrived; // STATE => task reached synch point.
1839345153Sdim  kmp_uint32 *skip_per_level;
1840345153Sdim  kmp_uint32 my_level;
1841345153Sdim  kmp_int32 parent_tid;
1842345153Sdim  kmp_int32 old_tid;
1843345153Sdim  kmp_uint32 depth;
1844345153Sdim  struct kmp_bstate *parent_bar;
1845345153Sdim  kmp_team_t *team;
1846345153Sdim  kmp_uint64 leaf_state;
1847345153Sdim  kmp_uint32 nproc;
1848345153Sdim  kmp_uint8 base_leaf_kids;
1849345153Sdim  kmp_uint8 leaf_kids;
1850345153Sdim  kmp_uint8 offset;
1851345153Sdim  kmp_uint8 wait_flag;
1852345153Sdim  kmp_uint8 use_oncore_barrier;
1853345153Sdim#if USE_DEBUGGER
1854345153Sdim  // The following field is intended for the debugger solely. Only the worker
1855345153Sdim  // thread itself accesses this field: the worker increases it by 1 when it
1856345153Sdim  // arrives to a barrier.
1857345153Sdim  KMP_ALIGN_CACHE kmp_uint b_worker_arrived;
1858345153Sdim#endif /* USE_DEBUGGER */
1859345153Sdim} kmp_bstate_t;
1860345153Sdim
1861345153Sdimunion KMP_ALIGN_CACHE kmp_barrier_union {
1862345153Sdim  double b_align; /* use worst case alignment */
1863345153Sdim  char b_pad[KMP_PAD(kmp_bstate_t, CACHE_LINE)];
1864345153Sdim  kmp_bstate_t bb;
1865345153Sdim};
1866345153Sdim
1867345153Sdimtypedef union kmp_barrier_union kmp_balign_t;
1868345153Sdim
1869345153Sdim/* Team barrier needs only non-volatile arrived counter */
1870345153Sdimunion KMP_ALIGN_CACHE kmp_barrier_team_union {
1871345153Sdim  double b_align; /* use worst case alignment */
1872345153Sdim  char b_pad[CACHE_LINE];
1873345153Sdim  struct {
1874345153Sdim    kmp_uint64 b_arrived; /* STATE => task reached synch point. */
1875345153Sdim#if USE_DEBUGGER
1876345153Sdim    // The following two fields are indended for the debugger solely. Only
1877345153Sdim    // master of the team accesses these fields: the first one is increased by
1878345153Sdim    // 1 when master arrives to a barrier, the second one is increased by one
1879345153Sdim    // when all the threads arrived.
1880345153Sdim    kmp_uint b_master_arrived;
1881345153Sdim    kmp_uint b_team_arrived;
1882345153Sdim#endif
1883345153Sdim  };
1884345153Sdim};
1885345153Sdim
1886345153Sdimtypedef union kmp_barrier_team_union kmp_balign_team_t;
1887345153Sdim
1888345153Sdim/* Padding for Linux* OS pthreads condition variables and mutexes used to signal
1889345153Sdim   threads when a condition changes.  This is to workaround an NPTL bug where
1890345153Sdim   padding was added to pthread_cond_t which caused the initialization routine
1891345153Sdim   to write outside of the structure if compiled on pre-NPTL threads.  */
1892345153Sdim#if KMP_OS_WINDOWS
1893345153Sdimtypedef struct kmp_win32_mutex {
1894345153Sdim  /* The Lock */
1895345153Sdim  CRITICAL_SECTION cs;
1896345153Sdim} kmp_win32_mutex_t;
1897345153Sdim
1898345153Sdimtypedef struct kmp_win32_cond {
1899345153Sdim  /* Count of the number of waiters. */
1900345153Sdim  int waiters_count_;
1901345153Sdim
1902345153Sdim  /* Serialize access to <waiters_count_> */
1903345153Sdim  kmp_win32_mutex_t waiters_count_lock_;
1904345153Sdim
1905345153Sdim  /* Number of threads to release via a <cond_broadcast> or a <cond_signal> */
1906345153Sdim  int release_count_;
1907345153Sdim
1908345153Sdim  /* Keeps track of the current "generation" so that we don't allow */
1909345153Sdim  /* one thread to steal all the "releases" from the broadcast. */
1910345153Sdim  int wait_generation_count_;
1911345153Sdim
1912345153Sdim  /* A manual-reset event that's used to block and release waiting threads. */
1913345153Sdim  HANDLE event_;
1914345153Sdim} kmp_win32_cond_t;
1915345153Sdim#endif
1916345153Sdim
1917345153Sdim#if KMP_OS_UNIX
1918345153Sdim
1919345153Sdimunion KMP_ALIGN_CACHE kmp_cond_union {
1920345153Sdim  double c_align;
1921345153Sdim  char c_pad[CACHE_LINE];
1922345153Sdim  pthread_cond_t c_cond;
1923345153Sdim};
1924345153Sdim
1925345153Sdimtypedef union kmp_cond_union kmp_cond_align_t;
1926345153Sdim
1927345153Sdimunion KMP_ALIGN_CACHE kmp_mutex_union {
1928345153Sdim  double m_align;
1929345153Sdim  char m_pad[CACHE_LINE];
1930345153Sdim  pthread_mutex_t m_mutex;
1931345153Sdim};
1932345153Sdim
1933345153Sdimtypedef union kmp_mutex_union kmp_mutex_align_t;
1934345153Sdim
1935345153Sdim#endif /* KMP_OS_UNIX */
1936345153Sdim
1937345153Sdimtypedef struct kmp_desc_base {
1938345153Sdim  void *ds_stackbase;
1939345153Sdim  size_t ds_stacksize;
1940345153Sdim  int ds_stackgrow;
1941345153Sdim  kmp_thread_t ds_thread;
1942345153Sdim  volatile int ds_tid;
1943345153Sdim  int ds_gtid;
1944345153Sdim#if KMP_OS_WINDOWS
1945345153Sdim  volatile int ds_alive;
1946345153Sdim  DWORD ds_thread_id;
1947345153Sdim/* ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes.
1948345153Sdim   However, debugger support (libomp_db) cannot work with handles, because they
1949345153Sdim   uncomparable. For example, debugger requests info about thread with handle h.
1950345153Sdim   h is valid within debugger process, and meaningless within debugee process.
1951345153Sdim   Even if h is duped by call to DuplicateHandle(), so the result h' is valid
1952345153Sdim   within debugee process, but it is a *new* handle which does *not* equal to
1953345153Sdim   any other handle in debugee... The only way to compare handles is convert
1954345153Sdim   them to system-wide ids. GetThreadId() function is available only in
1955345153Sdim   Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is available
1956345153Sdim   on all Windows* OS flavours (including Windows* 95). Thus, we have to get
1957345153Sdim   thread id by call to GetCurrentThreadId() from within the thread and save it
1958345153Sdim   to let libomp_db identify threads.  */
1959345153Sdim#endif /* KMP_OS_WINDOWS */
1960345153Sdim} kmp_desc_base_t;
1961345153Sdim
1962345153Sdimtypedef union KMP_ALIGN_CACHE kmp_desc {
1963345153Sdim  double ds_align; /* use worst case alignment */
1964345153Sdim  char ds_pad[KMP_PAD(kmp_desc_base_t, CACHE_LINE)];
1965345153Sdim  kmp_desc_base_t ds;
1966345153Sdim} kmp_desc_t;
1967345153Sdim
1968345153Sdimtypedef struct kmp_local {
1969345153Sdim  volatile int this_construct; /* count of single's encountered by thread */
1970345153Sdim  void *reduce_data;
1971345153Sdim#if KMP_USE_BGET
1972345153Sdim  void *bget_data;
1973345153Sdim  void *bget_list;
1974345153Sdim#if !USE_CMP_XCHG_FOR_BGET
1975345153Sdim#ifdef USE_QUEUING_LOCK_FOR_BGET
1976345153Sdim  kmp_lock_t bget_lock; /* Lock for accessing bget free list */
1977345153Sdim#else
1978345153Sdim  kmp_bootstrap_lock_t bget_lock; // Lock for accessing bget free list. Must be
1979345153Sdim// bootstrap lock so we can use it at library
1980345153Sdim// shutdown.
1981345153Sdim#endif /* USE_LOCK_FOR_BGET */
1982345153Sdim#endif /* ! USE_CMP_XCHG_FOR_BGET */
1983345153Sdim#endif /* KMP_USE_BGET */
1984345153Sdim
1985345153Sdim  PACKED_REDUCTION_METHOD_T
1986345153Sdim  packed_reduction_method; /* stored by __kmpc_reduce*(), used by
1987345153Sdim                              __kmpc_end_reduce*() */
1988345153Sdim
1989345153Sdim} kmp_local_t;
1990345153Sdim
1991345153Sdim#define KMP_CHECK_UPDATE(a, b)                                                 \
1992345153Sdim  if ((a) != (b))                                                              \
1993345153Sdim  (a) = (b)
1994345153Sdim#define KMP_CHECK_UPDATE_SYNC(a, b)                                            \
1995345153Sdim  if ((a) != (b))                                                              \
1996345153Sdim  TCW_SYNC_PTR((a), (b))
1997345153Sdim
1998345153Sdim#define get__blocktime(xteam, xtid)                                            \
1999345153Sdim  ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime)
2000345153Sdim#define get__bt_set(xteam, xtid)                                               \
2001345153Sdim  ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set)
2002345153Sdim#if KMP_USE_MONITOR
2003345153Sdim#define get__bt_intervals(xteam, xtid)                                         \
2004345153Sdim  ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)
2005345153Sdim#endif
2006345153Sdim
2007345153Sdim#define get__dynamic_2(xteam, xtid)                                            \
2008345153Sdim  ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic)
2009345153Sdim#define get__nproc_2(xteam, xtid)                                              \
2010345153Sdim  ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc)
2011345153Sdim#define get__sched_2(xteam, xtid)                                              \
2012345153Sdim  ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched)
2013345153Sdim
2014345153Sdim#define set__blocktime_team(xteam, xtid, xval)                                 \
2015345153Sdim  (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) =     \
2016345153Sdim       (xval))
2017345153Sdim
2018345153Sdim#if KMP_USE_MONITOR
2019345153Sdim#define set__bt_intervals_team(xteam, xtid, xval)                              \
2020345153Sdim  (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) =  \
2021345153Sdim       (xval))
2022345153Sdim#endif
2023345153Sdim
2024345153Sdim#define set__bt_set_team(xteam, xtid, xval)                                    \
2025345153Sdim  (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) = (xval))
2026345153Sdim
2027345153Sdim#define set__dynamic(xthread, xval)                                            \
2028345153Sdim  (((xthread)->th.th_current_task->td_icvs.dynamic) = (xval))
2029345153Sdim#define get__dynamic(xthread)                                                  \
2030345153Sdim  (((xthread)->th.th_current_task->td_icvs.dynamic) ? (FTN_TRUE) : (FTN_FALSE))
2031345153Sdim
2032345153Sdim#define set__nproc(xthread, xval)                                              \
2033345153Sdim  (((xthread)->th.th_current_task->td_icvs.nproc) = (xval))
2034345153Sdim
2035353358Sdim#define set__thread_limit(xthread, xval)                                       \
2036353358Sdim  (((xthread)->th.th_current_task->td_icvs.thread_limit) = (xval))
2037353358Sdim
2038345153Sdim#define set__max_active_levels(xthread, xval)                                  \
2039345153Sdim  (((xthread)->th.th_current_task->td_icvs.max_active_levels) = (xval))
2040345153Sdim
2041353358Sdim#define get__max_active_levels(xthread)                                        \
2042353358Sdim  ((xthread)->th.th_current_task->td_icvs.max_active_levels)
2043353358Sdim
2044345153Sdim#define set__sched(xthread, xval)                                              \
2045345153Sdim  (((xthread)->th.th_current_task->td_icvs.sched) = (xval))
2046345153Sdim
2047345153Sdim#define set__proc_bind(xthread, xval)                                          \
2048345153Sdim  (((xthread)->th.th_current_task->td_icvs.proc_bind) = (xval))
2049345153Sdim#define get__proc_bind(xthread)                                                \
2050345153Sdim  ((xthread)->th.th_current_task->td_icvs.proc_bind)
2051345153Sdim
2052345153Sdim// OpenMP tasking data structures
2053345153Sdim
2054345153Sdimtypedef enum kmp_tasking_mode {
2055345153Sdim  tskm_immediate_exec = 0,
2056345153Sdim  tskm_extra_barrier = 1,
2057345153Sdim  tskm_task_teams = 2,
2058345153Sdim  tskm_max = 2
2059345153Sdim} kmp_tasking_mode_t;
2060345153Sdim
2061345153Sdimextern kmp_tasking_mode_t
2062345153Sdim    __kmp_tasking_mode; /* determines how/when to execute tasks */
2063345153Sdimextern int __kmp_task_stealing_constraint;
2064353358Sdimextern int __kmp_enable_task_throttling;
2065345153Sdimextern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if
2066345153Sdim// specified, defaults to 0 otherwise
2067345153Sdim// Set via OMP_MAX_TASK_PRIORITY if specified, defaults to 0 otherwise
2068345153Sdimextern kmp_int32 __kmp_max_task_priority;
2069345153Sdim// Set via KMP_TASKLOOP_MIN_TASKS if specified, defaults to 0 otherwise
2070345153Sdimextern kmp_uint64 __kmp_taskloop_min_tasks;
2071345153Sdim
2072345153Sdim/* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with
2073345153Sdim   taskdata first */
2074345153Sdim#define KMP_TASK_TO_TASKDATA(task) (((kmp_taskdata_t *)task) - 1)
2075345153Sdim#define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *)(taskdata + 1)
2076345153Sdim
2077345153Sdim// The tt_found_tasks flag is a signal to all threads in the team that tasks
2078345153Sdim// were spawned and queued since the previous barrier release.
2079345153Sdim#define KMP_TASKING_ENABLED(task_team)                                         \
2080345153Sdim  (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE)
2081345153Sdim/*!
2082345153Sdim@ingroup BASIC_TYPES
2083345153Sdim@{
2084345153Sdim*/
2085345153Sdim
2086345153Sdim/*!
2087345153Sdim */
2088345153Sdimtypedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32, void *);
2089345153Sdim
2090345153Sdimtypedef union kmp_cmplrdata {
2091345153Sdim  kmp_int32 priority; /**< priority specified by user for the task */
2092345153Sdim  kmp_routine_entry_t
2093345153Sdim      destructors; /* pointer to function to invoke deconstructors of
2094345153Sdim                      firstprivate C++ objects */
2095345153Sdim  /* future data */
2096345153Sdim} kmp_cmplrdata_t;
2097345153Sdim
2098345153Sdim/*  sizeof_kmp_task_t passed as arg to kmpc_omp_task call  */
2099345153Sdim/*!
2100345153Sdim */
2101345153Sdimtypedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */
2102345153Sdim  void *shareds; /**< pointer to block of pointers to shared vars   */
2103345153Sdim  kmp_routine_entry_t
2104345153Sdim      routine; /**< pointer to routine to call for executing task */
2105345153Sdim  kmp_int32 part_id; /**< part id for the task                          */
2106345153Sdim  kmp_cmplrdata_t
2107345153Sdim      data1; /* Two known optional additions: destructors and priority */
2108345153Sdim  kmp_cmplrdata_t data2; /* Process destructors first, priority second */
2109353358Sdim  /* future data */
2110345153Sdim  /*  private vars  */
2111345153Sdim} kmp_task_t;
2112345153Sdim
2113345153Sdim/*!
2114345153Sdim@}
2115345153Sdim*/
2116345153Sdim
2117345153Sdimtypedef struct kmp_taskgroup {
2118345153Sdim  std::atomic<kmp_int32> count; // number of allocated and incomplete tasks
2119345153Sdim  std::atomic<kmp_int32>
2120345153Sdim      cancel_request; // request for cancellation of this taskgroup
2121345153Sdim  struct kmp_taskgroup *parent; // parent taskgroup
2122345153Sdim  // Block of data to perform task reduction
2123345153Sdim  void *reduce_data; // reduction related info
2124345153Sdim  kmp_int32 reduce_num_data; // number of data items to reduce
2125345153Sdim} kmp_taskgroup_t;
2126345153Sdim
2127345153Sdim// forward declarations
2128345153Sdimtypedef union kmp_depnode kmp_depnode_t;
2129345153Sdimtypedef struct kmp_depnode_list kmp_depnode_list_t;
2130345153Sdimtypedef struct kmp_dephash_entry kmp_dephash_entry_t;
2131345153Sdim
2132345153Sdim// Compiler sends us this info:
2133345153Sdimtypedef struct kmp_depend_info {
2134345153Sdim  kmp_intptr_t base_addr;
2135345153Sdim  size_t len;
2136345153Sdim  struct {
2137345153Sdim    bool in : 1;
2138345153Sdim    bool out : 1;
2139345153Sdim    bool mtx : 1;
2140345153Sdim  } flags;
2141345153Sdim} kmp_depend_info_t;
2142345153Sdim
2143345153Sdim// Internal structures to work with task dependencies:
2144345153Sdimstruct kmp_depnode_list {
2145345153Sdim  kmp_depnode_t *node;
2146345153Sdim  kmp_depnode_list_t *next;
2147345153Sdim};
2148345153Sdim
2149345153Sdim// Max number of mutexinoutset dependencies per node
2150345153Sdim#define MAX_MTX_DEPS 4
2151345153Sdim
2152345153Sdimtypedef struct kmp_base_depnode {
2153345153Sdim  kmp_depnode_list_t *successors; /* used under lock */
2154345153Sdim  kmp_task_t *task; /* non-NULL if depnode is active, used under lock */
2155345153Sdim  kmp_lock_t *mtx_locks[MAX_MTX_DEPS]; /* lock mutexinoutset dependent tasks */
2156345153Sdim  kmp_int32 mtx_num_locks; /* number of locks in mtx_locks array */
2157345153Sdim  kmp_lock_t lock; /* guards shared fields: task, successors */
2158345153Sdim#if KMP_SUPPORT_GRAPH_OUTPUT
2159345153Sdim  kmp_uint32 id;
2160345153Sdim#endif
2161345153Sdim  std::atomic<kmp_int32> npredecessors;
2162345153Sdim  std::atomic<kmp_int32> nrefs;
2163345153Sdim} kmp_base_depnode_t;
2164345153Sdim
2165345153Sdimunion KMP_ALIGN_CACHE kmp_depnode {
2166345153Sdim  double dn_align; /* use worst case alignment */
2167345153Sdim  char dn_pad[KMP_PAD(kmp_base_depnode_t, CACHE_LINE)];
2168345153Sdim  kmp_base_depnode_t dn;
2169345153Sdim};
2170345153Sdim
2171345153Sdimstruct kmp_dephash_entry {
2172345153Sdim  kmp_intptr_t addr;
2173345153Sdim  kmp_depnode_t *last_out;
2174345153Sdim  kmp_depnode_list_t *last_ins;
2175345153Sdim  kmp_depnode_list_t *last_mtxs;
2176345153Sdim  kmp_int32 last_flag;
2177345153Sdim  kmp_lock_t *mtx_lock; /* is referenced by depnodes w/mutexinoutset dep */
2178345153Sdim  kmp_dephash_entry_t *next_in_bucket;
2179345153Sdim};
2180345153Sdim
2181345153Sdimtypedef struct kmp_dephash {
2182345153Sdim  kmp_dephash_entry_t **buckets;
2183345153Sdim  size_t size;
2184360784Sdim  size_t generation;
2185345153Sdim  kmp_uint32 nelements;
2186345153Sdim  kmp_uint32 nconflicts;
2187345153Sdim} kmp_dephash_t;
2188345153Sdim
2189345153Sdimtypedef struct kmp_task_affinity_info {
2190345153Sdim  kmp_intptr_t base_addr;
2191345153Sdim  size_t len;
2192345153Sdim  struct {
2193345153Sdim    bool flag1 : 1;
2194345153Sdim    bool flag2 : 1;
2195345153Sdim    kmp_int32 reserved : 30;
2196345153Sdim  } flags;
2197345153Sdim} kmp_task_affinity_info_t;
2198345153Sdim
2199353358Sdimtypedef enum kmp_event_type_t {
2200353358Sdim  KMP_EVENT_UNINITIALIZED = 0,
2201353358Sdim  KMP_EVENT_ALLOW_COMPLETION = 1
2202353358Sdim} kmp_event_type_t;
2203345153Sdim
2204353358Sdimtypedef struct {
2205353358Sdim  kmp_event_type_t type;
2206353358Sdim  kmp_tas_lock_t lock;
2207353358Sdim  union {
2208353358Sdim    kmp_task_t *task;
2209353358Sdim  } ed;
2210353358Sdim} kmp_event_t;
2211353358Sdim
2212345153Sdim#ifdef BUILD_TIED_TASK_STACK
2213345153Sdim
2214345153Sdim/* Tied Task stack definitions */
2215345153Sdimtypedef struct kmp_stack_block {
2216345153Sdim  kmp_taskdata_t *sb_block[TASK_STACK_BLOCK_SIZE];
2217345153Sdim  struct kmp_stack_block *sb_next;
2218345153Sdim  struct kmp_stack_block *sb_prev;
2219345153Sdim} kmp_stack_block_t;
2220345153Sdim
2221345153Sdimtypedef struct kmp_task_stack {
2222345153Sdim  kmp_stack_block_t ts_first_block; // first block of stack entries
2223345153Sdim  kmp_taskdata_t **ts_top; // pointer to the top of stack
2224345153Sdim  kmp_int32 ts_entries; // number of entries on the stack
2225345153Sdim} kmp_task_stack_t;
2226345153Sdim
2227345153Sdim#endif // BUILD_TIED_TASK_STACK
2228345153Sdim
2229345153Sdimtypedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
2230345153Sdim  /* Compiler flags */ /* Total compiler flags must be 16 bits */
2231345153Sdim  unsigned tiedness : 1; /* task is either tied (1) or untied (0) */
2232345153Sdim  unsigned final : 1; /* task is final(1) so execute immediately */
2233345153Sdim  unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0
2234345153Sdim                              code path */
2235345153Sdim  unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to
2236345153Sdim                                     invoke destructors from the runtime */
2237345153Sdim  unsigned proxy : 1; /* task is a proxy task (it will be executed outside the
2238345153Sdim                         context of the RTL) */
2239345153Sdim  unsigned priority_specified : 1; /* set if the compiler provides priority
2240345153Sdim                                      setting for the task */
2241353358Sdim  unsigned detachable : 1; /* 1 == can detach */
2242353358Sdim  unsigned reserved : 9; /* reserved for compiler use */
2243345153Sdim
2244345153Sdim  /* Library flags */ /* Total library flags must be 16 bits */
2245345153Sdim  unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */
2246345153Sdim  unsigned task_serial : 1; // task is executed immediately (1) or deferred (0)
2247345153Sdim  unsigned tasking_ser : 1; // all tasks in team are either executed immediately
2248345153Sdim  // (1) or may be deferred (0)
2249345153Sdim  unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel
2250345153Sdim  // (0) [>= 2 threads]
2251345153Sdim  /* If either team_serial or tasking_ser is set, task team may be NULL */
2252345153Sdim  /* Task State Flags: */
2253345153Sdim  unsigned started : 1; /* 1==started, 0==not started     */
2254345153Sdim  unsigned executing : 1; /* 1==executing, 0==not executing */
2255345153Sdim  unsigned complete : 1; /* 1==complete, 0==not complete   */
2256360784Sdim  unsigned freed : 1; /* 1==freed, 0==allocated        */
2257345153Sdim  unsigned native : 1; /* 1==gcc-compiled task, 0==intel */
2258345153Sdim  unsigned reserved31 : 7; /* reserved for library use */
2259345153Sdim
2260345153Sdim} kmp_tasking_flags_t;
2261345153Sdim
2262345153Sdimstruct kmp_taskdata { /* aligned during dynamic allocation       */
2263345153Sdim  kmp_int32 td_task_id; /* id, assigned by debugger                */
2264345153Sdim  kmp_tasking_flags_t td_flags; /* task flags                              */
2265345153Sdim  kmp_team_t *td_team; /* team for this task                      */
2266345153Sdim  kmp_info_p *td_alloc_thread; /* thread that allocated data structures   */
2267345153Sdim  /* Currently not used except for perhaps IDB */
2268345153Sdim  kmp_taskdata_t *td_parent; /* parent task                             */
2269345153Sdim  kmp_int32 td_level; /* task nesting level                      */
2270345153Sdim  std::atomic<kmp_int32> td_untied_count; // untied task active parts counter
2271345153Sdim  ident_t *td_ident; /* task identifier                         */
2272345153Sdim  // Taskwait data.
2273345153Sdim  ident_t *td_taskwait_ident;
2274345153Sdim  kmp_uint32 td_taskwait_counter;
2275345153Sdim  kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */
2276345153Sdim  KMP_ALIGN_CACHE kmp_internal_control_t
2277345153Sdim      td_icvs; /* Internal control variables for the task */
2278345153Sdim  KMP_ALIGN_CACHE std::atomic<kmp_int32>
2279345153Sdim      td_allocated_child_tasks; /* Child tasks (+ current task) not yet
2280345153Sdim                                   deallocated */
2281345153Sdim  std::atomic<kmp_int32>
2282345153Sdim      td_incomplete_child_tasks; /* Child tasks not yet complete */
2283345153Sdim  kmp_taskgroup_t
2284345153Sdim      *td_taskgroup; // Each task keeps pointer to its current taskgroup
2285345153Sdim  kmp_dephash_t
2286345153Sdim      *td_dephash; // Dependencies for children tasks are tracked from here
2287345153Sdim  kmp_depnode_t
2288345153Sdim      *td_depnode; // Pointer to graph node if this task has dependencies
2289345153Sdim  kmp_task_team_t *td_task_team;
2290345153Sdim  kmp_int32 td_size_alloc; // The size of task structure, including shareds etc.
2291345153Sdim#if defined(KMP_GOMP_COMPAT)
2292345153Sdim  // 4 or 8 byte integers for the loop bounds in GOMP_taskloop
2293345153Sdim  kmp_int32 td_size_loop_bounds;
2294345153Sdim#endif
2295345153Sdim  kmp_taskdata_t *td_last_tied; // keep tied task for task scheduling constraint
2296353358Sdim#if defined(KMP_GOMP_COMPAT)
2297345153Sdim  // GOMP sends in a copy function for copy constructors
2298345153Sdim  void (*td_copy_func)(void *, void *);
2299345153Sdim#endif
2300353358Sdim  kmp_event_t td_allow_completion_event;
2301345153Sdim#if OMPT_SUPPORT
2302345153Sdim  ompt_task_info_t ompt_task_info;
2303345153Sdim#endif
2304345153Sdim}; // struct kmp_taskdata
2305345153Sdim
2306345153Sdim// Make sure padding above worked
2307345153SdimKMP_BUILD_ASSERT(sizeof(kmp_taskdata_t) % sizeof(void *) == 0);
2308345153Sdim
2309345153Sdim// Data for task team but per thread
2310345153Sdimtypedef struct kmp_base_thread_data {
2311345153Sdim  kmp_info_p *td_thr; // Pointer back to thread info
2312345153Sdim  // Used only in __kmp_execute_tasks_template, maybe not avail until task is
2313345153Sdim  // queued?
2314345153Sdim  kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque
2315345153Sdim  kmp_taskdata_t *
2316345153Sdim      *td_deque; // Deque of tasks encountered by td_thr, dynamically allocated
2317345153Sdim  kmp_int32 td_deque_size; // Size of deck
2318345153Sdim  kmp_uint32 td_deque_head; // Head of deque (will wrap)
2319345153Sdim  kmp_uint32 td_deque_tail; // Tail of deque (will wrap)
2320345153Sdim  kmp_int32 td_deque_ntasks; // Number of tasks in deque
2321345153Sdim  // GEH: shouldn't this be volatile since used in while-spin?
2322345153Sdim  kmp_int32 td_deque_last_stolen; // Thread number of last successful steal
2323345153Sdim#ifdef BUILD_TIED_TASK_STACK
2324345153Sdim  kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task
2325345153Sdim// scheduling constraint
2326345153Sdim#endif // BUILD_TIED_TASK_STACK
2327345153Sdim} kmp_base_thread_data_t;
2328345153Sdim
2329345153Sdim#define TASK_DEQUE_BITS 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE
2330345153Sdim#define INITIAL_TASK_DEQUE_SIZE (1 << TASK_DEQUE_BITS)
2331345153Sdim
2332345153Sdim#define TASK_DEQUE_SIZE(td) ((td).td_deque_size)
2333345153Sdim#define TASK_DEQUE_MASK(td) ((td).td_deque_size - 1)
2334345153Sdim
2335345153Sdimtypedef union KMP_ALIGN_CACHE kmp_thread_data {
2336345153Sdim  kmp_base_thread_data_t td;
2337345153Sdim  double td_align; /* use worst case alignment */
2338345153Sdim  char td_pad[KMP_PAD(kmp_base_thread_data_t, CACHE_LINE)];
2339345153Sdim} kmp_thread_data_t;
2340345153Sdim
2341345153Sdim// Data for task teams which are used when tasking is enabled for the team
2342345153Sdimtypedef struct kmp_base_task_team {
2343345153Sdim  kmp_bootstrap_lock_t
2344345153Sdim      tt_threads_lock; /* Lock used to allocate per-thread part of task team */
2345345153Sdim  /* must be bootstrap lock since used at library shutdown*/
2346345153Sdim  kmp_task_team_t *tt_next; /* For linking the task team free list */
2347345153Sdim  kmp_thread_data_t
2348345153Sdim      *tt_threads_data; /* Array of per-thread structures for task team */
2349345153Sdim  /* Data survives task team deallocation */
2350345153Sdim  kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while
2351345153Sdim                               executing this team? */
2352345153Sdim  /* TRUE means tt_threads_data is set up and initialized */
2353345153Sdim  kmp_int32 tt_nproc; /* #threads in team           */
2354353358Sdim  kmp_int32 tt_max_threads; // # entries allocated for threads_data array
2355353358Sdim  kmp_int32 tt_found_proxy_tasks; // found proxy tasks since last barrier
2356345153Sdim  kmp_int32 tt_untied_task_encountered;
2357345153Sdim
2358345153Sdim  KMP_ALIGN_CACHE
2359345153Sdim  std::atomic<kmp_int32> tt_unfinished_threads; /* #threads still active */
2360345153Sdim
2361345153Sdim  KMP_ALIGN_CACHE
2362345153Sdim  volatile kmp_uint32
2363345153Sdim      tt_active; /* is the team still actively executing tasks */
2364345153Sdim} kmp_base_task_team_t;
2365345153Sdim
2366345153Sdimunion KMP_ALIGN_CACHE kmp_task_team {
2367345153Sdim  kmp_base_task_team_t tt;
2368345153Sdim  double tt_align; /* use worst case alignment */
2369345153Sdim  char tt_pad[KMP_PAD(kmp_base_task_team_t, CACHE_LINE)];
2370345153Sdim};
2371345153Sdim
2372345153Sdim#if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5)
2373345153Sdim// Free lists keep same-size free memory slots for fast memory allocation
2374345153Sdim// routines
2375345153Sdimtypedef struct kmp_free_list {
2376345153Sdim  void *th_free_list_self; // Self-allocated tasks free list
2377345153Sdim  void *th_free_list_sync; // Self-allocated tasks stolen/returned by other
2378345153Sdim  // threads
2379345153Sdim  void *th_free_list_other; // Non-self free list (to be returned to owner's
2380345153Sdim  // sync list)
2381345153Sdim} kmp_free_list_t;
2382345153Sdim#endif
2383345153Sdim#if KMP_NESTED_HOT_TEAMS
2384345153Sdim// Hot teams array keeps hot teams and their sizes for given thread. Hot teams
2385345153Sdim// are not put in teams pool, and they don't put threads in threads pool.
2386345153Sdimtypedef struct kmp_hot_team_ptr {
2387345153Sdim  kmp_team_p *hot_team; // pointer to hot_team of given nesting level
2388345153Sdim  kmp_int32 hot_team_nth; // number of threads allocated for the hot_team
2389345153Sdim} kmp_hot_team_ptr_t;
2390345153Sdim#endif
2391345153Sdimtypedef struct kmp_teams_size {
2392345153Sdim  kmp_int32 nteams; // number of teams in a league
2393345153Sdim  kmp_int32 nth; // number of threads in each team of the league
2394345153Sdim} kmp_teams_size_t;
2395345153Sdim
2396353358Sdim// This struct stores a thread that acts as a "root" for a contention
2397353358Sdim// group. Contention groups are rooted at kmp_root threads, but also at
2398353358Sdim// each master thread of each team created in the teams construct.
2399353358Sdim// This struct therefore also stores a thread_limit associated with
2400353358Sdim// that contention group, and a counter to track the number of threads
2401353358Sdim// active in that contention group. Each thread has a list of these: CG
2402353358Sdim// root threads have an entry in their list in which cg_root refers to
2403353358Sdim// the thread itself, whereas other workers in the CG will have a
2404353358Sdim// single entry where cg_root is same as the entry containing their CG
2405353358Sdim// root. When a thread encounters a teams construct, it will add a new
2406353358Sdim// entry to the front of its list, because it now roots a new CG.
2407353358Sdimtypedef struct kmp_cg_root {
2408353358Sdim  kmp_info_p *cg_root; // "root" thread for a contention group
2409353358Sdim  // The CG root's limit comes from OMP_THREAD_LIMIT for root threads, or
2410353358Sdim  // thread_limit clause for teams masters
2411353358Sdim  kmp_int32 cg_thread_limit;
2412353358Sdim  kmp_int32 cg_nthreads; // Count of active threads in CG rooted at cg_root
2413353358Sdim  struct kmp_cg_root *up; // pointer to higher level CG root in list
2414353358Sdim} kmp_cg_root_t;
2415353358Sdim
2416345153Sdim// OpenMP thread data structures
2417345153Sdim
2418345153Sdimtypedef struct KMP_ALIGN_CACHE kmp_base_info {
2419345153Sdim  /* Start with the readonly data which is cache aligned and padded. This is
2420345153Sdim     written before the thread starts working by the master. Uber masters may
2421345153Sdim     update themselves later. Usage does not consider serialized regions.  */
2422345153Sdim  kmp_desc_t th_info;
2423345153Sdim  kmp_team_p *th_team; /* team we belong to */
2424345153Sdim  kmp_root_p *th_root; /* pointer to root of task hierarchy */
2425345153Sdim  kmp_info_p *th_next_pool; /* next available thread in the pool */
2426345153Sdim  kmp_disp_t *th_dispatch; /* thread's dispatch data */
2427345153Sdim  int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */
2428345153Sdim
2429345153Sdim  /* The following are cached from the team info structure */
2430345153Sdim  /* TODO use these in more places as determined to be needed via profiling */
2431345153Sdim  int th_team_nproc; /* number of threads in a team */
2432345153Sdim  kmp_info_p *th_team_master; /* the team's master thread */
2433345153Sdim  int th_team_serialized; /* team is serialized */
2434345153Sdim  microtask_t th_teams_microtask; /* save entry address for teams construct */
2435345153Sdim  int th_teams_level; /* save initial level of teams construct */
2436345153Sdim/* it is 0 on device but may be any on host */
2437345153Sdim
2438345153Sdim/* The blocktime info is copied from the team struct to the thread sruct */
2439345153Sdim/* at the start of a barrier, and the values stored in the team are used */
2440345153Sdim/* at points in the code where the team struct is no longer guaranteed   */
2441345153Sdim/* to exist (from the POV of worker threads).                            */
2442345153Sdim#if KMP_USE_MONITOR
2443345153Sdim  int th_team_bt_intervals;
2444345153Sdim  int th_team_bt_set;
2445345153Sdim#else
2446345153Sdim  kmp_uint64 th_team_bt_intervals;
2447345153Sdim#endif
2448345153Sdim
2449345153Sdim#if KMP_AFFINITY_SUPPORTED
2450345153Sdim  kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */
2451345153Sdim#endif
2452353358Sdim  omp_allocator_handle_t th_def_allocator; /* default allocator */
2453345153Sdim  /* The data set by the master at reinit, then R/W by the worker */
2454345153Sdim  KMP_ALIGN_CACHE int
2455345153Sdim      th_set_nproc; /* if > 0, then only use this request for the next fork */
2456345153Sdim#if KMP_NESTED_HOT_TEAMS
2457345153Sdim  kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */
2458345153Sdim#endif
2459345153Sdim  kmp_proc_bind_t
2460345153Sdim      th_set_proc_bind; /* if != proc_bind_default, use request for next fork */
2461345153Sdim  kmp_teams_size_t
2462345153Sdim      th_teams_size; /* number of teams/threads in teams construct */
2463345153Sdim#if KMP_AFFINITY_SUPPORTED
2464345153Sdim  int th_current_place; /* place currently bound to */
2465345153Sdim  int th_new_place; /* place to bind to in par reg */
2466345153Sdim  int th_first_place; /* first place in partition */
2467345153Sdim  int th_last_place; /* last place in partition */
2468345153Sdim#endif
2469345153Sdim  int th_prev_level; /* previous level for affinity format */
2470345153Sdim  int th_prev_num_threads; /* previous num_threads for affinity format */
2471345153Sdim#if USE_ITT_BUILD
2472345153Sdim  kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */
2473345153Sdim  kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */
2474345153Sdim  kmp_uint64 th_frame_time; /* frame timestamp */
2475345153Sdim#endif /* USE_ITT_BUILD */
2476345153Sdim  kmp_local_t th_local;
2477345153Sdim  struct private_common *th_pri_head;
2478345153Sdim
2479345153Sdim  /* Now the data only used by the worker (after initial allocation) */
2480345153Sdim  /* TODO the first serial team should actually be stored in the info_t
2481345153Sdim     structure.  this will help reduce initial allocation overhead */
2482345153Sdim  KMP_ALIGN_CACHE kmp_team_p
2483345153Sdim      *th_serial_team; /*serialized team held in reserve*/
2484345153Sdim
2485345153Sdim#if OMPT_SUPPORT
2486345153Sdim  ompt_thread_info_t ompt_thread_info;
2487345153Sdim#endif
2488345153Sdim
2489345153Sdim  /* The following are also read by the master during reinit */
2490345153Sdim  struct common_table *th_pri_common;
2491345153Sdim
2492345153Sdim  volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */
2493345153Sdim  /* while awaiting queuing lock acquire */
2494345153Sdim
2495345153Sdim  volatile void *th_sleep_loc; // this points at a kmp_flag<T>
2496345153Sdim
2497345153Sdim  ident_t *th_ident;
2498345153Sdim  unsigned th_x; // Random number generator data
2499345153Sdim  unsigned th_a; // Random number generator data
2500345153Sdim
2501345153Sdim  /* Tasking-related data for the thread */
2502345153Sdim  kmp_task_team_t *th_task_team; // Task team struct
2503345153Sdim  kmp_taskdata_t *th_current_task; // Innermost Task being executed
2504345153Sdim  kmp_uint8 th_task_state; // alternating 0/1 for task team identification
2505345153Sdim  kmp_uint8 *th_task_state_memo_stack; // Stack holding memos of th_task_state
2506345153Sdim  // at nested levels
2507345153Sdim  kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack
2508345153Sdim  kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack
2509345153Sdim  kmp_uint32 th_reap_state; // Non-zero indicates thread is not
2510345153Sdim  // tasking, thus safe to reap
2511345153Sdim
2512345153Sdim  /* More stuff for keeping track of active/sleeping threads (this part is
2513345153Sdim     written by the worker thread) */
2514345153Sdim  kmp_uint8 th_active_in_pool; // included in count of #active threads in pool
2515345153Sdim  int th_active; // ! sleeping; 32 bits for TCR/TCW
2516345153Sdim  struct cons_header *th_cons; // used for consistency check
2517345153Sdim#if KMP_USE_HIER_SCHED
2518345153Sdim  // used for hierarchical scheduling
2519345153Sdim  kmp_hier_private_bdata_t *th_hier_bar_data;
2520345153Sdim#endif
2521345153Sdim
2522345153Sdim  /* Add the syncronizing data which is cache aligned and padded. */
2523345153Sdim  KMP_ALIGN_CACHE kmp_balign_t th_bar[bs_last_barrier];
2524345153Sdim
2525345153Sdim  KMP_ALIGN_CACHE volatile kmp_int32
2526345153Sdim      th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */
2527345153Sdim
2528345153Sdim#if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5)
2529345153Sdim#define NUM_LISTS 4
2530345153Sdim  kmp_free_list_t th_free_lists[NUM_LISTS]; // Free lists for fast memory
2531345153Sdim// allocation routines
2532345153Sdim#endif
2533345153Sdim
2534345153Sdim#if KMP_OS_WINDOWS
2535345153Sdim  kmp_win32_cond_t th_suspend_cv;
2536345153Sdim  kmp_win32_mutex_t th_suspend_mx;
2537353358Sdim  std::atomic<int> th_suspend_init;
2538345153Sdim#endif
2539345153Sdim#if KMP_OS_UNIX
2540345153Sdim  kmp_cond_align_t th_suspend_cv;
2541345153Sdim  kmp_mutex_align_t th_suspend_mx;
2542353358Sdim  std::atomic<int> th_suspend_init_count;
2543345153Sdim#endif
2544345153Sdim
2545345153Sdim#if USE_ITT_BUILD
2546345153Sdim  kmp_itt_mark_t th_itt_mark_single;
2547345153Sdim// alignment ???
2548345153Sdim#endif /* USE_ITT_BUILD */
2549345153Sdim#if KMP_STATS_ENABLED
2550345153Sdim  kmp_stats_list *th_stats;
2551345153Sdim#endif
2552345153Sdim#if KMP_OS_UNIX
2553345153Sdim  std::atomic<bool> th_blocking;
2554345153Sdim#endif
2555353358Sdim  kmp_cg_root_t *th_cg_roots; // list of cg_roots associated with this thread
2556345153Sdim} kmp_base_info_t;
2557345153Sdim
2558345153Sdimtypedef union KMP_ALIGN_CACHE kmp_info {
2559345153Sdim  double th_align; /* use worst case alignment */
2560345153Sdim  char th_pad[KMP_PAD(kmp_base_info_t, CACHE_LINE)];
2561345153Sdim  kmp_base_info_t th;
2562345153Sdim} kmp_info_t;
2563345153Sdim
2564345153Sdim// OpenMP thread team data structures
2565345153Sdim
2566345153Sdimtypedef struct kmp_base_data { volatile kmp_uint32 t_value; } kmp_base_data_t;
2567345153Sdim
2568345153Sdimtypedef union KMP_ALIGN_CACHE kmp_sleep_team {
2569345153Sdim  double dt_align; /* use worst case alignment */
2570345153Sdim  char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
2571345153Sdim  kmp_base_data_t dt;
2572345153Sdim} kmp_sleep_team_t;
2573345153Sdim
2574345153Sdimtypedef union KMP_ALIGN_CACHE kmp_ordered_team {
2575345153Sdim  double dt_align; /* use worst case alignment */
2576345153Sdim  char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
2577345153Sdim  kmp_base_data_t dt;
2578345153Sdim} kmp_ordered_team_t;
2579345153Sdim
2580345153Sdimtypedef int (*launch_t)(int gtid);
2581345153Sdim
2582345153Sdim/* Minimum number of ARGV entries to malloc if necessary */
2583345153Sdim#define KMP_MIN_MALLOC_ARGV_ENTRIES 100
2584345153Sdim
2585345153Sdim// Set up how many argv pointers will fit in cache lines containing
2586345153Sdim// t_inline_argv. Historically, we have supported at least 96 bytes. Using a
2587345153Sdim// larger value for more space between the master write/worker read section and
2588345153Sdim// read/write by all section seems to buy more performance on EPCC PARALLEL.
2589345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64
2590345153Sdim#define KMP_INLINE_ARGV_BYTES                                                  \
2591345153Sdim  (4 * CACHE_LINE -                                                            \
2592345153Sdim   ((3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) +               \
2593345153Sdim     sizeof(kmp_int16) + sizeof(kmp_uint32)) %                                 \
2594345153Sdim    CACHE_LINE))
2595345153Sdim#else
2596345153Sdim#define KMP_INLINE_ARGV_BYTES                                                  \
2597345153Sdim  (2 * CACHE_LINE - ((3 * KMP_PTR_SKIP + 2 * sizeof(int)) % CACHE_LINE))
2598345153Sdim#endif
2599345153Sdim#define KMP_INLINE_ARGV_ENTRIES (int)(KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP)
2600345153Sdim
2601345153Sdimtypedef struct KMP_ALIGN_CACHE kmp_base_team {
2602345153Sdim  // Synchronization Data
2603345153Sdim  // ---------------------------------------------------------------------------
2604345153Sdim  KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered;
2605345153Sdim  kmp_balign_team_t t_bar[bs_last_barrier];
2606345153Sdim  std::atomic<int> t_construct; // count of single directive encountered by team
2607345153Sdim  char pad[sizeof(kmp_lock_t)]; // padding to maintain performance on big iron
2608345153Sdim
2609353358Sdim  // [0] - parallel / [1] - worksharing task reduction data shared by taskgroups
2610353358Sdim  std::atomic<void *> t_tg_reduce_data[2]; // to support task modifier
2611353358Sdim  std::atomic<int> t_tg_fini_counter[2]; // sync end of task reductions
2612353358Sdim
2613345153Sdim  // Master only
2614345153Sdim  // ---------------------------------------------------------------------------
2615345153Sdim  KMP_ALIGN_CACHE int t_master_tid; // tid of master in parent team
2616345153Sdim  int t_master_this_cons; // "this_construct" single counter of master in parent
2617345153Sdim  // team
2618345153Sdim  ident_t *t_ident; // if volatile, have to change too much other crud to
2619345153Sdim  // volatile too
2620345153Sdim  kmp_team_p *t_parent; // parent team
2621345153Sdim  kmp_team_p *t_next_pool; // next free team in the team pool
2622345153Sdim  kmp_disp_t *t_dispatch; // thread's dispatch data
2623345153Sdim  kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2
2624345153Sdim  kmp_proc_bind_t t_proc_bind; // bind type for par region
2625345153Sdim#if USE_ITT_BUILD
2626345153Sdim  kmp_uint64 t_region_time; // region begin timestamp
2627345153Sdim#endif /* USE_ITT_BUILD */
2628345153Sdim
2629345153Sdim  // Master write, workers read
2630345153Sdim  // --------------------------------------------------------------------------
2631345153Sdim  KMP_ALIGN_CACHE void **t_argv;
2632345153Sdim  int t_argc;
2633345153Sdim  int t_nproc; // number of threads in team
2634345153Sdim  microtask_t t_pkfn;
2635345153Sdim  launch_t t_invoke; // procedure to launch the microtask
2636345153Sdim
2637345153Sdim#if OMPT_SUPPORT
2638345153Sdim  ompt_team_info_t ompt_team_info;
2639345153Sdim  ompt_lw_taskteam_t *ompt_serialized_team_info;
2640345153Sdim#endif
2641345153Sdim
2642345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64
2643345153Sdim  kmp_int8 t_fp_control_saved;
2644345153Sdim  kmp_int8 t_pad2b;
2645345153Sdim  kmp_int16 t_x87_fpu_control_word; // FP control regs
2646345153Sdim  kmp_uint32 t_mxcsr;
2647345153Sdim#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
2648345153Sdim
2649345153Sdim  void *t_inline_argv[KMP_INLINE_ARGV_ENTRIES];
2650345153Sdim
2651345153Sdim  KMP_ALIGN_CACHE kmp_info_t **t_threads;
2652345153Sdim  kmp_taskdata_t
2653345153Sdim      *t_implicit_task_taskdata; // Taskdata for the thread's implicit task
2654345153Sdim  int t_level; // nested parallel level
2655345153Sdim
2656345153Sdim  KMP_ALIGN_CACHE int t_max_argc;
2657360784Sdim  int t_max_nproc; // max threads this team can handle (dynamically expandable)
2658345153Sdim  int t_serialized; // levels deep of serialized teams
2659345153Sdim  dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system
2660345153Sdim  int t_id; // team's id, assigned by debugger.
2661345153Sdim  int t_active_level; // nested active parallel level
2662345153Sdim  kmp_r_sched_t t_sched; // run-time schedule for the team
2663353358Sdim#if KMP_AFFINITY_SUPPORTED
2664345153Sdim  int t_first_place; // first & last place in parent thread's partition.
2665345153Sdim  int t_last_place; // Restore these values to master after par region.
2666353358Sdim#endif // KMP_AFFINITY_SUPPORTED
2667345153Sdim  int t_display_affinity;
2668345153Sdim  int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via
2669353358Sdim  // omp_set_num_threads() call
2670353358Sdim  omp_allocator_handle_t t_def_allocator; /* default allocator */
2671345153Sdim
2672345153Sdim// Read/write by workers as well
2673345153Sdim#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
2674345153Sdim  // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf
2675345153Sdim  // regression of epcc 'parallel' and 'barrier' on fxe256lin01. This extra
2676345153Sdim  // padding serves to fix the performance of epcc 'parallel' and 'barrier' when
2677345153Sdim  // CACHE_LINE=64. TODO: investigate more and get rid if this padding.
2678345153Sdim  char dummy_padding[1024];
2679345153Sdim#endif
2680345153Sdim  // Internal control stack for additional nested teams.
2681345153Sdim  KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top;
2682353358Sdim  // for SERIALIZED teams nested 2 or more levels deep
2683345153Sdim  // typed flag to store request state of cancellation
2684345153Sdim  std::atomic<kmp_int32> t_cancel_request;
2685345153Sdim  int t_master_active; // save on fork, restore on join
2686345153Sdim  void *t_copypriv_data; // team specific pointer to copyprivate data array
2687345153Sdim#if KMP_OS_WINDOWS
2688345153Sdim  std::atomic<kmp_uint32> t_copyin_counter;
2689345153Sdim#endif
2690345153Sdim#if USE_ITT_BUILD
2691345153Sdim  void *t_stack_id; // team specific stack stitching id (for ittnotify)
2692345153Sdim#endif /* USE_ITT_BUILD */
2693345153Sdim} kmp_base_team_t;
2694345153Sdim
2695345153Sdimunion KMP_ALIGN_CACHE kmp_team {
2696345153Sdim  kmp_base_team_t t;
2697345153Sdim  double t_align; /* use worst case alignment */
2698345153Sdim  char t_pad[KMP_PAD(kmp_base_team_t, CACHE_LINE)];
2699345153Sdim};
2700345153Sdim
2701345153Sdimtypedef union KMP_ALIGN_CACHE kmp_time_global {
2702345153Sdim  double dt_align; /* use worst case alignment */
2703345153Sdim  char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
2704345153Sdim  kmp_base_data_t dt;
2705345153Sdim} kmp_time_global_t;
2706345153Sdim
2707345153Sdimtypedef struct kmp_base_global {
2708345153Sdim  /* cache-aligned */
2709345153Sdim  kmp_time_global_t g_time;
2710345153Sdim
2711345153Sdim  /* non cache-aligned */
2712345153Sdim  volatile int g_abort;
2713345153Sdim  volatile int g_done;
2714345153Sdim
2715345153Sdim  int g_dynamic;
2716345153Sdim  enum dynamic_mode g_dynamic_mode;
2717345153Sdim} kmp_base_global_t;
2718345153Sdim
2719345153Sdimtypedef union KMP_ALIGN_CACHE kmp_global {
2720345153Sdim  kmp_base_global_t g;
2721345153Sdim  double g_align; /* use worst case alignment */
2722345153Sdim  char g_pad[KMP_PAD(kmp_base_global_t, CACHE_LINE)];
2723345153Sdim} kmp_global_t;
2724345153Sdim
2725345153Sdimtypedef struct kmp_base_root {
2726345153Sdim  // TODO: GEH - combine r_active with r_in_parallel then r_active ==
2727345153Sdim  // (r_in_parallel>= 0)
2728345153Sdim  // TODO: GEH - then replace r_active with t_active_levels if we can to reduce
2729345153Sdim  // the synch overhead or keeping r_active
2730345153Sdim  volatile int r_active; /* TRUE if some region in a nest has > 1 thread */
2731345153Sdim  // keeps a count of active parallel regions per root
2732345153Sdim  std::atomic<int> r_in_parallel;
2733345153Sdim  // GEH: This is misnamed, should be r_active_levels
2734345153Sdim  kmp_team_t *r_root_team;
2735345153Sdim  kmp_team_t *r_hot_team;
2736345153Sdim  kmp_info_t *r_uber_thread;
2737345153Sdim  kmp_lock_t r_begin_lock;
2738345153Sdim  volatile int r_begin;
2739345153Sdim  int r_blocktime; /* blocktime for this root and descendants */
2740345153Sdim} kmp_base_root_t;
2741345153Sdim
2742345153Sdimtypedef union KMP_ALIGN_CACHE kmp_root {
2743345153Sdim  kmp_base_root_t r;
2744345153Sdim  double r_align; /* use worst case alignment */
2745345153Sdim  char r_pad[KMP_PAD(kmp_base_root_t, CACHE_LINE)];
2746345153Sdim} kmp_root_t;
2747345153Sdim
2748345153Sdimstruct fortran_inx_info {
2749345153Sdim  kmp_int32 data;
2750345153Sdim};
2751345153Sdim
2752345153Sdim/* ------------------------------------------------------------------------ */
2753345153Sdim
2754345153Sdimextern int __kmp_settings;
2755345153Sdimextern int __kmp_duplicate_library_ok;
2756345153Sdim#if USE_ITT_BUILD
2757345153Sdimextern int __kmp_forkjoin_frames;
2758345153Sdimextern int __kmp_forkjoin_frames_mode;
2759345153Sdim#endif
2760345153Sdimextern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method;
2761345153Sdimextern int __kmp_determ_red;
2762345153Sdim
2763345153Sdim#ifdef KMP_DEBUG
2764345153Sdimextern int kmp_a_debug;
2765345153Sdimextern int kmp_b_debug;
2766345153Sdimextern int kmp_c_debug;
2767345153Sdimextern int kmp_d_debug;
2768345153Sdimextern int kmp_e_debug;
2769345153Sdimextern int kmp_f_debug;
2770345153Sdim#endif /* KMP_DEBUG */
2771345153Sdim
2772345153Sdim/* For debug information logging using rotating buffer */
2773345153Sdim#define KMP_DEBUG_BUF_LINES_INIT 512
2774345153Sdim#define KMP_DEBUG_BUF_LINES_MIN 1
2775345153Sdim
2776345153Sdim#define KMP_DEBUG_BUF_CHARS_INIT 128
2777345153Sdim#define KMP_DEBUG_BUF_CHARS_MIN 2
2778345153Sdim
2779345153Sdimextern int
2780345153Sdim    __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */
2781345153Sdimextern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */
2782345153Sdimextern int
2783345153Sdim    __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */
2784345153Sdimextern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer
2785345153Sdim                                      entry pointer */
2786345153Sdim
2787345153Sdimextern char *__kmp_debug_buffer; /* Debug buffer itself */
2788345153Sdimextern std::atomic<int> __kmp_debug_count; /* Counter for number of lines
2789345153Sdim                                              printed in buffer so far */
2790345153Sdimextern int __kmp_debug_buf_warn_chars; /* Keep track of char increase
2791345153Sdim                                          recommended in warnings */
2792345153Sdim/* end rotating debug buffer */
2793345153Sdim
2794345153Sdim#ifdef KMP_DEBUG
2795345153Sdimextern int __kmp_par_range; /* +1 => only go par for constructs in range */
2796345153Sdim
2797345153Sdim#define KMP_PAR_RANGE_ROUTINE_LEN 1024
2798345153Sdimextern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN];
2799345153Sdim#define KMP_PAR_RANGE_FILENAME_LEN 1024
2800345153Sdimextern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN];
2801345153Sdimextern int __kmp_par_range_lb;
2802345153Sdimextern int __kmp_par_range_ub;
2803345153Sdim#endif
2804345153Sdim
2805345153Sdim/* For printing out dynamic storage map for threads and teams */
2806345153Sdimextern int
2807345153Sdim    __kmp_storage_map; /* True means print storage map for threads and teams */
2808345153Sdimextern int __kmp_storage_map_verbose; /* True means storage map includes
2809345153Sdim                                         placement info */
2810345153Sdimextern int __kmp_storage_map_verbose_specified;
2811345153Sdim
2812345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64
2813345153Sdimextern kmp_cpuinfo_t __kmp_cpuinfo;
2814345153Sdim#endif
2815345153Sdim
2816345153Sdimextern volatile int __kmp_init_serial;
2817345153Sdimextern volatile int __kmp_init_gtid;
2818345153Sdimextern volatile int __kmp_init_common;
2819345153Sdimextern volatile int __kmp_init_middle;
2820345153Sdimextern volatile int __kmp_init_parallel;
2821345153Sdim#if KMP_USE_MONITOR
2822345153Sdimextern volatile int __kmp_init_monitor;
2823345153Sdim#endif
2824345153Sdimextern volatile int __kmp_init_user_locks;
2825345153Sdimextern int __kmp_init_counter;
2826345153Sdimextern int __kmp_root_counter;
2827345153Sdimextern int __kmp_version;
2828345153Sdim
2829345153Sdim/* list of address of allocated caches for commons */
2830345153Sdimextern kmp_cached_addr_t *__kmp_threadpriv_cache_list;
2831345153Sdim
2832345153Sdim/* Barrier algorithm types and options */
2833345153Sdimextern kmp_uint32 __kmp_barrier_gather_bb_dflt;
2834345153Sdimextern kmp_uint32 __kmp_barrier_release_bb_dflt;
2835345153Sdimextern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt;
2836345153Sdimextern kmp_bar_pat_e __kmp_barrier_release_pat_dflt;
2837345153Sdimextern kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier];
2838345153Sdimextern kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier];
2839345153Sdimextern kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier];
2840345153Sdimextern kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier];
2841345153Sdimextern char const *__kmp_barrier_branch_bit_env_name[bs_last_barrier];
2842345153Sdimextern char const *__kmp_barrier_pattern_env_name[bs_last_barrier];
2843345153Sdimextern char const *__kmp_barrier_type_name[bs_last_barrier];
2844345153Sdimextern char const *__kmp_barrier_pattern_name[bp_last_bar];
2845345153Sdim
2846345153Sdim/* Global Locks */
2847345153Sdimextern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */
2848345153Sdimextern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */
2849345153Sdimextern kmp_bootstrap_lock_t __kmp_task_team_lock;
2850345153Sdimextern kmp_bootstrap_lock_t
2851345153Sdim    __kmp_exit_lock; /* exit() is not always thread-safe */
2852345153Sdim#if KMP_USE_MONITOR
2853345153Sdimextern kmp_bootstrap_lock_t
2854345153Sdim    __kmp_monitor_lock; /* control monitor thread creation */
2855345153Sdim#endif
2856345153Sdimextern kmp_bootstrap_lock_t
2857345153Sdim    __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and
2858345153Sdim                             __kmp_threads expansion to co-exist */
2859345153Sdim
2860345153Sdimextern kmp_lock_t __kmp_global_lock; /* control OS/global access  */
2861345153Sdimextern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access  */
2862345153Sdimextern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */
2863345153Sdim
2864345153Sdimextern enum library_type __kmp_library;
2865345153Sdim
2866345153Sdimextern enum sched_type __kmp_sched; /* default runtime scheduling */
2867345153Sdimextern enum sched_type __kmp_static; /* default static scheduling method */
2868345153Sdimextern enum sched_type __kmp_guided; /* default guided scheduling method */
2869345153Sdimextern enum sched_type __kmp_auto; /* default auto scheduling method */
2870345153Sdimextern int __kmp_chunk; /* default runtime chunk size */
2871345153Sdim
2872345153Sdimextern size_t __kmp_stksize; /* stack size per thread         */
2873345153Sdim#if KMP_USE_MONITOR
2874345153Sdimextern size_t __kmp_monitor_stksize; /* stack size for monitor thread */
2875345153Sdim#endif
2876345153Sdimextern size_t __kmp_stkoffset; /* stack offset per thread       */
2877345153Sdimextern int __kmp_stkpadding; /* Should we pad root thread(s) stack */
2878345153Sdim
2879345153Sdimextern size_t
2880345153Sdim    __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */
2881345153Sdimextern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */
2882345153Sdimextern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */
2883345153Sdimextern int __kmp_env_checks; /* was KMP_CHECKS specified?    */
2884345153Sdimextern int __kmp_env_consistency_check; // was KMP_CONSISTENCY_CHECK specified?
2885345153Sdimextern int __kmp_generate_warnings; /* should we issue warnings? */
2886345153Sdimextern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */
2887345153Sdim
2888345153Sdim#ifdef DEBUG_SUSPEND
2889345153Sdimextern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */
2890345153Sdim#endif
2891345153Sdim
2892353358Sdimextern kmp_int32 __kmp_use_yield;
2893353358Sdimextern kmp_int32 __kmp_use_yield_exp_set;
2894345153Sdimextern kmp_uint32 __kmp_yield_init;
2895345153Sdimextern kmp_uint32 __kmp_yield_next;
2896345153Sdim
2897345153Sdim/* ------------------------------------------------------------------------- */
2898345153Sdimextern int __kmp_allThreadsSpecified;
2899345153Sdim
2900345153Sdimextern size_t __kmp_align_alloc;
2901345153Sdim/* following data protected by initialization routines */
2902345153Sdimextern int __kmp_xproc; /* number of processors in the system */
2903345153Sdimextern int __kmp_avail_proc; /* number of processors available to the process */
2904345153Sdimextern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */
2905345153Sdimextern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */
2906345153Sdim// maximum total number of concurrently-existing threads on device
2907345153Sdimextern int __kmp_max_nth;
2908345153Sdim// maximum total number of concurrently-existing threads in a contention group
2909345153Sdimextern int __kmp_cg_max_nth;
2910345153Sdimextern int __kmp_teams_max_nth; // max threads used in a teams construct
2911345153Sdimextern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and
2912345153Sdim                                      __kmp_root */
2913345153Sdimextern int __kmp_dflt_team_nth; /* default number of threads in a parallel
2914345153Sdim                                   region a la OMP_NUM_THREADS */
2915345153Sdimextern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial
2916345153Sdim                                      initialization */
2917345153Sdimextern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is
2918345153Sdim                                 used (fixed) */
2919345153Sdimextern int __kmp_tp_cached; /* whether threadprivate cache has been created
2920345153Sdim                               (__kmpc_threadprivate_cached()) */
2921345153Sdimextern int __kmp_dflt_blocktime; /* number of milliseconds to wait before
2922345153Sdim                                    blocking (env setting) */
2923345153Sdim#if KMP_USE_MONITOR
2924345153Sdimextern int
2925345153Sdim    __kmp_monitor_wakeups; /* number of times monitor wakes up per second */
2926345153Sdimextern int __kmp_bt_intervals; /* number of monitor timestamp intervals before
2927345153Sdim                                  blocking */
2928345153Sdim#endif
2929345153Sdim#ifdef KMP_ADJUST_BLOCKTIME
2930345153Sdimextern int __kmp_zero_bt; /* whether blocktime has been forced to zero */
2931345153Sdim#endif /* KMP_ADJUST_BLOCKTIME */
2932345153Sdim#ifdef KMP_DFLT_NTH_CORES
2933345153Sdimextern int __kmp_ncores; /* Total number of cores for threads placement */
2934345153Sdim#endif
2935345153Sdim/* Number of millisecs to delay on abort for Intel(R) VTune(TM) tools */
2936345153Sdimextern int __kmp_abort_delay;
2937345153Sdim
2938345153Sdimextern int __kmp_need_register_atfork_specified;
2939345153Sdimextern int
2940345153Sdim    __kmp_need_register_atfork; /* At initialization, call pthread_atfork to
2941345153Sdim                                   install fork handler */
2942345153Sdimextern int __kmp_gtid_mode; /* Method of getting gtid, values:
2943345153Sdim                               0 - not set, will be set at runtime
2944345153Sdim                               1 - using stack search
2945345153Sdim                               2 - dynamic TLS (pthread_getspecific(Linux* OS/OS
2946345153Sdim                                   X*) or TlsGetValue(Windows* OS))
2947345153Sdim                               3 - static TLS (__declspec(thread) __kmp_gtid),
2948345153Sdim                                   Linux* OS .so only.  */
2949345153Sdimextern int
2950345153Sdim    __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */
2951345153Sdim#ifdef KMP_TDATA_GTID
2952345153Sdimextern KMP_THREAD_LOCAL int __kmp_gtid;
2953345153Sdim#endif
2954345153Sdimextern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */
2955345153Sdimextern int __kmp_foreign_tp; // If true, separate TP var for each foreign thread
2956345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64
2957345153Sdimextern int __kmp_inherit_fp_control; // copy fp creg(s) parent->workers at fork
2958345153Sdimextern kmp_int16 __kmp_init_x87_fpu_control_word; // init thread's FP ctrl reg
2959345153Sdimextern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */
2960345153Sdim#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
2961345153Sdim
2962353358Sdim// max_active_levels for nested parallelism enabled by default via
2963353358Sdim// OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS, and OMP_PROC_BIND
2964353358Sdimextern int __kmp_dflt_max_active_levels;
2965353358Sdim// Indicates whether value of __kmp_dflt_max_active_levels was already
2966353358Sdim// explicitly set by OMP_MAX_ACTIVE_LEVELS or OMP_NESTED=false
2967353358Sdimextern bool __kmp_dflt_max_active_levels_set;
2968345153Sdimextern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in
2969345153Sdim                                          concurrent execution per team */
2970345153Sdim#if KMP_NESTED_HOT_TEAMS
2971345153Sdimextern int __kmp_hot_teams_mode;
2972345153Sdimextern int __kmp_hot_teams_max_level;
2973345153Sdim#endif
2974345153Sdim
2975345153Sdim#if KMP_OS_LINUX
2976345153Sdimextern enum clock_function_type __kmp_clock_function;
2977345153Sdimextern int __kmp_clock_function_param;
2978345153Sdim#endif /* KMP_OS_LINUX */
2979345153Sdim
2980345153Sdim#if KMP_MIC_SUPPORTED
2981345153Sdimextern enum mic_type __kmp_mic_type;
2982345153Sdim#endif
2983345153Sdim
2984345153Sdim#ifdef USE_LOAD_BALANCE
2985345153Sdimextern double __kmp_load_balance_interval; // load balance algorithm interval
2986345153Sdim#endif /* USE_LOAD_BALANCE */
2987345153Sdim
2988345153Sdim// OpenMP 3.1 - Nested num threads array
2989345153Sdimtypedef struct kmp_nested_nthreads_t {
2990345153Sdim  int *nth;
2991345153Sdim  int size;
2992345153Sdim  int used;
2993345153Sdim} kmp_nested_nthreads_t;
2994345153Sdim
2995345153Sdimextern kmp_nested_nthreads_t __kmp_nested_nth;
2996345153Sdim
2997345153Sdim#if KMP_USE_ADAPTIVE_LOCKS
2998345153Sdim
2999345153Sdim// Parameters for the speculative lock backoff system.
3000345153Sdimstruct kmp_adaptive_backoff_params_t {
3001345153Sdim  // Number of soft retries before it counts as a hard retry.
3002345153Sdim  kmp_uint32 max_soft_retries;
3003345153Sdim  // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to
3004345153Sdim  // the right
3005345153Sdim  kmp_uint32 max_badness;
3006345153Sdim};
3007345153Sdim
3008345153Sdimextern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params;
3009345153Sdim
3010345153Sdim#if KMP_DEBUG_ADAPTIVE_LOCKS
3011345153Sdimextern const char *__kmp_speculative_statsfile;
3012345153Sdim#endif
3013345153Sdim
3014345153Sdim#endif // KMP_USE_ADAPTIVE_LOCKS
3015345153Sdim
3016345153Sdimextern int __kmp_display_env; /* TRUE or FALSE */
3017345153Sdimextern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */
3018345153Sdimextern int __kmp_omp_cancellation; /* TRUE or FALSE */
3019345153Sdim
3020345153Sdim/* ------------------------------------------------------------------------- */
3021345153Sdim
3022345153Sdim/* the following are protected by the fork/join lock */
3023345153Sdim/* write: lock  read: anytime */
3024345153Sdimextern kmp_info_t **__kmp_threads; /* Descriptors for the threads */
3025345153Sdim/* read/write: lock */
3026345153Sdimextern volatile kmp_team_t *__kmp_team_pool;
3027345153Sdimextern volatile kmp_info_t *__kmp_thread_pool;
3028345153Sdimextern kmp_info_t *__kmp_thread_pool_insert_pt;
3029345153Sdim
3030345153Sdim// total num threads reachable from some root thread including all root threads
3031345153Sdimextern volatile int __kmp_nth;
3032345153Sdim/* total number of threads reachable from some root thread including all root
3033345153Sdim   threads, and those in the thread pool */
3034345153Sdimextern volatile int __kmp_all_nth;
3035345153Sdimextern std::atomic<int> __kmp_thread_pool_active_nth;
3036345153Sdim
3037345153Sdimextern kmp_root_t **__kmp_root; /* root of thread hierarchy */
3038345153Sdim/* end data protected by fork/join lock */
3039345153Sdim/* ------------------------------------------------------------------------- */
3040345153Sdim
3041345153Sdim#define __kmp_get_gtid() __kmp_get_global_thread_id()
3042345153Sdim#define __kmp_entry_gtid() __kmp_get_global_thread_id_reg()
3043345153Sdim#define __kmp_get_tid() (__kmp_tid_from_gtid(__kmp_get_gtid()))
3044345153Sdim#define __kmp_get_team() (__kmp_threads[(__kmp_get_gtid())]->th.th_team)
3045345153Sdim#define __kmp_get_thread() (__kmp_thread_from_gtid(__kmp_get_gtid()))
3046345153Sdim
3047345153Sdim// AT: Which way is correct?
3048345153Sdim// AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc;
3049345153Sdim// AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc;
3050345153Sdim#define __kmp_get_team_num_threads(gtid)                                       \
3051345153Sdim  (__kmp_threads[(gtid)]->th.th_team->t.t_nproc)
3052345153Sdim
3053345153Sdimstatic inline bool KMP_UBER_GTID(int gtid) {
3054345153Sdim  KMP_DEBUG_ASSERT(gtid >= KMP_GTID_MIN);
3055345153Sdim  KMP_DEBUG_ASSERT(gtid < __kmp_threads_capacity);
3056345153Sdim  return (gtid >= 0 && __kmp_root[gtid] && __kmp_threads[gtid] &&
3057345153Sdim          __kmp_threads[gtid] == __kmp_root[gtid]->r.r_uber_thread);
3058345153Sdim}
3059345153Sdim
3060345153Sdimstatic inline int __kmp_tid_from_gtid(int gtid) {
3061345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
3062345153Sdim  return __kmp_threads[gtid]->th.th_info.ds.ds_tid;
3063345153Sdim}
3064345153Sdim
3065345153Sdimstatic inline int __kmp_gtid_from_tid(int tid, const kmp_team_t *team) {
3066345153Sdim  KMP_DEBUG_ASSERT(tid >= 0 && team);
3067345153Sdim  return team->t.t_threads[tid]->th.th_info.ds.ds_gtid;
3068345153Sdim}
3069345153Sdim
3070345153Sdimstatic inline int __kmp_gtid_from_thread(const kmp_info_t *thr) {
3071345153Sdim  KMP_DEBUG_ASSERT(thr);
3072345153Sdim  return thr->th.th_info.ds.ds_gtid;
3073345153Sdim}
3074345153Sdim
3075345153Sdimstatic inline kmp_info_t *__kmp_thread_from_gtid(int gtid) {
3076345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
3077345153Sdim  return __kmp_threads[gtid];
3078345153Sdim}
3079345153Sdim
3080345153Sdimstatic inline kmp_team_t *__kmp_team_from_gtid(int gtid) {
3081345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
3082345153Sdim  return __kmp_threads[gtid]->th.th_team;
3083345153Sdim}
3084345153Sdim
3085345153Sdim/* ------------------------------------------------------------------------- */
3086345153Sdim
3087345153Sdimextern kmp_global_t __kmp_global; /* global status */
3088345153Sdim
3089345153Sdimextern kmp_info_t __kmp_monitor;
3090345153Sdim// For Debugging Support Library
3091345153Sdimextern std::atomic<kmp_int32> __kmp_team_counter;
3092345153Sdim// For Debugging Support Library
3093345153Sdimextern std::atomic<kmp_int32> __kmp_task_counter;
3094345153Sdim
3095345153Sdim#if USE_DEBUGGER
3096345153Sdim#define _KMP_GEN_ID(counter)                                                   \
3097345153Sdim  (__kmp_debugging ? KMP_ATOMIC_INC(&counter) + 1 : ~0)
3098345153Sdim#else
3099345153Sdim#define _KMP_GEN_ID(counter) (~0)
3100345153Sdim#endif /* USE_DEBUGGER */
3101345153Sdim
3102345153Sdim#define KMP_GEN_TASK_ID() _KMP_GEN_ID(__kmp_task_counter)
3103345153Sdim#define KMP_GEN_TEAM_ID() _KMP_GEN_ID(__kmp_team_counter)
3104345153Sdim
3105345153Sdim/* ------------------------------------------------------------------------ */
3106345153Sdim
3107345153Sdimextern void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2,
3108345153Sdim                                         size_t size, char const *format, ...);
3109345153Sdim
3110345153Sdimextern void __kmp_serial_initialize(void);
3111345153Sdimextern void __kmp_middle_initialize(void);
3112345153Sdimextern void __kmp_parallel_initialize(void);
3113345153Sdim
3114345153Sdimextern void __kmp_internal_begin(void);
3115345153Sdimextern void __kmp_internal_end_library(int gtid);
3116345153Sdimextern void __kmp_internal_end_thread(int gtid);
3117345153Sdimextern void __kmp_internal_end_atexit(void);
3118345153Sdimextern void __kmp_internal_end_dtor(void);
3119345153Sdimextern void __kmp_internal_end_dest(void *);
3120345153Sdim
3121345153Sdimextern int __kmp_register_root(int initial_thread);
3122345153Sdimextern void __kmp_unregister_root(int gtid);
3123345153Sdim
3124345153Sdimextern int __kmp_ignore_mppbeg(void);
3125345153Sdimextern int __kmp_ignore_mppend(void);
3126345153Sdim
3127345153Sdimextern int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws);
3128345153Sdimextern void __kmp_exit_single(int gtid);
3129345153Sdim
3130345153Sdimextern void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
3131345153Sdimextern void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
3132345153Sdim
3133345153Sdim#ifdef USE_LOAD_BALANCE
3134345153Sdimextern int __kmp_get_load_balance(int);
3135345153Sdim#endif
3136345153Sdim
3137345153Sdimextern int __kmp_get_global_thread_id(void);
3138345153Sdimextern int __kmp_get_global_thread_id_reg(void);
3139345153Sdimextern void __kmp_exit_thread(int exit_status);
3140345153Sdimextern void __kmp_abort(char const *format, ...);
3141345153Sdimextern void __kmp_abort_thread(void);
3142345153SdimKMP_NORETURN extern void __kmp_abort_process(void);
3143345153Sdimextern void __kmp_warn(char const *format, ...);
3144345153Sdim
3145345153Sdimextern void __kmp_set_num_threads(int new_nth, int gtid);
3146345153Sdim
3147345153Sdim// Returns current thread (pointer to kmp_info_t). Current thread *must* be
3148345153Sdim// registered.
3149345153Sdimstatic inline kmp_info_t *__kmp_entry_thread() {
3150345153Sdim  int gtid = __kmp_entry_gtid();
3151345153Sdim
3152345153Sdim  return __kmp_threads[gtid];
3153345153Sdim}
3154345153Sdim
3155345153Sdimextern void __kmp_set_max_active_levels(int gtid, int new_max_active_levels);
3156345153Sdimextern int __kmp_get_max_active_levels(int gtid);
3157345153Sdimextern int __kmp_get_ancestor_thread_num(int gtid, int level);
3158345153Sdimextern int __kmp_get_team_size(int gtid, int level);
3159345153Sdimextern void __kmp_set_schedule(int gtid, kmp_sched_t new_sched, int chunk);
3160345153Sdimextern void __kmp_get_schedule(int gtid, kmp_sched_t *sched, int *chunk);
3161345153Sdim
3162345153Sdimextern unsigned short __kmp_get_random(kmp_info_t *thread);
3163345153Sdimextern void __kmp_init_random(kmp_info_t *thread);
3164345153Sdim
3165345153Sdimextern kmp_r_sched_t __kmp_get_schedule_global(void);
3166345153Sdimextern void __kmp_adjust_num_threads(int new_nproc);
3167353358Sdimextern void __kmp_check_stksize(size_t *val);
3168345153Sdim
3169345153Sdimextern void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL);
3170345153Sdimextern void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL);
3171345153Sdimextern void ___kmp_free(void *ptr KMP_SRC_LOC_DECL);
3172345153Sdim#define __kmp_allocate(size) ___kmp_allocate((size)KMP_SRC_LOC_CURR)
3173345153Sdim#define __kmp_page_allocate(size) ___kmp_page_allocate((size)KMP_SRC_LOC_CURR)
3174345153Sdim#define __kmp_free(ptr) ___kmp_free((ptr)KMP_SRC_LOC_CURR)
3175345153Sdim
3176345153Sdim#if USE_FAST_MEMORY
3177345153Sdimextern void *___kmp_fast_allocate(kmp_info_t *this_thr,
3178345153Sdim                                  size_t size KMP_SRC_LOC_DECL);
3179345153Sdimextern void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL);
3180345153Sdimextern void __kmp_free_fast_memory(kmp_info_t *this_thr);
3181345153Sdimextern void __kmp_initialize_fast_memory(kmp_info_t *this_thr);
3182345153Sdim#define __kmp_fast_allocate(this_thr, size)                                    \
3183345153Sdim  ___kmp_fast_allocate((this_thr), (size)KMP_SRC_LOC_CURR)
3184345153Sdim#define __kmp_fast_free(this_thr, ptr)                                         \
3185345153Sdim  ___kmp_fast_free((this_thr), (ptr)KMP_SRC_LOC_CURR)
3186345153Sdim#endif
3187345153Sdim
3188345153Sdimextern void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL);
3189345153Sdimextern void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
3190345153Sdim                                  size_t elsize KMP_SRC_LOC_DECL);
3191345153Sdimextern void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
3192345153Sdim                                   size_t size KMP_SRC_LOC_DECL);
3193345153Sdimextern void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL);
3194345153Sdim#define __kmp_thread_malloc(th, size)                                          \
3195345153Sdim  ___kmp_thread_malloc((th), (size)KMP_SRC_LOC_CURR)
3196345153Sdim#define __kmp_thread_calloc(th, nelem, elsize)                                 \
3197345153Sdim  ___kmp_thread_calloc((th), (nelem), (elsize)KMP_SRC_LOC_CURR)
3198345153Sdim#define __kmp_thread_realloc(th, ptr, size)                                    \
3199345153Sdim  ___kmp_thread_realloc((th), (ptr), (size)KMP_SRC_LOC_CURR)
3200345153Sdim#define __kmp_thread_free(th, ptr)                                             \
3201345153Sdim  ___kmp_thread_free((th), (ptr)KMP_SRC_LOC_CURR)
3202345153Sdim
3203345153Sdim#define KMP_INTERNAL_MALLOC(sz) malloc(sz)
3204345153Sdim#define KMP_INTERNAL_FREE(p) free(p)
3205345153Sdim#define KMP_INTERNAL_REALLOC(p, sz) realloc((p), (sz))
3206345153Sdim#define KMP_INTERNAL_CALLOC(n, sz) calloc((n), (sz))
3207345153Sdim
3208345153Sdimextern void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads);
3209345153Sdim
3210345153Sdimextern void __kmp_push_proc_bind(ident_t *loc, int gtid,
3211345153Sdim                                 kmp_proc_bind_t proc_bind);
3212345153Sdimextern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams,
3213345153Sdim                                 int num_threads);
3214345153Sdim
3215353358Sdimextern void __kmp_yield();
3216345153Sdim
3217345153Sdimextern void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
3218345153Sdim                                   enum sched_type schedule, kmp_int32 lb,
3219345153Sdim                                   kmp_int32 ub, kmp_int32 st, kmp_int32 chunk);
3220345153Sdimextern void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
3221345153Sdim                                    enum sched_type schedule, kmp_uint32 lb,
3222345153Sdim                                    kmp_uint32 ub, kmp_int32 st,
3223345153Sdim                                    kmp_int32 chunk);
3224345153Sdimextern void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
3225345153Sdim                                   enum sched_type schedule, kmp_int64 lb,
3226345153Sdim                                   kmp_int64 ub, kmp_int64 st, kmp_int64 chunk);
3227345153Sdimextern void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
3228345153Sdim                                    enum sched_type schedule, kmp_uint64 lb,
3229345153Sdim                                    kmp_uint64 ub, kmp_int64 st,
3230345153Sdim                                    kmp_int64 chunk);
3231345153Sdim
3232345153Sdimextern int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid,
3233345153Sdim                                  kmp_int32 *p_last, kmp_int32 *p_lb,
3234345153Sdim                                  kmp_int32 *p_ub, kmp_int32 *p_st);
3235345153Sdimextern int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid,
3236345153Sdim                                   kmp_int32 *p_last, kmp_uint32 *p_lb,
3237345153Sdim                                   kmp_uint32 *p_ub, kmp_int32 *p_st);
3238345153Sdimextern int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid,
3239345153Sdim                                  kmp_int32 *p_last, kmp_int64 *p_lb,
3240345153Sdim                                  kmp_int64 *p_ub, kmp_int64 *p_st);
3241345153Sdimextern int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid,
3242345153Sdim                                   kmp_int32 *p_last, kmp_uint64 *p_lb,
3243345153Sdim                                   kmp_uint64 *p_ub, kmp_int64 *p_st);
3244345153Sdim
3245345153Sdimextern void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid);
3246345153Sdimextern void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid);
3247345153Sdimextern void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid);
3248345153Sdimextern void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid);
3249345153Sdim
3250345153Sdim#ifdef KMP_GOMP_COMPAT
3251345153Sdim
3252345153Sdimextern void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
3253345153Sdim                                      enum sched_type schedule, kmp_int32 lb,
3254345153Sdim                                      kmp_int32 ub, kmp_int32 st,
3255345153Sdim                                      kmp_int32 chunk, int push_ws);
3256345153Sdimextern void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
3257345153Sdim                                       enum sched_type schedule, kmp_uint32 lb,
3258345153Sdim                                       kmp_uint32 ub, kmp_int32 st,
3259345153Sdim                                       kmp_int32 chunk, int push_ws);
3260345153Sdimextern void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
3261345153Sdim                                      enum sched_type schedule, kmp_int64 lb,
3262345153Sdim                                      kmp_int64 ub, kmp_int64 st,
3263345153Sdim                                      kmp_int64 chunk, int push_ws);
3264345153Sdimextern void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
3265345153Sdim                                       enum sched_type schedule, kmp_uint64 lb,
3266345153Sdim                                       kmp_uint64 ub, kmp_int64 st,
3267345153Sdim                                       kmp_int64 chunk, int push_ws);
3268345153Sdimextern void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid);
3269345153Sdimextern void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid);
3270345153Sdimextern void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid);
3271345153Sdimextern void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid);
3272345153Sdim
3273345153Sdim#endif /* KMP_GOMP_COMPAT */
3274345153Sdim
3275345153Sdimextern kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker);
3276345153Sdimextern kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker);
3277345153Sdimextern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker);
3278345153Sdimextern kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker);
3279345153Sdimextern kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker);
3280353358Sdimextern kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker,
3281353358Sdim                               kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
3282353358Sdim                               void *obj);
3283353358Sdimextern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
3284353358Sdim                             kmp_uint32 (*pred)(void *, kmp_uint32), void *obj);
3285345153Sdim
3286345153Sdimclass kmp_flag_32;
3287345153Sdimclass kmp_flag_64;
3288345153Sdimclass kmp_flag_oncore;
3289345153Sdimextern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag,
3290345153Sdim                          int final_spin
3291345153Sdim#if USE_ITT_BUILD
3292345153Sdim                          ,
3293345153Sdim                          void *itt_sync_obj
3294345153Sdim#endif
3295345153Sdim                          );
3296345153Sdimextern void __kmp_release_64(kmp_flag_64 *flag);
3297345153Sdim
3298345153Sdimextern void __kmp_infinite_loop(void);
3299345153Sdim
3300345153Sdimextern void __kmp_cleanup(void);
3301345153Sdim
3302345153Sdim#if KMP_HANDLE_SIGNALS
3303345153Sdimextern int __kmp_handle_signals;
3304345153Sdimextern void __kmp_install_signals(int parallel_init);
3305345153Sdimextern void __kmp_remove_signals(void);
3306345153Sdim#endif
3307345153Sdim
3308345153Sdimextern void __kmp_clear_system_time(void);
3309345153Sdimextern void __kmp_read_system_time(double *delta);
3310345153Sdim
3311345153Sdimextern void __kmp_check_stack_overlap(kmp_info_t *thr);
3312345153Sdim
3313345153Sdimextern void __kmp_expand_host_name(char *buffer, size_t size);
3314345153Sdimextern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern);
3315345153Sdim
3316345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3317345153Sdimextern void
3318345153Sdim__kmp_initialize_system_tick(void); /* Initialize timer tick value */
3319345153Sdim#endif
3320345153Sdim
3321345153Sdimextern void
3322345153Sdim__kmp_runtime_initialize(void); /* machine specific initialization */
3323345153Sdimextern void __kmp_runtime_destroy(void);
3324345153Sdim
3325345153Sdim#if KMP_AFFINITY_SUPPORTED
3326345153Sdimextern char *__kmp_affinity_print_mask(char *buf, int buf_len,
3327345153Sdim                                       kmp_affin_mask_t *mask);
3328345153Sdimextern kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
3329345153Sdim                                                  kmp_affin_mask_t *mask);
3330345153Sdimextern void __kmp_affinity_initialize(void);
3331345153Sdimextern void __kmp_affinity_uninitialize(void);
3332345153Sdimextern void __kmp_affinity_set_init_mask(
3333345153Sdim    int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */
3334345153Sdimextern void __kmp_affinity_set_place(int gtid);
3335345153Sdimextern void __kmp_affinity_determine_capable(const char *env_var);
3336345153Sdimextern int __kmp_aux_set_affinity(void **mask);
3337345153Sdimextern int __kmp_aux_get_affinity(void **mask);
3338345153Sdimextern int __kmp_aux_get_affinity_max_proc();
3339345153Sdimextern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);
3340345153Sdimextern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask);
3341345153Sdimextern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);
3342345153Sdimextern void __kmp_balanced_affinity(kmp_info_t *th, int team_size);
3343360784Sdim#if KMP_OS_LINUX || KMP_OS_FREEBSD
3344345153Sdimextern int kmp_set_thread_affinity_mask_initial(void);
3345345153Sdim#endif
3346345153Sdim#endif /* KMP_AFFINITY_SUPPORTED */
3347345153Sdim// No need for KMP_AFFINITY_SUPPORTED guard as only one field in the
3348345153Sdim// format string is for affinity, so platforms that do not support
3349345153Sdim// affinity can still use the other fields, e.g., %n for num_threads
3350345153Sdimextern size_t __kmp_aux_capture_affinity(int gtid, const char *format,
3351345153Sdim                                         kmp_str_buf_t *buffer);
3352345153Sdimextern void __kmp_aux_display_affinity(int gtid, const char *format);
3353345153Sdim
3354345153Sdimextern void __kmp_cleanup_hierarchy();
3355345153Sdimextern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);
3356345153Sdim
3357345153Sdim#if KMP_USE_FUTEX
3358345153Sdim
3359345153Sdimextern int __kmp_futex_determine_capable(void);
3360345153Sdim
3361345153Sdim#endif // KMP_USE_FUTEX
3362345153Sdim
3363345153Sdimextern void __kmp_gtid_set_specific(int gtid);
3364345153Sdimextern int __kmp_gtid_get_specific(void);
3365345153Sdim
3366345153Sdimextern double __kmp_read_cpu_time(void);
3367345153Sdim
3368345153Sdimextern int __kmp_read_system_info(struct kmp_sys_info *info);
3369345153Sdim
3370345153Sdim#if KMP_USE_MONITOR
3371345153Sdimextern void __kmp_create_monitor(kmp_info_t *th);
3372345153Sdim#endif
3373345153Sdim
3374345153Sdimextern void *__kmp_launch_thread(kmp_info_t *thr);
3375345153Sdim
3376345153Sdimextern void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size);
3377345153Sdim
3378345153Sdim#if KMP_OS_WINDOWS
3379345153Sdimextern int __kmp_still_running(kmp_info_t *th);
3380345153Sdimextern int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val);
3381345153Sdimextern void __kmp_free_handle(kmp_thread_t tHandle);
3382345153Sdim#endif
3383345153Sdim
3384345153Sdim#if KMP_USE_MONITOR
3385345153Sdimextern void __kmp_reap_monitor(kmp_info_t *th);
3386345153Sdim#endif
3387345153Sdimextern void __kmp_reap_worker(kmp_info_t *th);
3388345153Sdimextern void __kmp_terminate_thread(int gtid);
3389345153Sdim
3390353358Sdimextern int __kmp_try_suspend_mx(kmp_info_t *th);
3391353358Sdimextern void __kmp_lock_suspend_mx(kmp_info_t *th);
3392353358Sdimextern void __kmp_unlock_suspend_mx(kmp_info_t *th);
3393353358Sdim
3394345153Sdimextern void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag);
3395345153Sdimextern void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag);
3396345153Sdimextern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);
3397345153Sdimextern void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag);
3398345153Sdimextern void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag);
3399345153Sdimextern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);
3400345153Sdim
3401345153Sdimextern void __kmp_elapsed(double *);
3402345153Sdimextern void __kmp_elapsed_tick(double *);
3403345153Sdim
3404345153Sdimextern void __kmp_enable(int old_state);
3405345153Sdimextern void __kmp_disable(int *old_state);
3406345153Sdim
3407345153Sdimextern void __kmp_thread_sleep(int millis);
3408345153Sdim
3409345153Sdimextern void __kmp_common_initialize(void);
3410345153Sdimextern void __kmp_common_destroy(void);
3411345153Sdimextern void __kmp_common_destroy_gtid(int gtid);
3412345153Sdim
3413345153Sdim#if KMP_OS_UNIX
3414345153Sdimextern void __kmp_register_atfork(void);
3415345153Sdim#endif
3416345153Sdimextern void __kmp_suspend_initialize(void);
3417353358Sdimextern void __kmp_suspend_initialize_thread(kmp_info_t *th);
3418345153Sdimextern void __kmp_suspend_uninitialize_thread(kmp_info_t *th);
3419345153Sdim
3420345153Sdimextern kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
3421345153Sdim                                         int tid);
3422345153Sdimextern kmp_team_t *
3423345153Sdim__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
3424345153Sdim#if OMPT_SUPPORT
3425345153Sdim                    ompt_data_t ompt_parallel_data,
3426345153Sdim#endif
3427345153Sdim                    kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs,
3428345153Sdim                    int argc USE_NESTED_HOT_ARG(kmp_info_t *thr));
3429345153Sdimextern void __kmp_free_thread(kmp_info_t *);
3430345153Sdimextern void __kmp_free_team(kmp_root_t *,
3431345153Sdim                            kmp_team_t *USE_NESTED_HOT_ARG(kmp_info_t *));
3432345153Sdimextern kmp_team_t *__kmp_reap_team(kmp_team_t *);
3433345153Sdim
3434345153Sdim/* ------------------------------------------------------------------------ */
3435345153Sdim
3436345153Sdimextern void __kmp_initialize_bget(kmp_info_t *th);
3437345153Sdimextern void __kmp_finalize_bget(kmp_info_t *th);
3438345153Sdim
3439345153SdimKMP_EXPORT void *kmpc_malloc(size_t size);
3440345153SdimKMP_EXPORT void *kmpc_aligned_malloc(size_t size, size_t alignment);
3441345153SdimKMP_EXPORT void *kmpc_calloc(size_t nelem, size_t elsize);
3442345153SdimKMP_EXPORT void *kmpc_realloc(void *ptr, size_t size);
3443345153SdimKMP_EXPORT void kmpc_free(void *ptr);
3444345153Sdim
3445345153Sdim/* declarations for internal use */
3446345153Sdim
3447345153Sdimextern int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
3448345153Sdim                         size_t reduce_size, void *reduce_data,
3449345153Sdim                         void (*reduce)(void *, void *));
3450345153Sdimextern void __kmp_end_split_barrier(enum barrier_type bt, int gtid);
3451353358Sdimextern int __kmp_barrier_gomp_cancel(int gtid);
3452345153Sdim
3453345153Sdim/*!
3454345153Sdim * Tell the fork call which compiler generated the fork call, and therefore how
3455345153Sdim * to deal with the call.
3456345153Sdim */
3457345153Sdimenum fork_context_e {
3458345153Sdim  fork_context_gnu, /**< Called from GNU generated code, so must not invoke the
3459345153Sdim                       microtask internally. */
3460345153Sdim  fork_context_intel, /**< Called from Intel generated code.  */
3461345153Sdim  fork_context_last
3462345153Sdim};
3463345153Sdimextern int __kmp_fork_call(ident_t *loc, int gtid,
3464345153Sdim                           enum fork_context_e fork_context, kmp_int32 argc,
3465345153Sdim                           microtask_t microtask, launch_t invoker,
3466365427Sdim                           kmp_va_list ap);
3467345153Sdim
3468345153Sdimextern void __kmp_join_call(ident_t *loc, int gtid
3469345153Sdim#if OMPT_SUPPORT
3470345153Sdim                            ,
3471345153Sdim                            enum fork_context_e fork_context
3472345153Sdim#endif
3473345153Sdim                            ,
3474353358Sdim                            int exit_teams = 0);
3475345153Sdim
3476345153Sdimextern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid);
3477345153Sdimextern void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team);
3478345153Sdimextern void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team);
3479345153Sdimextern int __kmp_invoke_task_func(int gtid);
3480345153Sdimextern void __kmp_run_before_invoked_task(int gtid, int tid,
3481345153Sdim                                          kmp_info_t *this_thr,
3482345153Sdim                                          kmp_team_t *team);
3483345153Sdimextern void __kmp_run_after_invoked_task(int gtid, int tid,
3484345153Sdim                                         kmp_info_t *this_thr,
3485345153Sdim                                         kmp_team_t *team);
3486345153Sdim
3487345153Sdim// should never have been exported
3488345153SdimKMP_EXPORT int __kmpc_invoke_task_func(int gtid);
3489345153Sdimextern int __kmp_invoke_teams_master(int gtid);
3490345153Sdimextern void __kmp_teams_master(int gtid);
3491345153Sdimextern int __kmp_aux_get_team_num();
3492345153Sdimextern int __kmp_aux_get_num_teams();
3493345153Sdimextern void __kmp_save_internal_controls(kmp_info_t *thread);
3494345153Sdimextern void __kmp_user_set_library(enum library_type arg);
3495345153Sdimextern void __kmp_aux_set_library(enum library_type arg);
3496345153Sdimextern void __kmp_aux_set_stacksize(size_t arg);
3497345153Sdimextern void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid);
3498345153Sdimextern void __kmp_aux_set_defaults(char const *str, int len);
3499345153Sdim
3500345153Sdim/* Functions called from __kmp_aux_env_initialize() in kmp_settings.cpp */
3501345153Sdimvoid kmpc_set_blocktime(int arg);
3502345153Sdimvoid ompc_set_nested(int flag);
3503345153Sdimvoid ompc_set_dynamic(int flag);
3504345153Sdimvoid ompc_set_num_threads(int arg);
3505345153Sdim
3506345153Sdimextern void __kmp_push_current_task_to_thread(kmp_info_t *this_thr,
3507345153Sdim                                              kmp_team_t *team, int tid);
3508345153Sdimextern void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr);
3509345153Sdimextern kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
3510345153Sdim                                    kmp_tasking_flags_t *flags,
3511345153Sdim                                    size_t sizeof_kmp_task_t,
3512345153Sdim                                    size_t sizeof_shareds,
3513345153Sdim                                    kmp_routine_entry_t task_entry);
3514345153Sdimextern void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
3515345153Sdim                                     kmp_team_t *team, int tid,
3516345153Sdim                                     int set_curr_task);
3517345153Sdimextern void __kmp_finish_implicit_task(kmp_info_t *this_thr);
3518345153Sdimextern void __kmp_free_implicit_task(kmp_info_t *this_thr);
3519353358Sdim
3520353358Sdimextern kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3521353358Sdim                                                       int gtid,
3522353358Sdim                                                       kmp_task_t *task);
3523353358Sdimextern void __kmp_fulfill_event(kmp_event_t *event);
3524353358Sdim
3525345153Sdimint __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid,
3526345153Sdim                           kmp_flag_32 *flag, int final_spin,
3527345153Sdim                           int *thread_finished,
3528345153Sdim#if USE_ITT_BUILD
3529345153Sdim                           void *itt_sync_obj,
3530345153Sdim#endif /* USE_ITT_BUILD */
3531345153Sdim                           kmp_int32 is_constrained);
3532345153Sdimint __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
3533345153Sdim                           kmp_flag_64 *flag, int final_spin,
3534345153Sdim                           int *thread_finished,
3535345153Sdim#if USE_ITT_BUILD
3536345153Sdim                           void *itt_sync_obj,
3537345153Sdim#endif /* USE_ITT_BUILD */
3538345153Sdim                           kmp_int32 is_constrained);
3539345153Sdimint __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
3540345153Sdim                               kmp_flag_oncore *flag, int final_spin,
3541345153Sdim                               int *thread_finished,
3542345153Sdim#if USE_ITT_BUILD
3543345153Sdim                               void *itt_sync_obj,
3544345153Sdim#endif /* USE_ITT_BUILD */
3545345153Sdim                               kmp_int32 is_constrained);
3546345153Sdim
3547345153Sdimextern void __kmp_free_task_team(kmp_info_t *thread,
3548345153Sdim                                 kmp_task_team_t *task_team);
3549345153Sdimextern void __kmp_reap_task_teams(void);
3550345153Sdimextern void __kmp_wait_to_unref_task_teams(void);
3551345153Sdimextern void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
3552345153Sdim                                  int always);
3553345153Sdimextern void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team);
3554345153Sdimextern void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team
3555345153Sdim#if USE_ITT_BUILD
3556345153Sdim                                 ,
3557345153Sdim                                 void *itt_sync_obj
3558345153Sdim#endif /* USE_ITT_BUILD */
3559345153Sdim                                 ,
3560345153Sdim                                 int wait = 1);
3561345153Sdimextern void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
3562345153Sdim                                  int gtid);
3563345153Sdim
3564345153Sdimextern int __kmp_is_address_mapped(void *addr);
3565345153Sdimextern kmp_uint64 __kmp_hardware_timestamp(void);
3566345153Sdim
3567345153Sdim#if KMP_OS_UNIX
3568345153Sdimextern int __kmp_read_from_file(char const *path, char const *format, ...);
3569345153Sdim#endif
3570345153Sdim
3571345153Sdim/* ------------------------------------------------------------------------ */
3572345153Sdim//
3573345153Sdim// Assembly routines that have no compiler intrinsic replacement
3574345153Sdim//
3575345153Sdim
3576345153Sdimextern int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc,
3577345153Sdim                                  void *argv[]
3578345153Sdim#if OMPT_SUPPORT
3579345153Sdim                                  ,
3580345153Sdim                                  void **exit_frame_ptr
3581345153Sdim#endif
3582345153Sdim                                  );
3583345153Sdim
3584345153Sdim/* ------------------------------------------------------------------------ */
3585345153Sdim
3586345153SdimKMP_EXPORT void __kmpc_begin(ident_t *, kmp_int32 flags);
3587345153SdimKMP_EXPORT void __kmpc_end(ident_t *);
3588345153Sdim
3589345153SdimKMP_EXPORT void __kmpc_threadprivate_register_vec(ident_t *, void *data,
3590345153Sdim                                                  kmpc_ctor_vec ctor,
3591345153Sdim                                                  kmpc_cctor_vec cctor,
3592345153Sdim                                                  kmpc_dtor_vec dtor,
3593345153Sdim                                                  size_t vector_length);
3594345153SdimKMP_EXPORT void __kmpc_threadprivate_register(ident_t *, void *data,
3595345153Sdim                                              kmpc_ctor ctor, kmpc_cctor cctor,
3596345153Sdim                                              kmpc_dtor dtor);
3597345153SdimKMP_EXPORT void *__kmpc_threadprivate(ident_t *, kmp_int32 global_tid,
3598345153Sdim                                      void *data, size_t size);
3599345153Sdim
3600345153SdimKMP_EXPORT kmp_int32 __kmpc_global_thread_num(ident_t *);
3601345153SdimKMP_EXPORT kmp_int32 __kmpc_global_num_threads(ident_t *);
3602345153SdimKMP_EXPORT kmp_int32 __kmpc_bound_thread_num(ident_t *);
3603345153SdimKMP_EXPORT kmp_int32 __kmpc_bound_num_threads(ident_t *);
3604345153Sdim
3605345153SdimKMP_EXPORT kmp_int32 __kmpc_ok_to_fork(ident_t *);
3606345153SdimKMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs,
3607345153Sdim                                 kmpc_micro microtask, ...);
3608345153Sdim
3609345153SdimKMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid);
3610345153SdimKMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid);
3611345153Sdim
3612345153SdimKMP_EXPORT void __kmpc_flush(ident_t *);
3613345153SdimKMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid);
3614345153SdimKMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
3615345153SdimKMP_EXPORT void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
3616345153SdimKMP_EXPORT void __kmpc_ordered(ident_t *, kmp_int32 global_tid);
3617345153SdimKMP_EXPORT void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid);
3618345153SdimKMP_EXPORT void __kmpc_critical(ident_t *, kmp_int32 global_tid,
3619345153Sdim                                kmp_critical_name *);
3620345153SdimKMP_EXPORT void __kmpc_end_critical(ident_t *, kmp_int32 global_tid,
3621345153Sdim                                    kmp_critical_name *);
3622345153SdimKMP_EXPORT void __kmpc_critical_with_hint(ident_t *, kmp_int32 global_tid,
3623345153Sdim                                          kmp_critical_name *, uint32_t hint);
3624345153Sdim
3625345153SdimKMP_EXPORT kmp_int32 __kmpc_barrier_master(ident_t *, kmp_int32 global_tid);
3626345153SdimKMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid);
3627345153Sdim
3628345153SdimKMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *,
3629345153Sdim                                                  kmp_int32 global_tid);
3630345153Sdim
3631345153SdimKMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
3632345153SdimKMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
3633345153Sdim
3634345153SdimKMP_EXPORT void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid,
3635345153Sdim                                     kmp_int32 schedtype, kmp_int32 *plastiter,
3636345153Sdim                                     kmp_int *plower, kmp_int *pupper,
3637345153Sdim                                     kmp_int *pstride, kmp_int incr,
3638345153Sdim                                     kmp_int chunk);
3639345153Sdim
3640345153SdimKMP_EXPORT void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
3641345153Sdim
3642345153SdimKMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
3643345153Sdim                                   size_t cpy_size, void *cpy_data,
3644345153Sdim                                   void (*cpy_func)(void *, void *),
3645345153Sdim                                   kmp_int32 didit);
3646345153Sdim
3647345153Sdimextern void KMPC_SET_NUM_THREADS(int arg);
3648345153Sdimextern void KMPC_SET_DYNAMIC(int flag);
3649345153Sdimextern void KMPC_SET_NESTED(int flag);
3650345153Sdim
3651345153Sdim/* OMP 3.0 tasking interface routines */
3652345153SdimKMP_EXPORT kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
3653345153Sdim                                     kmp_task_t *new_task);
3654345153SdimKMP_EXPORT kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
3655345153Sdim                                             kmp_int32 flags,
3656345153Sdim                                             size_t sizeof_kmp_task_t,
3657345153Sdim                                             size_t sizeof_shareds,
3658345153Sdim                                             kmp_routine_entry_t task_entry);
3659353358SdimKMP_EXPORT kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
3660353358Sdim                                                    kmp_int32 flags,
3661353358Sdim                                                    size_t sizeof_kmp_task_t,
3662353358Sdim                                                    size_t sizeof_shareds,
3663353358Sdim                                                    kmp_routine_entry_t task_entry,
3664353358Sdim                                                    kmp_int64 device_id);
3665345153SdimKMP_EXPORT void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
3666345153Sdim                                          kmp_task_t *task);
3667345153SdimKMP_EXPORT void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
3668345153Sdim                                             kmp_task_t *task);
3669345153SdimKMP_EXPORT kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
3670345153Sdim                                           kmp_task_t *new_task);
3671345153SdimKMP_EXPORT kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid);
3672345153Sdim
3673345153SdimKMP_EXPORT kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid,
3674345153Sdim                                          int end_part);
3675345153Sdim
3676345153Sdim#if TASK_UNUSED
3677345153Sdimvoid __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task);
3678345153Sdimvoid __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
3679345153Sdim                              kmp_task_t *task);
3680345153Sdim#endif // TASK_UNUSED
3681345153Sdim
3682345153Sdim/* ------------------------------------------------------------------------ */
3683345153Sdim
3684345153SdimKMP_EXPORT void __kmpc_taskgroup(ident_t *loc, int gtid);
3685345153SdimKMP_EXPORT void __kmpc_end_taskgroup(ident_t *loc, int gtid);
3686345153Sdim
3687345153SdimKMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps(
3688345153Sdim    ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps,
3689345153Sdim    kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
3690345153Sdim    kmp_depend_info_t *noalias_dep_list);
3691345153SdimKMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid,
3692345153Sdim                                     kmp_int32 ndeps,
3693345153Sdim                                     kmp_depend_info_t *dep_list,
3694345153Sdim                                     kmp_int32 ndeps_noalias,
3695345153Sdim                                     kmp_depend_info_t *noalias_dep_list);
3696345153Sdimextern kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
3697345153Sdim                                bool serialize_immediate);
3698345153Sdim
3699345153SdimKMP_EXPORT kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid,
3700345153Sdim                                   kmp_int32 cncl_kind);
3701345153SdimKMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid,
3702345153Sdim                                              kmp_int32 cncl_kind);
3703345153SdimKMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t *loc_ref, kmp_int32 gtid);
3704345153SdimKMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind);
3705345153Sdim
3706345153SdimKMP_EXPORT void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask);
3707345153SdimKMP_EXPORT void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask);
3708345153SdimKMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task,
3709345153Sdim                                kmp_int32 if_val, kmp_uint64 *lb,
3710345153Sdim                                kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup,
3711345153Sdim                                kmp_int32 sched, kmp_uint64 grainsize,
3712345153Sdim                                void *task_dup);
3713345153SdimKMP_EXPORT void *__kmpc_task_reduction_init(int gtid, int num_data, void *data);
3714353358SdimKMP_EXPORT void *__kmpc_taskred_init(int gtid, int num_data, void *data);
3715345153SdimKMP_EXPORT void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d);
3716353358SdimKMP_EXPORT void *__kmpc_task_reduction_modifier_init(ident_t *loc, int gtid,
3717353358Sdim                                                     int is_ws, int num,
3718353358Sdim                                                     void *data);
3719353358SdimKMP_EXPORT void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws,
3720353358Sdim                                              int num, void *data);
3721353358SdimKMP_EXPORT void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
3722353358Sdim                                                    int is_ws);
3723345153SdimKMP_EXPORT kmp_int32 __kmpc_omp_reg_task_with_affinity(
3724345153Sdim    ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins,
3725345153Sdim    kmp_task_affinity_info_t *affin_list);
3726345153Sdim
3727345153Sdim/* Lock interface routines (fast versions with gtid passed in) */
3728345153SdimKMP_EXPORT void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid,
3729345153Sdim                                 void **user_lock);
3730345153SdimKMP_EXPORT void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid,
3731345153Sdim                                      void **user_lock);
3732345153SdimKMP_EXPORT void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid,
3733345153Sdim                                    void **user_lock);
3734345153SdimKMP_EXPORT void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid,
3735345153Sdim                                         void **user_lock);
3736345153SdimKMP_EXPORT void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
3737345153SdimKMP_EXPORT void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid,
3738345153Sdim                                     void **user_lock);
3739345153SdimKMP_EXPORT void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid,
3740345153Sdim                                  void **user_lock);
3741345153SdimKMP_EXPORT void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid,
3742345153Sdim                                       void **user_lock);
3743345153SdimKMP_EXPORT int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
3744345153SdimKMP_EXPORT int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid,
3745345153Sdim                                     void **user_lock);
3746345153Sdim
3747345153SdimKMP_EXPORT void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid,
3748345153Sdim                                           void **user_lock, uintptr_t hint);
3749345153SdimKMP_EXPORT void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
3750345153Sdim                                                void **user_lock,
3751345153Sdim                                                uintptr_t hint);
3752345153Sdim
3753345153Sdim/* Interface to fast scalable reduce methods routines */
3754345153Sdim
3755345153SdimKMP_EXPORT kmp_int32 __kmpc_reduce_nowait(
3756345153Sdim    ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
3757345153Sdim    void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
3758345153Sdim    kmp_critical_name *lck);
3759345153SdimKMP_EXPORT void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3760345153Sdim                                         kmp_critical_name *lck);
3761345153SdimKMP_EXPORT kmp_int32 __kmpc_reduce(
3762345153Sdim    ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
3763345153Sdim    void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
3764345153Sdim    kmp_critical_name *lck);
3765345153SdimKMP_EXPORT void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3766345153Sdim                                  kmp_critical_name *lck);
3767345153Sdim
3768345153Sdim/* Internal fast reduction routines */
3769345153Sdim
3770345153Sdimextern PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method(
3771345153Sdim    ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
3772345153Sdim    void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
3773345153Sdim    kmp_critical_name *lck);
3774345153Sdim
3775345153Sdim// this function is for testing set/get/determine reduce method
3776345153SdimKMP_EXPORT kmp_int32 __kmp_get_reduce_method(void);
3777345153Sdim
3778345153SdimKMP_EXPORT kmp_uint64 __kmpc_get_taskid();
3779345153SdimKMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid();
3780345153Sdim
3781345153Sdim// C++ port
3782345153Sdim// missing 'extern "C"' declarations
3783345153Sdim
3784345153SdimKMP_EXPORT kmp_int32 __kmpc_in_parallel(ident_t *loc);
3785345153SdimKMP_EXPORT void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid);
3786345153SdimKMP_EXPORT void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
3787345153Sdim                                        kmp_int32 num_threads);
3788345153Sdim
3789345153SdimKMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
3790345153Sdim                                      int proc_bind);
3791345153SdimKMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
3792345153Sdim                                      kmp_int32 num_teams,
3793345153Sdim                                      kmp_int32 num_threads);
3794345153SdimKMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
3795345153Sdim                                  kmpc_micro microtask, ...);
3796345153Sdimstruct kmp_dim { // loop bounds info casted to kmp_int64
3797345153Sdim  kmp_int64 lo; // lower
3798345153Sdim  kmp_int64 up; // upper
3799345153Sdim  kmp_int64 st; // stride
3800345153Sdim};
3801345153SdimKMP_EXPORT void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
3802345153Sdim                                     kmp_int32 num_dims,
3803345153Sdim                                     const struct kmp_dim *dims);
3804345153SdimKMP_EXPORT void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid,
3805345153Sdim                                     const kmp_int64 *vec);
3806345153SdimKMP_EXPORT void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid,
3807345153Sdim                                     const kmp_int64 *vec);
3808345153SdimKMP_EXPORT void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
3809345153Sdim
3810345153SdimKMP_EXPORT void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid,
3811345153Sdim                                             void *data, size_t size,
3812345153Sdim                                             void ***cache);
3813345153Sdim
3814345153Sdim// Symbols for MS mutual detection.
3815345153Sdimextern int _You_must_link_with_exactly_one_OpenMP_library;
3816345153Sdimextern int _You_must_link_with_Intel_OpenMP_library;
3817345153Sdim#if KMP_OS_WINDOWS && (KMP_VERSION_MAJOR > 4)
3818345153Sdimextern int _You_must_link_with_Microsoft_OpenMP_library;
3819345153Sdim#endif
3820345153Sdim
3821345153Sdim// The routines below are not exported.
3822345153Sdim// Consider making them 'static' in corresponding source files.
3823345153Sdimvoid kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
3824345153Sdim                                           void *data_addr, size_t pc_size);
3825345153Sdimstruct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
3826345153Sdim                                                void *data_addr,
3827345153Sdim                                                size_t pc_size);
3828345153Sdimvoid __kmp_threadprivate_resize_cache(int newCapacity);
3829345153Sdimvoid __kmp_cleanup_threadprivate_caches();
3830345153Sdim
3831345153Sdim// ompc_, kmpc_ entries moved from omp.h.
3832345153Sdim#if KMP_OS_WINDOWS
3833345153Sdim#define KMPC_CONVENTION __cdecl
3834345153Sdim#else
3835345153Sdim#define KMPC_CONVENTION
3836345153Sdim#endif
3837345153Sdim
3838345153Sdim#ifndef __OMP_H
3839345153Sdimtypedef enum omp_sched_t {
3840345153Sdim  omp_sched_static = 1,
3841345153Sdim  omp_sched_dynamic = 2,
3842345153Sdim  omp_sched_guided = 3,
3843345153Sdim  omp_sched_auto = 4
3844345153Sdim} omp_sched_t;
3845345153Sdimtypedef void *kmp_affinity_mask_t;
3846345153Sdim#endif
3847345153Sdim
3848345153SdimKMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int);
3849345153SdimKMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int);
3850345153SdimKMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int);
3851345153SdimKMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int);
3852345153SdimKMP_EXPORT int KMPC_CONVENTION
3853345153Sdimkmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *);
3854345153SdimKMP_EXPORT int KMPC_CONVENTION
3855345153Sdimkmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *);
3856345153SdimKMP_EXPORT int KMPC_CONVENTION
3857345153Sdimkmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *);
3858345153Sdim
3859345153SdimKMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int);
3860345153SdimKMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t);
3861345153SdimKMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int);
3862345153SdimKMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *);
3863345153SdimKMP_EXPORT void KMPC_CONVENTION kmpc_set_disp_num_buffers(int);
3864345153Sdim
3865345153Sdimenum kmp_target_offload_kind {
3866345153Sdim  tgt_disabled = 0,
3867345153Sdim  tgt_default = 1,
3868345153Sdim  tgt_mandatory = 2
3869345153Sdim};
3870345153Sdimtypedef enum kmp_target_offload_kind kmp_target_offload_kind_t;
3871345153Sdim// Set via OMP_TARGET_OFFLOAD if specified, defaults to tgt_default otherwise
3872345153Sdimextern kmp_target_offload_kind_t __kmp_target_offload;
3873345153Sdimextern int __kmpc_get_target_offload();
3874345153Sdim
3875345153Sdim// Constants used in libomptarget
3876345153Sdim#define KMP_DEVICE_DEFAULT -1 // This is libomptarget's default device.
3877345153Sdim#define KMP_HOST_DEVICE -10 // This is what it is in libomptarget, go figure.
3878345153Sdim#define KMP_DEVICE_ALL -11 // This is libomptarget's "all devices".
3879345153Sdim
3880353358Sdim// OMP Pause Resource
3881353358Sdim
3882353358Sdim// The following enum is used both to set the status in __kmp_pause_status, and
3883353358Sdim// as the internal equivalent of the externally-visible omp_pause_resource_t.
3884353358Sdimtypedef enum kmp_pause_status_t {
3885353358Sdim  kmp_not_paused = 0, // status is not paused, or, requesting resume
3886353358Sdim  kmp_soft_paused = 1, // status is soft-paused, or, requesting soft pause
3887353358Sdim  kmp_hard_paused = 2 // status is hard-paused, or, requesting hard pause
3888353358Sdim} kmp_pause_status_t;
3889353358Sdim
3890353358Sdim// This stores the pause state of the runtime
3891353358Sdimextern kmp_pause_status_t __kmp_pause_status;
3892353358Sdimextern int __kmpc_pause_resource(kmp_pause_status_t level);
3893353358Sdimextern int __kmp_pause_resource(kmp_pause_status_t level);
3894353358Sdim// Soft resume sets __kmp_pause_status, and wakes up all threads.
3895353358Sdimextern void __kmp_resume_if_soft_paused();
3896353358Sdim// Hard resume simply resets the status to not paused. Library will appear to
3897353358Sdim// be uninitialized after hard pause. Let OMP constructs trigger required
3898353358Sdim// initializations.
3899353358Sdimstatic inline void __kmp_resume_if_hard_paused() {
3900353358Sdim  if (__kmp_pause_status == kmp_hard_paused) {
3901353358Sdim    __kmp_pause_status = kmp_not_paused;
3902353358Sdim  }
3903353358Sdim}
3904353358Sdim
3905345153Sdim#ifdef __cplusplus
3906345153Sdim}
3907345153Sdim#endif
3908345153Sdim
3909345153Sdim#endif /* KMP_H */
3910