1345153Sdim/*! \file */ 2345153Sdim/* 3345153Sdim * kmp.h -- KPTS runtime header file. 4345153Sdim */ 5345153Sdim 6345153Sdim//===----------------------------------------------------------------------===// 7345153Sdim// 8353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 9353358Sdim// See https://llvm.org/LICENSE.txt for license information. 10353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 11345153Sdim// 12345153Sdim//===----------------------------------------------------------------------===// 13345153Sdim 14345153Sdim#ifndef KMP_H 15345153Sdim#define KMP_H 16345153Sdim 17345153Sdim#include "kmp_config.h" 18345153Sdim 19345153Sdim/* #define BUILD_PARALLEL_ORDERED 1 */ 20345153Sdim 21345153Sdim/* This fix replaces gettimeofday with clock_gettime for better scalability on 22345153Sdim the Altix. Requires user code to be linked with -lrt. */ 23345153Sdim//#define FIX_SGI_CLOCK 24345153Sdim 25345153Sdim/* Defines for OpenMP 3.0 tasking and auto scheduling */ 26345153Sdim 27345153Sdim#ifndef KMP_STATIC_STEAL_ENABLED 28345153Sdim#define KMP_STATIC_STEAL_ENABLED 1 29345153Sdim#endif 30345153Sdim 31345153Sdim#define TASK_CURRENT_NOT_QUEUED 0 32345153Sdim#define TASK_CURRENT_QUEUED 1 33345153Sdim 34345153Sdim#ifdef BUILD_TIED_TASK_STACK 35345153Sdim#define TASK_STACK_EMPTY 0 // entries when the stack is empty 36345153Sdim#define TASK_STACK_BLOCK_BITS 5 // Used in TASK_STACK_SIZE and TASK_STACK_MASK 37345153Sdim// Number of entries in each task stack array 38345153Sdim#define TASK_STACK_BLOCK_SIZE (1 << TASK_STACK_BLOCK_BITS) 39345153Sdim// Mask for determining index into stack block 40345153Sdim#define TASK_STACK_INDEX_MASK (TASK_STACK_BLOCK_SIZE - 1) 41345153Sdim#endif // BUILD_TIED_TASK_STACK 42345153Sdim 43345153Sdim#define TASK_NOT_PUSHED 1 44345153Sdim#define TASK_SUCCESSFULLY_PUSHED 0 45345153Sdim#define TASK_TIED 1 46345153Sdim#define TASK_UNTIED 0 47345153Sdim#define TASK_EXPLICIT 1 48345153Sdim#define TASK_IMPLICIT 0 49345153Sdim#define TASK_PROXY 1 50345153Sdim#define TASK_FULL 0 51353358Sdim#define TASK_DETACHABLE 1 52353358Sdim#define TASK_UNDETACHABLE 0 53345153Sdim 54345153Sdim#define KMP_CANCEL_THREADS 55345153Sdim#define KMP_THREAD_ATTR 56345153Sdim 57345153Sdim// Android does not have pthread_cancel. Undefine KMP_CANCEL_THREADS if being 58345153Sdim// built on Android 59345153Sdim#if defined(__ANDROID__) 60345153Sdim#undef KMP_CANCEL_THREADS 61345153Sdim#endif 62345153Sdim 63345153Sdim#include <signal.h> 64345153Sdim#include <stdarg.h> 65345153Sdim#include <stddef.h> 66345153Sdim#include <stdio.h> 67345153Sdim#include <stdlib.h> 68345153Sdim#include <string.h> 69345153Sdim/* include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad 70345153Sdim Microsoft library. Some macros provided below to replace these functions */ 71345153Sdim#ifndef __ABSOFT_WIN 72345153Sdim#include <sys/types.h> 73345153Sdim#endif 74345153Sdim#include <limits.h> 75345153Sdim#include <time.h> 76345153Sdim 77345153Sdim#include <errno.h> 78345153Sdim 79345153Sdim#include "kmp_os.h" 80345153Sdim 81345153Sdim#include "kmp_safe_c_api.h" 82345153Sdim 83345153Sdim#if KMP_STATS_ENABLED 84345153Sdimclass kmp_stats_list; 85345153Sdim#endif 86345153Sdim 87345153Sdim#if KMP_USE_HIER_SCHED 88345153Sdim// Only include hierarchical scheduling if affinity is supported 89345153Sdim#undef KMP_USE_HIER_SCHED 90345153Sdim#define KMP_USE_HIER_SCHED KMP_AFFINITY_SUPPORTED 91345153Sdim#endif 92345153Sdim 93345153Sdim#if KMP_USE_HWLOC && KMP_AFFINITY_SUPPORTED 94345153Sdim#include "hwloc.h" 95345153Sdim#ifndef HWLOC_OBJ_NUMANODE 96345153Sdim#define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE 97345153Sdim#endif 98345153Sdim#ifndef HWLOC_OBJ_PACKAGE 99345153Sdim#define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET 100345153Sdim#endif 101353358Sdim#if HWLOC_API_VERSION >= 0x00020000 102353358Sdim// hwloc 2.0 changed type of depth of object from unsigned to int 103353358Sdimtypedef int kmp_hwloc_depth_t; 104353358Sdim#else 105353358Sdimtypedef unsigned int kmp_hwloc_depth_t; 106345153Sdim#endif 107353358Sdim#endif 108345153Sdim 109345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64 110345153Sdim#include <xmmintrin.h> 111345153Sdim#endif 112345153Sdim 113345153Sdim#include "kmp_debug.h" 114345153Sdim#include "kmp_lock.h" 115345153Sdim#include "kmp_version.h" 116345153Sdim#if USE_DEBUGGER 117345153Sdim#include "kmp_debugger.h" 118345153Sdim#endif 119345153Sdim#include "kmp_i18n.h" 120345153Sdim 121345153Sdim#define KMP_HANDLE_SIGNALS (KMP_OS_UNIX || KMP_OS_WINDOWS) 122345153Sdim 123345153Sdim#include "kmp_wrapper_malloc.h" 124345153Sdim#if KMP_OS_UNIX 125345153Sdim#include <unistd.h> 126345153Sdim#if !defined NSIG && defined _NSIG 127345153Sdim#define NSIG _NSIG 128345153Sdim#endif 129345153Sdim#endif 130345153Sdim 131345153Sdim#if KMP_OS_LINUX 132345153Sdim#pragma weak clock_gettime 133345153Sdim#endif 134345153Sdim 135345153Sdim#if OMPT_SUPPORT 136345153Sdim#include "ompt-internal.h" 137345153Sdim#endif 138345153Sdim 139345153Sdim// Affinity format function 140345153Sdim#include "kmp_str.h" 141345153Sdim 142345153Sdim// 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64. 143345153Sdim// 3 - fast allocation using sync, non-sync free lists of any size, non-self 144345153Sdim// free lists of limited size. 145345153Sdim#ifndef USE_FAST_MEMORY 146345153Sdim#define USE_FAST_MEMORY 3 147345153Sdim#endif 148345153Sdim 149345153Sdim#ifndef KMP_NESTED_HOT_TEAMS 150345153Sdim#define KMP_NESTED_HOT_TEAMS 0 151345153Sdim#define USE_NESTED_HOT_ARG(x) 152345153Sdim#else 153345153Sdim#if KMP_NESTED_HOT_TEAMS 154345153Sdim#define USE_NESTED_HOT_ARG(x) , x 155345153Sdim#else 156345153Sdim#define USE_NESTED_HOT_ARG(x) 157345153Sdim#endif 158345153Sdim#endif 159345153Sdim 160345153Sdim// Assume using BGET compare_exchange instruction instead of lock by default. 161345153Sdim#ifndef USE_CMP_XCHG_FOR_BGET 162345153Sdim#define USE_CMP_XCHG_FOR_BGET 1 163345153Sdim#endif 164345153Sdim 165345153Sdim// Test to see if queuing lock is better than bootstrap lock for bget 166345153Sdim// #ifndef USE_QUEUING_LOCK_FOR_BGET 167345153Sdim// #define USE_QUEUING_LOCK_FOR_BGET 168345153Sdim// #endif 169345153Sdim 170345153Sdim#define KMP_NSEC_PER_SEC 1000000000L 171345153Sdim#define KMP_USEC_PER_SEC 1000000L 172345153Sdim 173345153Sdim/*! 174345153Sdim@ingroup BASIC_TYPES 175345153Sdim@{ 176345153Sdim*/ 177345153Sdim 178345153Sdim/*! 179345153SdimValues for bit flags used in the ident_t to describe the fields. 180345153Sdim*/ 181345153Sdimenum { 182345153Sdim /*! Use trampoline for internal microtasks */ 183345153Sdim KMP_IDENT_IMB = 0x01, 184345153Sdim /*! Use c-style ident structure */ 185345153Sdim KMP_IDENT_KMPC = 0x02, 186345153Sdim /* 0x04 is no longer used */ 187345153Sdim /*! Entry point generated by auto-parallelization */ 188345153Sdim KMP_IDENT_AUTOPAR = 0x08, 189345153Sdim /*! Compiler generates atomic reduction option for kmpc_reduce* */ 190345153Sdim KMP_IDENT_ATOMIC_REDUCE = 0x10, 191345153Sdim /*! To mark a 'barrier' directive in user code */ 192345153Sdim KMP_IDENT_BARRIER_EXPL = 0x20, 193345153Sdim /*! To Mark implicit barriers. */ 194345153Sdim KMP_IDENT_BARRIER_IMPL = 0x0040, 195345153Sdim KMP_IDENT_BARRIER_IMPL_MASK = 0x01C0, 196345153Sdim KMP_IDENT_BARRIER_IMPL_FOR = 0x0040, 197345153Sdim KMP_IDENT_BARRIER_IMPL_SECTIONS = 0x00C0, 198345153Sdim 199345153Sdim KMP_IDENT_BARRIER_IMPL_SINGLE = 0x0140, 200345153Sdim KMP_IDENT_BARRIER_IMPL_WORKSHARE = 0x01C0, 201345153Sdim 202345153Sdim /*! To mark a static loop in OMPT callbacks */ 203345153Sdim KMP_IDENT_WORK_LOOP = 0x200, 204345153Sdim /*! To mark a sections directive in OMPT callbacks */ 205345153Sdim KMP_IDENT_WORK_SECTIONS = 0x400, 206360784Sdim /*! To mark a distribute construct in OMPT callbacks */ 207345153Sdim KMP_IDENT_WORK_DISTRIBUTE = 0x800, 208345153Sdim /*! Atomic hint; bottom four bits as omp_sync_hint_t. Top four reserved and 209345153Sdim not currently used. If one day we need more bits, then we can use 210345153Sdim an invalid combination of hints to mean that another, larger field 211345153Sdim should be used in a different flag. */ 212345153Sdim KMP_IDENT_ATOMIC_HINT_MASK = 0xFF0000, 213345153Sdim KMP_IDENT_ATOMIC_HINT_UNCONTENDED = 0x010000, 214345153Sdim KMP_IDENT_ATOMIC_HINT_CONTENDED = 0x020000, 215345153Sdim KMP_IDENT_ATOMIC_HINT_NONSPECULATIVE = 0x040000, 216345153Sdim KMP_IDENT_ATOMIC_HINT_SPECULATIVE = 0x080000, 217345153Sdim}; 218345153Sdim 219345153Sdim/*! 220345153Sdim * The ident structure that describes a source location. 221345153Sdim */ 222345153Sdimtypedef struct ident { 223345153Sdim kmp_int32 reserved_1; /**< might be used in Fortran; see above */ 224345153Sdim kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC 225345153Sdim identifies this union member */ 226345153Sdim kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */ 227345153Sdim#if USE_ITT_BUILD 228345153Sdim/* but currently used for storing region-specific ITT */ 229345153Sdim/* contextual information. */ 230345153Sdim#endif /* USE_ITT_BUILD */ 231345153Sdim kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */ 232345153Sdim char const *psource; /**< String describing the source location. 233345153Sdim The string is composed of semi-colon separated fields 234345153Sdim which describe the source file, the function and a pair 235345153Sdim of line numbers that delimit the construct. */ 236345153Sdim} ident_t; 237345153Sdim/*! 238345153Sdim@} 239345153Sdim*/ 240345153Sdim 241345153Sdim// Some forward declarations. 242345153Sdimtypedef union kmp_team kmp_team_t; 243345153Sdimtypedef struct kmp_taskdata kmp_taskdata_t; 244345153Sdimtypedef union kmp_task_team kmp_task_team_t; 245345153Sdimtypedef union kmp_team kmp_team_p; 246345153Sdimtypedef union kmp_info kmp_info_p; 247345153Sdimtypedef union kmp_root kmp_root_p; 248345153Sdim 249345153Sdim#ifdef __cplusplus 250345153Sdimextern "C" { 251345153Sdim#endif 252345153Sdim 253345153Sdim/* ------------------------------------------------------------------------ */ 254345153Sdim 255345153Sdim/* Pack two 32-bit signed integers into a 64-bit signed integer */ 256345153Sdim/* ToDo: Fix word ordering for big-endian machines. */ 257345153Sdim#define KMP_PACK_64(HIGH_32, LOW_32) \ 258345153Sdim ((kmp_int64)((((kmp_uint64)(HIGH_32)) << 32) | (kmp_uint64)(LOW_32))) 259345153Sdim 260345153Sdim// Generic string manipulation macros. Assume that _x is of type char * 261345153Sdim#define SKIP_WS(_x) \ 262345153Sdim { \ 263345153Sdim while (*(_x) == ' ' || *(_x) == '\t') \ 264345153Sdim (_x)++; \ 265345153Sdim } 266345153Sdim#define SKIP_DIGITS(_x) \ 267345153Sdim { \ 268345153Sdim while (*(_x) >= '0' && *(_x) <= '9') \ 269345153Sdim (_x)++; \ 270345153Sdim } 271345153Sdim#define SKIP_TOKEN(_x) \ 272345153Sdim { \ 273345153Sdim while ((*(_x) >= '0' && *(_x) <= '9') || (*(_x) >= 'a' && *(_x) <= 'z') || \ 274345153Sdim (*(_x) >= 'A' && *(_x) <= 'Z') || *(_x) == '_') \ 275345153Sdim (_x)++; \ 276345153Sdim } 277345153Sdim#define SKIP_TO(_x, _c) \ 278345153Sdim { \ 279345153Sdim while (*(_x) != '\0' && *(_x) != (_c)) \ 280345153Sdim (_x)++; \ 281345153Sdim } 282345153Sdim 283345153Sdim/* ------------------------------------------------------------------------ */ 284345153Sdim 285345153Sdim#define KMP_MAX(x, y) ((x) > (y) ? (x) : (y)) 286345153Sdim#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) 287345153Sdim 288345153Sdim/* ------------------------------------------------------------------------ */ 289345153Sdim/* Enumeration types */ 290345153Sdim 291345153Sdimenum kmp_state_timer { 292345153Sdim ts_stop, 293345153Sdim ts_start, 294345153Sdim ts_pause, 295345153Sdim 296345153Sdim ts_last_state 297345153Sdim}; 298345153Sdim 299345153Sdimenum dynamic_mode { 300345153Sdim dynamic_default, 301345153Sdim#ifdef USE_LOAD_BALANCE 302345153Sdim dynamic_load_balance, 303345153Sdim#endif /* USE_LOAD_BALANCE */ 304345153Sdim dynamic_random, 305345153Sdim dynamic_thread_limit, 306345153Sdim dynamic_max 307345153Sdim}; 308345153Sdim 309345153Sdim/* external schedule constants, duplicate enum omp_sched in omp.h in order to 310345153Sdim * not include it here */ 311345153Sdim#ifndef KMP_SCHED_TYPE_DEFINED 312345153Sdim#define KMP_SCHED_TYPE_DEFINED 313345153Sdimtypedef enum kmp_sched { 314345153Sdim kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check 315345153Sdim // Note: need to adjust __kmp_sch_map global array in case enum is changed 316345153Sdim kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33) 317345153Sdim kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35) 318345153Sdim kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36) 319345153Sdim kmp_sched_auto = 4, // mapped to kmp_sch_auto (38) 320345153Sdim kmp_sched_upper_std = 5, // upper bound for standard schedules 321345153Sdim kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules 322345153Sdim kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39) 323345153Sdim#if KMP_STATIC_STEAL_ENABLED 324345153Sdim kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44) 325345153Sdim#endif 326345153Sdim kmp_sched_upper, 327353358Sdim kmp_sched_default = kmp_sched_static, // default scheduling 328353358Sdim kmp_sched_monotonic = 0x80000000 329345153Sdim} kmp_sched_t; 330345153Sdim#endif 331345153Sdim 332345153Sdim/*! 333345153Sdim @ingroup WORK_SHARING 334345153Sdim * Describes the loop schedule to be used for a parallel for loop. 335345153Sdim */ 336345153Sdimenum sched_type : kmp_int32 { 337345153Sdim kmp_sch_lower = 32, /**< lower bound for unordered values */ 338345153Sdim kmp_sch_static_chunked = 33, 339345153Sdim kmp_sch_static = 34, /**< static unspecialized */ 340345153Sdim kmp_sch_dynamic_chunked = 35, 341345153Sdim kmp_sch_guided_chunked = 36, /**< guided unspecialized */ 342345153Sdim kmp_sch_runtime = 37, 343345153Sdim kmp_sch_auto = 38, /**< auto */ 344345153Sdim kmp_sch_trapezoidal = 39, 345345153Sdim 346345153Sdim /* accessible only through KMP_SCHEDULE environment variable */ 347345153Sdim kmp_sch_static_greedy = 40, 348345153Sdim kmp_sch_static_balanced = 41, 349345153Sdim /* accessible only through KMP_SCHEDULE environment variable */ 350345153Sdim kmp_sch_guided_iterative_chunked = 42, 351345153Sdim kmp_sch_guided_analytical_chunked = 43, 352345153Sdim /* accessible only through KMP_SCHEDULE environment variable */ 353345153Sdim kmp_sch_static_steal = 44, 354345153Sdim 355345153Sdim /* static with chunk adjustment (e.g., simd) */ 356345153Sdim kmp_sch_static_balanced_chunked = 45, 357345153Sdim kmp_sch_guided_simd = 46, /**< guided with chunk adjustment */ 358345153Sdim kmp_sch_runtime_simd = 47, /**< runtime with chunk adjustment */ 359345153Sdim 360345153Sdim /* accessible only through KMP_SCHEDULE environment variable */ 361345153Sdim kmp_sch_upper, /**< upper bound for unordered values */ 362345153Sdim 363345153Sdim kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */ 364345153Sdim kmp_ord_static_chunked = 65, 365345153Sdim kmp_ord_static = 66, /**< ordered static unspecialized */ 366345153Sdim kmp_ord_dynamic_chunked = 67, 367345153Sdim kmp_ord_guided_chunked = 68, 368345153Sdim kmp_ord_runtime = 69, 369345153Sdim kmp_ord_auto = 70, /**< ordered auto */ 370345153Sdim kmp_ord_trapezoidal = 71, 371345153Sdim kmp_ord_upper, /**< upper bound for ordered values */ 372345153Sdim 373345153Sdim /* Schedules for Distribute construct */ 374345153Sdim kmp_distribute_static_chunked = 91, /**< distribute static chunked */ 375345153Sdim kmp_distribute_static = 92, /**< distribute static unspecialized */ 376345153Sdim 377345153Sdim /* For the "nomerge" versions, kmp_dispatch_next*() will always return a 378345153Sdim single iteration/chunk, even if the loop is serialized. For the schedule 379345153Sdim types listed above, the entire iteration vector is returned if the loop is 380345153Sdim serialized. This doesn't work for gcc/gcomp sections. */ 381345153Sdim kmp_nm_lower = 160, /**< lower bound for nomerge values */ 382345153Sdim 383345153Sdim kmp_nm_static_chunked = 384345153Sdim (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower), 385345153Sdim kmp_nm_static = 162, /**< static unspecialized */ 386345153Sdim kmp_nm_dynamic_chunked = 163, 387345153Sdim kmp_nm_guided_chunked = 164, /**< guided unspecialized */ 388345153Sdim kmp_nm_runtime = 165, 389345153Sdim kmp_nm_auto = 166, /**< auto */ 390345153Sdim kmp_nm_trapezoidal = 167, 391345153Sdim 392345153Sdim /* accessible only through KMP_SCHEDULE environment variable */ 393345153Sdim kmp_nm_static_greedy = 168, 394345153Sdim kmp_nm_static_balanced = 169, 395345153Sdim /* accessible only through KMP_SCHEDULE environment variable */ 396345153Sdim kmp_nm_guided_iterative_chunked = 170, 397345153Sdim kmp_nm_guided_analytical_chunked = 171, 398345153Sdim kmp_nm_static_steal = 399345153Sdim 172, /* accessible only through OMP_SCHEDULE environment variable */ 400345153Sdim 401345153Sdim kmp_nm_ord_static_chunked = 193, 402345153Sdim kmp_nm_ord_static = 194, /**< ordered static unspecialized */ 403345153Sdim kmp_nm_ord_dynamic_chunked = 195, 404345153Sdim kmp_nm_ord_guided_chunked = 196, 405345153Sdim kmp_nm_ord_runtime = 197, 406345153Sdim kmp_nm_ord_auto = 198, /**< auto */ 407345153Sdim kmp_nm_ord_trapezoidal = 199, 408345153Sdim kmp_nm_upper, /**< upper bound for nomerge values */ 409345153Sdim 410345153Sdim /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. Since 411345153Sdim we need to distinguish the three possible cases (no modifier, monotonic 412345153Sdim modifier, nonmonotonic modifier), we need separate bits for each modifier. 413345153Sdim The absence of monotonic does not imply nonmonotonic, especially since 4.5 414345153Sdim says that the behaviour of the "no modifier" case is implementation defined 415345153Sdim in 4.5, but will become "nonmonotonic" in 5.0. 416345153Sdim 417345153Sdim Since we're passing a full 32 bit value, we can use a couple of high bits 418345153Sdim for these flags; out of paranoia we avoid the sign bit. 419345153Sdim 420345153Sdim These modifiers can be or-ed into non-static schedules by the compiler to 421345153Sdim pass the additional information. They will be stripped early in the 422345153Sdim processing in __kmp_dispatch_init when setting up schedules, so most of the 423345153Sdim code won't ever see schedules with these bits set. */ 424345153Sdim kmp_sch_modifier_monotonic = 425345153Sdim (1 << 29), /**< Set if the monotonic schedule modifier was present */ 426345153Sdim kmp_sch_modifier_nonmonotonic = 427345153Sdim (1 << 30), /**< Set if the nonmonotonic schedule modifier was present */ 428345153Sdim 429345153Sdim#define SCHEDULE_WITHOUT_MODIFIERS(s) \ 430345153Sdim (enum sched_type)( \ 431345153Sdim (s) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) 432345153Sdim#define SCHEDULE_HAS_MONOTONIC(s) (((s)&kmp_sch_modifier_monotonic) != 0) 433345153Sdim#define SCHEDULE_HAS_NONMONOTONIC(s) (((s)&kmp_sch_modifier_nonmonotonic) != 0) 434345153Sdim#define SCHEDULE_HAS_NO_MODIFIERS(s) \ 435345153Sdim (((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0) 436353358Sdim#define SCHEDULE_GET_MODIFIERS(s) \ 437353358Sdim ((enum sched_type)( \ 438353358Sdim (s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic))) 439353358Sdim#define SCHEDULE_SET_MODIFIERS(s, m) \ 440353358Sdim (s = (enum sched_type)((kmp_int32)s | (kmp_int32)m)) 441353358Sdim#define SCHEDULE_NONMONOTONIC 0 442353358Sdim#define SCHEDULE_MONOTONIC 1 443345153Sdim 444345153Sdim kmp_sch_default = kmp_sch_static /**< default scheduling algorithm */ 445345153Sdim}; 446345153Sdim 447353358Sdim// Apply modifiers on internal kind to standard kind 448353358Sdimstatic inline void 449353358Sdim__kmp_sched_apply_mods_stdkind(kmp_sched_t *kind, 450353358Sdim enum sched_type internal_kind) { 451353358Sdim if (SCHEDULE_HAS_MONOTONIC(internal_kind)) { 452353358Sdim *kind = (kmp_sched_t)((int)*kind | (int)kmp_sched_monotonic); 453353358Sdim } 454353358Sdim} 455353358Sdim 456353358Sdim// Apply modifiers on standard kind to internal kind 457353358Sdimstatic inline void 458353358Sdim__kmp_sched_apply_mods_intkind(kmp_sched_t kind, 459353358Sdim enum sched_type *internal_kind) { 460353358Sdim if ((int)kind & (int)kmp_sched_monotonic) { 461353358Sdim *internal_kind = (enum sched_type)((int)*internal_kind | 462353358Sdim (int)kmp_sch_modifier_monotonic); 463353358Sdim } 464353358Sdim} 465353358Sdim 466353358Sdim// Get standard schedule without modifiers 467353358Sdimstatic inline kmp_sched_t __kmp_sched_without_mods(kmp_sched_t kind) { 468353358Sdim return (kmp_sched_t)((int)kind & ~((int)kmp_sched_monotonic)); 469353358Sdim} 470353358Sdim 471345153Sdim/* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */ 472345153Sdimtypedef union kmp_r_sched { 473345153Sdim struct { 474345153Sdim enum sched_type r_sched_type; 475345153Sdim int chunk; 476345153Sdim }; 477345153Sdim kmp_int64 sched; 478345153Sdim} kmp_r_sched_t; 479345153Sdim 480345153Sdimextern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our 481345153Sdim// internal schedule types 482345153Sdim 483345153Sdimenum library_type { 484345153Sdim library_none, 485345153Sdim library_serial, 486345153Sdim library_turnaround, 487345153Sdim library_throughput 488345153Sdim}; 489345153Sdim 490345153Sdim#if KMP_OS_LINUX 491345153Sdimenum clock_function_type { 492345153Sdim clock_function_gettimeofday, 493345153Sdim clock_function_clock_gettime 494345153Sdim}; 495345153Sdim#endif /* KMP_OS_LINUX */ 496345153Sdim 497345153Sdim#if KMP_MIC_SUPPORTED 498345153Sdimenum mic_type { non_mic, mic1, mic2, mic3, dummy }; 499345153Sdim#endif 500345153Sdim 501345153Sdim/* -- fast reduction stuff ------------------------------------------------ */ 502345153Sdim 503345153Sdim#undef KMP_FAST_REDUCTION_BARRIER 504345153Sdim#define KMP_FAST_REDUCTION_BARRIER 1 505345153Sdim 506345153Sdim#undef KMP_FAST_REDUCTION_CORE_DUO 507345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64 508345153Sdim#define KMP_FAST_REDUCTION_CORE_DUO 1 509345153Sdim#endif 510345153Sdim 511345153Sdimenum _reduction_method { 512345153Sdim reduction_method_not_defined = 0, 513345153Sdim critical_reduce_block = (1 << 8), 514345153Sdim atomic_reduce_block = (2 << 8), 515345153Sdim tree_reduce_block = (3 << 8), 516345153Sdim empty_reduce_block = (4 << 8) 517345153Sdim}; 518345153Sdim 519345153Sdim// Description of the packed_reduction_method variable: 520345153Sdim// The packed_reduction_method variable consists of two enum types variables 521345153Sdim// that are packed together into 0-th byte and 1-st byte: 522345153Sdim// 0: (packed_reduction_method & 0x000000FF) is a 'enum barrier_type' value of 523345153Sdim// barrier that will be used in fast reduction: bs_plain_barrier or 524345153Sdim// bs_reduction_barrier 525345153Sdim// 1: (packed_reduction_method & 0x0000FF00) is a reduction method that will 526345153Sdim// be used in fast reduction; 527345153Sdim// Reduction method is of 'enum _reduction_method' type and it's defined the way 528345153Sdim// so that the bits of 0-th byte are empty, so no need to execute a shift 529345153Sdim// instruction while packing/unpacking 530345153Sdim 531345153Sdim#if KMP_FAST_REDUCTION_BARRIER 532345153Sdim#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type) \ 533345153Sdim ((reduction_method) | (barrier_type)) 534345153Sdim 535345153Sdim#define UNPACK_REDUCTION_METHOD(packed_reduction_method) \ 536345153Sdim ((enum _reduction_method)((packed_reduction_method) & (0x0000FF00))) 537345153Sdim 538345153Sdim#define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \ 539345153Sdim ((enum barrier_type)((packed_reduction_method) & (0x000000FF))) 540345153Sdim#else 541345153Sdim#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type) \ 542345153Sdim (reduction_method) 543345153Sdim 544345153Sdim#define UNPACK_REDUCTION_METHOD(packed_reduction_method) \ 545345153Sdim (packed_reduction_method) 546345153Sdim 547345153Sdim#define UNPACK_REDUCTION_BARRIER(packed_reduction_method) (bs_plain_barrier) 548345153Sdim#endif 549345153Sdim 550345153Sdim#define TEST_REDUCTION_METHOD(packed_reduction_method, which_reduction_block) \ 551345153Sdim ((UNPACK_REDUCTION_METHOD(packed_reduction_method)) == \ 552345153Sdim (which_reduction_block)) 553345153Sdim 554345153Sdim#if KMP_FAST_REDUCTION_BARRIER 555345153Sdim#define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER \ 556345153Sdim (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_reduction_barrier)) 557345153Sdim 558345153Sdim#define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER \ 559345153Sdim (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_plain_barrier)) 560345153Sdim#endif 561345153Sdim 562345153Sdimtypedef int PACKED_REDUCTION_METHOD_T; 563345153Sdim 564345153Sdim/* -- end of fast reduction stuff ----------------------------------------- */ 565345153Sdim 566345153Sdim#if KMP_OS_WINDOWS 567345153Sdim#define USE_CBLKDATA 568345153Sdim#if KMP_MSVC_COMPAT 569345153Sdim#pragma warning(push) 570345153Sdim#pragma warning(disable : 271 310) 571345153Sdim#endif 572345153Sdim#include <windows.h> 573345153Sdim#if KMP_MSVC_COMPAT 574345153Sdim#pragma warning(pop) 575345153Sdim#endif 576345153Sdim#endif 577345153Sdim 578345153Sdim#if KMP_OS_UNIX 579345153Sdim#include <dlfcn.h> 580345153Sdim#include <pthread.h> 581345153Sdim#endif 582345153Sdim 583345153Sdim/* Only Linux* OS and Windows* OS support thread affinity. */ 584345153Sdim#if KMP_AFFINITY_SUPPORTED 585345153Sdim 586345153Sdim// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later). 587345153Sdim#if KMP_OS_WINDOWS 588345153Sdim#if _MSC_VER < 1600 && KMP_MSVC_COMPAT 589345153Sdimtypedef struct GROUP_AFFINITY { 590345153Sdim KAFFINITY Mask; 591345153Sdim WORD Group; 592345153Sdim WORD Reserved[3]; 593345153Sdim} GROUP_AFFINITY; 594345153Sdim#endif /* _MSC_VER < 1600 */ 595345153Sdim#if KMP_GROUP_AFFINITY 596345153Sdimextern int __kmp_num_proc_groups; 597345153Sdim#else 598345153Sdimstatic const int __kmp_num_proc_groups = 1; 599345153Sdim#endif /* KMP_GROUP_AFFINITY */ 600345153Sdimtypedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD); 601345153Sdimextern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount; 602345153Sdim 603345153Sdimtypedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void); 604345153Sdimextern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount; 605345153Sdim 606345153Sdimtypedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *); 607345153Sdimextern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity; 608345153Sdim 609345153Sdimtypedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, 610345153Sdim GROUP_AFFINITY *); 611345153Sdimextern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity; 612345153Sdim#endif /* KMP_OS_WINDOWS */ 613345153Sdim 614345153Sdim#if KMP_USE_HWLOC 615345153Sdimextern hwloc_topology_t __kmp_hwloc_topology; 616345153Sdimextern int __kmp_hwloc_error; 617345153Sdimextern int __kmp_numa_detected; 618345153Sdimextern int __kmp_tile_depth; 619345153Sdim#endif 620345153Sdim 621345153Sdimextern size_t __kmp_affin_mask_size; 622345153Sdim#define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0) 623345153Sdim#define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0) 624345153Sdim#define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size) 625345153Sdim#define KMP_CPU_SET_ITERATE(i, mask) \ 626345153Sdim for (i = (mask)->begin(); (int)i != (mask)->end(); i = (mask)->next(i)) 627345153Sdim#define KMP_CPU_SET(i, mask) (mask)->set(i) 628345153Sdim#define KMP_CPU_ISSET(i, mask) (mask)->is_set(i) 629345153Sdim#define KMP_CPU_CLR(i, mask) (mask)->clear(i) 630345153Sdim#define KMP_CPU_ZERO(mask) (mask)->zero() 631345153Sdim#define KMP_CPU_COPY(dest, src) (dest)->copy(src) 632345153Sdim#define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src) 633345153Sdim#define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not() 634345153Sdim#define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src) 635345153Sdim#define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask()) 636345153Sdim#define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr) 637345153Sdim#define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr) 638345153Sdim#define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr) 639345153Sdim#define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr) 640345153Sdim#define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr) 641345153Sdim#define KMP_CPU_INDEX(arr, i) __kmp_affinity_dispatch->index_mask_array(arr, i) 642345153Sdim#define KMP_CPU_ALLOC_ARRAY(arr, n) \ 643345153Sdim (arr = __kmp_affinity_dispatch->allocate_mask_array(n)) 644345153Sdim#define KMP_CPU_FREE_ARRAY(arr, n) \ 645345153Sdim __kmp_affinity_dispatch->deallocate_mask_array(arr) 646345153Sdim#define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) KMP_CPU_ALLOC_ARRAY(arr, n) 647345153Sdim#define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_CPU_FREE_ARRAY(arr, n) 648345153Sdim#define __kmp_get_system_affinity(mask, abort_bool) \ 649345153Sdim (mask)->get_system_affinity(abort_bool) 650345153Sdim#define __kmp_set_system_affinity(mask, abort_bool) \ 651345153Sdim (mask)->set_system_affinity(abort_bool) 652345153Sdim#define __kmp_get_proc_group(mask) (mask)->get_proc_group() 653345153Sdim 654345153Sdimclass KMPAffinity { 655345153Sdimpublic: 656345153Sdim class Mask { 657345153Sdim public: 658345153Sdim void *operator new(size_t n); 659345153Sdim void operator delete(void *p); 660345153Sdim void *operator new[](size_t n); 661345153Sdim void operator delete[](void *p); 662345153Sdim virtual ~Mask() {} 663345153Sdim // Set bit i to 1 664345153Sdim virtual void set(int i) {} 665345153Sdim // Return bit i 666345153Sdim virtual bool is_set(int i) const { return false; } 667345153Sdim // Set bit i to 0 668345153Sdim virtual void clear(int i) {} 669345153Sdim // Zero out entire mask 670345153Sdim virtual void zero() {} 671345153Sdim // Copy src into this mask 672345153Sdim virtual void copy(const Mask *src) {} 673345153Sdim // this &= rhs 674345153Sdim virtual void bitwise_and(const Mask *rhs) {} 675345153Sdim // this |= rhs 676345153Sdim virtual void bitwise_or(const Mask *rhs) {} 677345153Sdim // this = ~this 678345153Sdim virtual void bitwise_not() {} 679345153Sdim // API for iterating over an affinity mask 680345153Sdim // for (int i = mask->begin(); i != mask->end(); i = mask->next(i)) 681345153Sdim virtual int begin() const { return 0; } 682345153Sdim virtual int end() const { return 0; } 683345153Sdim virtual int next(int previous) const { return 0; } 684345153Sdim // Set the system's affinity to this affinity mask's value 685345153Sdim virtual int set_system_affinity(bool abort_on_error) const { return -1; } 686345153Sdim // Set this affinity mask to the current system affinity 687345153Sdim virtual int get_system_affinity(bool abort_on_error) { return -1; } 688345153Sdim // Only 1 DWORD in the mask should have any procs set. 689345153Sdim // Return the appropriate index, or -1 for an invalid mask. 690345153Sdim virtual int get_proc_group() const { return -1; } 691345153Sdim }; 692345153Sdim void *operator new(size_t n); 693345153Sdim void operator delete(void *p); 694345153Sdim // Need virtual destructor 695345153Sdim virtual ~KMPAffinity() = default; 696345153Sdim // Determine if affinity is capable 697345153Sdim virtual void determine_capable(const char *env_var) {} 698345153Sdim // Bind the current thread to os proc 699345153Sdim virtual void bind_thread(int proc) {} 700345153Sdim // Factory functions to allocate/deallocate a mask 701345153Sdim virtual Mask *allocate_mask() { return nullptr; } 702345153Sdim virtual void deallocate_mask(Mask *m) {} 703345153Sdim virtual Mask *allocate_mask_array(int num) { return nullptr; } 704345153Sdim virtual void deallocate_mask_array(Mask *m) {} 705345153Sdim virtual Mask *index_mask_array(Mask *m, int index) { return nullptr; } 706345153Sdim static void pick_api(); 707345153Sdim static void destroy_api(); 708345153Sdim enum api_type { 709345153Sdim NATIVE_OS 710345153Sdim#if KMP_USE_HWLOC 711345153Sdim , 712345153Sdim HWLOC 713345153Sdim#endif 714345153Sdim }; 715345153Sdim virtual api_type get_api_type() const { 716345153Sdim KMP_ASSERT(0); 717345153Sdim return NATIVE_OS; 718345153Sdim } 719345153Sdim 720345153Sdimprivate: 721345153Sdim static bool picked_api; 722345153Sdim}; 723345153Sdim 724345153Sdimtypedef KMPAffinity::Mask kmp_affin_mask_t; 725345153Sdimextern KMPAffinity *__kmp_affinity_dispatch; 726345153Sdim 727345153Sdim// Declare local char buffers with this size for printing debug and info 728345153Sdim// messages, using __kmp_affinity_print_mask(). 729345153Sdim#define KMP_AFFIN_MASK_PRINT_LEN 1024 730345153Sdim 731345153Sdimenum affinity_type { 732345153Sdim affinity_none = 0, 733345153Sdim affinity_physical, 734345153Sdim affinity_logical, 735345153Sdim affinity_compact, 736345153Sdim affinity_scatter, 737345153Sdim affinity_explicit, 738345153Sdim affinity_balanced, 739345153Sdim affinity_disabled, // not used outsize the env var parser 740345153Sdim affinity_default 741345153Sdim}; 742345153Sdim 743345153Sdimenum affinity_gran { 744345153Sdim affinity_gran_fine = 0, 745345153Sdim affinity_gran_thread, 746345153Sdim affinity_gran_core, 747345153Sdim affinity_gran_tile, 748345153Sdim affinity_gran_numa, 749345153Sdim affinity_gran_package, 750345153Sdim affinity_gran_node, 751345153Sdim#if KMP_GROUP_AFFINITY 752345153Sdim // The "group" granularity isn't necesssarily coarser than all of the 753345153Sdim // other levels, but we put it last in the enum. 754345153Sdim affinity_gran_group, 755345153Sdim#endif /* KMP_GROUP_AFFINITY */ 756345153Sdim affinity_gran_default 757345153Sdim}; 758345153Sdim 759345153Sdimenum affinity_top_method { 760345153Sdim affinity_top_method_all = 0, // try all (supported) methods, in order 761345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64 762345153Sdim affinity_top_method_apicid, 763345153Sdim affinity_top_method_x2apicid, 764345153Sdim#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 765345153Sdim affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too 766345153Sdim#if KMP_GROUP_AFFINITY 767345153Sdim affinity_top_method_group, 768345153Sdim#endif /* KMP_GROUP_AFFINITY */ 769345153Sdim affinity_top_method_flat, 770345153Sdim#if KMP_USE_HWLOC 771345153Sdim affinity_top_method_hwloc, 772345153Sdim#endif 773345153Sdim affinity_top_method_default 774345153Sdim}; 775345153Sdim 776345153Sdim#define affinity_respect_mask_default (-1) 777345153Sdim 778345153Sdimextern enum affinity_type __kmp_affinity_type; /* Affinity type */ 779345153Sdimextern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */ 780345153Sdimextern int __kmp_affinity_gran_levels; /* corresponding int value */ 781345153Sdimextern int __kmp_affinity_dups; /* Affinity duplicate masks */ 782345153Sdimextern enum affinity_top_method __kmp_affinity_top_method; 783345153Sdimextern int __kmp_affinity_compact; /* Affinity 'compact' value */ 784345153Sdimextern int __kmp_affinity_offset; /* Affinity offset value */ 785345153Sdimextern int __kmp_affinity_verbose; /* Was verbose specified for KMP_AFFINITY? */ 786345153Sdimextern int __kmp_affinity_warnings; /* KMP_AFFINITY warnings enabled ? */ 787345153Sdimextern int __kmp_affinity_respect_mask; // Respect process' init affinity mask? 788345153Sdimextern char *__kmp_affinity_proclist; /* proc ID list */ 789345153Sdimextern kmp_affin_mask_t *__kmp_affinity_masks; 790345153Sdimextern unsigned __kmp_affinity_num_masks; 791345153Sdimextern void __kmp_affinity_bind_thread(int which); 792345153Sdim 793345153Sdimextern kmp_affin_mask_t *__kmp_affin_fullMask; 794345153Sdimextern char *__kmp_cpuinfo_file; 795345153Sdim 796345153Sdim#endif /* KMP_AFFINITY_SUPPORTED */ 797345153Sdim 798345153Sdim// This needs to be kept in sync with the values in omp.h !!! 799345153Sdimtypedef enum kmp_proc_bind_t { 800345153Sdim proc_bind_false = 0, 801345153Sdim proc_bind_true, 802345153Sdim proc_bind_master, 803345153Sdim proc_bind_close, 804345153Sdim proc_bind_spread, 805345153Sdim proc_bind_intel, // use KMP_AFFINITY interface 806345153Sdim proc_bind_default 807345153Sdim} kmp_proc_bind_t; 808345153Sdim 809345153Sdimtypedef struct kmp_nested_proc_bind_t { 810345153Sdim kmp_proc_bind_t *bind_types; 811345153Sdim int size; 812345153Sdim int used; 813345153Sdim} kmp_nested_proc_bind_t; 814345153Sdim 815345153Sdimextern kmp_nested_proc_bind_t __kmp_nested_proc_bind; 816345153Sdim 817345153Sdimextern int __kmp_display_affinity; 818345153Sdimextern char *__kmp_affinity_format; 819345153Sdimstatic const size_t KMP_AFFINITY_FORMAT_SIZE = 512; 820345153Sdim 821345153Sdim#if KMP_AFFINITY_SUPPORTED 822345153Sdim#define KMP_PLACE_ALL (-1) 823345153Sdim#define KMP_PLACE_UNDEFINED (-2) 824345153Sdim// Is KMP_AFFINITY is being used instead of OMP_PROC_BIND/OMP_PLACES? 825345153Sdim#define KMP_AFFINITY_NON_PROC_BIND \ 826345153Sdim ((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false || \ 827345153Sdim __kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) && \ 828345153Sdim (__kmp_affinity_num_masks > 0 || __kmp_affinity_type == affinity_balanced)) 829345153Sdim#endif /* KMP_AFFINITY_SUPPORTED */ 830345153Sdim 831345153Sdimextern int __kmp_affinity_num_places; 832345153Sdim 833345153Sdimtypedef enum kmp_cancel_kind_t { 834345153Sdim cancel_noreq = 0, 835345153Sdim cancel_parallel = 1, 836345153Sdim cancel_loop = 2, 837345153Sdim cancel_sections = 3, 838345153Sdim cancel_taskgroup = 4 839345153Sdim} kmp_cancel_kind_t; 840345153Sdim 841345153Sdim// KMP_HW_SUBSET support: 842345153Sdimtypedef struct kmp_hws_item { 843345153Sdim int num; 844345153Sdim int offset; 845345153Sdim} kmp_hws_item_t; 846345153Sdim 847345153Sdimextern kmp_hws_item_t __kmp_hws_socket; 848345153Sdimextern kmp_hws_item_t __kmp_hws_node; 849345153Sdimextern kmp_hws_item_t __kmp_hws_tile; 850345153Sdimextern kmp_hws_item_t __kmp_hws_core; 851345153Sdimextern kmp_hws_item_t __kmp_hws_proc; 852345153Sdimextern int __kmp_hws_requested; 853345153Sdimextern int __kmp_hws_abs_flag; // absolute or per-item number requested 854345153Sdim 855345153Sdim/* ------------------------------------------------------------------------ */ 856345153Sdim 857345153Sdim#define KMP_PAD(type, sz) \ 858345153Sdim (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) 859345153Sdim 860345153Sdim// We need to avoid using -1 as a GTID as +1 is added to the gtid 861345153Sdim// when storing it in a lock, and the value 0 is reserved. 862345153Sdim#define KMP_GTID_DNE (-2) /* Does not exist */ 863345153Sdim#define KMP_GTID_SHUTDOWN (-3) /* Library is shutting down */ 864345153Sdim#define KMP_GTID_MONITOR (-4) /* Monitor thread ID */ 865345153Sdim#define KMP_GTID_UNKNOWN (-5) /* Is not known */ 866345153Sdim#define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */ 867345153Sdim 868345153Sdim/* OpenMP 5.0 Memory Management support */ 869353358Sdim 870353358Sdim#ifndef __OMP_H 871360784Sdim// Duplicate type definitions from omp.h 872353358Sdimtypedef uintptr_t omp_uintptr_t; 873353358Sdim 874353358Sdimtypedef enum { 875353358Sdim OMP_ATK_THREADMODEL = 1, 876353358Sdim OMP_ATK_ALIGNMENT = 2, 877353358Sdim OMP_ATK_ACCESS = 3, 878353358Sdim OMP_ATK_POOL_SIZE = 4, 879353358Sdim OMP_ATK_FALLBACK = 5, 880353358Sdim OMP_ATK_FB_DATA = 6, 881353358Sdim OMP_ATK_PINNED = 7, 882353358Sdim OMP_ATK_PARTITION = 8 883353358Sdim} omp_alloctrait_key_t; 884353358Sdim 885353358Sdimtypedef enum { 886353358Sdim OMP_ATV_FALSE = 0, 887353358Sdim OMP_ATV_TRUE = 1, 888353358Sdim OMP_ATV_DEFAULT = 2, 889353358Sdim OMP_ATV_CONTENDED = 3, 890353358Sdim OMP_ATV_UNCONTENDED = 4, 891353358Sdim OMP_ATV_SEQUENTIAL = 5, 892353358Sdim OMP_ATV_PRIVATE = 6, 893353358Sdim OMP_ATV_ALL = 7, 894353358Sdim OMP_ATV_THREAD = 8, 895353358Sdim OMP_ATV_PTEAM = 9, 896353358Sdim OMP_ATV_CGROUP = 10, 897353358Sdim OMP_ATV_DEFAULT_MEM_FB = 11, 898353358Sdim OMP_ATV_NULL_FB = 12, 899353358Sdim OMP_ATV_ABORT_FB = 13, 900353358Sdim OMP_ATV_ALLOCATOR_FB = 14, 901353358Sdim OMP_ATV_ENVIRONMENT = 15, 902353358Sdim OMP_ATV_NEAREST = 16, 903353358Sdim OMP_ATV_BLOCKED = 17, 904353358Sdim OMP_ATV_INTERLEAVED = 18 905353358Sdim} omp_alloctrait_value_t; 906353358Sdim 907353358Sdimtypedef void *omp_memspace_handle_t; 908353358Sdimextern omp_memspace_handle_t const omp_default_mem_space; 909353358Sdimextern omp_memspace_handle_t const omp_large_cap_mem_space; 910353358Sdimextern omp_memspace_handle_t const omp_const_mem_space; 911353358Sdimextern omp_memspace_handle_t const omp_high_bw_mem_space; 912353358Sdimextern omp_memspace_handle_t const omp_low_lat_mem_space; 913353358Sdim 914353358Sdimtypedef struct { 915353358Sdim omp_alloctrait_key_t key; 916353358Sdim omp_uintptr_t value; 917353358Sdim} omp_alloctrait_t; 918353358Sdim 919353358Sdimtypedef void *omp_allocator_handle_t; 920353358Sdimextern omp_allocator_handle_t const omp_null_allocator; 921353358Sdimextern omp_allocator_handle_t const omp_default_mem_alloc; 922353358Sdimextern omp_allocator_handle_t const omp_large_cap_mem_alloc; 923353358Sdimextern omp_allocator_handle_t const omp_const_mem_alloc; 924353358Sdimextern omp_allocator_handle_t const omp_high_bw_mem_alloc; 925353358Sdimextern omp_allocator_handle_t const omp_low_lat_mem_alloc; 926353358Sdimextern omp_allocator_handle_t const omp_cgroup_mem_alloc; 927353358Sdimextern omp_allocator_handle_t const omp_pteam_mem_alloc; 928353358Sdimextern omp_allocator_handle_t const omp_thread_mem_alloc; 929353358Sdimextern omp_allocator_handle_t const kmp_max_mem_alloc; 930353358Sdimextern omp_allocator_handle_t __kmp_def_allocator; 931353358Sdim 932360784Sdim// end of duplicate type definitions from omp.h 933353358Sdim#endif 934353358Sdim 935345153Sdimextern int __kmp_memkind_available; 936345153Sdim 937353358Sdimtypedef omp_memspace_handle_t kmp_memspace_t; // placeholder 938345153Sdim 939353358Sdimtypedef struct kmp_allocator_t { 940353358Sdim omp_memspace_handle_t memspace; 941353358Sdim void **memkind; // pointer to memkind 942353358Sdim int alignment; 943353358Sdim omp_alloctrait_value_t fb; 944353358Sdim kmp_allocator_t *fb_data; 945353358Sdim kmp_uint64 pool_size; 946353358Sdim kmp_uint64 pool_used; 947353358Sdim} kmp_allocator_t; 948353358Sdim 949353358Sdimextern omp_allocator_handle_t __kmpc_init_allocator(int gtid, 950353358Sdim omp_memspace_handle_t, 951353358Sdim int ntraits, 952353358Sdim omp_alloctrait_t traits[]); 953353358Sdimextern void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al); 954353358Sdimextern void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t al); 955353358Sdimextern omp_allocator_handle_t __kmpc_get_default_allocator(int gtid); 956353358Sdimextern void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 957353358Sdimextern void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 958353358Sdim 959345153Sdimextern void __kmp_init_memkind(); 960345153Sdimextern void __kmp_fini_memkind(); 961345153Sdim 962345153Sdim/* ------------------------------------------------------------------------ */ 963345153Sdim 964345153Sdim#define KMP_UINT64_MAX \ 965345153Sdim (~((kmp_uint64)1 << ((sizeof(kmp_uint64) * (1 << 3)) - 1))) 966345153Sdim 967345153Sdim#define KMP_MIN_NTH 1 968345153Sdim 969345153Sdim#ifndef KMP_MAX_NTH 970345153Sdim#if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX 971345153Sdim#define KMP_MAX_NTH PTHREAD_THREADS_MAX 972345153Sdim#else 973345153Sdim#define KMP_MAX_NTH INT_MAX 974345153Sdim#endif 975345153Sdim#endif /* KMP_MAX_NTH */ 976345153Sdim 977345153Sdim#ifdef PTHREAD_STACK_MIN 978345153Sdim#define KMP_MIN_STKSIZE PTHREAD_STACK_MIN 979345153Sdim#else 980345153Sdim#define KMP_MIN_STKSIZE ((size_t)(32 * 1024)) 981345153Sdim#endif 982345153Sdim 983345153Sdim#define KMP_MAX_STKSIZE (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1))) 984345153Sdim 985345153Sdim#if KMP_ARCH_X86 986345153Sdim#define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024)) 987345153Sdim#elif KMP_ARCH_X86_64 988345153Sdim#define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024)) 989345153Sdim#define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024)) 990345153Sdim#else 991345153Sdim#define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024)) 992345153Sdim#endif 993345153Sdim 994345153Sdim#define KMP_DEFAULT_MALLOC_POOL_INCR ((size_t)(1024 * 1024)) 995345153Sdim#define KMP_MIN_MALLOC_POOL_INCR ((size_t)(4 * 1024)) 996345153Sdim#define KMP_MAX_MALLOC_POOL_INCR \ 997345153Sdim (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1))) 998345153Sdim 999345153Sdim#define KMP_MIN_STKOFFSET (0) 1000345153Sdim#define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE 1001345153Sdim#if KMP_OS_DARWIN 1002345153Sdim#define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET 1003345153Sdim#else 1004345153Sdim#define KMP_DEFAULT_STKOFFSET CACHE_LINE 1005345153Sdim#endif 1006345153Sdim 1007345153Sdim#define KMP_MIN_STKPADDING (0) 1008345153Sdim#define KMP_MAX_STKPADDING (2 * 1024 * 1024) 1009345153Sdim 1010345153Sdim#define KMP_BLOCKTIME_MULTIPLIER \ 1011345153Sdim (1000) /* number of blocktime units per second */ 1012345153Sdim#define KMP_MIN_BLOCKTIME (0) 1013345153Sdim#define KMP_MAX_BLOCKTIME \ 1014345153Sdim (INT_MAX) /* Must be this for "infinite" setting the work */ 1015345153Sdim#define KMP_DEFAULT_BLOCKTIME (200) /* __kmp_blocktime is in milliseconds */ 1016345153Sdim 1017345153Sdim#if KMP_USE_MONITOR 1018345153Sdim#define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024)) 1019345153Sdim#define KMP_MIN_MONITOR_WAKEUPS (1) // min times monitor wakes up per second 1020345153Sdim#define KMP_MAX_MONITOR_WAKEUPS (1000) // max times monitor can wake up per sec 1021345153Sdim 1022345153Sdim/* Calculate new number of monitor wakeups for a specific block time based on 1023345153Sdim previous monitor_wakeups. Only allow increasing number of wakeups */ 1024345153Sdim#define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \ 1025345153Sdim (((blocktime) == KMP_MAX_BLOCKTIME) \ 1026345153Sdim ? (monitor_wakeups) \ 1027345153Sdim : ((blocktime) == KMP_MIN_BLOCKTIME) \ 1028345153Sdim ? KMP_MAX_MONITOR_WAKEUPS \ 1029345153Sdim : ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) \ 1030345153Sdim ? (monitor_wakeups) \ 1031345153Sdim : (KMP_BLOCKTIME_MULTIPLIER) / (blocktime)) 1032345153Sdim 1033345153Sdim/* Calculate number of intervals for a specific block time based on 1034345153Sdim monitor_wakeups */ 1035345153Sdim#define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \ 1036345153Sdim (((blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1) / \ 1037345153Sdim (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups))) 1038345153Sdim#else 1039345153Sdim#define KMP_BLOCKTIME(team, tid) \ 1040345153Sdim (get__bt_set(team, tid) ? get__blocktime(team, tid) : __kmp_dflt_blocktime) 1041345153Sdim#if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) 1042345153Sdim// HW TSC is used to reduce overhead (clock tick instead of nanosecond). 1043345153Sdimextern kmp_uint64 __kmp_ticks_per_msec; 1044345153Sdim#if KMP_COMPILER_ICC 1045345153Sdim#define KMP_NOW() ((kmp_uint64)_rdtsc()) 1046345153Sdim#else 1047345153Sdim#define KMP_NOW() __kmp_hardware_timestamp() 1048345153Sdim#endif 1049345153Sdim#define KMP_NOW_MSEC() (KMP_NOW() / __kmp_ticks_per_msec) 1050345153Sdim#define KMP_BLOCKTIME_INTERVAL(team, tid) \ 1051345153Sdim (KMP_BLOCKTIME(team, tid) * __kmp_ticks_per_msec) 1052345153Sdim#define KMP_BLOCKING(goal, count) ((goal) > KMP_NOW()) 1053345153Sdim#else 1054345153Sdim// System time is retrieved sporadically while blocking. 1055345153Sdimextern kmp_uint64 __kmp_now_nsec(); 1056345153Sdim#define KMP_NOW() __kmp_now_nsec() 1057345153Sdim#define KMP_NOW_MSEC() (KMP_NOW() / KMP_USEC_PER_SEC) 1058345153Sdim#define KMP_BLOCKTIME_INTERVAL(team, tid) \ 1059345153Sdim (KMP_BLOCKTIME(team, tid) * KMP_USEC_PER_SEC) 1060345153Sdim#define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW()) 1061345153Sdim#endif 1062345153Sdim#endif // KMP_USE_MONITOR 1063345153Sdim 1064345153Sdim#define KMP_MIN_STATSCOLS 40 1065345153Sdim#define KMP_MAX_STATSCOLS 4096 1066345153Sdim#define KMP_DEFAULT_STATSCOLS 80 1067345153Sdim 1068345153Sdim#define KMP_MIN_INTERVAL 0 1069345153Sdim#define KMP_MAX_INTERVAL (INT_MAX - 1) 1070345153Sdim#define KMP_DEFAULT_INTERVAL 0 1071345153Sdim 1072345153Sdim#define KMP_MIN_CHUNK 1 1073345153Sdim#define KMP_MAX_CHUNK (INT_MAX - 1) 1074345153Sdim#define KMP_DEFAULT_CHUNK 1 1075345153Sdim 1076345153Sdim#define KMP_DFLT_DISP_NUM_BUFF 7 1077345153Sdim#define KMP_MAX_ORDERED 8 1078345153Sdim 1079345153Sdim#define KMP_MAX_FIELDS 32 1080345153Sdim 1081345153Sdim#define KMP_MAX_BRANCH_BITS 31 1082345153Sdim 1083345153Sdim#define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX 1084345153Sdim 1085345153Sdim#define KMP_MAX_DEFAULT_DEVICE_LIMIT INT_MAX 1086345153Sdim 1087345153Sdim#define KMP_MAX_TASK_PRIORITY_LIMIT INT_MAX 1088345153Sdim 1089345153Sdim/* Minimum number of threads before switch to TLS gtid (experimentally 1090345153Sdim determined) */ 1091345153Sdim/* josh TODO: what about OS X* tuning? */ 1092345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64 1093345153Sdim#define KMP_TLS_GTID_MIN 5 1094345153Sdim#else 1095345153Sdim#define KMP_TLS_GTID_MIN INT_MAX 1096345153Sdim#endif 1097345153Sdim 1098345153Sdim#define KMP_MASTER_TID(tid) ((tid) == 0) 1099345153Sdim#define KMP_WORKER_TID(tid) ((tid) != 0) 1100345153Sdim 1101345153Sdim#define KMP_MASTER_GTID(gtid) (__kmp_tid_from_gtid((gtid)) == 0) 1102345153Sdim#define KMP_WORKER_GTID(gtid) (__kmp_tid_from_gtid((gtid)) != 0) 1103345153Sdim#define KMP_INITIAL_GTID(gtid) ((gtid) == 0) 1104345153Sdim 1105345153Sdim#ifndef TRUE 1106345153Sdim#define FALSE 0 1107345153Sdim#define TRUE (!FALSE) 1108345153Sdim#endif 1109345153Sdim 1110345153Sdim/* NOTE: all of the following constants must be even */ 1111345153Sdim 1112345153Sdim#if KMP_OS_WINDOWS 1113345153Sdim#define KMP_INIT_WAIT 64U /* initial number of spin-tests */ 1114345153Sdim#define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */ 1115345153Sdim#elif KMP_OS_CNK 1116345153Sdim#define KMP_INIT_WAIT 16U /* initial number of spin-tests */ 1117345153Sdim#define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */ 1118345153Sdim#elif KMP_OS_LINUX 1119345153Sdim#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ 1120345153Sdim#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ 1121345153Sdim#elif KMP_OS_DARWIN 1122345153Sdim/* TODO: tune for KMP_OS_DARWIN */ 1123345153Sdim#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ 1124345153Sdim#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ 1125345153Sdim#elif KMP_OS_DRAGONFLY 1126345153Sdim/* TODO: tune for KMP_OS_DRAGONFLY */ 1127345153Sdim#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ 1128345153Sdim#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ 1129345153Sdim#elif KMP_OS_FREEBSD 1130345153Sdim/* TODO: tune for KMP_OS_FREEBSD */ 1131345153Sdim#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ 1132345153Sdim#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ 1133345153Sdim#elif KMP_OS_NETBSD 1134345153Sdim/* TODO: tune for KMP_OS_NETBSD */ 1135345153Sdim#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ 1136345153Sdim#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ 1137345153Sdim#elif KMP_OS_HURD 1138345153Sdim/* TODO: tune for KMP_OS_HURD */ 1139345153Sdim#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ 1140345153Sdim#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ 1141345153Sdim#elif KMP_OS_OPENBSD 1142345153Sdim/* TODO: tune for KMP_OS_OPENBSD */ 1143345153Sdim#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ 1144345153Sdim#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ 1145345153Sdim#endif 1146345153Sdim 1147345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64 1148345153Sdimtypedef struct kmp_cpuid { 1149345153Sdim kmp_uint32 eax; 1150345153Sdim kmp_uint32 ebx; 1151345153Sdim kmp_uint32 ecx; 1152345153Sdim kmp_uint32 edx; 1153345153Sdim} kmp_cpuid_t; 1154353358Sdim 1155353358Sdimtypedef struct kmp_cpuinfo { 1156353358Sdim int initialized; // If 0, other fields are not initialized. 1157353358Sdim int signature; // CPUID(1).EAX 1158353358Sdim int family; // CPUID(1).EAX[27:20]+CPUID(1).EAX[11:8] (Extended Family+Family) 1159353358Sdim int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended 1160353358Sdim // Model << 4 ) + Model) 1161353358Sdim int stepping; // CPUID(1).EAX[3:0] ( Stepping ) 1162353358Sdim int sse2; // 0 if SSE2 instructions are not supported, 1 otherwise. 1163353358Sdim int rtm; // 0 if RTM instructions are not supported, 1 otherwise. 1164353358Sdim int cpu_stackoffset; 1165353358Sdim int apic_id; 1166353358Sdim int physical_id; 1167353358Sdim int logical_id; 1168353358Sdim kmp_uint64 frequency; // Nominal CPU frequency in Hz. 1169353358Sdim char name[3 * sizeof(kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004) 1170353358Sdim} kmp_cpuinfo_t; 1171353358Sdim 1172353358Sdimextern void __kmp_query_cpuid(kmp_cpuinfo_t *p); 1173353358Sdim 1174353358Sdim#if KMP_OS_UNIX 1175353358Sdim// subleaf is only needed for cache and topology discovery and can be set to 1176353358Sdim// zero in most cases 1177353358Sdimstatic inline void __kmp_x86_cpuid(int leaf, int subleaf, struct kmp_cpuid *p) { 1178353358Sdim __asm__ __volatile__("cpuid" 1179353358Sdim : "=a"(p->eax), "=b"(p->ebx), "=c"(p->ecx), "=d"(p->edx) 1180353358Sdim : "a"(leaf), "c"(subleaf)); 1181353358Sdim} 1182353358Sdim// Load p into FPU control word 1183353358Sdimstatic inline void __kmp_load_x87_fpu_control_word(const kmp_int16 *p) { 1184353358Sdim __asm__ __volatile__("fldcw %0" : : "m"(*p)); 1185353358Sdim} 1186353358Sdim// Store FPU control word into p 1187353358Sdimstatic inline void __kmp_store_x87_fpu_control_word(kmp_int16 *p) { 1188353358Sdim __asm__ __volatile__("fstcw %0" : "=m"(*p)); 1189353358Sdim} 1190353358Sdimstatic inline void __kmp_clear_x87_fpu_status_word() { 1191353358Sdim#if KMP_MIC 1192353358Sdim // 32-bit protected mode x87 FPU state 1193353358Sdim struct x87_fpu_state { 1194353358Sdim unsigned cw; 1195353358Sdim unsigned sw; 1196353358Sdim unsigned tw; 1197353358Sdim unsigned fip; 1198353358Sdim unsigned fips; 1199353358Sdim unsigned fdp; 1200353358Sdim unsigned fds; 1201353358Sdim }; 1202353358Sdim struct x87_fpu_state fpu_state = {0, 0, 0, 0, 0, 0, 0}; 1203353358Sdim __asm__ __volatile__("fstenv %0\n\t" // store FP env 1204353358Sdim "andw $0x7f00, %1\n\t" // clear 0-7,15 bits of FP SW 1205353358Sdim "fldenv %0\n\t" // load FP env back 1206353358Sdim : "+m"(fpu_state), "+m"(fpu_state.sw)); 1207353358Sdim#else 1208353358Sdim __asm__ __volatile__("fnclex"); 1209353358Sdim#endif // KMP_MIC 1210353358Sdim} 1211353358Sdim#if __SSE__ 1212353358Sdimstatic inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); } 1213353358Sdimstatic inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); } 1214353358Sdim#else 1215353358Sdimstatic inline void __kmp_load_mxcsr(const kmp_uint32 *p) {} 1216353358Sdimstatic inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = 0; } 1217353358Sdim#endif 1218353358Sdim#else 1219353358Sdim// Windows still has these as external functions in assembly file 1220345153Sdimextern void __kmp_x86_cpuid(int mode, int mode2, struct kmp_cpuid *p); 1221353358Sdimextern void __kmp_load_x87_fpu_control_word(const kmp_int16 *p); 1222353358Sdimextern void __kmp_store_x87_fpu_control_word(kmp_int16 *p); 1223353358Sdimextern void __kmp_clear_x87_fpu_status_word(); 1224353358Sdimstatic inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); } 1225353358Sdimstatic inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); } 1226353358Sdim#endif // KMP_OS_UNIX 1227353358Sdim 1228353358Sdim#define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */ 1229353358Sdim 1230345153Sdim#if KMP_ARCH_X86 1231345153Sdimextern void __kmp_x86_pause(void); 1232345153Sdim#elif KMP_MIC 1233345153Sdim// Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed 1234353358Sdim// regression after removal of extra PAUSE from spin loops. Changing 1235345153Sdim// the delay from 100 to 300 showed even better performance than double PAUSE 1236345153Sdim// on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC. 1237345153Sdimstatic inline void __kmp_x86_pause(void) { _mm_delay_32(300); } 1238345153Sdim#else 1239345153Sdimstatic inline void __kmp_x86_pause(void) { _mm_pause(); } 1240345153Sdim#endif 1241345153Sdim#define KMP_CPU_PAUSE() __kmp_x86_pause() 1242345153Sdim#elif KMP_ARCH_PPC64 1243345153Sdim#define KMP_PPC64_PRI_LOW() __asm__ volatile("or 1, 1, 1") 1244345153Sdim#define KMP_PPC64_PRI_MED() __asm__ volatile("or 2, 2, 2") 1245345153Sdim#define KMP_PPC64_PRI_LOC_MB() __asm__ volatile("" : : : "memory") 1246345153Sdim#define KMP_CPU_PAUSE() \ 1247345153Sdim do { \ 1248345153Sdim KMP_PPC64_PRI_LOW(); \ 1249345153Sdim KMP_PPC64_PRI_MED(); \ 1250345153Sdim KMP_PPC64_PRI_LOC_MB(); \ 1251345153Sdim } while (0) 1252345153Sdim#else 1253345153Sdim#define KMP_CPU_PAUSE() /* nothing to do */ 1254345153Sdim#endif 1255345153Sdim 1256345153Sdim#define KMP_INIT_YIELD(count) \ 1257345153Sdim { (count) = __kmp_yield_init; } 1258345153Sdim 1259353358Sdim#define KMP_OVERSUBSCRIBED \ 1260353358Sdim (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) 1261353358Sdim 1262353358Sdim#define KMP_TRY_YIELD \ 1263353358Sdim ((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (KMP_OVERSUBSCRIBED))) 1264353358Sdim 1265353358Sdim#define KMP_TRY_YIELD_OVERSUB \ 1266353358Sdim ((__kmp_use_yield == 1 || __kmp_use_yield == 2) && (KMP_OVERSUBSCRIBED)) 1267353358Sdim 1268345153Sdim#define KMP_YIELD(cond) \ 1269345153Sdim { \ 1270345153Sdim KMP_CPU_PAUSE(); \ 1271353358Sdim if ((cond) && (KMP_TRY_YIELD)) \ 1272353358Sdim __kmp_yield(); \ 1273345153Sdim } 1274345153Sdim 1275353358Sdim#define KMP_YIELD_OVERSUB() \ 1276353358Sdim { \ 1277353358Sdim KMP_CPU_PAUSE(); \ 1278353358Sdim if ((KMP_TRY_YIELD_OVERSUB)) \ 1279353358Sdim __kmp_yield(); \ 1280353358Sdim } 1281353358Sdim 1282345153Sdim// Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround, 1283345153Sdim// there should be no yielding since initial value from KMP_INIT_YIELD() is odd. 1284353358Sdim#define KMP_YIELD_SPIN(count) \ 1285345153Sdim { \ 1286345153Sdim KMP_CPU_PAUSE(); \ 1287353358Sdim if (KMP_TRY_YIELD) { \ 1288353358Sdim (count) -= 2; \ 1289353358Sdim if (!(count)) { \ 1290353358Sdim __kmp_yield(); \ 1291353358Sdim (count) = __kmp_yield_next; \ 1292353358Sdim } \ 1293345153Sdim } \ 1294345153Sdim } 1295353358Sdim 1296353358Sdim#define KMP_YIELD_OVERSUB_ELSE_SPIN(count) \ 1297345153Sdim { \ 1298345153Sdim KMP_CPU_PAUSE(); \ 1299353358Sdim if ((KMP_TRY_YIELD_OVERSUB)) \ 1300353358Sdim __kmp_yield(); \ 1301353358Sdim else if (__kmp_use_yield == 1) { \ 1302353358Sdim (count) -= 2; \ 1303353358Sdim if (!(count)) { \ 1304353358Sdim __kmp_yield(); \ 1305353358Sdim (count) = __kmp_yield_next; \ 1306353358Sdim } \ 1307345153Sdim } \ 1308345153Sdim } 1309345153Sdim 1310345153Sdim/* ------------------------------------------------------------------------ */ 1311345153Sdim/* Support datatypes for the orphaned construct nesting checks. */ 1312345153Sdim/* ------------------------------------------------------------------------ */ 1313345153Sdim 1314345153Sdimenum cons_type { 1315345153Sdim ct_none, 1316345153Sdim ct_parallel, 1317345153Sdim ct_pdo, 1318345153Sdim ct_pdo_ordered, 1319345153Sdim ct_psections, 1320345153Sdim ct_psingle, 1321345153Sdim ct_critical, 1322345153Sdim ct_ordered_in_parallel, 1323345153Sdim ct_ordered_in_pdo, 1324345153Sdim ct_master, 1325345153Sdim ct_reduce, 1326345153Sdim ct_barrier 1327345153Sdim}; 1328345153Sdim 1329353358Sdim#define IS_CONS_TYPE_ORDERED(ct) ((ct) == ct_pdo_ordered) 1330345153Sdim 1331345153Sdimstruct cons_data { 1332345153Sdim ident_t const *ident; 1333345153Sdim enum cons_type type; 1334345153Sdim int prev; 1335345153Sdim kmp_user_lock_p 1336345153Sdim name; /* address exclusively for critical section name comparison */ 1337345153Sdim}; 1338345153Sdim 1339345153Sdimstruct cons_header { 1340345153Sdim int p_top, w_top, s_top; 1341345153Sdim int stack_size, stack_top; 1342345153Sdim struct cons_data *stack_data; 1343345153Sdim}; 1344345153Sdim 1345345153Sdimstruct kmp_region_info { 1346345153Sdim char *text; 1347345153Sdim int offset[KMP_MAX_FIELDS]; 1348345153Sdim int length[KMP_MAX_FIELDS]; 1349345153Sdim}; 1350345153Sdim 1351345153Sdim/* ---------------------------------------------------------------------- */ 1352345153Sdim/* ---------------------------------------------------------------------- */ 1353345153Sdim 1354345153Sdim#if KMP_OS_WINDOWS 1355345153Sdimtypedef HANDLE kmp_thread_t; 1356345153Sdimtypedef DWORD kmp_key_t; 1357345153Sdim#endif /* KMP_OS_WINDOWS */ 1358345153Sdim 1359345153Sdim#if KMP_OS_UNIX 1360345153Sdimtypedef pthread_t kmp_thread_t; 1361345153Sdimtypedef pthread_key_t kmp_key_t; 1362345153Sdim#endif 1363345153Sdim 1364345153Sdimextern kmp_key_t __kmp_gtid_threadprivate_key; 1365345153Sdim 1366345153Sdimtypedef struct kmp_sys_info { 1367345153Sdim long maxrss; /* the maximum resident set size utilized (in kilobytes) */ 1368345153Sdim long minflt; /* the number of page faults serviced without any I/O */ 1369345153Sdim long majflt; /* the number of page faults serviced that required I/O */ 1370345153Sdim long nswap; /* the number of times a process was "swapped" out of memory */ 1371345153Sdim long inblock; /* the number of times the file system had to perform input */ 1372345153Sdim long oublock; /* the number of times the file system had to perform output */ 1373345153Sdim long nvcsw; /* the number of times a context switch was voluntarily */ 1374345153Sdim long nivcsw; /* the number of times a context switch was forced */ 1375345153Sdim} kmp_sys_info_t; 1376345153Sdim 1377345153Sdim#if USE_ITT_BUILD 1378345153Sdim// We cannot include "kmp_itt.h" due to circular dependency. Declare the only 1379345153Sdim// required type here. Later we will check the type meets requirements. 1380345153Sdimtypedef int kmp_itt_mark_t; 1381345153Sdim#define KMP_ITT_DEBUG 0 1382345153Sdim#endif /* USE_ITT_BUILD */ 1383345153Sdim 1384345153Sdimtypedef kmp_int32 kmp_critical_name[8]; 1385345153Sdim 1386345153Sdim/*! 1387345153Sdim@ingroup PARALLEL 1388345153SdimThe type for a microtask which gets passed to @ref __kmpc_fork_call(). 1389345153SdimThe arguments to the outlined function are 1390345153Sdim@param global_tid the global thread identity of the thread executing the 1391345153Sdimfunction. 1392360784Sdim@param bound_tid the local identity of the thread executing the function 1393345153Sdim@param ... pointers to shared variables accessed by the function. 1394345153Sdim*/ 1395345153Sdimtypedef void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...); 1396345153Sdimtypedef void (*kmpc_micro_bound)(kmp_int32 *bound_tid, kmp_int32 *bound_nth, 1397345153Sdim ...); 1398345153Sdim 1399345153Sdim/*! 1400345153Sdim@ingroup THREADPRIVATE 1401345153Sdim@{ 1402345153Sdim*/ 1403345153Sdim/* --------------------------------------------------------------------------- 1404345153Sdim */ 1405345153Sdim/* Threadprivate initialization/finalization function declarations */ 1406345153Sdim 1407345153Sdim/* for non-array objects: __kmpc_threadprivate_register() */ 1408345153Sdim 1409345153Sdim/*! 1410345153Sdim Pointer to the constructor function. 1411345153Sdim The first argument is the <tt>this</tt> pointer 1412345153Sdim*/ 1413345153Sdimtypedef void *(*kmpc_ctor)(void *); 1414345153Sdim 1415345153Sdim/*! 1416345153Sdim Pointer to the destructor function. 1417345153Sdim The first argument is the <tt>this</tt> pointer 1418345153Sdim*/ 1419345153Sdimtypedef void (*kmpc_dtor)( 1420345153Sdim void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel 1421345153Sdim compiler */ 1422345153Sdim/*! 1423345153Sdim Pointer to an alternate constructor. 1424345153Sdim The first argument is the <tt>this</tt> pointer. 1425345153Sdim*/ 1426345153Sdimtypedef void *(*kmpc_cctor)(void *, void *); 1427345153Sdim 1428345153Sdim/* for array objects: __kmpc_threadprivate_register_vec() */ 1429345153Sdim/* First arg: "this" pointer */ 1430345153Sdim/* Last arg: number of array elements */ 1431345153Sdim/*! 1432345153Sdim Array constructor. 1433345153Sdim First argument is the <tt>this</tt> pointer 1434345153Sdim Second argument the number of array elements. 1435345153Sdim*/ 1436345153Sdimtypedef void *(*kmpc_ctor_vec)(void *, size_t); 1437345153Sdim/*! 1438345153Sdim Pointer to the array destructor function. 1439345153Sdim The first argument is the <tt>this</tt> pointer 1440345153Sdim Second argument the number of array elements. 1441345153Sdim*/ 1442345153Sdimtypedef void (*kmpc_dtor_vec)(void *, size_t); 1443345153Sdim/*! 1444345153Sdim Array constructor. 1445345153Sdim First argument is the <tt>this</tt> pointer 1446345153Sdim Third argument the number of array elements. 1447345153Sdim*/ 1448345153Sdimtypedef void *(*kmpc_cctor_vec)(void *, void *, 1449345153Sdim size_t); /* function unused by compiler */ 1450345153Sdim 1451345153Sdim/*! 1452345153Sdim@} 1453345153Sdim*/ 1454345153Sdim 1455345153Sdim/* keeps tracked of threadprivate cache allocations for cleanup later */ 1456345153Sdimtypedef struct kmp_cached_addr { 1457345153Sdim void **addr; /* address of allocated cache */ 1458345153Sdim void ***compiler_cache; /* pointer to compiler's cache */ 1459345153Sdim void *data; /* pointer to global data */ 1460345153Sdim struct kmp_cached_addr *next; /* pointer to next cached address */ 1461345153Sdim} kmp_cached_addr_t; 1462345153Sdim 1463345153Sdimstruct private_data { 1464345153Sdim struct private_data *next; /* The next descriptor in the list */ 1465345153Sdim void *data; /* The data buffer for this descriptor */ 1466345153Sdim int more; /* The repeat count for this descriptor */ 1467345153Sdim size_t size; /* The data size for this descriptor */ 1468345153Sdim}; 1469345153Sdim 1470345153Sdimstruct private_common { 1471345153Sdim struct private_common *next; 1472345153Sdim struct private_common *link; 1473345153Sdim void *gbl_addr; 1474345153Sdim void *par_addr; /* par_addr == gbl_addr for MASTER thread */ 1475345153Sdim size_t cmn_size; 1476345153Sdim}; 1477345153Sdim 1478345153Sdimstruct shared_common { 1479345153Sdim struct shared_common *next; 1480345153Sdim struct private_data *pod_init; 1481345153Sdim void *obj_init; 1482345153Sdim void *gbl_addr; 1483345153Sdim union { 1484345153Sdim kmpc_ctor ctor; 1485345153Sdim kmpc_ctor_vec ctorv; 1486345153Sdim } ct; 1487345153Sdim union { 1488345153Sdim kmpc_cctor cctor; 1489345153Sdim kmpc_cctor_vec cctorv; 1490345153Sdim } cct; 1491345153Sdim union { 1492345153Sdim kmpc_dtor dtor; 1493345153Sdim kmpc_dtor_vec dtorv; 1494345153Sdim } dt; 1495345153Sdim size_t vec_len; 1496345153Sdim int is_vec; 1497345153Sdim size_t cmn_size; 1498345153Sdim}; 1499345153Sdim 1500345153Sdim#define KMP_HASH_TABLE_LOG2 9 /* log2 of the hash table size */ 1501345153Sdim#define KMP_HASH_TABLE_SIZE \ 1502345153Sdim (1 << KMP_HASH_TABLE_LOG2) /* size of the hash table */ 1503345153Sdim#define KMP_HASH_SHIFT 3 /* throw away this many low bits from the address */ 1504345153Sdim#define KMP_HASH(x) \ 1505345153Sdim ((((kmp_uintptr_t)x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE - 1)) 1506345153Sdim 1507345153Sdimstruct common_table { 1508345153Sdim struct private_common *data[KMP_HASH_TABLE_SIZE]; 1509345153Sdim}; 1510345153Sdim 1511345153Sdimstruct shared_table { 1512345153Sdim struct shared_common *data[KMP_HASH_TABLE_SIZE]; 1513345153Sdim}; 1514345153Sdim 1515345153Sdim/* ------------------------------------------------------------------------ */ 1516345153Sdim 1517345153Sdim#if KMP_USE_HIER_SCHED 1518345153Sdim// Shared barrier data that exists inside a single unit of the scheduling 1519345153Sdim// hierarchy 1520345153Sdimtypedef struct kmp_hier_private_bdata_t { 1521345153Sdim kmp_int32 num_active; 1522345153Sdim kmp_uint64 index; 1523345153Sdim kmp_uint64 wait_val[2]; 1524345153Sdim} kmp_hier_private_bdata_t; 1525345153Sdim#endif 1526345153Sdim 1527345153Sdimtypedef struct kmp_sched_flags { 1528345153Sdim unsigned ordered : 1; 1529345153Sdim unsigned nomerge : 1; 1530345153Sdim unsigned contains_last : 1; 1531345153Sdim#if KMP_USE_HIER_SCHED 1532345153Sdim unsigned use_hier : 1; 1533345153Sdim unsigned unused : 28; 1534345153Sdim#else 1535345153Sdim unsigned unused : 29; 1536345153Sdim#endif 1537345153Sdim} kmp_sched_flags_t; 1538345153Sdim 1539345153SdimKMP_BUILD_ASSERT(sizeof(kmp_sched_flags_t) == 4); 1540345153Sdim 1541345153Sdim#if KMP_STATIC_STEAL_ENABLED 1542345153Sdimtypedef struct KMP_ALIGN_CACHE dispatch_private_info32 { 1543345153Sdim kmp_int32 count; 1544345153Sdim kmp_int32 ub; 1545345153Sdim /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ 1546345153Sdim kmp_int32 lb; 1547345153Sdim kmp_int32 st; 1548345153Sdim kmp_int32 tc; 1549345153Sdim kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put 1550345153Sdim after ub */ 1551345153Sdim 1552345153Sdim // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on ) 1553345153Sdim // a) parm3 is properly aligned and 1554345153Sdim // b) all parm1-4 are in the same cache line. 1555345153Sdim // Because of parm1-4 are used together, performance seems to be better 1556345153Sdim // if they are in the same line (not measured though). 1557345153Sdim 1558345153Sdim struct KMP_ALIGN(32) { // AC: changed 16 to 32 in order to simplify template 1559345153Sdim kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should 1560345153Sdim kmp_int32 parm2; // make no real change at least while padding is off. 1561345153Sdim kmp_int32 parm3; 1562345153Sdim kmp_int32 parm4; 1563345153Sdim }; 1564345153Sdim 1565345153Sdim kmp_uint32 ordered_lower; 1566345153Sdim kmp_uint32 ordered_upper; 1567345153Sdim#if KMP_OS_WINDOWS 1568345153Sdim // This var can be placed in the hole between 'tc' and 'parm1', instead of 1569345153Sdim // 'static_steal_counter'. It would be nice to measure execution times. 1570345153Sdim // Conditional if/endif can be removed at all. 1571345153Sdim kmp_int32 last_upper; 1572345153Sdim#endif /* KMP_OS_WINDOWS */ 1573345153Sdim} dispatch_private_info32_t; 1574345153Sdim 1575345153Sdimtypedef struct KMP_ALIGN_CACHE dispatch_private_info64 { 1576345153Sdim kmp_int64 count; // current chunk number for static & static-steal scheduling 1577345153Sdim kmp_int64 ub; /* upper-bound */ 1578345153Sdim /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ 1579345153Sdim kmp_int64 lb; /* lower-bound */ 1580345153Sdim kmp_int64 st; /* stride */ 1581345153Sdim kmp_int64 tc; /* trip count (number of iterations) */ 1582345153Sdim kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put 1583345153Sdim after ub */ 1584345153Sdim 1585345153Sdim /* parm[1-4] are used in different ways by different scheduling algorithms */ 1586345153Sdim 1587345153Sdim // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on ) 1588345153Sdim // a) parm3 is properly aligned and 1589345153Sdim // b) all parm1-4 are in the same cache line. 1590345153Sdim // Because of parm1-4 are used together, performance seems to be better 1591345153Sdim // if they are in the same line (not measured though). 1592345153Sdim 1593345153Sdim struct KMP_ALIGN(32) { 1594345153Sdim kmp_int64 parm1; 1595345153Sdim kmp_int64 parm2; 1596345153Sdim kmp_int64 parm3; 1597345153Sdim kmp_int64 parm4; 1598345153Sdim }; 1599345153Sdim 1600345153Sdim kmp_uint64 ordered_lower; 1601345153Sdim kmp_uint64 ordered_upper; 1602345153Sdim#if KMP_OS_WINDOWS 1603345153Sdim // This var can be placed in the hole between 'tc' and 'parm1', instead of 1604345153Sdim // 'static_steal_counter'. It would be nice to measure execution times. 1605345153Sdim // Conditional if/endif can be removed at all. 1606345153Sdim kmp_int64 last_upper; 1607345153Sdim#endif /* KMP_OS_WINDOWS */ 1608345153Sdim} dispatch_private_info64_t; 1609345153Sdim#else /* KMP_STATIC_STEAL_ENABLED */ 1610345153Sdimtypedef struct KMP_ALIGN_CACHE dispatch_private_info32 { 1611345153Sdim kmp_int32 lb; 1612345153Sdim kmp_int32 ub; 1613345153Sdim kmp_int32 st; 1614345153Sdim kmp_int32 tc; 1615345153Sdim 1616345153Sdim kmp_int32 parm1; 1617345153Sdim kmp_int32 parm2; 1618345153Sdim kmp_int32 parm3; 1619345153Sdim kmp_int32 parm4; 1620345153Sdim 1621345153Sdim kmp_int32 count; 1622345153Sdim 1623345153Sdim kmp_uint32 ordered_lower; 1624345153Sdim kmp_uint32 ordered_upper; 1625345153Sdim#if KMP_OS_WINDOWS 1626345153Sdim kmp_int32 last_upper; 1627345153Sdim#endif /* KMP_OS_WINDOWS */ 1628345153Sdim} dispatch_private_info32_t; 1629345153Sdim 1630345153Sdimtypedef struct KMP_ALIGN_CACHE dispatch_private_info64 { 1631345153Sdim kmp_int64 lb; /* lower-bound */ 1632345153Sdim kmp_int64 ub; /* upper-bound */ 1633345153Sdim kmp_int64 st; /* stride */ 1634345153Sdim kmp_int64 tc; /* trip count (number of iterations) */ 1635345153Sdim 1636345153Sdim /* parm[1-4] are used in different ways by different scheduling algorithms */ 1637345153Sdim kmp_int64 parm1; 1638345153Sdim kmp_int64 parm2; 1639345153Sdim kmp_int64 parm3; 1640345153Sdim kmp_int64 parm4; 1641345153Sdim 1642345153Sdim kmp_int64 count; /* current chunk number for static scheduling */ 1643345153Sdim 1644345153Sdim kmp_uint64 ordered_lower; 1645345153Sdim kmp_uint64 ordered_upper; 1646345153Sdim#if KMP_OS_WINDOWS 1647345153Sdim kmp_int64 last_upper; 1648345153Sdim#endif /* KMP_OS_WINDOWS */ 1649345153Sdim} dispatch_private_info64_t; 1650345153Sdim#endif /* KMP_STATIC_STEAL_ENABLED */ 1651345153Sdim 1652345153Sdimtypedef struct KMP_ALIGN_CACHE dispatch_private_info { 1653345153Sdim union private_info { 1654345153Sdim dispatch_private_info32_t p32; 1655345153Sdim dispatch_private_info64_t p64; 1656345153Sdim } u; 1657345153Sdim enum sched_type schedule; /* scheduling algorithm */ 1658345153Sdim kmp_sched_flags_t flags; /* flags (e.g., ordered, nomerge, etc.) */ 1659345153Sdim kmp_int32 ordered_bumped; 1660345153Sdim // To retain the structure size after making ordered_iteration scalar 1661345153Sdim kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 3]; 1662345153Sdim // Stack of buffers for nest of serial regions 1663345153Sdim struct dispatch_private_info *next; 1664345153Sdim kmp_int32 type_size; /* the size of types in private_info */ 1665345153Sdim#if KMP_USE_HIER_SCHED 1666345153Sdim kmp_int32 hier_id; 1667345153Sdim void *parent; /* hierarchical scheduling parent pointer */ 1668345153Sdim#endif 1669345153Sdim enum cons_type pushed_ws; 1670345153Sdim} dispatch_private_info_t; 1671345153Sdim 1672345153Sdimtypedef struct dispatch_shared_info32 { 1673345153Sdim /* chunk index under dynamic, number of idle threads under static-steal; 1674345153Sdim iteration index otherwise */ 1675345153Sdim volatile kmp_uint32 iteration; 1676345153Sdim volatile kmp_uint32 num_done; 1677345153Sdim volatile kmp_uint32 ordered_iteration; 1678345153Sdim // Dummy to retain the structure size after making ordered_iteration scalar 1679345153Sdim kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 1]; 1680345153Sdim} dispatch_shared_info32_t; 1681345153Sdim 1682345153Sdimtypedef struct dispatch_shared_info64 { 1683345153Sdim /* chunk index under dynamic, number of idle threads under static-steal; 1684345153Sdim iteration index otherwise */ 1685345153Sdim volatile kmp_uint64 iteration; 1686345153Sdim volatile kmp_uint64 num_done; 1687345153Sdim volatile kmp_uint64 ordered_iteration; 1688345153Sdim // Dummy to retain the structure size after making ordered_iteration scalar 1689345153Sdim kmp_int64 ordered_dummy[KMP_MAX_ORDERED - 3]; 1690345153Sdim} dispatch_shared_info64_t; 1691345153Sdim 1692345153Sdimtypedef struct dispatch_shared_info { 1693345153Sdim union shared_info { 1694345153Sdim dispatch_shared_info32_t s32; 1695345153Sdim dispatch_shared_info64_t s64; 1696345153Sdim } u; 1697345153Sdim volatile kmp_uint32 buffer_index; 1698345153Sdim volatile kmp_int32 doacross_buf_idx; // teamwise index 1699345153Sdim volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1) 1700345153Sdim kmp_int32 doacross_num_done; // count finished threads 1701345153Sdim#if KMP_USE_HIER_SCHED 1702345153Sdim void *hier; 1703345153Sdim#endif 1704345153Sdim#if KMP_USE_HWLOC 1705345153Sdim // When linking with libhwloc, the ORDERED EPCC test slows down on big 1706345153Sdim // machines (> 48 cores). Performance analysis showed that a cache thrash 1707345153Sdim // was occurring and this padding helps alleviate the problem. 1708345153Sdim char padding[64]; 1709345153Sdim#endif 1710345153Sdim} dispatch_shared_info_t; 1711345153Sdim 1712345153Sdimtypedef struct kmp_disp { 1713345153Sdim /* Vector for ORDERED SECTION */ 1714345153Sdim void (*th_deo_fcn)(int *gtid, int *cid, ident_t *); 1715345153Sdim /* Vector for END ORDERED SECTION */ 1716345153Sdim void (*th_dxo_fcn)(int *gtid, int *cid, ident_t *); 1717345153Sdim 1718345153Sdim dispatch_shared_info_t *th_dispatch_sh_current; 1719345153Sdim dispatch_private_info_t *th_dispatch_pr_current; 1720345153Sdim 1721345153Sdim dispatch_private_info_t *th_disp_buffer; 1722345153Sdim kmp_int32 th_disp_index; 1723345153Sdim kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index 1724345153Sdim volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags 1725345153Sdim union { // we can use union here because doacross cannot be used in 1726345153Sdim // nonmonotonic loops 1727345153Sdim kmp_int64 *th_doacross_info; // info on loop bounds 1728345153Sdim kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable) 1729345153Sdim }; 1730345153Sdim#if KMP_USE_INTERNODE_ALIGNMENT 1731345153Sdim char more_padding[INTERNODE_CACHE_LINE]; 1732345153Sdim#endif 1733345153Sdim} kmp_disp_t; 1734345153Sdim 1735345153Sdim/* ------------------------------------------------------------------------ */ 1736345153Sdim/* Barrier stuff */ 1737345153Sdim 1738345153Sdim/* constants for barrier state update */ 1739345153Sdim#define KMP_INIT_BARRIER_STATE 0 /* should probably start from zero */ 1740345153Sdim#define KMP_BARRIER_SLEEP_BIT 0 /* bit used for suspend/sleep part of state */ 1741345153Sdim#define KMP_BARRIER_UNUSED_BIT 1 // bit that must never be set for valid state 1742345153Sdim#define KMP_BARRIER_BUMP_BIT 2 /* lsb used for bump of go/arrived state */ 1743345153Sdim 1744345153Sdim#define KMP_BARRIER_SLEEP_STATE (1 << KMP_BARRIER_SLEEP_BIT) 1745345153Sdim#define KMP_BARRIER_UNUSED_STATE (1 << KMP_BARRIER_UNUSED_BIT) 1746345153Sdim#define KMP_BARRIER_STATE_BUMP (1 << KMP_BARRIER_BUMP_BIT) 1747345153Sdim 1748345153Sdim#if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT) 1749345153Sdim#error "Barrier sleep bit must be smaller than barrier bump bit" 1750345153Sdim#endif 1751345153Sdim#if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT) 1752345153Sdim#error "Barrier unused bit must be smaller than barrier bump bit" 1753345153Sdim#endif 1754345153Sdim 1755345153Sdim// Constants for release barrier wait state: currently, hierarchical only 1756345153Sdim#define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep 1757345153Sdim#define KMP_BARRIER_OWN_FLAG \ 1758345153Sdim 1 // Normal state; worker waiting on own b_go flag in release 1759345153Sdim#define KMP_BARRIER_PARENT_FLAG \ 1760345153Sdim 2 // Special state; worker waiting on parent's b_go flag in release 1761345153Sdim#define KMP_BARRIER_SWITCH_TO_OWN_FLAG \ 1762345153Sdim 3 // Special state; tells worker to shift from parent to own b_go 1763345153Sdim#define KMP_BARRIER_SWITCHING \ 1764345153Sdim 4 // Special state; worker resets appropriate flag on wake-up 1765345153Sdim 1766345153Sdim#define KMP_NOT_SAFE_TO_REAP \ 1767345153Sdim 0 // Thread th_reap_state: not safe to reap (tasking) 1768345153Sdim#define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking) 1769345153Sdim 1770345153Sdimenum barrier_type { 1771345153Sdim bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction 1772345153Sdim barriers if enabled) */ 1773345153Sdim bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */ 1774345153Sdim#if KMP_FAST_REDUCTION_BARRIER 1775345153Sdim bs_reduction_barrier, /* 2, All barriers that are used in reduction */ 1776345153Sdim#endif // KMP_FAST_REDUCTION_BARRIER 1777345153Sdim bs_last_barrier /* Just a placeholder to mark the end */ 1778345153Sdim}; 1779345153Sdim 1780345153Sdim// to work with reduction barriers just like with plain barriers 1781345153Sdim#if !KMP_FAST_REDUCTION_BARRIER 1782345153Sdim#define bs_reduction_barrier bs_plain_barrier 1783345153Sdim#endif // KMP_FAST_REDUCTION_BARRIER 1784345153Sdim 1785345153Sdimtypedef enum kmp_bar_pat { /* Barrier communication patterns */ 1786345153Sdim bp_linear_bar = 1787345153Sdim 0, /* Single level (degenerate) tree */ 1788345153Sdim bp_tree_bar = 1789345153Sdim 1, /* Balanced tree with branching factor 2^n */ 1790345153Sdim bp_hyper_bar = 1791345153Sdim 2, /* Hypercube-embedded tree with min branching 1792345153Sdim factor 2^n */ 1793345153Sdim bp_hierarchical_bar = 3, /* Machine hierarchy tree */ 1794345153Sdim bp_last_bar /* Placeholder to mark the end */ 1795345153Sdim} kmp_bar_pat_e; 1796345153Sdim 1797345153Sdim#define KMP_BARRIER_ICV_PUSH 1 1798345153Sdim 1799345153Sdim/* Record for holding the values of the internal controls stack records */ 1800345153Sdimtypedef struct kmp_internal_control { 1801345153Sdim int serial_nesting_level; /* corresponds to the value of the 1802345153Sdim th_team_serialized field */ 1803345153Sdim kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per 1804345153Sdim thread) */ 1805345153Sdim kmp_int8 1806345153Sdim bt_set; /* internal control for whether blocktime is explicitly set */ 1807345153Sdim int blocktime; /* internal control for blocktime */ 1808345153Sdim#if KMP_USE_MONITOR 1809345153Sdim int bt_intervals; /* internal control for blocktime intervals */ 1810345153Sdim#endif 1811345153Sdim int nproc; /* internal control for #threads for next parallel region (per 1812345153Sdim thread) */ 1813353358Sdim int thread_limit; /* internal control for thread-limit-var */ 1814345153Sdim int max_active_levels; /* internal control for max_active_levels */ 1815345153Sdim kmp_r_sched_t 1816345153Sdim sched; /* internal control for runtime schedule {sched,chunk} pair */ 1817345153Sdim kmp_proc_bind_t proc_bind; /* internal control for affinity */ 1818345153Sdim kmp_int32 default_device; /* internal control for default device */ 1819345153Sdim struct kmp_internal_control *next; 1820345153Sdim} kmp_internal_control_t; 1821345153Sdim 1822345153Sdimstatic inline void copy_icvs(kmp_internal_control_t *dst, 1823345153Sdim kmp_internal_control_t *src) { 1824345153Sdim *dst = *src; 1825345153Sdim} 1826345153Sdim 1827345153Sdim/* Thread barrier needs volatile barrier fields */ 1828345153Sdimtypedef struct KMP_ALIGN_CACHE kmp_bstate { 1829345153Sdim // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all 1830345153Sdim // uses of it). It is not explicitly aligned below, because we *don't* want 1831345153Sdim // it to be padded -- instead, we fit b_go into the same cache line with 1832345153Sdim // th_fixed_icvs, enabling NGO cache lines stores in the hierarchical barrier. 1833345153Sdim kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread 1834345153Sdim // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with 1835345153Sdim // same NGO store 1836345153Sdim volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical) 1837345153Sdim KMP_ALIGN_CACHE volatile kmp_uint64 1838345153Sdim b_arrived; // STATE => task reached synch point. 1839345153Sdim kmp_uint32 *skip_per_level; 1840345153Sdim kmp_uint32 my_level; 1841345153Sdim kmp_int32 parent_tid; 1842345153Sdim kmp_int32 old_tid; 1843345153Sdim kmp_uint32 depth; 1844345153Sdim struct kmp_bstate *parent_bar; 1845345153Sdim kmp_team_t *team; 1846345153Sdim kmp_uint64 leaf_state; 1847345153Sdim kmp_uint32 nproc; 1848345153Sdim kmp_uint8 base_leaf_kids; 1849345153Sdim kmp_uint8 leaf_kids; 1850345153Sdim kmp_uint8 offset; 1851345153Sdim kmp_uint8 wait_flag; 1852345153Sdim kmp_uint8 use_oncore_barrier; 1853345153Sdim#if USE_DEBUGGER 1854345153Sdim // The following field is intended for the debugger solely. Only the worker 1855345153Sdim // thread itself accesses this field: the worker increases it by 1 when it 1856345153Sdim // arrives to a barrier. 1857345153Sdim KMP_ALIGN_CACHE kmp_uint b_worker_arrived; 1858345153Sdim#endif /* USE_DEBUGGER */ 1859345153Sdim} kmp_bstate_t; 1860345153Sdim 1861345153Sdimunion KMP_ALIGN_CACHE kmp_barrier_union { 1862345153Sdim double b_align; /* use worst case alignment */ 1863345153Sdim char b_pad[KMP_PAD(kmp_bstate_t, CACHE_LINE)]; 1864345153Sdim kmp_bstate_t bb; 1865345153Sdim}; 1866345153Sdim 1867345153Sdimtypedef union kmp_barrier_union kmp_balign_t; 1868345153Sdim 1869345153Sdim/* Team barrier needs only non-volatile arrived counter */ 1870345153Sdimunion KMP_ALIGN_CACHE kmp_barrier_team_union { 1871345153Sdim double b_align; /* use worst case alignment */ 1872345153Sdim char b_pad[CACHE_LINE]; 1873345153Sdim struct { 1874345153Sdim kmp_uint64 b_arrived; /* STATE => task reached synch point. */ 1875345153Sdim#if USE_DEBUGGER 1876345153Sdim // The following two fields are indended for the debugger solely. Only 1877345153Sdim // master of the team accesses these fields: the first one is increased by 1878345153Sdim // 1 when master arrives to a barrier, the second one is increased by one 1879345153Sdim // when all the threads arrived. 1880345153Sdim kmp_uint b_master_arrived; 1881345153Sdim kmp_uint b_team_arrived; 1882345153Sdim#endif 1883345153Sdim }; 1884345153Sdim}; 1885345153Sdim 1886345153Sdimtypedef union kmp_barrier_team_union kmp_balign_team_t; 1887345153Sdim 1888345153Sdim/* Padding for Linux* OS pthreads condition variables and mutexes used to signal 1889345153Sdim threads when a condition changes. This is to workaround an NPTL bug where 1890345153Sdim padding was added to pthread_cond_t which caused the initialization routine 1891345153Sdim to write outside of the structure if compiled on pre-NPTL threads. */ 1892345153Sdim#if KMP_OS_WINDOWS 1893345153Sdimtypedef struct kmp_win32_mutex { 1894345153Sdim /* The Lock */ 1895345153Sdim CRITICAL_SECTION cs; 1896345153Sdim} kmp_win32_mutex_t; 1897345153Sdim 1898345153Sdimtypedef struct kmp_win32_cond { 1899345153Sdim /* Count of the number of waiters. */ 1900345153Sdim int waiters_count_; 1901345153Sdim 1902345153Sdim /* Serialize access to <waiters_count_> */ 1903345153Sdim kmp_win32_mutex_t waiters_count_lock_; 1904345153Sdim 1905345153Sdim /* Number of threads to release via a <cond_broadcast> or a <cond_signal> */ 1906345153Sdim int release_count_; 1907345153Sdim 1908345153Sdim /* Keeps track of the current "generation" so that we don't allow */ 1909345153Sdim /* one thread to steal all the "releases" from the broadcast. */ 1910345153Sdim int wait_generation_count_; 1911345153Sdim 1912345153Sdim /* A manual-reset event that's used to block and release waiting threads. */ 1913345153Sdim HANDLE event_; 1914345153Sdim} kmp_win32_cond_t; 1915345153Sdim#endif 1916345153Sdim 1917345153Sdim#if KMP_OS_UNIX 1918345153Sdim 1919345153Sdimunion KMP_ALIGN_CACHE kmp_cond_union { 1920345153Sdim double c_align; 1921345153Sdim char c_pad[CACHE_LINE]; 1922345153Sdim pthread_cond_t c_cond; 1923345153Sdim}; 1924345153Sdim 1925345153Sdimtypedef union kmp_cond_union kmp_cond_align_t; 1926345153Sdim 1927345153Sdimunion KMP_ALIGN_CACHE kmp_mutex_union { 1928345153Sdim double m_align; 1929345153Sdim char m_pad[CACHE_LINE]; 1930345153Sdim pthread_mutex_t m_mutex; 1931345153Sdim}; 1932345153Sdim 1933345153Sdimtypedef union kmp_mutex_union kmp_mutex_align_t; 1934345153Sdim 1935345153Sdim#endif /* KMP_OS_UNIX */ 1936345153Sdim 1937345153Sdimtypedef struct kmp_desc_base { 1938345153Sdim void *ds_stackbase; 1939345153Sdim size_t ds_stacksize; 1940345153Sdim int ds_stackgrow; 1941345153Sdim kmp_thread_t ds_thread; 1942345153Sdim volatile int ds_tid; 1943345153Sdim int ds_gtid; 1944345153Sdim#if KMP_OS_WINDOWS 1945345153Sdim volatile int ds_alive; 1946345153Sdim DWORD ds_thread_id; 1947345153Sdim/* ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes. 1948345153Sdim However, debugger support (libomp_db) cannot work with handles, because they 1949345153Sdim uncomparable. For example, debugger requests info about thread with handle h. 1950345153Sdim h is valid within debugger process, and meaningless within debugee process. 1951345153Sdim Even if h is duped by call to DuplicateHandle(), so the result h' is valid 1952345153Sdim within debugee process, but it is a *new* handle which does *not* equal to 1953345153Sdim any other handle in debugee... The only way to compare handles is convert 1954345153Sdim them to system-wide ids. GetThreadId() function is available only in 1955345153Sdim Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is available 1956345153Sdim on all Windows* OS flavours (including Windows* 95). Thus, we have to get 1957345153Sdim thread id by call to GetCurrentThreadId() from within the thread and save it 1958345153Sdim to let libomp_db identify threads. */ 1959345153Sdim#endif /* KMP_OS_WINDOWS */ 1960345153Sdim} kmp_desc_base_t; 1961345153Sdim 1962345153Sdimtypedef union KMP_ALIGN_CACHE kmp_desc { 1963345153Sdim double ds_align; /* use worst case alignment */ 1964345153Sdim char ds_pad[KMP_PAD(kmp_desc_base_t, CACHE_LINE)]; 1965345153Sdim kmp_desc_base_t ds; 1966345153Sdim} kmp_desc_t; 1967345153Sdim 1968345153Sdimtypedef struct kmp_local { 1969345153Sdim volatile int this_construct; /* count of single's encountered by thread */ 1970345153Sdim void *reduce_data; 1971345153Sdim#if KMP_USE_BGET 1972345153Sdim void *bget_data; 1973345153Sdim void *bget_list; 1974345153Sdim#if !USE_CMP_XCHG_FOR_BGET 1975345153Sdim#ifdef USE_QUEUING_LOCK_FOR_BGET 1976345153Sdim kmp_lock_t bget_lock; /* Lock for accessing bget free list */ 1977345153Sdim#else 1978345153Sdim kmp_bootstrap_lock_t bget_lock; // Lock for accessing bget free list. Must be 1979345153Sdim// bootstrap lock so we can use it at library 1980345153Sdim// shutdown. 1981345153Sdim#endif /* USE_LOCK_FOR_BGET */ 1982345153Sdim#endif /* ! USE_CMP_XCHG_FOR_BGET */ 1983345153Sdim#endif /* KMP_USE_BGET */ 1984345153Sdim 1985345153Sdim PACKED_REDUCTION_METHOD_T 1986345153Sdim packed_reduction_method; /* stored by __kmpc_reduce*(), used by 1987345153Sdim __kmpc_end_reduce*() */ 1988345153Sdim 1989345153Sdim} kmp_local_t; 1990345153Sdim 1991345153Sdim#define KMP_CHECK_UPDATE(a, b) \ 1992345153Sdim if ((a) != (b)) \ 1993345153Sdim (a) = (b) 1994345153Sdim#define KMP_CHECK_UPDATE_SYNC(a, b) \ 1995345153Sdim if ((a) != (b)) \ 1996345153Sdim TCW_SYNC_PTR((a), (b)) 1997345153Sdim 1998345153Sdim#define get__blocktime(xteam, xtid) \ 1999345153Sdim ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) 2000345153Sdim#define get__bt_set(xteam, xtid) \ 2001345153Sdim ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) 2002345153Sdim#if KMP_USE_MONITOR 2003345153Sdim#define get__bt_intervals(xteam, xtid) \ 2004345153Sdim ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) 2005345153Sdim#endif 2006345153Sdim 2007345153Sdim#define get__dynamic_2(xteam, xtid) \ 2008345153Sdim ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic) 2009345153Sdim#define get__nproc_2(xteam, xtid) \ 2010345153Sdim ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc) 2011345153Sdim#define get__sched_2(xteam, xtid) \ 2012345153Sdim ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched) 2013345153Sdim 2014345153Sdim#define set__blocktime_team(xteam, xtid, xval) \ 2015345153Sdim (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) = \ 2016345153Sdim (xval)) 2017345153Sdim 2018345153Sdim#if KMP_USE_MONITOR 2019345153Sdim#define set__bt_intervals_team(xteam, xtid, xval) \ 2020345153Sdim (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) = \ 2021345153Sdim (xval)) 2022345153Sdim#endif 2023345153Sdim 2024345153Sdim#define set__bt_set_team(xteam, xtid, xval) \ 2025345153Sdim (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) = (xval)) 2026345153Sdim 2027345153Sdim#define set__dynamic(xthread, xval) \ 2028345153Sdim (((xthread)->th.th_current_task->td_icvs.dynamic) = (xval)) 2029345153Sdim#define get__dynamic(xthread) \ 2030345153Sdim (((xthread)->th.th_current_task->td_icvs.dynamic) ? (FTN_TRUE) : (FTN_FALSE)) 2031345153Sdim 2032345153Sdim#define set__nproc(xthread, xval) \ 2033345153Sdim (((xthread)->th.th_current_task->td_icvs.nproc) = (xval)) 2034345153Sdim 2035353358Sdim#define set__thread_limit(xthread, xval) \ 2036353358Sdim (((xthread)->th.th_current_task->td_icvs.thread_limit) = (xval)) 2037353358Sdim 2038345153Sdim#define set__max_active_levels(xthread, xval) \ 2039345153Sdim (((xthread)->th.th_current_task->td_icvs.max_active_levels) = (xval)) 2040345153Sdim 2041353358Sdim#define get__max_active_levels(xthread) \ 2042353358Sdim ((xthread)->th.th_current_task->td_icvs.max_active_levels) 2043353358Sdim 2044345153Sdim#define set__sched(xthread, xval) \ 2045345153Sdim (((xthread)->th.th_current_task->td_icvs.sched) = (xval)) 2046345153Sdim 2047345153Sdim#define set__proc_bind(xthread, xval) \ 2048345153Sdim (((xthread)->th.th_current_task->td_icvs.proc_bind) = (xval)) 2049345153Sdim#define get__proc_bind(xthread) \ 2050345153Sdim ((xthread)->th.th_current_task->td_icvs.proc_bind) 2051345153Sdim 2052345153Sdim// OpenMP tasking data structures 2053345153Sdim 2054345153Sdimtypedef enum kmp_tasking_mode { 2055345153Sdim tskm_immediate_exec = 0, 2056345153Sdim tskm_extra_barrier = 1, 2057345153Sdim tskm_task_teams = 2, 2058345153Sdim tskm_max = 2 2059345153Sdim} kmp_tasking_mode_t; 2060345153Sdim 2061345153Sdimextern kmp_tasking_mode_t 2062345153Sdim __kmp_tasking_mode; /* determines how/when to execute tasks */ 2063345153Sdimextern int __kmp_task_stealing_constraint; 2064353358Sdimextern int __kmp_enable_task_throttling; 2065345153Sdimextern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if 2066345153Sdim// specified, defaults to 0 otherwise 2067345153Sdim// Set via OMP_MAX_TASK_PRIORITY if specified, defaults to 0 otherwise 2068345153Sdimextern kmp_int32 __kmp_max_task_priority; 2069345153Sdim// Set via KMP_TASKLOOP_MIN_TASKS if specified, defaults to 0 otherwise 2070345153Sdimextern kmp_uint64 __kmp_taskloop_min_tasks; 2071345153Sdim 2072345153Sdim/* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with 2073345153Sdim taskdata first */ 2074345153Sdim#define KMP_TASK_TO_TASKDATA(task) (((kmp_taskdata_t *)task) - 1) 2075345153Sdim#define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *)(taskdata + 1) 2076345153Sdim 2077345153Sdim// The tt_found_tasks flag is a signal to all threads in the team that tasks 2078345153Sdim// were spawned and queued since the previous barrier release. 2079345153Sdim#define KMP_TASKING_ENABLED(task_team) \ 2080345153Sdim (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE) 2081345153Sdim/*! 2082345153Sdim@ingroup BASIC_TYPES 2083345153Sdim@{ 2084345153Sdim*/ 2085345153Sdim 2086345153Sdim/*! 2087345153Sdim */ 2088345153Sdimtypedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32, void *); 2089345153Sdim 2090345153Sdimtypedef union kmp_cmplrdata { 2091345153Sdim kmp_int32 priority; /**< priority specified by user for the task */ 2092345153Sdim kmp_routine_entry_t 2093345153Sdim destructors; /* pointer to function to invoke deconstructors of 2094345153Sdim firstprivate C++ objects */ 2095345153Sdim /* future data */ 2096345153Sdim} kmp_cmplrdata_t; 2097345153Sdim 2098345153Sdim/* sizeof_kmp_task_t passed as arg to kmpc_omp_task call */ 2099345153Sdim/*! 2100345153Sdim */ 2101345153Sdimtypedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */ 2102345153Sdim void *shareds; /**< pointer to block of pointers to shared vars */ 2103345153Sdim kmp_routine_entry_t 2104345153Sdim routine; /**< pointer to routine to call for executing task */ 2105345153Sdim kmp_int32 part_id; /**< part id for the task */ 2106345153Sdim kmp_cmplrdata_t 2107345153Sdim data1; /* Two known optional additions: destructors and priority */ 2108345153Sdim kmp_cmplrdata_t data2; /* Process destructors first, priority second */ 2109353358Sdim /* future data */ 2110345153Sdim /* private vars */ 2111345153Sdim} kmp_task_t; 2112345153Sdim 2113345153Sdim/*! 2114345153Sdim@} 2115345153Sdim*/ 2116345153Sdim 2117345153Sdimtypedef struct kmp_taskgroup { 2118345153Sdim std::atomic<kmp_int32> count; // number of allocated and incomplete tasks 2119345153Sdim std::atomic<kmp_int32> 2120345153Sdim cancel_request; // request for cancellation of this taskgroup 2121345153Sdim struct kmp_taskgroup *parent; // parent taskgroup 2122345153Sdim // Block of data to perform task reduction 2123345153Sdim void *reduce_data; // reduction related info 2124345153Sdim kmp_int32 reduce_num_data; // number of data items to reduce 2125345153Sdim} kmp_taskgroup_t; 2126345153Sdim 2127345153Sdim// forward declarations 2128345153Sdimtypedef union kmp_depnode kmp_depnode_t; 2129345153Sdimtypedef struct kmp_depnode_list kmp_depnode_list_t; 2130345153Sdimtypedef struct kmp_dephash_entry kmp_dephash_entry_t; 2131345153Sdim 2132345153Sdim// Compiler sends us this info: 2133345153Sdimtypedef struct kmp_depend_info { 2134345153Sdim kmp_intptr_t base_addr; 2135345153Sdim size_t len; 2136345153Sdim struct { 2137345153Sdim bool in : 1; 2138345153Sdim bool out : 1; 2139345153Sdim bool mtx : 1; 2140345153Sdim } flags; 2141345153Sdim} kmp_depend_info_t; 2142345153Sdim 2143345153Sdim// Internal structures to work with task dependencies: 2144345153Sdimstruct kmp_depnode_list { 2145345153Sdim kmp_depnode_t *node; 2146345153Sdim kmp_depnode_list_t *next; 2147345153Sdim}; 2148345153Sdim 2149345153Sdim// Max number of mutexinoutset dependencies per node 2150345153Sdim#define MAX_MTX_DEPS 4 2151345153Sdim 2152345153Sdimtypedef struct kmp_base_depnode { 2153345153Sdim kmp_depnode_list_t *successors; /* used under lock */ 2154345153Sdim kmp_task_t *task; /* non-NULL if depnode is active, used under lock */ 2155345153Sdim kmp_lock_t *mtx_locks[MAX_MTX_DEPS]; /* lock mutexinoutset dependent tasks */ 2156345153Sdim kmp_int32 mtx_num_locks; /* number of locks in mtx_locks array */ 2157345153Sdim kmp_lock_t lock; /* guards shared fields: task, successors */ 2158345153Sdim#if KMP_SUPPORT_GRAPH_OUTPUT 2159345153Sdim kmp_uint32 id; 2160345153Sdim#endif 2161345153Sdim std::atomic<kmp_int32> npredecessors; 2162345153Sdim std::atomic<kmp_int32> nrefs; 2163345153Sdim} kmp_base_depnode_t; 2164345153Sdim 2165345153Sdimunion KMP_ALIGN_CACHE kmp_depnode { 2166345153Sdim double dn_align; /* use worst case alignment */ 2167345153Sdim char dn_pad[KMP_PAD(kmp_base_depnode_t, CACHE_LINE)]; 2168345153Sdim kmp_base_depnode_t dn; 2169345153Sdim}; 2170345153Sdim 2171345153Sdimstruct kmp_dephash_entry { 2172345153Sdim kmp_intptr_t addr; 2173345153Sdim kmp_depnode_t *last_out; 2174345153Sdim kmp_depnode_list_t *last_ins; 2175345153Sdim kmp_depnode_list_t *last_mtxs; 2176345153Sdim kmp_int32 last_flag; 2177345153Sdim kmp_lock_t *mtx_lock; /* is referenced by depnodes w/mutexinoutset dep */ 2178345153Sdim kmp_dephash_entry_t *next_in_bucket; 2179345153Sdim}; 2180345153Sdim 2181345153Sdimtypedef struct kmp_dephash { 2182345153Sdim kmp_dephash_entry_t **buckets; 2183345153Sdim size_t size; 2184360784Sdim size_t generation; 2185345153Sdim kmp_uint32 nelements; 2186345153Sdim kmp_uint32 nconflicts; 2187345153Sdim} kmp_dephash_t; 2188345153Sdim 2189345153Sdimtypedef struct kmp_task_affinity_info { 2190345153Sdim kmp_intptr_t base_addr; 2191345153Sdim size_t len; 2192345153Sdim struct { 2193345153Sdim bool flag1 : 1; 2194345153Sdim bool flag2 : 1; 2195345153Sdim kmp_int32 reserved : 30; 2196345153Sdim } flags; 2197345153Sdim} kmp_task_affinity_info_t; 2198345153Sdim 2199353358Sdimtypedef enum kmp_event_type_t { 2200353358Sdim KMP_EVENT_UNINITIALIZED = 0, 2201353358Sdim KMP_EVENT_ALLOW_COMPLETION = 1 2202353358Sdim} kmp_event_type_t; 2203345153Sdim 2204353358Sdimtypedef struct { 2205353358Sdim kmp_event_type_t type; 2206353358Sdim kmp_tas_lock_t lock; 2207353358Sdim union { 2208353358Sdim kmp_task_t *task; 2209353358Sdim } ed; 2210353358Sdim} kmp_event_t; 2211353358Sdim 2212345153Sdim#ifdef BUILD_TIED_TASK_STACK 2213345153Sdim 2214345153Sdim/* Tied Task stack definitions */ 2215345153Sdimtypedef struct kmp_stack_block { 2216345153Sdim kmp_taskdata_t *sb_block[TASK_STACK_BLOCK_SIZE]; 2217345153Sdim struct kmp_stack_block *sb_next; 2218345153Sdim struct kmp_stack_block *sb_prev; 2219345153Sdim} kmp_stack_block_t; 2220345153Sdim 2221345153Sdimtypedef struct kmp_task_stack { 2222345153Sdim kmp_stack_block_t ts_first_block; // first block of stack entries 2223345153Sdim kmp_taskdata_t **ts_top; // pointer to the top of stack 2224345153Sdim kmp_int32 ts_entries; // number of entries on the stack 2225345153Sdim} kmp_task_stack_t; 2226345153Sdim 2227345153Sdim#endif // BUILD_TIED_TASK_STACK 2228345153Sdim 2229345153Sdimtypedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ 2230345153Sdim /* Compiler flags */ /* Total compiler flags must be 16 bits */ 2231345153Sdim unsigned tiedness : 1; /* task is either tied (1) or untied (0) */ 2232345153Sdim unsigned final : 1; /* task is final(1) so execute immediately */ 2233345153Sdim unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0 2234345153Sdim code path */ 2235345153Sdim unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to 2236345153Sdim invoke destructors from the runtime */ 2237345153Sdim unsigned proxy : 1; /* task is a proxy task (it will be executed outside the 2238345153Sdim context of the RTL) */ 2239345153Sdim unsigned priority_specified : 1; /* set if the compiler provides priority 2240345153Sdim setting for the task */ 2241353358Sdim unsigned detachable : 1; /* 1 == can detach */ 2242353358Sdim unsigned reserved : 9; /* reserved for compiler use */ 2243345153Sdim 2244345153Sdim /* Library flags */ /* Total library flags must be 16 bits */ 2245345153Sdim unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ 2246345153Sdim unsigned task_serial : 1; // task is executed immediately (1) or deferred (0) 2247345153Sdim unsigned tasking_ser : 1; // all tasks in team are either executed immediately 2248345153Sdim // (1) or may be deferred (0) 2249345153Sdim unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel 2250345153Sdim // (0) [>= 2 threads] 2251345153Sdim /* If either team_serial or tasking_ser is set, task team may be NULL */ 2252345153Sdim /* Task State Flags: */ 2253345153Sdim unsigned started : 1; /* 1==started, 0==not started */ 2254345153Sdim unsigned executing : 1; /* 1==executing, 0==not executing */ 2255345153Sdim unsigned complete : 1; /* 1==complete, 0==not complete */ 2256360784Sdim unsigned freed : 1; /* 1==freed, 0==allocated */ 2257345153Sdim unsigned native : 1; /* 1==gcc-compiled task, 0==intel */ 2258345153Sdim unsigned reserved31 : 7; /* reserved for library use */ 2259345153Sdim 2260345153Sdim} kmp_tasking_flags_t; 2261345153Sdim 2262345153Sdimstruct kmp_taskdata { /* aligned during dynamic allocation */ 2263345153Sdim kmp_int32 td_task_id; /* id, assigned by debugger */ 2264345153Sdim kmp_tasking_flags_t td_flags; /* task flags */ 2265345153Sdim kmp_team_t *td_team; /* team for this task */ 2266345153Sdim kmp_info_p *td_alloc_thread; /* thread that allocated data structures */ 2267345153Sdim /* Currently not used except for perhaps IDB */ 2268345153Sdim kmp_taskdata_t *td_parent; /* parent task */ 2269345153Sdim kmp_int32 td_level; /* task nesting level */ 2270345153Sdim std::atomic<kmp_int32> td_untied_count; // untied task active parts counter 2271345153Sdim ident_t *td_ident; /* task identifier */ 2272345153Sdim // Taskwait data. 2273345153Sdim ident_t *td_taskwait_ident; 2274345153Sdim kmp_uint32 td_taskwait_counter; 2275345153Sdim kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */ 2276345153Sdim KMP_ALIGN_CACHE kmp_internal_control_t 2277345153Sdim td_icvs; /* Internal control variables for the task */ 2278345153Sdim KMP_ALIGN_CACHE std::atomic<kmp_int32> 2279345153Sdim td_allocated_child_tasks; /* Child tasks (+ current task) not yet 2280345153Sdim deallocated */ 2281345153Sdim std::atomic<kmp_int32> 2282345153Sdim td_incomplete_child_tasks; /* Child tasks not yet complete */ 2283345153Sdim kmp_taskgroup_t 2284345153Sdim *td_taskgroup; // Each task keeps pointer to its current taskgroup 2285345153Sdim kmp_dephash_t 2286345153Sdim *td_dephash; // Dependencies for children tasks are tracked from here 2287345153Sdim kmp_depnode_t 2288345153Sdim *td_depnode; // Pointer to graph node if this task has dependencies 2289345153Sdim kmp_task_team_t *td_task_team; 2290345153Sdim kmp_int32 td_size_alloc; // The size of task structure, including shareds etc. 2291345153Sdim#if defined(KMP_GOMP_COMPAT) 2292345153Sdim // 4 or 8 byte integers for the loop bounds in GOMP_taskloop 2293345153Sdim kmp_int32 td_size_loop_bounds; 2294345153Sdim#endif 2295345153Sdim kmp_taskdata_t *td_last_tied; // keep tied task for task scheduling constraint 2296353358Sdim#if defined(KMP_GOMP_COMPAT) 2297345153Sdim // GOMP sends in a copy function for copy constructors 2298345153Sdim void (*td_copy_func)(void *, void *); 2299345153Sdim#endif 2300353358Sdim kmp_event_t td_allow_completion_event; 2301345153Sdim#if OMPT_SUPPORT 2302345153Sdim ompt_task_info_t ompt_task_info; 2303345153Sdim#endif 2304345153Sdim}; // struct kmp_taskdata 2305345153Sdim 2306345153Sdim// Make sure padding above worked 2307345153SdimKMP_BUILD_ASSERT(sizeof(kmp_taskdata_t) % sizeof(void *) == 0); 2308345153Sdim 2309345153Sdim// Data for task team but per thread 2310345153Sdimtypedef struct kmp_base_thread_data { 2311345153Sdim kmp_info_p *td_thr; // Pointer back to thread info 2312345153Sdim // Used only in __kmp_execute_tasks_template, maybe not avail until task is 2313345153Sdim // queued? 2314345153Sdim kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque 2315345153Sdim kmp_taskdata_t * 2316345153Sdim *td_deque; // Deque of tasks encountered by td_thr, dynamically allocated 2317345153Sdim kmp_int32 td_deque_size; // Size of deck 2318345153Sdim kmp_uint32 td_deque_head; // Head of deque (will wrap) 2319345153Sdim kmp_uint32 td_deque_tail; // Tail of deque (will wrap) 2320345153Sdim kmp_int32 td_deque_ntasks; // Number of tasks in deque 2321345153Sdim // GEH: shouldn't this be volatile since used in while-spin? 2322345153Sdim kmp_int32 td_deque_last_stolen; // Thread number of last successful steal 2323345153Sdim#ifdef BUILD_TIED_TASK_STACK 2324345153Sdim kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task 2325345153Sdim// scheduling constraint 2326345153Sdim#endif // BUILD_TIED_TASK_STACK 2327345153Sdim} kmp_base_thread_data_t; 2328345153Sdim 2329345153Sdim#define TASK_DEQUE_BITS 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE 2330345153Sdim#define INITIAL_TASK_DEQUE_SIZE (1 << TASK_DEQUE_BITS) 2331345153Sdim 2332345153Sdim#define TASK_DEQUE_SIZE(td) ((td).td_deque_size) 2333345153Sdim#define TASK_DEQUE_MASK(td) ((td).td_deque_size - 1) 2334345153Sdim 2335345153Sdimtypedef union KMP_ALIGN_CACHE kmp_thread_data { 2336345153Sdim kmp_base_thread_data_t td; 2337345153Sdim double td_align; /* use worst case alignment */ 2338345153Sdim char td_pad[KMP_PAD(kmp_base_thread_data_t, CACHE_LINE)]; 2339345153Sdim} kmp_thread_data_t; 2340345153Sdim 2341345153Sdim// Data for task teams which are used when tasking is enabled for the team 2342345153Sdimtypedef struct kmp_base_task_team { 2343345153Sdim kmp_bootstrap_lock_t 2344345153Sdim tt_threads_lock; /* Lock used to allocate per-thread part of task team */ 2345345153Sdim /* must be bootstrap lock since used at library shutdown*/ 2346345153Sdim kmp_task_team_t *tt_next; /* For linking the task team free list */ 2347345153Sdim kmp_thread_data_t 2348345153Sdim *tt_threads_data; /* Array of per-thread structures for task team */ 2349345153Sdim /* Data survives task team deallocation */ 2350345153Sdim kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while 2351345153Sdim executing this team? */ 2352345153Sdim /* TRUE means tt_threads_data is set up and initialized */ 2353345153Sdim kmp_int32 tt_nproc; /* #threads in team */ 2354353358Sdim kmp_int32 tt_max_threads; // # entries allocated for threads_data array 2355353358Sdim kmp_int32 tt_found_proxy_tasks; // found proxy tasks since last barrier 2356345153Sdim kmp_int32 tt_untied_task_encountered; 2357345153Sdim 2358345153Sdim KMP_ALIGN_CACHE 2359345153Sdim std::atomic<kmp_int32> tt_unfinished_threads; /* #threads still active */ 2360345153Sdim 2361345153Sdim KMP_ALIGN_CACHE 2362345153Sdim volatile kmp_uint32 2363345153Sdim tt_active; /* is the team still actively executing tasks */ 2364345153Sdim} kmp_base_task_team_t; 2365345153Sdim 2366345153Sdimunion KMP_ALIGN_CACHE kmp_task_team { 2367345153Sdim kmp_base_task_team_t tt; 2368345153Sdim double tt_align; /* use worst case alignment */ 2369345153Sdim char tt_pad[KMP_PAD(kmp_base_task_team_t, CACHE_LINE)]; 2370345153Sdim}; 2371345153Sdim 2372345153Sdim#if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5) 2373345153Sdim// Free lists keep same-size free memory slots for fast memory allocation 2374345153Sdim// routines 2375345153Sdimtypedef struct kmp_free_list { 2376345153Sdim void *th_free_list_self; // Self-allocated tasks free list 2377345153Sdim void *th_free_list_sync; // Self-allocated tasks stolen/returned by other 2378345153Sdim // threads 2379345153Sdim void *th_free_list_other; // Non-self free list (to be returned to owner's 2380345153Sdim // sync list) 2381345153Sdim} kmp_free_list_t; 2382345153Sdim#endif 2383345153Sdim#if KMP_NESTED_HOT_TEAMS 2384345153Sdim// Hot teams array keeps hot teams and their sizes for given thread. Hot teams 2385345153Sdim// are not put in teams pool, and they don't put threads in threads pool. 2386345153Sdimtypedef struct kmp_hot_team_ptr { 2387345153Sdim kmp_team_p *hot_team; // pointer to hot_team of given nesting level 2388345153Sdim kmp_int32 hot_team_nth; // number of threads allocated for the hot_team 2389345153Sdim} kmp_hot_team_ptr_t; 2390345153Sdim#endif 2391345153Sdimtypedef struct kmp_teams_size { 2392345153Sdim kmp_int32 nteams; // number of teams in a league 2393345153Sdim kmp_int32 nth; // number of threads in each team of the league 2394345153Sdim} kmp_teams_size_t; 2395345153Sdim 2396353358Sdim// This struct stores a thread that acts as a "root" for a contention 2397353358Sdim// group. Contention groups are rooted at kmp_root threads, but also at 2398353358Sdim// each master thread of each team created in the teams construct. 2399353358Sdim// This struct therefore also stores a thread_limit associated with 2400353358Sdim// that contention group, and a counter to track the number of threads 2401353358Sdim// active in that contention group. Each thread has a list of these: CG 2402353358Sdim// root threads have an entry in their list in which cg_root refers to 2403353358Sdim// the thread itself, whereas other workers in the CG will have a 2404353358Sdim// single entry where cg_root is same as the entry containing their CG 2405353358Sdim// root. When a thread encounters a teams construct, it will add a new 2406353358Sdim// entry to the front of its list, because it now roots a new CG. 2407353358Sdimtypedef struct kmp_cg_root { 2408353358Sdim kmp_info_p *cg_root; // "root" thread for a contention group 2409353358Sdim // The CG root's limit comes from OMP_THREAD_LIMIT for root threads, or 2410353358Sdim // thread_limit clause for teams masters 2411353358Sdim kmp_int32 cg_thread_limit; 2412353358Sdim kmp_int32 cg_nthreads; // Count of active threads in CG rooted at cg_root 2413353358Sdim struct kmp_cg_root *up; // pointer to higher level CG root in list 2414353358Sdim} kmp_cg_root_t; 2415353358Sdim 2416345153Sdim// OpenMP thread data structures 2417345153Sdim 2418345153Sdimtypedef struct KMP_ALIGN_CACHE kmp_base_info { 2419345153Sdim /* Start with the readonly data which is cache aligned and padded. This is 2420345153Sdim written before the thread starts working by the master. Uber masters may 2421345153Sdim update themselves later. Usage does not consider serialized regions. */ 2422345153Sdim kmp_desc_t th_info; 2423345153Sdim kmp_team_p *th_team; /* team we belong to */ 2424345153Sdim kmp_root_p *th_root; /* pointer to root of task hierarchy */ 2425345153Sdim kmp_info_p *th_next_pool; /* next available thread in the pool */ 2426345153Sdim kmp_disp_t *th_dispatch; /* thread's dispatch data */ 2427345153Sdim int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */ 2428345153Sdim 2429345153Sdim /* The following are cached from the team info structure */ 2430345153Sdim /* TODO use these in more places as determined to be needed via profiling */ 2431345153Sdim int th_team_nproc; /* number of threads in a team */ 2432345153Sdim kmp_info_p *th_team_master; /* the team's master thread */ 2433345153Sdim int th_team_serialized; /* team is serialized */ 2434345153Sdim microtask_t th_teams_microtask; /* save entry address for teams construct */ 2435345153Sdim int th_teams_level; /* save initial level of teams construct */ 2436345153Sdim/* it is 0 on device but may be any on host */ 2437345153Sdim 2438345153Sdim/* The blocktime info is copied from the team struct to the thread sruct */ 2439345153Sdim/* at the start of a barrier, and the values stored in the team are used */ 2440345153Sdim/* at points in the code where the team struct is no longer guaranteed */ 2441345153Sdim/* to exist (from the POV of worker threads). */ 2442345153Sdim#if KMP_USE_MONITOR 2443345153Sdim int th_team_bt_intervals; 2444345153Sdim int th_team_bt_set; 2445345153Sdim#else 2446345153Sdim kmp_uint64 th_team_bt_intervals; 2447345153Sdim#endif 2448345153Sdim 2449345153Sdim#if KMP_AFFINITY_SUPPORTED 2450345153Sdim kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */ 2451345153Sdim#endif 2452353358Sdim omp_allocator_handle_t th_def_allocator; /* default allocator */ 2453345153Sdim /* The data set by the master at reinit, then R/W by the worker */ 2454345153Sdim KMP_ALIGN_CACHE int 2455345153Sdim th_set_nproc; /* if > 0, then only use this request for the next fork */ 2456345153Sdim#if KMP_NESTED_HOT_TEAMS 2457345153Sdim kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */ 2458345153Sdim#endif 2459345153Sdim kmp_proc_bind_t 2460345153Sdim th_set_proc_bind; /* if != proc_bind_default, use request for next fork */ 2461345153Sdim kmp_teams_size_t 2462345153Sdim th_teams_size; /* number of teams/threads in teams construct */ 2463345153Sdim#if KMP_AFFINITY_SUPPORTED 2464345153Sdim int th_current_place; /* place currently bound to */ 2465345153Sdim int th_new_place; /* place to bind to in par reg */ 2466345153Sdim int th_first_place; /* first place in partition */ 2467345153Sdim int th_last_place; /* last place in partition */ 2468345153Sdim#endif 2469345153Sdim int th_prev_level; /* previous level for affinity format */ 2470345153Sdim int th_prev_num_threads; /* previous num_threads for affinity format */ 2471345153Sdim#if USE_ITT_BUILD 2472345153Sdim kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */ 2473345153Sdim kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */ 2474345153Sdim kmp_uint64 th_frame_time; /* frame timestamp */ 2475345153Sdim#endif /* USE_ITT_BUILD */ 2476345153Sdim kmp_local_t th_local; 2477345153Sdim struct private_common *th_pri_head; 2478345153Sdim 2479345153Sdim /* Now the data only used by the worker (after initial allocation) */ 2480345153Sdim /* TODO the first serial team should actually be stored in the info_t 2481345153Sdim structure. this will help reduce initial allocation overhead */ 2482345153Sdim KMP_ALIGN_CACHE kmp_team_p 2483345153Sdim *th_serial_team; /*serialized team held in reserve*/ 2484345153Sdim 2485345153Sdim#if OMPT_SUPPORT 2486345153Sdim ompt_thread_info_t ompt_thread_info; 2487345153Sdim#endif 2488345153Sdim 2489345153Sdim /* The following are also read by the master during reinit */ 2490345153Sdim struct common_table *th_pri_common; 2491345153Sdim 2492345153Sdim volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */ 2493345153Sdim /* while awaiting queuing lock acquire */ 2494345153Sdim 2495345153Sdim volatile void *th_sleep_loc; // this points at a kmp_flag<T> 2496345153Sdim 2497345153Sdim ident_t *th_ident; 2498345153Sdim unsigned th_x; // Random number generator data 2499345153Sdim unsigned th_a; // Random number generator data 2500345153Sdim 2501345153Sdim /* Tasking-related data for the thread */ 2502345153Sdim kmp_task_team_t *th_task_team; // Task team struct 2503345153Sdim kmp_taskdata_t *th_current_task; // Innermost Task being executed 2504345153Sdim kmp_uint8 th_task_state; // alternating 0/1 for task team identification 2505345153Sdim kmp_uint8 *th_task_state_memo_stack; // Stack holding memos of th_task_state 2506345153Sdim // at nested levels 2507345153Sdim kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack 2508345153Sdim kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack 2509345153Sdim kmp_uint32 th_reap_state; // Non-zero indicates thread is not 2510345153Sdim // tasking, thus safe to reap 2511345153Sdim 2512345153Sdim /* More stuff for keeping track of active/sleeping threads (this part is 2513345153Sdim written by the worker thread) */ 2514345153Sdim kmp_uint8 th_active_in_pool; // included in count of #active threads in pool 2515345153Sdim int th_active; // ! sleeping; 32 bits for TCR/TCW 2516345153Sdim struct cons_header *th_cons; // used for consistency check 2517345153Sdim#if KMP_USE_HIER_SCHED 2518345153Sdim // used for hierarchical scheduling 2519345153Sdim kmp_hier_private_bdata_t *th_hier_bar_data; 2520345153Sdim#endif 2521345153Sdim 2522345153Sdim /* Add the syncronizing data which is cache aligned and padded. */ 2523345153Sdim KMP_ALIGN_CACHE kmp_balign_t th_bar[bs_last_barrier]; 2524345153Sdim 2525345153Sdim KMP_ALIGN_CACHE volatile kmp_int32 2526345153Sdim th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */ 2527345153Sdim 2528345153Sdim#if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5) 2529345153Sdim#define NUM_LISTS 4 2530345153Sdim kmp_free_list_t th_free_lists[NUM_LISTS]; // Free lists for fast memory 2531345153Sdim// allocation routines 2532345153Sdim#endif 2533345153Sdim 2534345153Sdim#if KMP_OS_WINDOWS 2535345153Sdim kmp_win32_cond_t th_suspend_cv; 2536345153Sdim kmp_win32_mutex_t th_suspend_mx; 2537353358Sdim std::atomic<int> th_suspend_init; 2538345153Sdim#endif 2539345153Sdim#if KMP_OS_UNIX 2540345153Sdim kmp_cond_align_t th_suspend_cv; 2541345153Sdim kmp_mutex_align_t th_suspend_mx; 2542353358Sdim std::atomic<int> th_suspend_init_count; 2543345153Sdim#endif 2544345153Sdim 2545345153Sdim#if USE_ITT_BUILD 2546345153Sdim kmp_itt_mark_t th_itt_mark_single; 2547345153Sdim// alignment ??? 2548345153Sdim#endif /* USE_ITT_BUILD */ 2549345153Sdim#if KMP_STATS_ENABLED 2550345153Sdim kmp_stats_list *th_stats; 2551345153Sdim#endif 2552345153Sdim#if KMP_OS_UNIX 2553345153Sdim std::atomic<bool> th_blocking; 2554345153Sdim#endif 2555353358Sdim kmp_cg_root_t *th_cg_roots; // list of cg_roots associated with this thread 2556345153Sdim} kmp_base_info_t; 2557345153Sdim 2558345153Sdimtypedef union KMP_ALIGN_CACHE kmp_info { 2559345153Sdim double th_align; /* use worst case alignment */ 2560345153Sdim char th_pad[KMP_PAD(kmp_base_info_t, CACHE_LINE)]; 2561345153Sdim kmp_base_info_t th; 2562345153Sdim} kmp_info_t; 2563345153Sdim 2564345153Sdim// OpenMP thread team data structures 2565345153Sdim 2566345153Sdimtypedef struct kmp_base_data { volatile kmp_uint32 t_value; } kmp_base_data_t; 2567345153Sdim 2568345153Sdimtypedef union KMP_ALIGN_CACHE kmp_sleep_team { 2569345153Sdim double dt_align; /* use worst case alignment */ 2570345153Sdim char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)]; 2571345153Sdim kmp_base_data_t dt; 2572345153Sdim} kmp_sleep_team_t; 2573345153Sdim 2574345153Sdimtypedef union KMP_ALIGN_CACHE kmp_ordered_team { 2575345153Sdim double dt_align; /* use worst case alignment */ 2576345153Sdim char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)]; 2577345153Sdim kmp_base_data_t dt; 2578345153Sdim} kmp_ordered_team_t; 2579345153Sdim 2580345153Sdimtypedef int (*launch_t)(int gtid); 2581345153Sdim 2582345153Sdim/* Minimum number of ARGV entries to malloc if necessary */ 2583345153Sdim#define KMP_MIN_MALLOC_ARGV_ENTRIES 100 2584345153Sdim 2585345153Sdim// Set up how many argv pointers will fit in cache lines containing 2586345153Sdim// t_inline_argv. Historically, we have supported at least 96 bytes. Using a 2587345153Sdim// larger value for more space between the master write/worker read section and 2588345153Sdim// read/write by all section seems to buy more performance on EPCC PARALLEL. 2589345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64 2590345153Sdim#define KMP_INLINE_ARGV_BYTES \ 2591345153Sdim (4 * CACHE_LINE - \ 2592345153Sdim ((3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + \ 2593345153Sdim sizeof(kmp_int16) + sizeof(kmp_uint32)) % \ 2594345153Sdim CACHE_LINE)) 2595345153Sdim#else 2596345153Sdim#define KMP_INLINE_ARGV_BYTES \ 2597345153Sdim (2 * CACHE_LINE - ((3 * KMP_PTR_SKIP + 2 * sizeof(int)) % CACHE_LINE)) 2598345153Sdim#endif 2599345153Sdim#define KMP_INLINE_ARGV_ENTRIES (int)(KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP) 2600345153Sdim 2601345153Sdimtypedef struct KMP_ALIGN_CACHE kmp_base_team { 2602345153Sdim // Synchronization Data 2603345153Sdim // --------------------------------------------------------------------------- 2604345153Sdim KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered; 2605345153Sdim kmp_balign_team_t t_bar[bs_last_barrier]; 2606345153Sdim std::atomic<int> t_construct; // count of single directive encountered by team 2607345153Sdim char pad[sizeof(kmp_lock_t)]; // padding to maintain performance on big iron 2608345153Sdim 2609353358Sdim // [0] - parallel / [1] - worksharing task reduction data shared by taskgroups 2610353358Sdim std::atomic<void *> t_tg_reduce_data[2]; // to support task modifier 2611353358Sdim std::atomic<int> t_tg_fini_counter[2]; // sync end of task reductions 2612353358Sdim 2613345153Sdim // Master only 2614345153Sdim // --------------------------------------------------------------------------- 2615345153Sdim KMP_ALIGN_CACHE int t_master_tid; // tid of master in parent team 2616345153Sdim int t_master_this_cons; // "this_construct" single counter of master in parent 2617345153Sdim // team 2618345153Sdim ident_t *t_ident; // if volatile, have to change too much other crud to 2619345153Sdim // volatile too 2620345153Sdim kmp_team_p *t_parent; // parent team 2621345153Sdim kmp_team_p *t_next_pool; // next free team in the team pool 2622345153Sdim kmp_disp_t *t_dispatch; // thread's dispatch data 2623345153Sdim kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2 2624345153Sdim kmp_proc_bind_t t_proc_bind; // bind type for par region 2625345153Sdim#if USE_ITT_BUILD 2626345153Sdim kmp_uint64 t_region_time; // region begin timestamp 2627345153Sdim#endif /* USE_ITT_BUILD */ 2628345153Sdim 2629345153Sdim // Master write, workers read 2630345153Sdim // -------------------------------------------------------------------------- 2631345153Sdim KMP_ALIGN_CACHE void **t_argv; 2632345153Sdim int t_argc; 2633345153Sdim int t_nproc; // number of threads in team 2634345153Sdim microtask_t t_pkfn; 2635345153Sdim launch_t t_invoke; // procedure to launch the microtask 2636345153Sdim 2637345153Sdim#if OMPT_SUPPORT 2638345153Sdim ompt_team_info_t ompt_team_info; 2639345153Sdim ompt_lw_taskteam_t *ompt_serialized_team_info; 2640345153Sdim#endif 2641345153Sdim 2642345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64 2643345153Sdim kmp_int8 t_fp_control_saved; 2644345153Sdim kmp_int8 t_pad2b; 2645345153Sdim kmp_int16 t_x87_fpu_control_word; // FP control regs 2646345153Sdim kmp_uint32 t_mxcsr; 2647345153Sdim#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 2648345153Sdim 2649345153Sdim void *t_inline_argv[KMP_INLINE_ARGV_ENTRIES]; 2650345153Sdim 2651345153Sdim KMP_ALIGN_CACHE kmp_info_t **t_threads; 2652345153Sdim kmp_taskdata_t 2653345153Sdim *t_implicit_task_taskdata; // Taskdata for the thread's implicit task 2654345153Sdim int t_level; // nested parallel level 2655345153Sdim 2656345153Sdim KMP_ALIGN_CACHE int t_max_argc; 2657360784Sdim int t_max_nproc; // max threads this team can handle (dynamically expandable) 2658345153Sdim int t_serialized; // levels deep of serialized teams 2659345153Sdim dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system 2660345153Sdim int t_id; // team's id, assigned by debugger. 2661345153Sdim int t_active_level; // nested active parallel level 2662345153Sdim kmp_r_sched_t t_sched; // run-time schedule for the team 2663353358Sdim#if KMP_AFFINITY_SUPPORTED 2664345153Sdim int t_first_place; // first & last place in parent thread's partition. 2665345153Sdim int t_last_place; // Restore these values to master after par region. 2666353358Sdim#endif // KMP_AFFINITY_SUPPORTED 2667345153Sdim int t_display_affinity; 2668345153Sdim int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via 2669353358Sdim // omp_set_num_threads() call 2670353358Sdim omp_allocator_handle_t t_def_allocator; /* default allocator */ 2671345153Sdim 2672345153Sdim// Read/write by workers as well 2673345153Sdim#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 2674345153Sdim // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf 2675345153Sdim // regression of epcc 'parallel' and 'barrier' on fxe256lin01. This extra 2676345153Sdim // padding serves to fix the performance of epcc 'parallel' and 'barrier' when 2677345153Sdim // CACHE_LINE=64. TODO: investigate more and get rid if this padding. 2678345153Sdim char dummy_padding[1024]; 2679345153Sdim#endif 2680345153Sdim // Internal control stack for additional nested teams. 2681345153Sdim KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top; 2682353358Sdim // for SERIALIZED teams nested 2 or more levels deep 2683345153Sdim // typed flag to store request state of cancellation 2684345153Sdim std::atomic<kmp_int32> t_cancel_request; 2685345153Sdim int t_master_active; // save on fork, restore on join 2686345153Sdim void *t_copypriv_data; // team specific pointer to copyprivate data array 2687345153Sdim#if KMP_OS_WINDOWS 2688345153Sdim std::atomic<kmp_uint32> t_copyin_counter; 2689345153Sdim#endif 2690345153Sdim#if USE_ITT_BUILD 2691345153Sdim void *t_stack_id; // team specific stack stitching id (for ittnotify) 2692345153Sdim#endif /* USE_ITT_BUILD */ 2693345153Sdim} kmp_base_team_t; 2694345153Sdim 2695345153Sdimunion KMP_ALIGN_CACHE kmp_team { 2696345153Sdim kmp_base_team_t t; 2697345153Sdim double t_align; /* use worst case alignment */ 2698345153Sdim char t_pad[KMP_PAD(kmp_base_team_t, CACHE_LINE)]; 2699345153Sdim}; 2700345153Sdim 2701345153Sdimtypedef union KMP_ALIGN_CACHE kmp_time_global { 2702345153Sdim double dt_align; /* use worst case alignment */ 2703345153Sdim char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)]; 2704345153Sdim kmp_base_data_t dt; 2705345153Sdim} kmp_time_global_t; 2706345153Sdim 2707345153Sdimtypedef struct kmp_base_global { 2708345153Sdim /* cache-aligned */ 2709345153Sdim kmp_time_global_t g_time; 2710345153Sdim 2711345153Sdim /* non cache-aligned */ 2712345153Sdim volatile int g_abort; 2713345153Sdim volatile int g_done; 2714345153Sdim 2715345153Sdim int g_dynamic; 2716345153Sdim enum dynamic_mode g_dynamic_mode; 2717345153Sdim} kmp_base_global_t; 2718345153Sdim 2719345153Sdimtypedef union KMP_ALIGN_CACHE kmp_global { 2720345153Sdim kmp_base_global_t g; 2721345153Sdim double g_align; /* use worst case alignment */ 2722345153Sdim char g_pad[KMP_PAD(kmp_base_global_t, CACHE_LINE)]; 2723345153Sdim} kmp_global_t; 2724345153Sdim 2725345153Sdimtypedef struct kmp_base_root { 2726345153Sdim // TODO: GEH - combine r_active with r_in_parallel then r_active == 2727345153Sdim // (r_in_parallel>= 0) 2728345153Sdim // TODO: GEH - then replace r_active with t_active_levels if we can to reduce 2729345153Sdim // the synch overhead or keeping r_active 2730345153Sdim volatile int r_active; /* TRUE if some region in a nest has > 1 thread */ 2731345153Sdim // keeps a count of active parallel regions per root 2732345153Sdim std::atomic<int> r_in_parallel; 2733345153Sdim // GEH: This is misnamed, should be r_active_levels 2734345153Sdim kmp_team_t *r_root_team; 2735345153Sdim kmp_team_t *r_hot_team; 2736345153Sdim kmp_info_t *r_uber_thread; 2737345153Sdim kmp_lock_t r_begin_lock; 2738345153Sdim volatile int r_begin; 2739345153Sdim int r_blocktime; /* blocktime for this root and descendants */ 2740345153Sdim} kmp_base_root_t; 2741345153Sdim 2742345153Sdimtypedef union KMP_ALIGN_CACHE kmp_root { 2743345153Sdim kmp_base_root_t r; 2744345153Sdim double r_align; /* use worst case alignment */ 2745345153Sdim char r_pad[KMP_PAD(kmp_base_root_t, CACHE_LINE)]; 2746345153Sdim} kmp_root_t; 2747345153Sdim 2748345153Sdimstruct fortran_inx_info { 2749345153Sdim kmp_int32 data; 2750345153Sdim}; 2751345153Sdim 2752345153Sdim/* ------------------------------------------------------------------------ */ 2753345153Sdim 2754345153Sdimextern int __kmp_settings; 2755345153Sdimextern int __kmp_duplicate_library_ok; 2756345153Sdim#if USE_ITT_BUILD 2757345153Sdimextern int __kmp_forkjoin_frames; 2758345153Sdimextern int __kmp_forkjoin_frames_mode; 2759345153Sdim#endif 2760345153Sdimextern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method; 2761345153Sdimextern int __kmp_determ_red; 2762345153Sdim 2763345153Sdim#ifdef KMP_DEBUG 2764345153Sdimextern int kmp_a_debug; 2765345153Sdimextern int kmp_b_debug; 2766345153Sdimextern int kmp_c_debug; 2767345153Sdimextern int kmp_d_debug; 2768345153Sdimextern int kmp_e_debug; 2769345153Sdimextern int kmp_f_debug; 2770345153Sdim#endif /* KMP_DEBUG */ 2771345153Sdim 2772345153Sdim/* For debug information logging using rotating buffer */ 2773345153Sdim#define KMP_DEBUG_BUF_LINES_INIT 512 2774345153Sdim#define KMP_DEBUG_BUF_LINES_MIN 1 2775345153Sdim 2776345153Sdim#define KMP_DEBUG_BUF_CHARS_INIT 128 2777345153Sdim#define KMP_DEBUG_BUF_CHARS_MIN 2 2778345153Sdim 2779345153Sdimextern int 2780345153Sdim __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */ 2781345153Sdimextern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */ 2782345153Sdimextern int 2783345153Sdim __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */ 2784345153Sdimextern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer 2785345153Sdim entry pointer */ 2786345153Sdim 2787345153Sdimextern char *__kmp_debug_buffer; /* Debug buffer itself */ 2788345153Sdimextern std::atomic<int> __kmp_debug_count; /* Counter for number of lines 2789345153Sdim printed in buffer so far */ 2790345153Sdimextern int __kmp_debug_buf_warn_chars; /* Keep track of char increase 2791345153Sdim recommended in warnings */ 2792345153Sdim/* end rotating debug buffer */ 2793345153Sdim 2794345153Sdim#ifdef KMP_DEBUG 2795345153Sdimextern int __kmp_par_range; /* +1 => only go par for constructs in range */ 2796345153Sdim 2797345153Sdim#define KMP_PAR_RANGE_ROUTINE_LEN 1024 2798345153Sdimextern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN]; 2799345153Sdim#define KMP_PAR_RANGE_FILENAME_LEN 1024 2800345153Sdimextern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN]; 2801345153Sdimextern int __kmp_par_range_lb; 2802345153Sdimextern int __kmp_par_range_ub; 2803345153Sdim#endif 2804345153Sdim 2805345153Sdim/* For printing out dynamic storage map for threads and teams */ 2806345153Sdimextern int 2807345153Sdim __kmp_storage_map; /* True means print storage map for threads and teams */ 2808345153Sdimextern int __kmp_storage_map_verbose; /* True means storage map includes 2809345153Sdim placement info */ 2810345153Sdimextern int __kmp_storage_map_verbose_specified; 2811345153Sdim 2812345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64 2813345153Sdimextern kmp_cpuinfo_t __kmp_cpuinfo; 2814345153Sdim#endif 2815345153Sdim 2816345153Sdimextern volatile int __kmp_init_serial; 2817345153Sdimextern volatile int __kmp_init_gtid; 2818345153Sdimextern volatile int __kmp_init_common; 2819345153Sdimextern volatile int __kmp_init_middle; 2820345153Sdimextern volatile int __kmp_init_parallel; 2821345153Sdim#if KMP_USE_MONITOR 2822345153Sdimextern volatile int __kmp_init_monitor; 2823345153Sdim#endif 2824345153Sdimextern volatile int __kmp_init_user_locks; 2825345153Sdimextern int __kmp_init_counter; 2826345153Sdimextern int __kmp_root_counter; 2827345153Sdimextern int __kmp_version; 2828345153Sdim 2829345153Sdim/* list of address of allocated caches for commons */ 2830345153Sdimextern kmp_cached_addr_t *__kmp_threadpriv_cache_list; 2831345153Sdim 2832345153Sdim/* Barrier algorithm types and options */ 2833345153Sdimextern kmp_uint32 __kmp_barrier_gather_bb_dflt; 2834345153Sdimextern kmp_uint32 __kmp_barrier_release_bb_dflt; 2835345153Sdimextern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt; 2836345153Sdimextern kmp_bar_pat_e __kmp_barrier_release_pat_dflt; 2837345153Sdimextern kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier]; 2838345153Sdimextern kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier]; 2839345153Sdimextern kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier]; 2840345153Sdimextern kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier]; 2841345153Sdimextern char const *__kmp_barrier_branch_bit_env_name[bs_last_barrier]; 2842345153Sdimextern char const *__kmp_barrier_pattern_env_name[bs_last_barrier]; 2843345153Sdimextern char const *__kmp_barrier_type_name[bs_last_barrier]; 2844345153Sdimextern char const *__kmp_barrier_pattern_name[bp_last_bar]; 2845345153Sdim 2846345153Sdim/* Global Locks */ 2847345153Sdimextern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */ 2848345153Sdimextern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */ 2849345153Sdimextern kmp_bootstrap_lock_t __kmp_task_team_lock; 2850345153Sdimextern kmp_bootstrap_lock_t 2851345153Sdim __kmp_exit_lock; /* exit() is not always thread-safe */ 2852345153Sdim#if KMP_USE_MONITOR 2853345153Sdimextern kmp_bootstrap_lock_t 2854345153Sdim __kmp_monitor_lock; /* control monitor thread creation */ 2855345153Sdim#endif 2856345153Sdimextern kmp_bootstrap_lock_t 2857345153Sdim __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and 2858345153Sdim __kmp_threads expansion to co-exist */ 2859345153Sdim 2860345153Sdimextern kmp_lock_t __kmp_global_lock; /* control OS/global access */ 2861345153Sdimextern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */ 2862345153Sdimextern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */ 2863345153Sdim 2864345153Sdimextern enum library_type __kmp_library; 2865345153Sdim 2866345153Sdimextern enum sched_type __kmp_sched; /* default runtime scheduling */ 2867345153Sdimextern enum sched_type __kmp_static; /* default static scheduling method */ 2868345153Sdimextern enum sched_type __kmp_guided; /* default guided scheduling method */ 2869345153Sdimextern enum sched_type __kmp_auto; /* default auto scheduling method */ 2870345153Sdimextern int __kmp_chunk; /* default runtime chunk size */ 2871345153Sdim 2872345153Sdimextern size_t __kmp_stksize; /* stack size per thread */ 2873345153Sdim#if KMP_USE_MONITOR 2874345153Sdimextern size_t __kmp_monitor_stksize; /* stack size for monitor thread */ 2875345153Sdim#endif 2876345153Sdimextern size_t __kmp_stkoffset; /* stack offset per thread */ 2877345153Sdimextern int __kmp_stkpadding; /* Should we pad root thread(s) stack */ 2878345153Sdim 2879345153Sdimextern size_t 2880345153Sdim __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */ 2881345153Sdimextern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */ 2882345153Sdimextern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */ 2883345153Sdimextern int __kmp_env_checks; /* was KMP_CHECKS specified? */ 2884345153Sdimextern int __kmp_env_consistency_check; // was KMP_CONSISTENCY_CHECK specified? 2885345153Sdimextern int __kmp_generate_warnings; /* should we issue warnings? */ 2886345153Sdimextern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */ 2887345153Sdim 2888345153Sdim#ifdef DEBUG_SUSPEND 2889345153Sdimextern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */ 2890345153Sdim#endif 2891345153Sdim 2892353358Sdimextern kmp_int32 __kmp_use_yield; 2893353358Sdimextern kmp_int32 __kmp_use_yield_exp_set; 2894345153Sdimextern kmp_uint32 __kmp_yield_init; 2895345153Sdimextern kmp_uint32 __kmp_yield_next; 2896345153Sdim 2897345153Sdim/* ------------------------------------------------------------------------- */ 2898345153Sdimextern int __kmp_allThreadsSpecified; 2899345153Sdim 2900345153Sdimextern size_t __kmp_align_alloc; 2901345153Sdim/* following data protected by initialization routines */ 2902345153Sdimextern int __kmp_xproc; /* number of processors in the system */ 2903345153Sdimextern int __kmp_avail_proc; /* number of processors available to the process */ 2904345153Sdimextern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */ 2905345153Sdimextern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */ 2906345153Sdim// maximum total number of concurrently-existing threads on device 2907345153Sdimextern int __kmp_max_nth; 2908345153Sdim// maximum total number of concurrently-existing threads in a contention group 2909345153Sdimextern int __kmp_cg_max_nth; 2910345153Sdimextern int __kmp_teams_max_nth; // max threads used in a teams construct 2911345153Sdimextern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and 2912345153Sdim __kmp_root */ 2913345153Sdimextern int __kmp_dflt_team_nth; /* default number of threads in a parallel 2914345153Sdim region a la OMP_NUM_THREADS */ 2915345153Sdimextern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial 2916345153Sdim initialization */ 2917345153Sdimextern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is 2918345153Sdim used (fixed) */ 2919345153Sdimextern int __kmp_tp_cached; /* whether threadprivate cache has been created 2920345153Sdim (__kmpc_threadprivate_cached()) */ 2921345153Sdimextern int __kmp_dflt_blocktime; /* number of milliseconds to wait before 2922345153Sdim blocking (env setting) */ 2923345153Sdim#if KMP_USE_MONITOR 2924345153Sdimextern int 2925345153Sdim __kmp_monitor_wakeups; /* number of times monitor wakes up per second */ 2926345153Sdimextern int __kmp_bt_intervals; /* number of monitor timestamp intervals before 2927345153Sdim blocking */ 2928345153Sdim#endif 2929345153Sdim#ifdef KMP_ADJUST_BLOCKTIME 2930345153Sdimextern int __kmp_zero_bt; /* whether blocktime has been forced to zero */ 2931345153Sdim#endif /* KMP_ADJUST_BLOCKTIME */ 2932345153Sdim#ifdef KMP_DFLT_NTH_CORES 2933345153Sdimextern int __kmp_ncores; /* Total number of cores for threads placement */ 2934345153Sdim#endif 2935345153Sdim/* Number of millisecs to delay on abort for Intel(R) VTune(TM) tools */ 2936345153Sdimextern int __kmp_abort_delay; 2937345153Sdim 2938345153Sdimextern int __kmp_need_register_atfork_specified; 2939345153Sdimextern int 2940345153Sdim __kmp_need_register_atfork; /* At initialization, call pthread_atfork to 2941345153Sdim install fork handler */ 2942345153Sdimextern int __kmp_gtid_mode; /* Method of getting gtid, values: 2943345153Sdim 0 - not set, will be set at runtime 2944345153Sdim 1 - using stack search 2945345153Sdim 2 - dynamic TLS (pthread_getspecific(Linux* OS/OS 2946345153Sdim X*) or TlsGetValue(Windows* OS)) 2947345153Sdim 3 - static TLS (__declspec(thread) __kmp_gtid), 2948345153Sdim Linux* OS .so only. */ 2949345153Sdimextern int 2950345153Sdim __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */ 2951345153Sdim#ifdef KMP_TDATA_GTID 2952345153Sdimextern KMP_THREAD_LOCAL int __kmp_gtid; 2953345153Sdim#endif 2954345153Sdimextern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */ 2955345153Sdimextern int __kmp_foreign_tp; // If true, separate TP var for each foreign thread 2956345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64 2957345153Sdimextern int __kmp_inherit_fp_control; // copy fp creg(s) parent->workers at fork 2958345153Sdimextern kmp_int16 __kmp_init_x87_fpu_control_word; // init thread's FP ctrl reg 2959345153Sdimextern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */ 2960345153Sdim#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 2961345153Sdim 2962353358Sdim// max_active_levels for nested parallelism enabled by default via 2963353358Sdim// OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS, and OMP_PROC_BIND 2964353358Sdimextern int __kmp_dflt_max_active_levels; 2965353358Sdim// Indicates whether value of __kmp_dflt_max_active_levels was already 2966353358Sdim// explicitly set by OMP_MAX_ACTIVE_LEVELS or OMP_NESTED=false 2967353358Sdimextern bool __kmp_dflt_max_active_levels_set; 2968345153Sdimextern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in 2969345153Sdim concurrent execution per team */ 2970345153Sdim#if KMP_NESTED_HOT_TEAMS 2971345153Sdimextern int __kmp_hot_teams_mode; 2972345153Sdimextern int __kmp_hot_teams_max_level; 2973345153Sdim#endif 2974345153Sdim 2975345153Sdim#if KMP_OS_LINUX 2976345153Sdimextern enum clock_function_type __kmp_clock_function; 2977345153Sdimextern int __kmp_clock_function_param; 2978345153Sdim#endif /* KMP_OS_LINUX */ 2979345153Sdim 2980345153Sdim#if KMP_MIC_SUPPORTED 2981345153Sdimextern enum mic_type __kmp_mic_type; 2982345153Sdim#endif 2983345153Sdim 2984345153Sdim#ifdef USE_LOAD_BALANCE 2985345153Sdimextern double __kmp_load_balance_interval; // load balance algorithm interval 2986345153Sdim#endif /* USE_LOAD_BALANCE */ 2987345153Sdim 2988345153Sdim// OpenMP 3.1 - Nested num threads array 2989345153Sdimtypedef struct kmp_nested_nthreads_t { 2990345153Sdim int *nth; 2991345153Sdim int size; 2992345153Sdim int used; 2993345153Sdim} kmp_nested_nthreads_t; 2994345153Sdim 2995345153Sdimextern kmp_nested_nthreads_t __kmp_nested_nth; 2996345153Sdim 2997345153Sdim#if KMP_USE_ADAPTIVE_LOCKS 2998345153Sdim 2999345153Sdim// Parameters for the speculative lock backoff system. 3000345153Sdimstruct kmp_adaptive_backoff_params_t { 3001345153Sdim // Number of soft retries before it counts as a hard retry. 3002345153Sdim kmp_uint32 max_soft_retries; 3003345153Sdim // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to 3004345153Sdim // the right 3005345153Sdim kmp_uint32 max_badness; 3006345153Sdim}; 3007345153Sdim 3008345153Sdimextern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params; 3009345153Sdim 3010345153Sdim#if KMP_DEBUG_ADAPTIVE_LOCKS 3011345153Sdimextern const char *__kmp_speculative_statsfile; 3012345153Sdim#endif 3013345153Sdim 3014345153Sdim#endif // KMP_USE_ADAPTIVE_LOCKS 3015345153Sdim 3016345153Sdimextern int __kmp_display_env; /* TRUE or FALSE */ 3017345153Sdimextern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */ 3018345153Sdimextern int __kmp_omp_cancellation; /* TRUE or FALSE */ 3019345153Sdim 3020345153Sdim/* ------------------------------------------------------------------------- */ 3021345153Sdim 3022345153Sdim/* the following are protected by the fork/join lock */ 3023345153Sdim/* write: lock read: anytime */ 3024345153Sdimextern kmp_info_t **__kmp_threads; /* Descriptors for the threads */ 3025345153Sdim/* read/write: lock */ 3026345153Sdimextern volatile kmp_team_t *__kmp_team_pool; 3027345153Sdimextern volatile kmp_info_t *__kmp_thread_pool; 3028345153Sdimextern kmp_info_t *__kmp_thread_pool_insert_pt; 3029345153Sdim 3030345153Sdim// total num threads reachable from some root thread including all root threads 3031345153Sdimextern volatile int __kmp_nth; 3032345153Sdim/* total number of threads reachable from some root thread including all root 3033345153Sdim threads, and those in the thread pool */ 3034345153Sdimextern volatile int __kmp_all_nth; 3035345153Sdimextern std::atomic<int> __kmp_thread_pool_active_nth; 3036345153Sdim 3037345153Sdimextern kmp_root_t **__kmp_root; /* root of thread hierarchy */ 3038345153Sdim/* end data protected by fork/join lock */ 3039345153Sdim/* ------------------------------------------------------------------------- */ 3040345153Sdim 3041345153Sdim#define __kmp_get_gtid() __kmp_get_global_thread_id() 3042345153Sdim#define __kmp_entry_gtid() __kmp_get_global_thread_id_reg() 3043345153Sdim#define __kmp_get_tid() (__kmp_tid_from_gtid(__kmp_get_gtid())) 3044345153Sdim#define __kmp_get_team() (__kmp_threads[(__kmp_get_gtid())]->th.th_team) 3045345153Sdim#define __kmp_get_thread() (__kmp_thread_from_gtid(__kmp_get_gtid())) 3046345153Sdim 3047345153Sdim// AT: Which way is correct? 3048345153Sdim// AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc; 3049345153Sdim// AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc; 3050345153Sdim#define __kmp_get_team_num_threads(gtid) \ 3051345153Sdim (__kmp_threads[(gtid)]->th.th_team->t.t_nproc) 3052345153Sdim 3053345153Sdimstatic inline bool KMP_UBER_GTID(int gtid) { 3054345153Sdim KMP_DEBUG_ASSERT(gtid >= KMP_GTID_MIN); 3055345153Sdim KMP_DEBUG_ASSERT(gtid < __kmp_threads_capacity); 3056345153Sdim return (gtid >= 0 && __kmp_root[gtid] && __kmp_threads[gtid] && 3057345153Sdim __kmp_threads[gtid] == __kmp_root[gtid]->r.r_uber_thread); 3058345153Sdim} 3059345153Sdim 3060345153Sdimstatic inline int __kmp_tid_from_gtid(int gtid) { 3061345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 3062345153Sdim return __kmp_threads[gtid]->th.th_info.ds.ds_tid; 3063345153Sdim} 3064345153Sdim 3065345153Sdimstatic inline int __kmp_gtid_from_tid(int tid, const kmp_team_t *team) { 3066345153Sdim KMP_DEBUG_ASSERT(tid >= 0 && team); 3067345153Sdim return team->t.t_threads[tid]->th.th_info.ds.ds_gtid; 3068345153Sdim} 3069345153Sdim 3070345153Sdimstatic inline int __kmp_gtid_from_thread(const kmp_info_t *thr) { 3071345153Sdim KMP_DEBUG_ASSERT(thr); 3072345153Sdim return thr->th.th_info.ds.ds_gtid; 3073345153Sdim} 3074345153Sdim 3075345153Sdimstatic inline kmp_info_t *__kmp_thread_from_gtid(int gtid) { 3076345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 3077345153Sdim return __kmp_threads[gtid]; 3078345153Sdim} 3079345153Sdim 3080345153Sdimstatic inline kmp_team_t *__kmp_team_from_gtid(int gtid) { 3081345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 3082345153Sdim return __kmp_threads[gtid]->th.th_team; 3083345153Sdim} 3084345153Sdim 3085345153Sdim/* ------------------------------------------------------------------------- */ 3086345153Sdim 3087345153Sdimextern kmp_global_t __kmp_global; /* global status */ 3088345153Sdim 3089345153Sdimextern kmp_info_t __kmp_monitor; 3090345153Sdim// For Debugging Support Library 3091345153Sdimextern std::atomic<kmp_int32> __kmp_team_counter; 3092345153Sdim// For Debugging Support Library 3093345153Sdimextern std::atomic<kmp_int32> __kmp_task_counter; 3094345153Sdim 3095345153Sdim#if USE_DEBUGGER 3096345153Sdim#define _KMP_GEN_ID(counter) \ 3097345153Sdim (__kmp_debugging ? KMP_ATOMIC_INC(&counter) + 1 : ~0) 3098345153Sdim#else 3099345153Sdim#define _KMP_GEN_ID(counter) (~0) 3100345153Sdim#endif /* USE_DEBUGGER */ 3101345153Sdim 3102345153Sdim#define KMP_GEN_TASK_ID() _KMP_GEN_ID(__kmp_task_counter) 3103345153Sdim#define KMP_GEN_TEAM_ID() _KMP_GEN_ID(__kmp_team_counter) 3104345153Sdim 3105345153Sdim/* ------------------------------------------------------------------------ */ 3106345153Sdim 3107345153Sdimextern void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, 3108345153Sdim size_t size, char const *format, ...); 3109345153Sdim 3110345153Sdimextern void __kmp_serial_initialize(void); 3111345153Sdimextern void __kmp_middle_initialize(void); 3112345153Sdimextern void __kmp_parallel_initialize(void); 3113345153Sdim 3114345153Sdimextern void __kmp_internal_begin(void); 3115345153Sdimextern void __kmp_internal_end_library(int gtid); 3116345153Sdimextern void __kmp_internal_end_thread(int gtid); 3117345153Sdimextern void __kmp_internal_end_atexit(void); 3118345153Sdimextern void __kmp_internal_end_dtor(void); 3119345153Sdimextern void __kmp_internal_end_dest(void *); 3120345153Sdim 3121345153Sdimextern int __kmp_register_root(int initial_thread); 3122345153Sdimextern void __kmp_unregister_root(int gtid); 3123345153Sdim 3124345153Sdimextern int __kmp_ignore_mppbeg(void); 3125345153Sdimextern int __kmp_ignore_mppend(void); 3126345153Sdim 3127345153Sdimextern int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws); 3128345153Sdimextern void __kmp_exit_single(int gtid); 3129345153Sdim 3130345153Sdimextern void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref); 3131345153Sdimextern void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref); 3132345153Sdim 3133345153Sdim#ifdef USE_LOAD_BALANCE 3134345153Sdimextern int __kmp_get_load_balance(int); 3135345153Sdim#endif 3136345153Sdim 3137345153Sdimextern int __kmp_get_global_thread_id(void); 3138345153Sdimextern int __kmp_get_global_thread_id_reg(void); 3139345153Sdimextern void __kmp_exit_thread(int exit_status); 3140345153Sdimextern void __kmp_abort(char const *format, ...); 3141345153Sdimextern void __kmp_abort_thread(void); 3142345153SdimKMP_NORETURN extern void __kmp_abort_process(void); 3143345153Sdimextern void __kmp_warn(char const *format, ...); 3144345153Sdim 3145345153Sdimextern void __kmp_set_num_threads(int new_nth, int gtid); 3146345153Sdim 3147345153Sdim// Returns current thread (pointer to kmp_info_t). Current thread *must* be 3148345153Sdim// registered. 3149345153Sdimstatic inline kmp_info_t *__kmp_entry_thread() { 3150345153Sdim int gtid = __kmp_entry_gtid(); 3151345153Sdim 3152345153Sdim return __kmp_threads[gtid]; 3153345153Sdim} 3154345153Sdim 3155345153Sdimextern void __kmp_set_max_active_levels(int gtid, int new_max_active_levels); 3156345153Sdimextern int __kmp_get_max_active_levels(int gtid); 3157345153Sdimextern int __kmp_get_ancestor_thread_num(int gtid, int level); 3158345153Sdimextern int __kmp_get_team_size(int gtid, int level); 3159345153Sdimextern void __kmp_set_schedule(int gtid, kmp_sched_t new_sched, int chunk); 3160345153Sdimextern void __kmp_get_schedule(int gtid, kmp_sched_t *sched, int *chunk); 3161345153Sdim 3162345153Sdimextern unsigned short __kmp_get_random(kmp_info_t *thread); 3163345153Sdimextern void __kmp_init_random(kmp_info_t *thread); 3164345153Sdim 3165345153Sdimextern kmp_r_sched_t __kmp_get_schedule_global(void); 3166345153Sdimextern void __kmp_adjust_num_threads(int new_nproc); 3167353358Sdimextern void __kmp_check_stksize(size_t *val); 3168345153Sdim 3169345153Sdimextern void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL); 3170345153Sdimextern void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL); 3171345153Sdimextern void ___kmp_free(void *ptr KMP_SRC_LOC_DECL); 3172345153Sdim#define __kmp_allocate(size) ___kmp_allocate((size)KMP_SRC_LOC_CURR) 3173345153Sdim#define __kmp_page_allocate(size) ___kmp_page_allocate((size)KMP_SRC_LOC_CURR) 3174345153Sdim#define __kmp_free(ptr) ___kmp_free((ptr)KMP_SRC_LOC_CURR) 3175345153Sdim 3176345153Sdim#if USE_FAST_MEMORY 3177345153Sdimextern void *___kmp_fast_allocate(kmp_info_t *this_thr, 3178345153Sdim size_t size KMP_SRC_LOC_DECL); 3179345153Sdimextern void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL); 3180345153Sdimextern void __kmp_free_fast_memory(kmp_info_t *this_thr); 3181345153Sdimextern void __kmp_initialize_fast_memory(kmp_info_t *this_thr); 3182345153Sdim#define __kmp_fast_allocate(this_thr, size) \ 3183345153Sdim ___kmp_fast_allocate((this_thr), (size)KMP_SRC_LOC_CURR) 3184345153Sdim#define __kmp_fast_free(this_thr, ptr) \ 3185345153Sdim ___kmp_fast_free((this_thr), (ptr)KMP_SRC_LOC_CURR) 3186345153Sdim#endif 3187345153Sdim 3188345153Sdimextern void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL); 3189345153Sdimextern void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem, 3190345153Sdim size_t elsize KMP_SRC_LOC_DECL); 3191345153Sdimextern void *___kmp_thread_realloc(kmp_info_t *th, void *ptr, 3192345153Sdim size_t size KMP_SRC_LOC_DECL); 3193345153Sdimextern void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL); 3194345153Sdim#define __kmp_thread_malloc(th, size) \ 3195345153Sdim ___kmp_thread_malloc((th), (size)KMP_SRC_LOC_CURR) 3196345153Sdim#define __kmp_thread_calloc(th, nelem, elsize) \ 3197345153Sdim ___kmp_thread_calloc((th), (nelem), (elsize)KMP_SRC_LOC_CURR) 3198345153Sdim#define __kmp_thread_realloc(th, ptr, size) \ 3199345153Sdim ___kmp_thread_realloc((th), (ptr), (size)KMP_SRC_LOC_CURR) 3200345153Sdim#define __kmp_thread_free(th, ptr) \ 3201345153Sdim ___kmp_thread_free((th), (ptr)KMP_SRC_LOC_CURR) 3202345153Sdim 3203345153Sdim#define KMP_INTERNAL_MALLOC(sz) malloc(sz) 3204345153Sdim#define KMP_INTERNAL_FREE(p) free(p) 3205345153Sdim#define KMP_INTERNAL_REALLOC(p, sz) realloc((p), (sz)) 3206345153Sdim#define KMP_INTERNAL_CALLOC(n, sz) calloc((n), (sz)) 3207345153Sdim 3208345153Sdimextern void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads); 3209345153Sdim 3210345153Sdimextern void __kmp_push_proc_bind(ident_t *loc, int gtid, 3211345153Sdim kmp_proc_bind_t proc_bind); 3212345153Sdimextern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams, 3213345153Sdim int num_threads); 3214345153Sdim 3215353358Sdimextern void __kmp_yield(); 3216345153Sdim 3217345153Sdimextern void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, 3218345153Sdim enum sched_type schedule, kmp_int32 lb, 3219345153Sdim kmp_int32 ub, kmp_int32 st, kmp_int32 chunk); 3220345153Sdimextern void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, 3221345153Sdim enum sched_type schedule, kmp_uint32 lb, 3222345153Sdim kmp_uint32 ub, kmp_int32 st, 3223345153Sdim kmp_int32 chunk); 3224345153Sdimextern void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, 3225345153Sdim enum sched_type schedule, kmp_int64 lb, 3226345153Sdim kmp_int64 ub, kmp_int64 st, kmp_int64 chunk); 3227345153Sdimextern void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, 3228345153Sdim enum sched_type schedule, kmp_uint64 lb, 3229345153Sdim kmp_uint64 ub, kmp_int64 st, 3230345153Sdim kmp_int64 chunk); 3231345153Sdim 3232345153Sdimextern int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, 3233345153Sdim kmp_int32 *p_last, kmp_int32 *p_lb, 3234345153Sdim kmp_int32 *p_ub, kmp_int32 *p_st); 3235345153Sdimextern int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, 3236345153Sdim kmp_int32 *p_last, kmp_uint32 *p_lb, 3237345153Sdim kmp_uint32 *p_ub, kmp_int32 *p_st); 3238345153Sdimextern int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, 3239345153Sdim kmp_int32 *p_last, kmp_int64 *p_lb, 3240345153Sdim kmp_int64 *p_ub, kmp_int64 *p_st); 3241345153Sdimextern int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, 3242345153Sdim kmp_int32 *p_last, kmp_uint64 *p_lb, 3243345153Sdim kmp_uint64 *p_ub, kmp_int64 *p_st); 3244345153Sdim 3245345153Sdimextern void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid); 3246345153Sdimextern void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid); 3247345153Sdimextern void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid); 3248345153Sdimextern void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid); 3249345153Sdim 3250345153Sdim#ifdef KMP_GOMP_COMPAT 3251345153Sdim 3252345153Sdimextern void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid, 3253345153Sdim enum sched_type schedule, kmp_int32 lb, 3254345153Sdim kmp_int32 ub, kmp_int32 st, 3255345153Sdim kmp_int32 chunk, int push_ws); 3256345153Sdimextern void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, 3257345153Sdim enum sched_type schedule, kmp_uint32 lb, 3258345153Sdim kmp_uint32 ub, kmp_int32 st, 3259345153Sdim kmp_int32 chunk, int push_ws); 3260345153Sdimextern void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid, 3261345153Sdim enum sched_type schedule, kmp_int64 lb, 3262345153Sdim kmp_int64 ub, kmp_int64 st, 3263345153Sdim kmp_int64 chunk, int push_ws); 3264345153Sdimextern void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, 3265345153Sdim enum sched_type schedule, kmp_uint64 lb, 3266345153Sdim kmp_uint64 ub, kmp_int64 st, 3267345153Sdim kmp_int64 chunk, int push_ws); 3268345153Sdimextern void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid); 3269345153Sdimextern void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid); 3270345153Sdimextern void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid); 3271345153Sdimextern void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid); 3272345153Sdim 3273345153Sdim#endif /* KMP_GOMP_COMPAT */ 3274345153Sdim 3275345153Sdimextern kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker); 3276345153Sdimextern kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker); 3277345153Sdimextern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker); 3278345153Sdimextern kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker); 3279345153Sdimextern kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker); 3280353358Sdimextern kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker, 3281353358Sdim kmp_uint32 (*pred)(kmp_uint32, kmp_uint32), 3282353358Sdim void *obj); 3283353358Sdimextern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker, 3284353358Sdim kmp_uint32 (*pred)(void *, kmp_uint32), void *obj); 3285345153Sdim 3286345153Sdimclass kmp_flag_32; 3287345153Sdimclass kmp_flag_64; 3288345153Sdimclass kmp_flag_oncore; 3289345153Sdimextern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, 3290345153Sdim int final_spin 3291345153Sdim#if USE_ITT_BUILD 3292345153Sdim , 3293345153Sdim void *itt_sync_obj 3294345153Sdim#endif 3295345153Sdim ); 3296345153Sdimextern void __kmp_release_64(kmp_flag_64 *flag); 3297345153Sdim 3298345153Sdimextern void __kmp_infinite_loop(void); 3299345153Sdim 3300345153Sdimextern void __kmp_cleanup(void); 3301345153Sdim 3302345153Sdim#if KMP_HANDLE_SIGNALS 3303345153Sdimextern int __kmp_handle_signals; 3304345153Sdimextern void __kmp_install_signals(int parallel_init); 3305345153Sdimextern void __kmp_remove_signals(void); 3306345153Sdim#endif 3307345153Sdim 3308345153Sdimextern void __kmp_clear_system_time(void); 3309345153Sdimextern void __kmp_read_system_time(double *delta); 3310345153Sdim 3311345153Sdimextern void __kmp_check_stack_overlap(kmp_info_t *thr); 3312345153Sdim 3313345153Sdimextern void __kmp_expand_host_name(char *buffer, size_t size); 3314345153Sdimextern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern); 3315345153Sdim 3316345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64 3317345153Sdimextern void 3318345153Sdim__kmp_initialize_system_tick(void); /* Initialize timer tick value */ 3319345153Sdim#endif 3320345153Sdim 3321345153Sdimextern void 3322345153Sdim__kmp_runtime_initialize(void); /* machine specific initialization */ 3323345153Sdimextern void __kmp_runtime_destroy(void); 3324345153Sdim 3325345153Sdim#if KMP_AFFINITY_SUPPORTED 3326345153Sdimextern char *__kmp_affinity_print_mask(char *buf, int buf_len, 3327345153Sdim kmp_affin_mask_t *mask); 3328345153Sdimextern kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf, 3329345153Sdim kmp_affin_mask_t *mask); 3330345153Sdimextern void __kmp_affinity_initialize(void); 3331345153Sdimextern void __kmp_affinity_uninitialize(void); 3332345153Sdimextern void __kmp_affinity_set_init_mask( 3333345153Sdim int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */ 3334345153Sdimextern void __kmp_affinity_set_place(int gtid); 3335345153Sdimextern void __kmp_affinity_determine_capable(const char *env_var); 3336345153Sdimextern int __kmp_aux_set_affinity(void **mask); 3337345153Sdimextern int __kmp_aux_get_affinity(void **mask); 3338345153Sdimextern int __kmp_aux_get_affinity_max_proc(); 3339345153Sdimextern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask); 3340345153Sdimextern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask); 3341345153Sdimextern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask); 3342345153Sdimextern void __kmp_balanced_affinity(kmp_info_t *th, int team_size); 3343360784Sdim#if KMP_OS_LINUX || KMP_OS_FREEBSD 3344345153Sdimextern int kmp_set_thread_affinity_mask_initial(void); 3345345153Sdim#endif 3346345153Sdim#endif /* KMP_AFFINITY_SUPPORTED */ 3347345153Sdim// No need for KMP_AFFINITY_SUPPORTED guard as only one field in the 3348345153Sdim// format string is for affinity, so platforms that do not support 3349345153Sdim// affinity can still use the other fields, e.g., %n for num_threads 3350345153Sdimextern size_t __kmp_aux_capture_affinity(int gtid, const char *format, 3351345153Sdim kmp_str_buf_t *buffer); 3352345153Sdimextern void __kmp_aux_display_affinity(int gtid, const char *format); 3353345153Sdim 3354345153Sdimextern void __kmp_cleanup_hierarchy(); 3355345153Sdimextern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar); 3356345153Sdim 3357345153Sdim#if KMP_USE_FUTEX 3358345153Sdim 3359345153Sdimextern int __kmp_futex_determine_capable(void); 3360345153Sdim 3361345153Sdim#endif // KMP_USE_FUTEX 3362345153Sdim 3363345153Sdimextern void __kmp_gtid_set_specific(int gtid); 3364345153Sdimextern int __kmp_gtid_get_specific(void); 3365345153Sdim 3366345153Sdimextern double __kmp_read_cpu_time(void); 3367345153Sdim 3368345153Sdimextern int __kmp_read_system_info(struct kmp_sys_info *info); 3369345153Sdim 3370345153Sdim#if KMP_USE_MONITOR 3371345153Sdimextern void __kmp_create_monitor(kmp_info_t *th); 3372345153Sdim#endif 3373345153Sdim 3374345153Sdimextern void *__kmp_launch_thread(kmp_info_t *thr); 3375345153Sdim 3376345153Sdimextern void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size); 3377345153Sdim 3378345153Sdim#if KMP_OS_WINDOWS 3379345153Sdimextern int __kmp_still_running(kmp_info_t *th); 3380345153Sdimextern int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val); 3381345153Sdimextern void __kmp_free_handle(kmp_thread_t tHandle); 3382345153Sdim#endif 3383345153Sdim 3384345153Sdim#if KMP_USE_MONITOR 3385345153Sdimextern void __kmp_reap_monitor(kmp_info_t *th); 3386345153Sdim#endif 3387345153Sdimextern void __kmp_reap_worker(kmp_info_t *th); 3388345153Sdimextern void __kmp_terminate_thread(int gtid); 3389345153Sdim 3390353358Sdimextern int __kmp_try_suspend_mx(kmp_info_t *th); 3391353358Sdimextern void __kmp_lock_suspend_mx(kmp_info_t *th); 3392353358Sdimextern void __kmp_unlock_suspend_mx(kmp_info_t *th); 3393353358Sdim 3394345153Sdimextern void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag); 3395345153Sdimextern void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag); 3396345153Sdimextern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag); 3397345153Sdimextern void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag); 3398345153Sdimextern void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag); 3399345153Sdimextern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag); 3400345153Sdim 3401345153Sdimextern void __kmp_elapsed(double *); 3402345153Sdimextern void __kmp_elapsed_tick(double *); 3403345153Sdim 3404345153Sdimextern void __kmp_enable(int old_state); 3405345153Sdimextern void __kmp_disable(int *old_state); 3406345153Sdim 3407345153Sdimextern void __kmp_thread_sleep(int millis); 3408345153Sdim 3409345153Sdimextern void __kmp_common_initialize(void); 3410345153Sdimextern void __kmp_common_destroy(void); 3411345153Sdimextern void __kmp_common_destroy_gtid(int gtid); 3412345153Sdim 3413345153Sdim#if KMP_OS_UNIX 3414345153Sdimextern void __kmp_register_atfork(void); 3415345153Sdim#endif 3416345153Sdimextern void __kmp_suspend_initialize(void); 3417353358Sdimextern void __kmp_suspend_initialize_thread(kmp_info_t *th); 3418345153Sdimextern void __kmp_suspend_uninitialize_thread(kmp_info_t *th); 3419345153Sdim 3420345153Sdimextern kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, 3421345153Sdim int tid); 3422345153Sdimextern kmp_team_t * 3423345153Sdim__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, 3424345153Sdim#if OMPT_SUPPORT 3425345153Sdim ompt_data_t ompt_parallel_data, 3426345153Sdim#endif 3427345153Sdim kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs, 3428345153Sdim int argc USE_NESTED_HOT_ARG(kmp_info_t *thr)); 3429345153Sdimextern void __kmp_free_thread(kmp_info_t *); 3430345153Sdimextern void __kmp_free_team(kmp_root_t *, 3431345153Sdim kmp_team_t *USE_NESTED_HOT_ARG(kmp_info_t *)); 3432345153Sdimextern kmp_team_t *__kmp_reap_team(kmp_team_t *); 3433345153Sdim 3434345153Sdim/* ------------------------------------------------------------------------ */ 3435345153Sdim 3436345153Sdimextern void __kmp_initialize_bget(kmp_info_t *th); 3437345153Sdimextern void __kmp_finalize_bget(kmp_info_t *th); 3438345153Sdim 3439345153SdimKMP_EXPORT void *kmpc_malloc(size_t size); 3440345153SdimKMP_EXPORT void *kmpc_aligned_malloc(size_t size, size_t alignment); 3441345153SdimKMP_EXPORT void *kmpc_calloc(size_t nelem, size_t elsize); 3442345153SdimKMP_EXPORT void *kmpc_realloc(void *ptr, size_t size); 3443345153SdimKMP_EXPORT void kmpc_free(void *ptr); 3444345153Sdim 3445345153Sdim/* declarations for internal use */ 3446345153Sdim 3447345153Sdimextern int __kmp_barrier(enum barrier_type bt, int gtid, int is_split, 3448345153Sdim size_t reduce_size, void *reduce_data, 3449345153Sdim void (*reduce)(void *, void *)); 3450345153Sdimextern void __kmp_end_split_barrier(enum barrier_type bt, int gtid); 3451353358Sdimextern int __kmp_barrier_gomp_cancel(int gtid); 3452345153Sdim 3453345153Sdim/*! 3454345153Sdim * Tell the fork call which compiler generated the fork call, and therefore how 3455345153Sdim * to deal with the call. 3456345153Sdim */ 3457345153Sdimenum fork_context_e { 3458345153Sdim fork_context_gnu, /**< Called from GNU generated code, so must not invoke the 3459345153Sdim microtask internally. */ 3460345153Sdim fork_context_intel, /**< Called from Intel generated code. */ 3461345153Sdim fork_context_last 3462345153Sdim}; 3463345153Sdimextern int __kmp_fork_call(ident_t *loc, int gtid, 3464345153Sdim enum fork_context_e fork_context, kmp_int32 argc, 3465345153Sdim microtask_t microtask, launch_t invoker, 3466365427Sdim kmp_va_list ap); 3467345153Sdim 3468345153Sdimextern void __kmp_join_call(ident_t *loc, int gtid 3469345153Sdim#if OMPT_SUPPORT 3470345153Sdim , 3471345153Sdim enum fork_context_e fork_context 3472345153Sdim#endif 3473345153Sdim , 3474353358Sdim int exit_teams = 0); 3475345153Sdim 3476345153Sdimextern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid); 3477345153Sdimextern void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team); 3478345153Sdimextern void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team); 3479345153Sdimextern int __kmp_invoke_task_func(int gtid); 3480345153Sdimextern void __kmp_run_before_invoked_task(int gtid, int tid, 3481345153Sdim kmp_info_t *this_thr, 3482345153Sdim kmp_team_t *team); 3483345153Sdimextern void __kmp_run_after_invoked_task(int gtid, int tid, 3484345153Sdim kmp_info_t *this_thr, 3485345153Sdim kmp_team_t *team); 3486345153Sdim 3487345153Sdim// should never have been exported 3488345153SdimKMP_EXPORT int __kmpc_invoke_task_func(int gtid); 3489345153Sdimextern int __kmp_invoke_teams_master(int gtid); 3490345153Sdimextern void __kmp_teams_master(int gtid); 3491345153Sdimextern int __kmp_aux_get_team_num(); 3492345153Sdimextern int __kmp_aux_get_num_teams(); 3493345153Sdimextern void __kmp_save_internal_controls(kmp_info_t *thread); 3494345153Sdimextern void __kmp_user_set_library(enum library_type arg); 3495345153Sdimextern void __kmp_aux_set_library(enum library_type arg); 3496345153Sdimextern void __kmp_aux_set_stacksize(size_t arg); 3497345153Sdimextern void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid); 3498345153Sdimextern void __kmp_aux_set_defaults(char const *str, int len); 3499345153Sdim 3500345153Sdim/* Functions called from __kmp_aux_env_initialize() in kmp_settings.cpp */ 3501345153Sdimvoid kmpc_set_blocktime(int arg); 3502345153Sdimvoid ompc_set_nested(int flag); 3503345153Sdimvoid ompc_set_dynamic(int flag); 3504345153Sdimvoid ompc_set_num_threads(int arg); 3505345153Sdim 3506345153Sdimextern void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, 3507345153Sdim kmp_team_t *team, int tid); 3508345153Sdimextern void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr); 3509345153Sdimextern kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, 3510345153Sdim kmp_tasking_flags_t *flags, 3511345153Sdim size_t sizeof_kmp_task_t, 3512345153Sdim size_t sizeof_shareds, 3513345153Sdim kmp_routine_entry_t task_entry); 3514345153Sdimextern void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, 3515345153Sdim kmp_team_t *team, int tid, 3516345153Sdim int set_curr_task); 3517345153Sdimextern void __kmp_finish_implicit_task(kmp_info_t *this_thr); 3518345153Sdimextern void __kmp_free_implicit_task(kmp_info_t *this_thr); 3519353358Sdim 3520353358Sdimextern kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 3521353358Sdim int gtid, 3522353358Sdim kmp_task_t *task); 3523353358Sdimextern void __kmp_fulfill_event(kmp_event_t *event); 3524353358Sdim 3525345153Sdimint __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, 3526345153Sdim kmp_flag_32 *flag, int final_spin, 3527345153Sdim int *thread_finished, 3528345153Sdim#if USE_ITT_BUILD 3529345153Sdim void *itt_sync_obj, 3530345153Sdim#endif /* USE_ITT_BUILD */ 3531345153Sdim kmp_int32 is_constrained); 3532345153Sdimint __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, 3533345153Sdim kmp_flag_64 *flag, int final_spin, 3534345153Sdim int *thread_finished, 3535345153Sdim#if USE_ITT_BUILD 3536345153Sdim void *itt_sync_obj, 3537345153Sdim#endif /* USE_ITT_BUILD */ 3538345153Sdim kmp_int32 is_constrained); 3539345153Sdimint __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, 3540345153Sdim kmp_flag_oncore *flag, int final_spin, 3541345153Sdim int *thread_finished, 3542345153Sdim#if USE_ITT_BUILD 3543345153Sdim void *itt_sync_obj, 3544345153Sdim#endif /* USE_ITT_BUILD */ 3545345153Sdim kmp_int32 is_constrained); 3546345153Sdim 3547345153Sdimextern void __kmp_free_task_team(kmp_info_t *thread, 3548345153Sdim kmp_task_team_t *task_team); 3549345153Sdimextern void __kmp_reap_task_teams(void); 3550345153Sdimextern void __kmp_wait_to_unref_task_teams(void); 3551345153Sdimextern void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team, 3552345153Sdim int always); 3553345153Sdimextern void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team); 3554345153Sdimextern void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team 3555345153Sdim#if USE_ITT_BUILD 3556345153Sdim , 3557345153Sdim void *itt_sync_obj 3558345153Sdim#endif /* USE_ITT_BUILD */ 3559345153Sdim , 3560345153Sdim int wait = 1); 3561345153Sdimextern void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, 3562345153Sdim int gtid); 3563345153Sdim 3564345153Sdimextern int __kmp_is_address_mapped(void *addr); 3565345153Sdimextern kmp_uint64 __kmp_hardware_timestamp(void); 3566345153Sdim 3567345153Sdim#if KMP_OS_UNIX 3568345153Sdimextern int __kmp_read_from_file(char const *path, char const *format, ...); 3569345153Sdim#endif 3570345153Sdim 3571345153Sdim/* ------------------------------------------------------------------------ */ 3572345153Sdim// 3573345153Sdim// Assembly routines that have no compiler intrinsic replacement 3574345153Sdim// 3575345153Sdim 3576345153Sdimextern int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc, 3577345153Sdim void *argv[] 3578345153Sdim#if OMPT_SUPPORT 3579345153Sdim , 3580345153Sdim void **exit_frame_ptr 3581345153Sdim#endif 3582345153Sdim ); 3583345153Sdim 3584345153Sdim/* ------------------------------------------------------------------------ */ 3585345153Sdim 3586345153SdimKMP_EXPORT void __kmpc_begin(ident_t *, kmp_int32 flags); 3587345153SdimKMP_EXPORT void __kmpc_end(ident_t *); 3588345153Sdim 3589345153SdimKMP_EXPORT void __kmpc_threadprivate_register_vec(ident_t *, void *data, 3590345153Sdim kmpc_ctor_vec ctor, 3591345153Sdim kmpc_cctor_vec cctor, 3592345153Sdim kmpc_dtor_vec dtor, 3593345153Sdim size_t vector_length); 3594345153SdimKMP_EXPORT void __kmpc_threadprivate_register(ident_t *, void *data, 3595345153Sdim kmpc_ctor ctor, kmpc_cctor cctor, 3596345153Sdim kmpc_dtor dtor); 3597345153SdimKMP_EXPORT void *__kmpc_threadprivate(ident_t *, kmp_int32 global_tid, 3598345153Sdim void *data, size_t size); 3599345153Sdim 3600345153SdimKMP_EXPORT kmp_int32 __kmpc_global_thread_num(ident_t *); 3601345153SdimKMP_EXPORT kmp_int32 __kmpc_global_num_threads(ident_t *); 3602345153SdimKMP_EXPORT kmp_int32 __kmpc_bound_thread_num(ident_t *); 3603345153SdimKMP_EXPORT kmp_int32 __kmpc_bound_num_threads(ident_t *); 3604345153Sdim 3605345153SdimKMP_EXPORT kmp_int32 __kmpc_ok_to_fork(ident_t *); 3606345153SdimKMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, 3607345153Sdim kmpc_micro microtask, ...); 3608345153Sdim 3609345153SdimKMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid); 3610345153SdimKMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid); 3611345153Sdim 3612345153SdimKMP_EXPORT void __kmpc_flush(ident_t *); 3613345153SdimKMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid); 3614345153SdimKMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 3615345153SdimKMP_EXPORT void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 3616345153SdimKMP_EXPORT void __kmpc_ordered(ident_t *, kmp_int32 global_tid); 3617345153SdimKMP_EXPORT void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid); 3618345153SdimKMP_EXPORT void __kmpc_critical(ident_t *, kmp_int32 global_tid, 3619345153Sdim kmp_critical_name *); 3620345153SdimKMP_EXPORT void __kmpc_end_critical(ident_t *, kmp_int32 global_tid, 3621345153Sdim kmp_critical_name *); 3622345153SdimKMP_EXPORT void __kmpc_critical_with_hint(ident_t *, kmp_int32 global_tid, 3623345153Sdim kmp_critical_name *, uint32_t hint); 3624345153Sdim 3625345153SdimKMP_EXPORT kmp_int32 __kmpc_barrier_master(ident_t *, kmp_int32 global_tid); 3626345153SdimKMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid); 3627345153Sdim 3628345153SdimKMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *, 3629345153Sdim kmp_int32 global_tid); 3630345153Sdim 3631345153SdimKMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 3632345153SdimKMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 3633345153Sdim 3634345153SdimKMP_EXPORT void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid, 3635345153Sdim kmp_int32 schedtype, kmp_int32 *plastiter, 3636345153Sdim kmp_int *plower, kmp_int *pupper, 3637345153Sdim kmp_int *pstride, kmp_int incr, 3638345153Sdim kmp_int chunk); 3639345153Sdim 3640345153SdimKMP_EXPORT void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 3641345153Sdim 3642345153SdimKMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 3643345153Sdim size_t cpy_size, void *cpy_data, 3644345153Sdim void (*cpy_func)(void *, void *), 3645345153Sdim kmp_int32 didit); 3646345153Sdim 3647345153Sdimextern void KMPC_SET_NUM_THREADS(int arg); 3648345153Sdimextern void KMPC_SET_DYNAMIC(int flag); 3649345153Sdimextern void KMPC_SET_NESTED(int flag); 3650345153Sdim 3651345153Sdim/* OMP 3.0 tasking interface routines */ 3652345153SdimKMP_EXPORT kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, 3653345153Sdim kmp_task_t *new_task); 3654345153SdimKMP_EXPORT kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, 3655345153Sdim kmp_int32 flags, 3656345153Sdim size_t sizeof_kmp_task_t, 3657345153Sdim size_t sizeof_shareds, 3658345153Sdim kmp_routine_entry_t task_entry); 3659353358SdimKMP_EXPORT kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid, 3660353358Sdim kmp_int32 flags, 3661353358Sdim size_t sizeof_kmp_task_t, 3662353358Sdim size_t sizeof_shareds, 3663353358Sdim kmp_routine_entry_t task_entry, 3664353358Sdim kmp_int64 device_id); 3665345153SdimKMP_EXPORT void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid, 3666345153Sdim kmp_task_t *task); 3667345153SdimKMP_EXPORT void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid, 3668345153Sdim kmp_task_t *task); 3669345153SdimKMP_EXPORT kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid, 3670345153Sdim kmp_task_t *new_task); 3671345153SdimKMP_EXPORT kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid); 3672345153Sdim 3673345153SdimKMP_EXPORT kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, 3674345153Sdim int end_part); 3675345153Sdim 3676345153Sdim#if TASK_UNUSED 3677345153Sdimvoid __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task); 3678345153Sdimvoid __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid, 3679345153Sdim kmp_task_t *task); 3680345153Sdim#endif // TASK_UNUSED 3681345153Sdim 3682345153Sdim/* ------------------------------------------------------------------------ */ 3683345153Sdim 3684345153SdimKMP_EXPORT void __kmpc_taskgroup(ident_t *loc, int gtid); 3685345153SdimKMP_EXPORT void __kmpc_end_taskgroup(ident_t *loc, int gtid); 3686345153Sdim 3687345153SdimKMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps( 3688345153Sdim ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, 3689345153Sdim kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 3690345153Sdim kmp_depend_info_t *noalias_dep_list); 3691345153SdimKMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, 3692345153Sdim kmp_int32 ndeps, 3693345153Sdim kmp_depend_info_t *dep_list, 3694345153Sdim kmp_int32 ndeps_noalias, 3695345153Sdim kmp_depend_info_t *noalias_dep_list); 3696345153Sdimextern kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task, 3697345153Sdim bool serialize_immediate); 3698345153Sdim 3699345153SdimKMP_EXPORT kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid, 3700345153Sdim kmp_int32 cncl_kind); 3701345153SdimKMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid, 3702345153Sdim kmp_int32 cncl_kind); 3703345153SdimKMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t *loc_ref, kmp_int32 gtid); 3704345153SdimKMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind); 3705345153Sdim 3706345153SdimKMP_EXPORT void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask); 3707345153SdimKMP_EXPORT void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask); 3708345153SdimKMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task, 3709345153Sdim kmp_int32 if_val, kmp_uint64 *lb, 3710345153Sdim kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup, 3711345153Sdim kmp_int32 sched, kmp_uint64 grainsize, 3712345153Sdim void *task_dup); 3713345153SdimKMP_EXPORT void *__kmpc_task_reduction_init(int gtid, int num_data, void *data); 3714353358SdimKMP_EXPORT void *__kmpc_taskred_init(int gtid, int num_data, void *data); 3715345153SdimKMP_EXPORT void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d); 3716353358SdimKMP_EXPORT void *__kmpc_task_reduction_modifier_init(ident_t *loc, int gtid, 3717353358Sdim int is_ws, int num, 3718353358Sdim void *data); 3719353358SdimKMP_EXPORT void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws, 3720353358Sdim int num, void *data); 3721353358SdimKMP_EXPORT void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, 3722353358Sdim int is_ws); 3723345153SdimKMP_EXPORT kmp_int32 __kmpc_omp_reg_task_with_affinity( 3724345153Sdim ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins, 3725345153Sdim kmp_task_affinity_info_t *affin_list); 3726345153Sdim 3727345153Sdim/* Lock interface routines (fast versions with gtid passed in) */ 3728345153SdimKMP_EXPORT void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, 3729345153Sdim void **user_lock); 3730345153SdimKMP_EXPORT void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, 3731345153Sdim void **user_lock); 3732345153SdimKMP_EXPORT void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, 3733345153Sdim void **user_lock); 3734345153SdimKMP_EXPORT void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, 3735345153Sdim void **user_lock); 3736345153SdimKMP_EXPORT void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock); 3737345153SdimKMP_EXPORT void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, 3738345153Sdim void **user_lock); 3739345153SdimKMP_EXPORT void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, 3740345153Sdim void **user_lock); 3741345153SdimKMP_EXPORT void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, 3742345153Sdim void **user_lock); 3743345153SdimKMP_EXPORT int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock); 3744345153SdimKMP_EXPORT int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, 3745345153Sdim void **user_lock); 3746345153Sdim 3747345153SdimKMP_EXPORT void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, 3748345153Sdim void **user_lock, uintptr_t hint); 3749345153SdimKMP_EXPORT void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 3750345153Sdim void **user_lock, 3751345153Sdim uintptr_t hint); 3752345153Sdim 3753345153Sdim/* Interface to fast scalable reduce methods routines */ 3754345153Sdim 3755345153SdimKMP_EXPORT kmp_int32 __kmpc_reduce_nowait( 3756345153Sdim ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, 3757345153Sdim void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 3758345153Sdim kmp_critical_name *lck); 3759345153SdimKMP_EXPORT void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3760345153Sdim kmp_critical_name *lck); 3761345153SdimKMP_EXPORT kmp_int32 __kmpc_reduce( 3762345153Sdim ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, 3763345153Sdim void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 3764345153Sdim kmp_critical_name *lck); 3765345153SdimKMP_EXPORT void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3766345153Sdim kmp_critical_name *lck); 3767345153Sdim 3768345153Sdim/* Internal fast reduction routines */ 3769345153Sdim 3770345153Sdimextern PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method( 3771345153Sdim ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, 3772345153Sdim void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 3773345153Sdim kmp_critical_name *lck); 3774345153Sdim 3775345153Sdim// this function is for testing set/get/determine reduce method 3776345153SdimKMP_EXPORT kmp_int32 __kmp_get_reduce_method(void); 3777345153Sdim 3778345153SdimKMP_EXPORT kmp_uint64 __kmpc_get_taskid(); 3779345153SdimKMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid(); 3780345153Sdim 3781345153Sdim// C++ port 3782345153Sdim// missing 'extern "C"' declarations 3783345153Sdim 3784345153SdimKMP_EXPORT kmp_int32 __kmpc_in_parallel(ident_t *loc); 3785345153SdimKMP_EXPORT void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid); 3786345153SdimKMP_EXPORT void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 3787345153Sdim kmp_int32 num_threads); 3788345153Sdim 3789345153SdimKMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 3790345153Sdim int proc_bind); 3791345153SdimKMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 3792345153Sdim kmp_int32 num_teams, 3793345153Sdim kmp_int32 num_threads); 3794345153SdimKMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, 3795345153Sdim kmpc_micro microtask, ...); 3796345153Sdimstruct kmp_dim { // loop bounds info casted to kmp_int64 3797345153Sdim kmp_int64 lo; // lower 3798345153Sdim kmp_int64 up; // upper 3799345153Sdim kmp_int64 st; // stride 3800345153Sdim}; 3801345153SdimKMP_EXPORT void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 3802345153Sdim kmp_int32 num_dims, 3803345153Sdim const struct kmp_dim *dims); 3804345153SdimKMP_EXPORT void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, 3805345153Sdim const kmp_int64 *vec); 3806345153SdimKMP_EXPORT void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, 3807345153Sdim const kmp_int64 *vec); 3808345153SdimKMP_EXPORT void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 3809345153Sdim 3810345153SdimKMP_EXPORT void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid, 3811345153Sdim void *data, size_t size, 3812345153Sdim void ***cache); 3813345153Sdim 3814345153Sdim// Symbols for MS mutual detection. 3815345153Sdimextern int _You_must_link_with_exactly_one_OpenMP_library; 3816345153Sdimextern int _You_must_link_with_Intel_OpenMP_library; 3817345153Sdim#if KMP_OS_WINDOWS && (KMP_VERSION_MAJOR > 4) 3818345153Sdimextern int _You_must_link_with_Microsoft_OpenMP_library; 3819345153Sdim#endif 3820345153Sdim 3821345153Sdim// The routines below are not exported. 3822345153Sdim// Consider making them 'static' in corresponding source files. 3823345153Sdimvoid kmp_threadprivate_insert_private_data(int gtid, void *pc_addr, 3824345153Sdim void *data_addr, size_t pc_size); 3825345153Sdimstruct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr, 3826345153Sdim void *data_addr, 3827345153Sdim size_t pc_size); 3828345153Sdimvoid __kmp_threadprivate_resize_cache(int newCapacity); 3829345153Sdimvoid __kmp_cleanup_threadprivate_caches(); 3830345153Sdim 3831345153Sdim// ompc_, kmpc_ entries moved from omp.h. 3832345153Sdim#if KMP_OS_WINDOWS 3833345153Sdim#define KMPC_CONVENTION __cdecl 3834345153Sdim#else 3835345153Sdim#define KMPC_CONVENTION 3836345153Sdim#endif 3837345153Sdim 3838345153Sdim#ifndef __OMP_H 3839345153Sdimtypedef enum omp_sched_t { 3840345153Sdim omp_sched_static = 1, 3841345153Sdim omp_sched_dynamic = 2, 3842345153Sdim omp_sched_guided = 3, 3843345153Sdim omp_sched_auto = 4 3844345153Sdim} omp_sched_t; 3845345153Sdimtypedef void *kmp_affinity_mask_t; 3846345153Sdim#endif 3847345153Sdim 3848345153SdimKMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int); 3849345153SdimKMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int); 3850345153SdimKMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int); 3851345153SdimKMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int); 3852345153SdimKMP_EXPORT int KMPC_CONVENTION 3853345153Sdimkmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *); 3854345153SdimKMP_EXPORT int KMPC_CONVENTION 3855345153Sdimkmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *); 3856345153SdimKMP_EXPORT int KMPC_CONVENTION 3857345153Sdimkmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *); 3858345153Sdim 3859345153SdimKMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int); 3860345153SdimKMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t); 3861345153SdimKMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int); 3862345153SdimKMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *); 3863345153SdimKMP_EXPORT void KMPC_CONVENTION kmpc_set_disp_num_buffers(int); 3864345153Sdim 3865345153Sdimenum kmp_target_offload_kind { 3866345153Sdim tgt_disabled = 0, 3867345153Sdim tgt_default = 1, 3868345153Sdim tgt_mandatory = 2 3869345153Sdim}; 3870345153Sdimtypedef enum kmp_target_offload_kind kmp_target_offload_kind_t; 3871345153Sdim// Set via OMP_TARGET_OFFLOAD if specified, defaults to tgt_default otherwise 3872345153Sdimextern kmp_target_offload_kind_t __kmp_target_offload; 3873345153Sdimextern int __kmpc_get_target_offload(); 3874345153Sdim 3875345153Sdim// Constants used in libomptarget 3876345153Sdim#define KMP_DEVICE_DEFAULT -1 // This is libomptarget's default device. 3877345153Sdim#define KMP_HOST_DEVICE -10 // This is what it is in libomptarget, go figure. 3878345153Sdim#define KMP_DEVICE_ALL -11 // This is libomptarget's "all devices". 3879345153Sdim 3880353358Sdim// OMP Pause Resource 3881353358Sdim 3882353358Sdim// The following enum is used both to set the status in __kmp_pause_status, and 3883353358Sdim// as the internal equivalent of the externally-visible omp_pause_resource_t. 3884353358Sdimtypedef enum kmp_pause_status_t { 3885353358Sdim kmp_not_paused = 0, // status is not paused, or, requesting resume 3886353358Sdim kmp_soft_paused = 1, // status is soft-paused, or, requesting soft pause 3887353358Sdim kmp_hard_paused = 2 // status is hard-paused, or, requesting hard pause 3888353358Sdim} kmp_pause_status_t; 3889353358Sdim 3890353358Sdim// This stores the pause state of the runtime 3891353358Sdimextern kmp_pause_status_t __kmp_pause_status; 3892353358Sdimextern int __kmpc_pause_resource(kmp_pause_status_t level); 3893353358Sdimextern int __kmp_pause_resource(kmp_pause_status_t level); 3894353358Sdim// Soft resume sets __kmp_pause_status, and wakes up all threads. 3895353358Sdimextern void __kmp_resume_if_soft_paused(); 3896353358Sdim// Hard resume simply resets the status to not paused. Library will appear to 3897353358Sdim// be uninitialized after hard pause. Let OMP constructs trigger required 3898353358Sdim// initializations. 3899353358Sdimstatic inline void __kmp_resume_if_hard_paused() { 3900353358Sdim if (__kmp_pause_status == kmp_hard_paused) { 3901353358Sdim __kmp_pause_status = kmp_not_paused; 3902353358Sdim } 3903353358Sdim} 3904353358Sdim 3905345153Sdim#ifdef __cplusplus 3906345153Sdim} 3907345153Sdim#endif 3908345153Sdim 3909345153Sdim#endif /* KMP_H */ 3910