1/*
2 * kmp_lock.cpp -- lock-related functions
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include <stddef.h>
14#include <atomic>
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_io.h"
19#include "kmp_itt.h"
20#include "kmp_lock.h"
21#include "kmp_wait_release.h"
22#include "kmp_wrapper_getpid.h"
23
24#if KMP_USE_FUTEX
25#include <sys/syscall.h>
26#include <unistd.h>
27// We should really include <futex.h>, but that causes compatibility problems on
28// different Linux* OS distributions that either require that you include (or
29// break when you try to include) <pci/types.h>. Since all we need is the two
30// macros below (which are part of the kernel ABI, so can't change) we just
31// define the constants here and don't include <futex.h>
32#ifndef FUTEX_WAIT
33#define FUTEX_WAIT 0
34#endif
35#ifndef FUTEX_WAKE
36#define FUTEX_WAKE 1
37#endif
38#endif
39
40/* Implement spin locks for internal library use.             */
41/* The algorithm implemented is Lamport's bakery lock [1974]. */
42
43void __kmp_validate_locks(void) {
44  int i;
45  kmp_uint32 x, y;
46
47  /* Check to make sure unsigned arithmetic does wraps properly */
48  x = ~((kmp_uint32)0) - 2;
49  y = x - 2;
50
51  for (i = 0; i < 8; ++i, ++x, ++y) {
52    kmp_uint32 z = (x - y);
53    KMP_ASSERT(z == 2);
54  }
55
56  KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0);
57}
58
59/* ------------------------------------------------------------------------ */
60/* test and set locks */
61
62// For the non-nested locks, we can only assume that the first 4 bytes were
63// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
64// compiler only allocates a 4 byte pointer on IA-32 architecture.  On
65// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
66//
67// gcc reserves >= 8 bytes for nested locks, so we can assume that the
68// entire 8 bytes were allocated for nested locks on all 64-bit platforms.
69
70static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) {
71  return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1;
72}
73
74static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) {
75  return lck->lk.depth_locked != -1;
76}
77
78__forceinline static int
79__kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
80  KMP_MB();
81
82#ifdef USE_LOCK_PROFILE
83  kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll);
84  if ((curr != 0) && (curr != gtid + 1))
85    __kmp_printf("LOCK CONTENTION: %p\n", lck);
86/* else __kmp_printf( "." );*/
87#endif /* USE_LOCK_PROFILE */
88
89  kmp_int32 tas_free = KMP_LOCK_FREE(tas);
90  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
91
92  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
93      __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
94    KMP_FSYNC_ACQUIRED(lck);
95    return KMP_LOCK_ACQUIRED_FIRST;
96  }
97
98  kmp_uint32 spins;
99  kmp_uint64 time;
100  KMP_FSYNC_PREPARE(lck);
101  KMP_INIT_YIELD(spins);
102  KMP_INIT_BACKOFF(time);
103  kmp_backoff_t backoff = __kmp_spin_backoff_params;
104  do {
105#if !KMP_HAVE_UMWAIT
106    __kmp_spin_backoff(&backoff);
107#else
108    if (!__kmp_tpause_enabled)
109      __kmp_spin_backoff(&backoff);
110#endif
111    KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
112  } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
113           !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
114  KMP_FSYNC_ACQUIRED(lck);
115  return KMP_LOCK_ACQUIRED_FIRST;
116}
117
118int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
119  int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid);
120  return retval;
121}
122
123static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck,
124                                              kmp_int32 gtid) {
125  char const *const func = "omp_set_lock";
126  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
127      __kmp_is_tas_lock_nestable(lck)) {
128    KMP_FATAL(LockNestableUsedAsSimple, func);
129  }
130  if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) {
131    KMP_FATAL(LockIsAlreadyOwned, func);
132  }
133  return __kmp_acquire_tas_lock(lck, gtid);
134}
135
136int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
137  kmp_int32 tas_free = KMP_LOCK_FREE(tas);
138  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
139  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
140      __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
141    KMP_FSYNC_ACQUIRED(lck);
142    return TRUE;
143  }
144  return FALSE;
145}
146
147static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck,
148                                           kmp_int32 gtid) {
149  char const *const func = "omp_test_lock";
150  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
151      __kmp_is_tas_lock_nestable(lck)) {
152    KMP_FATAL(LockNestableUsedAsSimple, func);
153  }
154  return __kmp_test_tas_lock(lck, gtid);
155}
156
157int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
158  KMP_MB(); /* Flush all pending memory write invalidates.  */
159
160  KMP_FSYNC_RELEASING(lck);
161  KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
162  KMP_MB(); /* Flush all pending memory write invalidates.  */
163
164  KMP_YIELD_OVERSUB();
165  return KMP_LOCK_RELEASED;
166}
167
168static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck,
169                                              kmp_int32 gtid) {
170  char const *const func = "omp_unset_lock";
171  KMP_MB(); /* in case another processor initialized lock */
172  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
173      __kmp_is_tas_lock_nestable(lck)) {
174    KMP_FATAL(LockNestableUsedAsSimple, func);
175  }
176  if (__kmp_get_tas_lock_owner(lck) == -1) {
177    KMP_FATAL(LockUnsettingFree, func);
178  }
179  if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) &&
180      (__kmp_get_tas_lock_owner(lck) != gtid)) {
181    KMP_FATAL(LockUnsettingSetByAnother, func);
182  }
183  return __kmp_release_tas_lock(lck, gtid);
184}
185
186void __kmp_init_tas_lock(kmp_tas_lock_t *lck) {
187  lck->lk.poll = KMP_LOCK_FREE(tas);
188}
189
190void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; }
191
192static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) {
193  char const *const func = "omp_destroy_lock";
194  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
195      __kmp_is_tas_lock_nestable(lck)) {
196    KMP_FATAL(LockNestableUsedAsSimple, func);
197  }
198  if (__kmp_get_tas_lock_owner(lck) != -1) {
199    KMP_FATAL(LockStillOwned, func);
200  }
201  __kmp_destroy_tas_lock(lck);
202}
203
204// nested test and set locks
205
206int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
207  KMP_DEBUG_ASSERT(gtid >= 0);
208
209  if (__kmp_get_tas_lock_owner(lck) == gtid) {
210    lck->lk.depth_locked += 1;
211    return KMP_LOCK_ACQUIRED_NEXT;
212  } else {
213    __kmp_acquire_tas_lock_timed_template(lck, gtid);
214    lck->lk.depth_locked = 1;
215    return KMP_LOCK_ACQUIRED_FIRST;
216  }
217}
218
219static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
220                                                     kmp_int32 gtid) {
221  char const *const func = "omp_set_nest_lock";
222  if (!__kmp_is_tas_lock_nestable(lck)) {
223    KMP_FATAL(LockSimpleUsedAsNestable, func);
224  }
225  return __kmp_acquire_nested_tas_lock(lck, gtid);
226}
227
228int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
229  int retval;
230
231  KMP_DEBUG_ASSERT(gtid >= 0);
232
233  if (__kmp_get_tas_lock_owner(lck) == gtid) {
234    retval = ++lck->lk.depth_locked;
235  } else if (!__kmp_test_tas_lock(lck, gtid)) {
236    retval = 0;
237  } else {
238    KMP_MB();
239    retval = lck->lk.depth_locked = 1;
240  }
241  return retval;
242}
243
244static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
245                                                  kmp_int32 gtid) {
246  char const *const func = "omp_test_nest_lock";
247  if (!__kmp_is_tas_lock_nestable(lck)) {
248    KMP_FATAL(LockSimpleUsedAsNestable, func);
249  }
250  return __kmp_test_nested_tas_lock(lck, gtid);
251}
252
253int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
254  KMP_DEBUG_ASSERT(gtid >= 0);
255
256  KMP_MB();
257  if (--(lck->lk.depth_locked) == 0) {
258    __kmp_release_tas_lock(lck, gtid);
259    return KMP_LOCK_RELEASED;
260  }
261  return KMP_LOCK_STILL_HELD;
262}
263
264static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
265                                                     kmp_int32 gtid) {
266  char const *const func = "omp_unset_nest_lock";
267  KMP_MB(); /* in case another processor initialized lock */
268  if (!__kmp_is_tas_lock_nestable(lck)) {
269    KMP_FATAL(LockSimpleUsedAsNestable, func);
270  }
271  if (__kmp_get_tas_lock_owner(lck) == -1) {
272    KMP_FATAL(LockUnsettingFree, func);
273  }
274  if (__kmp_get_tas_lock_owner(lck) != gtid) {
275    KMP_FATAL(LockUnsettingSetByAnother, func);
276  }
277  return __kmp_release_nested_tas_lock(lck, gtid);
278}
279
280void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) {
281  __kmp_init_tas_lock(lck);
282  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
283}
284
285void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) {
286  __kmp_destroy_tas_lock(lck);
287  lck->lk.depth_locked = 0;
288}
289
290static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
291  char const *const func = "omp_destroy_nest_lock";
292  if (!__kmp_is_tas_lock_nestable(lck)) {
293    KMP_FATAL(LockSimpleUsedAsNestable, func);
294  }
295  if (__kmp_get_tas_lock_owner(lck) != -1) {
296    KMP_FATAL(LockStillOwned, func);
297  }
298  __kmp_destroy_nested_tas_lock(lck);
299}
300
301#if KMP_USE_FUTEX
302
303/* ------------------------------------------------------------------------ */
304/* futex locks */
305
306// futex locks are really just test and set locks, with a different method
307// of handling contention.  They take the same amount of space as test and
308// set locks, and are allocated the same way (i.e. use the area allocated by
309// the compiler for non-nested locks / allocate nested locks on the heap).
310
311static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) {
312  return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1;
313}
314
315static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) {
316  return lck->lk.depth_locked != -1;
317}
318
319__forceinline static int
320__kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) {
321  kmp_int32 gtid_code = (gtid + 1) << 1;
322
323  KMP_MB();
324
325#ifdef USE_LOCK_PROFILE
326  kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll));
327  if ((curr != 0) && (curr != gtid_code))
328    __kmp_printf("LOCK CONTENTION: %p\n", lck);
329/* else __kmp_printf( "." );*/
330#endif /* USE_LOCK_PROFILE */
331
332  KMP_FSYNC_PREPARE(lck);
333  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
334                  lck, lck->lk.poll, gtid));
335
336  kmp_int32 poll_val;
337
338  while ((poll_val = KMP_COMPARE_AND_STORE_RET32(
339              &(lck->lk.poll), KMP_LOCK_FREE(futex),
340              KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {
341
342    kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
343    KA_TRACE(
344        1000,
345        ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
346         lck, gtid, poll_val, cond));
347
348    // NOTE: if you try to use the following condition for this branch
349    //
350    // if ( poll_val & 1 == 0 )
351    //
352    // Then the 12.0 compiler has a bug where the following block will
353    // always be skipped, regardless of the value of the LSB of poll_val.
354    if (!cond) {
355      // Try to set the lsb in the poll to indicate to the owner
356      // thread that they need to wake this thread up.
357      if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val,
358                                       poll_val | KMP_LOCK_BUSY(1, futex))) {
359        KA_TRACE(
360            1000,
361            ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
362             lck, lck->lk.poll, gtid));
363        continue;
364      }
365      poll_val |= KMP_LOCK_BUSY(1, futex);
366
367      KA_TRACE(1000,
368               ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck,
369                lck->lk.poll, gtid));
370    }
371
372    KA_TRACE(
373        1000,
374        ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
375         lck, gtid, poll_val));
376
377    long rc;
378    if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL,
379                      NULL, 0)) != 0) {
380      KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) "
381                      "failed (rc=%ld errno=%d)\n",
382                      lck, gtid, poll_val, rc, errno));
383      continue;
384    }
385
386    KA_TRACE(1000,
387             ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
388              lck, gtid, poll_val));
389    // This thread has now done a successful futex wait call and was entered on
390    // the OS futex queue.  We must now perform a futex wake call when releasing
391    // the lock, as we have no idea how many other threads are in the queue.
392    gtid_code |= 1;
393  }
394
395  KMP_FSYNC_ACQUIRED(lck);
396  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
397                  lck->lk.poll, gtid));
398  return KMP_LOCK_ACQUIRED_FIRST;
399}
400
401int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
402  int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid);
403  return retval;
404}
405
406static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck,
407                                                kmp_int32 gtid) {
408  char const *const func = "omp_set_lock";
409  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
410      __kmp_is_futex_lock_nestable(lck)) {
411    KMP_FATAL(LockNestableUsedAsSimple, func);
412  }
413  if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) {
414    KMP_FATAL(LockIsAlreadyOwned, func);
415  }
416  return __kmp_acquire_futex_lock(lck, gtid);
417}
418
419int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
420  if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex),
421                                  KMP_LOCK_BUSY((gtid + 1) << 1, futex))) {
422    KMP_FSYNC_ACQUIRED(lck);
423    return TRUE;
424  }
425  return FALSE;
426}
427
428static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck,
429                                             kmp_int32 gtid) {
430  char const *const func = "omp_test_lock";
431  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
432      __kmp_is_futex_lock_nestable(lck)) {
433    KMP_FATAL(LockNestableUsedAsSimple, func);
434  }
435  return __kmp_test_futex_lock(lck, gtid);
436}
437
438int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
439  KMP_MB(); /* Flush all pending memory write invalidates.  */
440
441  KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
442                  lck, lck->lk.poll, gtid));
443
444  KMP_FSYNC_RELEASING(lck);
445
446  kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex));
447
448  KA_TRACE(1000,
449           ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
450            lck, gtid, poll_val));
451
452  if (KMP_LOCK_STRIP(poll_val) & 1) {
453    KA_TRACE(1000,
454             ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
455              lck, gtid));
456    syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex),
457            NULL, NULL, 0);
458  }
459
460  KMP_MB(); /* Flush all pending memory write invalidates.  */
461
462  KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
463                  lck->lk.poll, gtid));
464
465  KMP_YIELD_OVERSUB();
466  return KMP_LOCK_RELEASED;
467}
468
469static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck,
470                                                kmp_int32 gtid) {
471  char const *const func = "omp_unset_lock";
472  KMP_MB(); /* in case another processor initialized lock */
473  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
474      __kmp_is_futex_lock_nestable(lck)) {
475    KMP_FATAL(LockNestableUsedAsSimple, func);
476  }
477  if (__kmp_get_futex_lock_owner(lck) == -1) {
478    KMP_FATAL(LockUnsettingFree, func);
479  }
480  if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) &&
481      (__kmp_get_futex_lock_owner(lck) != gtid)) {
482    KMP_FATAL(LockUnsettingSetByAnother, func);
483  }
484  return __kmp_release_futex_lock(lck, gtid);
485}
486
487void __kmp_init_futex_lock(kmp_futex_lock_t *lck) {
488  TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex));
489}
490
491void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; }
492
493static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) {
494  char const *const func = "omp_destroy_lock";
495  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
496      __kmp_is_futex_lock_nestable(lck)) {
497    KMP_FATAL(LockNestableUsedAsSimple, func);
498  }
499  if (__kmp_get_futex_lock_owner(lck) != -1) {
500    KMP_FATAL(LockStillOwned, func);
501  }
502  __kmp_destroy_futex_lock(lck);
503}
504
505// nested futex locks
506
507int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
508  KMP_DEBUG_ASSERT(gtid >= 0);
509
510  if (__kmp_get_futex_lock_owner(lck) == gtid) {
511    lck->lk.depth_locked += 1;
512    return KMP_LOCK_ACQUIRED_NEXT;
513  } else {
514    __kmp_acquire_futex_lock_timed_template(lck, gtid);
515    lck->lk.depth_locked = 1;
516    return KMP_LOCK_ACQUIRED_FIRST;
517  }
518}
519
520static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
521                                                       kmp_int32 gtid) {
522  char const *const func = "omp_set_nest_lock";
523  if (!__kmp_is_futex_lock_nestable(lck)) {
524    KMP_FATAL(LockSimpleUsedAsNestable, func);
525  }
526  return __kmp_acquire_nested_futex_lock(lck, gtid);
527}
528
529int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
530  int retval;
531
532  KMP_DEBUG_ASSERT(gtid >= 0);
533
534  if (__kmp_get_futex_lock_owner(lck) == gtid) {
535    retval = ++lck->lk.depth_locked;
536  } else if (!__kmp_test_futex_lock(lck, gtid)) {
537    retval = 0;
538  } else {
539    KMP_MB();
540    retval = lck->lk.depth_locked = 1;
541  }
542  return retval;
543}
544
545static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
546                                                    kmp_int32 gtid) {
547  char const *const func = "omp_test_nest_lock";
548  if (!__kmp_is_futex_lock_nestable(lck)) {
549    KMP_FATAL(LockSimpleUsedAsNestable, func);
550  }
551  return __kmp_test_nested_futex_lock(lck, gtid);
552}
553
554int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
555  KMP_DEBUG_ASSERT(gtid >= 0);
556
557  KMP_MB();
558  if (--(lck->lk.depth_locked) == 0) {
559    __kmp_release_futex_lock(lck, gtid);
560    return KMP_LOCK_RELEASED;
561  }
562  return KMP_LOCK_STILL_HELD;
563}
564
565static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
566                                                       kmp_int32 gtid) {
567  char const *const func = "omp_unset_nest_lock";
568  KMP_MB(); /* in case another processor initialized lock */
569  if (!__kmp_is_futex_lock_nestable(lck)) {
570    KMP_FATAL(LockSimpleUsedAsNestable, func);
571  }
572  if (__kmp_get_futex_lock_owner(lck) == -1) {
573    KMP_FATAL(LockUnsettingFree, func);
574  }
575  if (__kmp_get_futex_lock_owner(lck) != gtid) {
576    KMP_FATAL(LockUnsettingSetByAnother, func);
577  }
578  return __kmp_release_nested_futex_lock(lck, gtid);
579}
580
581void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) {
582  __kmp_init_futex_lock(lck);
583  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
584}
585
586void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) {
587  __kmp_destroy_futex_lock(lck);
588  lck->lk.depth_locked = 0;
589}
590
591static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
592  char const *const func = "omp_destroy_nest_lock";
593  if (!__kmp_is_futex_lock_nestable(lck)) {
594    KMP_FATAL(LockSimpleUsedAsNestable, func);
595  }
596  if (__kmp_get_futex_lock_owner(lck) != -1) {
597    KMP_FATAL(LockStillOwned, func);
598  }
599  __kmp_destroy_nested_futex_lock(lck);
600}
601
602#endif // KMP_USE_FUTEX
603
604/* ------------------------------------------------------------------------ */
605/* ticket (bakery) locks */
606
607static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) {
608  return std::atomic_load_explicit(&lck->lk.owner_id,
609                                   std::memory_order_relaxed) -
610         1;
611}
612
613static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) {
614  return std::atomic_load_explicit(&lck->lk.depth_locked,
615                                   std::memory_order_relaxed) != -1;
616}
617
618static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) {
619  return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving,
620                                   std::memory_order_acquire) == my_ticket;
621}
622
623__forceinline static int
624__kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,
625                                         kmp_int32 gtid) {
626  kmp_uint32 my_ticket = std::atomic_fetch_add_explicit(
627      &lck->lk.next_ticket, 1U, std::memory_order_relaxed);
628
629#ifdef USE_LOCK_PROFILE
630  if (std::atomic_load_explicit(&lck->lk.now_serving,
631                                std::memory_order_relaxed) != my_ticket)
632    __kmp_printf("LOCK CONTENTION: %p\n", lck);
633/* else __kmp_printf( "." );*/
634#endif /* USE_LOCK_PROFILE */
635
636  if (std::atomic_load_explicit(&lck->lk.now_serving,
637                                std::memory_order_acquire) == my_ticket) {
638    return KMP_LOCK_ACQUIRED_FIRST;
639  }
640  KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
641  return KMP_LOCK_ACQUIRED_FIRST;
642}
643
644int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
645  int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid);
646  return retval;
647}
648
649static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
650                                                 kmp_int32 gtid) {
651  char const *const func = "omp_set_lock";
652
653  if (!std::atomic_load_explicit(&lck->lk.initialized,
654                                 std::memory_order_relaxed)) {
655    KMP_FATAL(LockIsUninitialized, func);
656  }
657  if (lck->lk.self != lck) {
658    KMP_FATAL(LockIsUninitialized, func);
659  }
660  if (__kmp_is_ticket_lock_nestable(lck)) {
661    KMP_FATAL(LockNestableUsedAsSimple, func);
662  }
663  if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) {
664    KMP_FATAL(LockIsAlreadyOwned, func);
665  }
666
667  __kmp_acquire_ticket_lock(lck, gtid);
668
669  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
670                             std::memory_order_relaxed);
671  return KMP_LOCK_ACQUIRED_FIRST;
672}
673
674int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
675  kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket,
676                                                   std::memory_order_relaxed);
677
678  if (std::atomic_load_explicit(&lck->lk.now_serving,
679                                std::memory_order_relaxed) == my_ticket) {
680    kmp_uint32 next_ticket = my_ticket + 1;
681    if (std::atomic_compare_exchange_strong_explicit(
682            &lck->lk.next_ticket, &my_ticket, next_ticket,
683            std::memory_order_acquire, std::memory_order_acquire)) {
684      return TRUE;
685    }
686  }
687  return FALSE;
688}
689
690static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
691                                              kmp_int32 gtid) {
692  char const *const func = "omp_test_lock";
693
694  if (!std::atomic_load_explicit(&lck->lk.initialized,
695                                 std::memory_order_relaxed)) {
696    KMP_FATAL(LockIsUninitialized, func);
697  }
698  if (lck->lk.self != lck) {
699    KMP_FATAL(LockIsUninitialized, func);
700  }
701  if (__kmp_is_ticket_lock_nestable(lck)) {
702    KMP_FATAL(LockNestableUsedAsSimple, func);
703  }
704
705  int retval = __kmp_test_ticket_lock(lck, gtid);
706
707  if (retval) {
708    std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
709                               std::memory_order_relaxed);
710  }
711  return retval;
712}
713
714int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
715  kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket,
716                                                  std::memory_order_relaxed) -
717                        std::atomic_load_explicit(&lck->lk.now_serving,
718                                                  std::memory_order_relaxed);
719
720  std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U,
721                                 std::memory_order_release);
722
723  KMP_YIELD(distance >
724            (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
725  return KMP_LOCK_RELEASED;
726}
727
728static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
729                                                 kmp_int32 gtid) {
730  char const *const func = "omp_unset_lock";
731
732  if (!std::atomic_load_explicit(&lck->lk.initialized,
733                                 std::memory_order_relaxed)) {
734    KMP_FATAL(LockIsUninitialized, func);
735  }
736  if (lck->lk.self != lck) {
737    KMP_FATAL(LockIsUninitialized, func);
738  }
739  if (__kmp_is_ticket_lock_nestable(lck)) {
740    KMP_FATAL(LockNestableUsedAsSimple, func);
741  }
742  if (__kmp_get_ticket_lock_owner(lck) == -1) {
743    KMP_FATAL(LockUnsettingFree, func);
744  }
745  if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) &&
746      (__kmp_get_ticket_lock_owner(lck) != gtid)) {
747    KMP_FATAL(LockUnsettingSetByAnother, func);
748  }
749  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
750  return __kmp_release_ticket_lock(lck, gtid);
751}
752
753void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) {
754  lck->lk.location = NULL;
755  lck->lk.self = lck;
756  std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
757                             std::memory_order_relaxed);
758  std::atomic_store_explicit(&lck->lk.now_serving, 0U,
759                             std::memory_order_relaxed);
760  std::atomic_store_explicit(
761      &lck->lk.owner_id, 0,
762      std::memory_order_relaxed); // no thread owns the lock.
763  std::atomic_store_explicit(
764      &lck->lk.depth_locked, -1,
765      std::memory_order_relaxed); // -1 => not a nested lock.
766  std::atomic_store_explicit(&lck->lk.initialized, true,
767                             std::memory_order_release);
768}
769
770void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) {
771  std::atomic_store_explicit(&lck->lk.initialized, false,
772                             std::memory_order_release);
773  lck->lk.self = NULL;
774  lck->lk.location = NULL;
775  std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
776                             std::memory_order_relaxed);
777  std::atomic_store_explicit(&lck->lk.now_serving, 0U,
778                             std::memory_order_relaxed);
779  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
780  std::atomic_store_explicit(&lck->lk.depth_locked, -1,
781                             std::memory_order_relaxed);
782}
783
784static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
785  char const *const func = "omp_destroy_lock";
786
787  if (!std::atomic_load_explicit(&lck->lk.initialized,
788                                 std::memory_order_relaxed)) {
789    KMP_FATAL(LockIsUninitialized, func);
790  }
791  if (lck->lk.self != lck) {
792    KMP_FATAL(LockIsUninitialized, func);
793  }
794  if (__kmp_is_ticket_lock_nestable(lck)) {
795    KMP_FATAL(LockNestableUsedAsSimple, func);
796  }
797  if (__kmp_get_ticket_lock_owner(lck) != -1) {
798    KMP_FATAL(LockStillOwned, func);
799  }
800  __kmp_destroy_ticket_lock(lck);
801}
802
803// nested ticket locks
804
805int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
806  KMP_DEBUG_ASSERT(gtid >= 0);
807
808  if (__kmp_get_ticket_lock_owner(lck) == gtid) {
809    std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
810                                   std::memory_order_relaxed);
811    return KMP_LOCK_ACQUIRED_NEXT;
812  } else {
813    __kmp_acquire_ticket_lock_timed_template(lck, gtid);
814    std::atomic_store_explicit(&lck->lk.depth_locked, 1,
815                               std::memory_order_relaxed);
816    std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
817                               std::memory_order_relaxed);
818    return KMP_LOCK_ACQUIRED_FIRST;
819  }
820}
821
822static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
823                                                        kmp_int32 gtid) {
824  char const *const func = "omp_set_nest_lock";
825
826  if (!std::atomic_load_explicit(&lck->lk.initialized,
827                                 std::memory_order_relaxed)) {
828    KMP_FATAL(LockIsUninitialized, func);
829  }
830  if (lck->lk.self != lck) {
831    KMP_FATAL(LockIsUninitialized, func);
832  }
833  if (!__kmp_is_ticket_lock_nestable(lck)) {
834    KMP_FATAL(LockSimpleUsedAsNestable, func);
835  }
836  return __kmp_acquire_nested_ticket_lock(lck, gtid);
837}
838
839int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
840  int retval;
841
842  KMP_DEBUG_ASSERT(gtid >= 0);
843
844  if (__kmp_get_ticket_lock_owner(lck) == gtid) {
845    retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
846                                            std::memory_order_relaxed) +
847             1;
848  } else if (!__kmp_test_ticket_lock(lck, gtid)) {
849    retval = 0;
850  } else {
851    std::atomic_store_explicit(&lck->lk.depth_locked, 1,
852                               std::memory_order_relaxed);
853    std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
854                               std::memory_order_relaxed);
855    retval = 1;
856  }
857  return retval;
858}
859
860static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
861                                                     kmp_int32 gtid) {
862  char const *const func = "omp_test_nest_lock";
863
864  if (!std::atomic_load_explicit(&lck->lk.initialized,
865                                 std::memory_order_relaxed)) {
866    KMP_FATAL(LockIsUninitialized, func);
867  }
868  if (lck->lk.self != lck) {
869    KMP_FATAL(LockIsUninitialized, func);
870  }
871  if (!__kmp_is_ticket_lock_nestable(lck)) {
872    KMP_FATAL(LockSimpleUsedAsNestable, func);
873  }
874  return __kmp_test_nested_ticket_lock(lck, gtid);
875}
876
877int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
878  KMP_DEBUG_ASSERT(gtid >= 0);
879
880  if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1,
881                                      std::memory_order_relaxed) -
882       1) == 0) {
883    std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
884    __kmp_release_ticket_lock(lck, gtid);
885    return KMP_LOCK_RELEASED;
886  }
887  return KMP_LOCK_STILL_HELD;
888}
889
890static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
891                                                        kmp_int32 gtid) {
892  char const *const func = "omp_unset_nest_lock";
893
894  if (!std::atomic_load_explicit(&lck->lk.initialized,
895                                 std::memory_order_relaxed)) {
896    KMP_FATAL(LockIsUninitialized, func);
897  }
898  if (lck->lk.self != lck) {
899    KMP_FATAL(LockIsUninitialized, func);
900  }
901  if (!__kmp_is_ticket_lock_nestable(lck)) {
902    KMP_FATAL(LockSimpleUsedAsNestable, func);
903  }
904  if (__kmp_get_ticket_lock_owner(lck) == -1) {
905    KMP_FATAL(LockUnsettingFree, func);
906  }
907  if (__kmp_get_ticket_lock_owner(lck) != gtid) {
908    KMP_FATAL(LockUnsettingSetByAnother, func);
909  }
910  return __kmp_release_nested_ticket_lock(lck, gtid);
911}
912
913void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) {
914  __kmp_init_ticket_lock(lck);
915  std::atomic_store_explicit(&lck->lk.depth_locked, 0,
916                             std::memory_order_relaxed);
917  // >= 0 for nestable locks, -1 for simple locks
918}
919
920void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) {
921  __kmp_destroy_ticket_lock(lck);
922  std::atomic_store_explicit(&lck->lk.depth_locked, 0,
923                             std::memory_order_relaxed);
924}
925
926static void
927__kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
928  char const *const func = "omp_destroy_nest_lock";
929
930  if (!std::atomic_load_explicit(&lck->lk.initialized,
931                                 std::memory_order_relaxed)) {
932    KMP_FATAL(LockIsUninitialized, func);
933  }
934  if (lck->lk.self != lck) {
935    KMP_FATAL(LockIsUninitialized, func);
936  }
937  if (!__kmp_is_ticket_lock_nestable(lck)) {
938    KMP_FATAL(LockSimpleUsedAsNestable, func);
939  }
940  if (__kmp_get_ticket_lock_owner(lck) != -1) {
941    KMP_FATAL(LockStillOwned, func);
942  }
943  __kmp_destroy_nested_ticket_lock(lck);
944}
945
946// access functions to fields which don't exist for all lock kinds.
947
948static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) {
949  return lck->lk.location;
950}
951
952static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck,
953                                           const ident_t *loc) {
954  lck->lk.location = loc;
955}
956
957static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) {
958  return lck->lk.flags;
959}
960
961static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck,
962                                        kmp_lock_flags_t flags) {
963  lck->lk.flags = flags;
964}
965
966/* ------------------------------------------------------------------------ */
967/* queuing locks */
968
969/* First the states
970   (head,tail) =              0, 0  means lock is unheld, nobody on queue
971                 UINT_MAX or -1, 0  means lock is held, nobody on queue
972                              h, h  means lock held or about to transition,
973                                    1 element on queue
974                              h, t  h <> t, means lock is held or about to
975                                    transition, >1 elements on queue
976
977   Now the transitions
978      Acquire(0,0)  = -1 ,0
979      Release(0,0)  = Error
980      Acquire(-1,0) =  h ,h    h > 0
981      Release(-1,0) =  0 ,0
982      Acquire(h,h)  =  h ,t    h > 0, t > 0, h <> t
983      Release(h,h)  = -1 ,0    h > 0
984      Acquire(h,t)  =  h ,t'   h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
985      Release(h,t)  =  h',t    h > 0, t > 0, h <> t, h <> h', h' maybe = t
986
987   And pictorially
988
989           +-----+
990           | 0, 0|------- release -------> Error
991           +-----+
992             |  ^
993      acquire|  |release
994             |  |
995             |  |
996             v  |
997           +-----+
998           |-1, 0|
999           +-----+
1000             |  ^
1001      acquire|  |release
1002             |  |
1003             |  |
1004             v  |
1005           +-----+
1006           | h, h|
1007           +-----+
1008             |  ^
1009      acquire|  |release
1010             |  |
1011             |  |
1012             v  |
1013           +-----+
1014           | h, t|----- acquire, release loopback ---+
1015           +-----+                                   |
1016                ^                                    |
1017                |                                    |
1018                +------------------------------------+
1019 */
1020
1021#ifdef DEBUG_QUEUING_LOCKS
1022
1023/* Stuff for circular trace buffer */
1024#define TRACE_BUF_ELE 1024
1025static char traces[TRACE_BUF_ELE][128] = {0};
1026static int tc = 0;
1027#define TRACE_LOCK(X, Y)                                                       \
1028  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y);
1029#define TRACE_LOCK_T(X, Y, Z)                                                  \
1030  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z);
1031#define TRACE_LOCK_HT(X, Y, Z, Q)                                              \
1032  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y,   \
1033               Z, Q);
1034
1035static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid,
1036                                    kmp_queuing_lock_t *lck, kmp_int32 head_id,
1037                                    kmp_int32 tail_id) {
1038  kmp_int32 t, i;
1039
1040  __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");
1041
1042  i = tc % TRACE_BUF_ELE;
1043  __kmp_printf_no_lock("%s\n", traces[i]);
1044  i = (i + 1) % TRACE_BUF_ELE;
1045  while (i != (tc % TRACE_BUF_ELE)) {
1046    __kmp_printf_no_lock("%s", traces[i]);
1047    i = (i + 1) % TRACE_BUF_ELE;
1048  }
1049  __kmp_printf_no_lock("\n");
1050
1051  __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "
1052                       "next_wait:%d, head_id:%d, tail_id:%d\n",
1053                       gtid + 1, this_thr->th.th_spin_here,
1054                       this_thr->th.th_next_waiting, head_id, tail_id);
1055
1056  __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id);
1057
1058  if (lck->lk.head_id >= 1) {
1059    t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting;
1060    while (t > 0) {
1061      __kmp_printf_no_lock("-> %d ", t);
1062      t = __kmp_threads[t - 1]->th.th_next_waiting;
1063    }
1064  }
1065  __kmp_printf_no_lock(";  tail: %d ", lck->lk.tail_id);
1066  __kmp_printf_no_lock("\n\n");
1067}
1068
1069#endif /* DEBUG_QUEUING_LOCKS */
1070
1071static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) {
1072  return TCR_4(lck->lk.owner_id) - 1;
1073}
1074
1075static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) {
1076  return lck->lk.depth_locked != -1;
1077}
1078
1079/* Acquire a lock using a the queuing lock implementation */
1080template <bool takeTime>
1081/* [TLW] The unused template above is left behind because of what BEB believes
1082   is a potential compiler problem with __forceinline. */
1083__forceinline static int
1084__kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
1085                                          kmp_int32 gtid) {
1086  kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1087  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1088  volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1089  volatile kmp_uint32 *spin_here_p;
1090
1091#if OMPT_SUPPORT
1092  ompt_state_t prev_state = ompt_state_undefined;
1093#endif
1094
1095  KA_TRACE(1000,
1096           ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1097
1098  KMP_FSYNC_PREPARE(lck);
1099  KMP_DEBUG_ASSERT(this_thr != NULL);
1100  spin_here_p = &this_thr->th.th_spin_here;
1101
1102#ifdef DEBUG_QUEUING_LOCKS
1103  TRACE_LOCK(gtid + 1, "acq ent");
1104  if (*spin_here_p)
1105    __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1106  if (this_thr->th.th_next_waiting != 0)
1107    __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1108#endif
1109  KMP_DEBUG_ASSERT(!*spin_here_p);
1110  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1111
1112  /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to
1113     head_id_p that may follow, not just in execution order, but also in
1114     visibility order. This way, when a releasing thread observes the changes to
1115     the queue by this thread, it can rightly assume that spin_here_p has
1116     already been set to TRUE, so that when it sets spin_here_p to FALSE, it is
1117     not premature.  If the releasing thread sets spin_here_p to FALSE before
1118     this thread sets it to TRUE, this thread will hang. */
1119  *spin_here_p = TRUE; /* before enqueuing to prevent race */
1120
1121  while (1) {
1122    kmp_int32 enqueued;
1123    kmp_int32 head;
1124    kmp_int32 tail;
1125
1126    head = *head_id_p;
1127
1128    switch (head) {
1129
1130    case -1: {
1131#ifdef DEBUG_QUEUING_LOCKS
1132      tail = *tail_id_p;
1133      TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1134#endif
1135      tail = 0; /* to make sure next link asynchronously read is not set
1136                accidentally; this assignment prevents us from entering the
1137                if ( t > 0 ) condition in the enqueued case below, which is not
1138                necessary for this state transition */
1139
1140      /* try (-1,0)->(tid,tid) */
1141      enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p,
1142                                             KMP_PACK_64(-1, 0),
1143                                             KMP_PACK_64(gtid + 1, gtid + 1));
1144#ifdef DEBUG_QUEUING_LOCKS
1145      if (enqueued)
1146        TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)");
1147#endif
1148    } break;
1149
1150    default: {
1151      tail = *tail_id_p;
1152      KMP_DEBUG_ASSERT(tail != gtid + 1);
1153
1154#ifdef DEBUG_QUEUING_LOCKS
1155      TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1156#endif
1157
1158      if (tail == 0) {
1159        enqueued = FALSE;
1160      } else {
1161        /* try (h,t) or (h,h)->(h,tid) */
1162        enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1);
1163
1164#ifdef DEBUG_QUEUING_LOCKS
1165        if (enqueued)
1166          TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)");
1167#endif
1168      }
1169    } break;
1170
1171    case 0: /* empty queue */
1172    {
1173      kmp_int32 grabbed_lock;
1174
1175#ifdef DEBUG_QUEUING_LOCKS
1176      tail = *tail_id_p;
1177      TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1178#endif
1179      /* try (0,0)->(-1,0) */
1180
1181      /* only legal transition out of head = 0 is head = -1 with no change to
1182       * tail */
1183      grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1);
1184
1185      if (grabbed_lock) {
1186
1187        *spin_here_p = FALSE;
1188
1189        KA_TRACE(
1190            1000,
1191            ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1192             lck, gtid));
1193#ifdef DEBUG_QUEUING_LOCKS
1194        TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0);
1195#endif
1196
1197#if OMPT_SUPPORT
1198        if (ompt_enabled.enabled && prev_state != ompt_state_undefined) {
1199          /* change the state before clearing wait_id */
1200          this_thr->th.ompt_thread_info.state = prev_state;
1201          this_thr->th.ompt_thread_info.wait_id = 0;
1202        }
1203#endif
1204
1205        KMP_FSYNC_ACQUIRED(lck);
1206        return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1207      }
1208      enqueued = FALSE;
1209    } break;
1210    }
1211
1212#if OMPT_SUPPORT
1213    if (ompt_enabled.enabled && prev_state == ompt_state_undefined) {
1214      /* this thread will spin; set wait_id before entering wait state */
1215      prev_state = this_thr->th.ompt_thread_info.state;
1216      this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
1217      this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1218    }
1219#endif
1220
1221    if (enqueued) {
1222      if (tail > 0) {
1223        kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1);
1224        KMP_ASSERT(tail_thr != NULL);
1225        tail_thr->th.th_next_waiting = gtid + 1;
1226        /* corresponding wait for this write in release code */
1227      }
1228      KA_TRACE(1000,
1229               ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
1230                lck, gtid));
1231
1232      KMP_MB();
1233      // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
1234      KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
1235      // Synchronize writes to both runtime thread structures
1236      // and writes in user code.
1237      KMP_MB();
1238
1239#ifdef DEBUG_QUEUING_LOCKS
1240      TRACE_LOCK(gtid + 1, "acq spin");
1241
1242      if (this_thr->th.th_next_waiting != 0)
1243        __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1244#endif
1245      KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1246      KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after "
1247                      "waiting on queue\n",
1248                      lck, gtid));
1249
1250#ifdef DEBUG_QUEUING_LOCKS
1251      TRACE_LOCK(gtid + 1, "acq exit 2");
1252#endif
1253
1254#if OMPT_SUPPORT
1255      /* change the state before clearing wait_id */
1256      this_thr->th.ompt_thread_info.state = prev_state;
1257      this_thr->th.ompt_thread_info.wait_id = 0;
1258#endif
1259
1260      /* got lock, we were dequeued by the thread that released lock */
1261      return KMP_LOCK_ACQUIRED_FIRST;
1262    }
1263
1264    /* Yield if number of threads > number of logical processors */
1265    /* ToDo: Not sure why this should only be in oversubscription case,
1266       maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1267    KMP_YIELD_OVERSUB();
1268
1269#ifdef DEBUG_QUEUING_LOCKS
1270    TRACE_LOCK(gtid + 1, "acq retry");
1271#endif
1272  }
1273  KMP_ASSERT2(0, "should not get here");
1274  return KMP_LOCK_ACQUIRED_FIRST;
1275}
1276
1277int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1278  KMP_DEBUG_ASSERT(gtid >= 0);
1279
1280  int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1281  return retval;
1282}
1283
1284static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1285                                                  kmp_int32 gtid) {
1286  char const *const func = "omp_set_lock";
1287  if (lck->lk.initialized != lck) {
1288    KMP_FATAL(LockIsUninitialized, func);
1289  }
1290  if (__kmp_is_queuing_lock_nestable(lck)) {
1291    KMP_FATAL(LockNestableUsedAsSimple, func);
1292  }
1293  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1294    KMP_FATAL(LockIsAlreadyOwned, func);
1295  }
1296
1297  __kmp_acquire_queuing_lock(lck, gtid);
1298
1299  lck->lk.owner_id = gtid + 1;
1300  return KMP_LOCK_ACQUIRED_FIRST;
1301}
1302
1303int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1304  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1305  kmp_int32 head;
1306#ifdef KMP_DEBUG
1307  kmp_info_t *this_thr;
1308#endif
1309
1310  KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid));
1311  KMP_DEBUG_ASSERT(gtid >= 0);
1312#ifdef KMP_DEBUG
1313  this_thr = __kmp_thread_from_gtid(gtid);
1314  KMP_DEBUG_ASSERT(this_thr != NULL);
1315  KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1316#endif
1317
1318  head = *head_id_p;
1319
1320  if (head == 0) { /* nobody on queue, nobody holding */
1321    /* try (0,0)->(-1,0) */
1322    if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) {
1323      KA_TRACE(1000,
1324               ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid));
1325      KMP_FSYNC_ACQUIRED(lck);
1326      return TRUE;
1327    }
1328  }
1329
1330  KA_TRACE(1000,
1331           ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid));
1332  return FALSE;
1333}
1334
1335static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1336                                               kmp_int32 gtid) {
1337  char const *const func = "omp_test_lock";
1338  if (lck->lk.initialized != lck) {
1339    KMP_FATAL(LockIsUninitialized, func);
1340  }
1341  if (__kmp_is_queuing_lock_nestable(lck)) {
1342    KMP_FATAL(LockNestableUsedAsSimple, func);
1343  }
1344
1345  int retval = __kmp_test_queuing_lock(lck, gtid);
1346
1347  if (retval) {
1348    lck->lk.owner_id = gtid + 1;
1349  }
1350  return retval;
1351}
1352
1353int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1354  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1355  volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1356
1357  KA_TRACE(1000,
1358           ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1359  KMP_DEBUG_ASSERT(gtid >= 0);
1360#if KMP_DEBUG || DEBUG_QUEUING_LOCKS
1361  kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1362#endif
1363  KMP_DEBUG_ASSERT(this_thr != NULL);
1364#ifdef DEBUG_QUEUING_LOCKS
1365  TRACE_LOCK(gtid + 1, "rel ent");
1366
1367  if (this_thr->th.th_spin_here)
1368    __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1369  if (this_thr->th.th_next_waiting != 0)
1370    __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1371#endif
1372  KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1373  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1374
1375  KMP_FSYNC_RELEASING(lck);
1376
1377  while (1) {
1378    kmp_int32 dequeued;
1379    kmp_int32 head;
1380    kmp_int32 tail;
1381
1382    head = *head_id_p;
1383
1384#ifdef DEBUG_QUEUING_LOCKS
1385    tail = *tail_id_p;
1386    TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail);
1387    if (head == 0)
1388      __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1389#endif
1390    KMP_DEBUG_ASSERT(head !=
1391                     0); /* holding the lock, head must be -1 or queue head */
1392
1393    if (head == -1) { /* nobody on queue */
1394      /* try (-1,0)->(0,0) */
1395      if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) {
1396        KA_TRACE(
1397            1000,
1398            ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1399             lck, gtid));
1400#ifdef DEBUG_QUEUING_LOCKS
1401        TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0);
1402#endif
1403
1404#if OMPT_SUPPORT
1405/* nothing to do - no other thread is trying to shift blame */
1406#endif
1407        return KMP_LOCK_RELEASED;
1408      }
1409      dequeued = FALSE;
1410    } else {
1411      KMP_MB();
1412      tail = *tail_id_p;
1413      if (head == tail) { /* only one thread on the queue */
1414#ifdef DEBUG_QUEUING_LOCKS
1415        if (head <= 0)
1416          __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1417#endif
1418        KMP_DEBUG_ASSERT(head > 0);
1419
1420        /* try (h,h)->(-1,0) */
1421        dequeued = KMP_COMPARE_AND_STORE_REL64(
1422            RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head),
1423            KMP_PACK_64(-1, 0));
1424#ifdef DEBUG_QUEUING_LOCKS
1425        TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)");
1426#endif
1427
1428      } else {
1429        volatile kmp_int32 *waiting_id_p;
1430        kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1431        KMP_DEBUG_ASSERT(head_thr != NULL);
1432        waiting_id_p = &head_thr->th.th_next_waiting;
1433
1434/* Does this require synchronous reads? */
1435#ifdef DEBUG_QUEUING_LOCKS
1436        if (head <= 0 || tail <= 0)
1437          __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1438#endif
1439        KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1440
1441        /* try (h,t)->(h',t) or (t,t) */
1442        KMP_MB();
1443        /* make sure enqueuing thread has time to update next waiting thread
1444         * field */
1445        *head_id_p =
1446            KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);
1447#ifdef DEBUG_QUEUING_LOCKS
1448        TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
1449#endif
1450        dequeued = TRUE;
1451      }
1452    }
1453
1454    if (dequeued) {
1455      kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1456      KMP_DEBUG_ASSERT(head_thr != NULL);
1457
1458/* Does this require synchronous reads? */
1459#ifdef DEBUG_QUEUING_LOCKS
1460      if (head <= 0 || tail <= 0)
1461        __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1462#endif
1463      KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1464
1465      /* For clean code only. Thread not released until next statement prevents
1466         race with acquire code. */
1467      head_thr->th.th_next_waiting = 0;
1468#ifdef DEBUG_QUEUING_LOCKS
1469      TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head);
1470#endif
1471
1472      KMP_MB();
1473      /* reset spin value */
1474      head_thr->th.th_spin_here = FALSE;
1475
1476      KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after "
1477                      "dequeuing\n",
1478                      lck, gtid));
1479#ifdef DEBUG_QUEUING_LOCKS
1480      TRACE_LOCK(gtid + 1, "rel exit 2");
1481#endif
1482      return KMP_LOCK_RELEASED;
1483    }
1484    /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring
1485       threads */
1486
1487#ifdef DEBUG_QUEUING_LOCKS
1488    TRACE_LOCK(gtid + 1, "rel retry");
1489#endif
1490
1491  } /* while */
1492  KMP_ASSERT2(0, "should not get here");
1493  return KMP_LOCK_RELEASED;
1494}
1495
1496static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1497                                                  kmp_int32 gtid) {
1498  char const *const func = "omp_unset_lock";
1499  KMP_MB(); /* in case another processor initialized lock */
1500  if (lck->lk.initialized != lck) {
1501    KMP_FATAL(LockIsUninitialized, func);
1502  }
1503  if (__kmp_is_queuing_lock_nestable(lck)) {
1504    KMP_FATAL(LockNestableUsedAsSimple, func);
1505  }
1506  if (__kmp_get_queuing_lock_owner(lck) == -1) {
1507    KMP_FATAL(LockUnsettingFree, func);
1508  }
1509  if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1510    KMP_FATAL(LockUnsettingSetByAnother, func);
1511  }
1512  lck->lk.owner_id = 0;
1513  return __kmp_release_queuing_lock(lck, gtid);
1514}
1515
1516void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) {
1517  lck->lk.location = NULL;
1518  lck->lk.head_id = 0;
1519  lck->lk.tail_id = 0;
1520  lck->lk.next_ticket = 0;
1521  lck->lk.now_serving = 0;
1522  lck->lk.owner_id = 0; // no thread owns the lock.
1523  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1524  lck->lk.initialized = lck;
1525
1526  KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1527}
1528
1529void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) {
1530  lck->lk.initialized = NULL;
1531  lck->lk.location = NULL;
1532  lck->lk.head_id = 0;
1533  lck->lk.tail_id = 0;
1534  lck->lk.next_ticket = 0;
1535  lck->lk.now_serving = 0;
1536  lck->lk.owner_id = 0;
1537  lck->lk.depth_locked = -1;
1538}
1539
1540static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1541  char const *const func = "omp_destroy_lock";
1542  if (lck->lk.initialized != lck) {
1543    KMP_FATAL(LockIsUninitialized, func);
1544  }
1545  if (__kmp_is_queuing_lock_nestable(lck)) {
1546    KMP_FATAL(LockNestableUsedAsSimple, func);
1547  }
1548  if (__kmp_get_queuing_lock_owner(lck) != -1) {
1549    KMP_FATAL(LockStillOwned, func);
1550  }
1551  __kmp_destroy_queuing_lock(lck);
1552}
1553
1554// nested queuing locks
1555
1556int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1557  KMP_DEBUG_ASSERT(gtid >= 0);
1558
1559  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1560    lck->lk.depth_locked += 1;
1561    return KMP_LOCK_ACQUIRED_NEXT;
1562  } else {
1563    __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1564    KMP_MB();
1565    lck->lk.depth_locked = 1;
1566    KMP_MB();
1567    lck->lk.owner_id = gtid + 1;
1568    return KMP_LOCK_ACQUIRED_FIRST;
1569  }
1570}
1571
1572static int
1573__kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1574                                              kmp_int32 gtid) {
1575  char const *const func = "omp_set_nest_lock";
1576  if (lck->lk.initialized != lck) {
1577    KMP_FATAL(LockIsUninitialized, func);
1578  }
1579  if (!__kmp_is_queuing_lock_nestable(lck)) {
1580    KMP_FATAL(LockSimpleUsedAsNestable, func);
1581  }
1582  return __kmp_acquire_nested_queuing_lock(lck, gtid);
1583}
1584
1585int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1586  int retval;
1587
1588  KMP_DEBUG_ASSERT(gtid >= 0);
1589
1590  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1591    retval = ++lck->lk.depth_locked;
1592  } else if (!__kmp_test_queuing_lock(lck, gtid)) {
1593    retval = 0;
1594  } else {
1595    KMP_MB();
1596    retval = lck->lk.depth_locked = 1;
1597    KMP_MB();
1598    lck->lk.owner_id = gtid + 1;
1599  }
1600  return retval;
1601}
1602
1603static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1604                                                      kmp_int32 gtid) {
1605  char const *const func = "omp_test_nest_lock";
1606  if (lck->lk.initialized != lck) {
1607    KMP_FATAL(LockIsUninitialized, func);
1608  }
1609  if (!__kmp_is_queuing_lock_nestable(lck)) {
1610    KMP_FATAL(LockSimpleUsedAsNestable, func);
1611  }
1612  return __kmp_test_nested_queuing_lock(lck, gtid);
1613}
1614
1615int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1616  KMP_DEBUG_ASSERT(gtid >= 0);
1617
1618  KMP_MB();
1619  if (--(lck->lk.depth_locked) == 0) {
1620    KMP_MB();
1621    lck->lk.owner_id = 0;
1622    __kmp_release_queuing_lock(lck, gtid);
1623    return KMP_LOCK_RELEASED;
1624  }
1625  return KMP_LOCK_STILL_HELD;
1626}
1627
1628static int
1629__kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1630                                              kmp_int32 gtid) {
1631  char const *const func = "omp_unset_nest_lock";
1632  KMP_MB(); /* in case another processor initialized lock */
1633  if (lck->lk.initialized != lck) {
1634    KMP_FATAL(LockIsUninitialized, func);
1635  }
1636  if (!__kmp_is_queuing_lock_nestable(lck)) {
1637    KMP_FATAL(LockSimpleUsedAsNestable, func);
1638  }
1639  if (__kmp_get_queuing_lock_owner(lck) == -1) {
1640    KMP_FATAL(LockUnsettingFree, func);
1641  }
1642  if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1643    KMP_FATAL(LockUnsettingSetByAnother, func);
1644  }
1645  return __kmp_release_nested_queuing_lock(lck, gtid);
1646}
1647
1648void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1649  __kmp_init_queuing_lock(lck);
1650  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1651}
1652
1653void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1654  __kmp_destroy_queuing_lock(lck);
1655  lck->lk.depth_locked = 0;
1656}
1657
1658static void
1659__kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1660  char const *const func = "omp_destroy_nest_lock";
1661  if (lck->lk.initialized != lck) {
1662    KMP_FATAL(LockIsUninitialized, func);
1663  }
1664  if (!__kmp_is_queuing_lock_nestable(lck)) {
1665    KMP_FATAL(LockSimpleUsedAsNestable, func);
1666  }
1667  if (__kmp_get_queuing_lock_owner(lck) != -1) {
1668    KMP_FATAL(LockStillOwned, func);
1669  }
1670  __kmp_destroy_nested_queuing_lock(lck);
1671}
1672
1673// access functions to fields which don't exist for all lock kinds.
1674
1675static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) {
1676  return lck->lk.location;
1677}
1678
1679static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck,
1680                                            const ident_t *loc) {
1681  lck->lk.location = loc;
1682}
1683
1684static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) {
1685  return lck->lk.flags;
1686}
1687
1688static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck,
1689                                         kmp_lock_flags_t flags) {
1690  lck->lk.flags = flags;
1691}
1692
1693#if KMP_USE_ADAPTIVE_LOCKS
1694
1695/* RTM Adaptive locks */
1696
1697#if KMP_HAVE_RTM_INTRINSICS
1698#include <immintrin.h>
1699#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1700
1701#else
1702
1703// Values from the status register after failed speculation.
1704#define _XBEGIN_STARTED (~0u)
1705#define _XABORT_EXPLICIT (1 << 0)
1706#define _XABORT_RETRY (1 << 1)
1707#define _XABORT_CONFLICT (1 << 2)
1708#define _XABORT_CAPACITY (1 << 3)
1709#define _XABORT_DEBUG (1 << 4)
1710#define _XABORT_NESTED (1 << 5)
1711#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1712
1713// Aborts for which it's worth trying again immediately
1714#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1715
1716#define STRINGIZE_INTERNAL(arg) #arg
1717#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1718
1719// Access to RTM instructions
1720/*A version of XBegin which returns -1 on speculation, and the value of EAX on
1721  an abort. This is the same definition as the compiler intrinsic that will be
1722  supported at some point. */
1723static __inline int _xbegin() {
1724  int res = -1;
1725
1726#if KMP_OS_WINDOWS
1727#if KMP_ARCH_X86_64
1728  _asm {
1729        _emit 0xC7
1730        _emit 0xF8
1731        _emit 2
1732        _emit 0
1733        _emit 0
1734        _emit 0
1735        jmp   L2
1736        mov   res, eax
1737    L2:
1738  }
1739#else /* IA32 */
1740  _asm {
1741        _emit 0xC7
1742        _emit 0xF8
1743        _emit 2
1744        _emit 0
1745        _emit 0
1746        _emit 0
1747        jmp   L2
1748        mov   res, eax
1749    L2:
1750  }
1751#endif // KMP_ARCH_X86_64
1752#else
1753  /* Note that %eax must be noted as killed (clobbered), because the XSR is
1754     returned in %eax(%rax) on abort.  Other register values are restored, so
1755     don't need to be killed.
1756
1757     We must also mark 'res' as an input and an output, since otherwise
1758     'res=-1' may be dropped as being dead, whereas we do need the assignment on
1759     the successful (i.e., non-abort) path. */
1760  __asm__ volatile("1: .byte  0xC7; .byte 0xF8;\n"
1761                   "   .long  1f-1b-6\n"
1762                   "    jmp   2f\n"
1763                   "1:  movl  %%eax,%0\n"
1764                   "2:"
1765                   : "+r"(res)::"memory", "%eax");
1766#endif // KMP_OS_WINDOWS
1767  return res;
1768}
1769
1770/* Transaction end */
1771static __inline void _xend() {
1772#if KMP_OS_WINDOWS
1773  __asm {
1774        _emit 0x0f
1775        _emit 0x01
1776        _emit 0xd5
1777  }
1778#else
1779  __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
1780#endif
1781}
1782
1783/* This is a macro, the argument must be a single byte constant which can be
1784   evaluated by the inline assembler, since it is emitted as a byte into the
1785   assembly code. */
1786// clang-format off
1787#if KMP_OS_WINDOWS
1788#define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG
1789#else
1790#define _xabort(ARG)                                                           \
1791  __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory");
1792#endif
1793// clang-format on
1794#endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1795
1796// Statistics is collected for testing purpose
1797#if KMP_DEBUG_ADAPTIVE_LOCKS
1798
1799// We accumulate speculative lock statistics when the lock is destroyed. We
1800// keep locks that haven't been destroyed in the liveLocks list so that we can
1801// grab their statistics too.
1802static kmp_adaptive_lock_statistics_t destroyedStats;
1803
1804// To hold the list of live locks.
1805static kmp_adaptive_lock_info_t liveLocks;
1806
1807// A lock so we can safely update the list of locks.
1808static kmp_bootstrap_lock_t chain_lock =
1809    KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock);
1810
1811// Initialize the list of stats.
1812void __kmp_init_speculative_stats() {
1813  kmp_adaptive_lock_info_t *lck = &liveLocks;
1814
1815  memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0,
1816         sizeof(lck->stats));
1817  lck->stats.next = lck;
1818  lck->stats.prev = lck;
1819
1820  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1821  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1822
1823  __kmp_init_bootstrap_lock(&chain_lock);
1824}
1825
1826// Insert the lock into the circular list
1827static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) {
1828  __kmp_acquire_bootstrap_lock(&chain_lock);
1829
1830  lck->stats.next = liveLocks.stats.next;
1831  lck->stats.prev = &liveLocks;
1832
1833  liveLocks.stats.next = lck;
1834  lck->stats.next->stats.prev = lck;
1835
1836  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1837  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1838
1839  __kmp_release_bootstrap_lock(&chain_lock);
1840}
1841
1842static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) {
1843  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1844  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1845
1846  kmp_adaptive_lock_info_t *n = lck->stats.next;
1847  kmp_adaptive_lock_info_t *p = lck->stats.prev;
1848
1849  n->stats.prev = p;
1850  p->stats.next = n;
1851}
1852
1853static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1854  memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0,
1855         sizeof(lck->stats));
1856  __kmp_remember_lock(lck);
1857}
1858
1859static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t,
1860                            kmp_adaptive_lock_info_t *lck) {
1861  kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
1862
1863  t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
1864  t->successfulSpeculations += s->successfulSpeculations;
1865  t->hardFailedSpeculations += s->hardFailedSpeculations;
1866  t->softFailedSpeculations += s->softFailedSpeculations;
1867  t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
1868  t->lemmingYields += s->lemmingYields;
1869}
1870
1871static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1872  __kmp_acquire_bootstrap_lock(&chain_lock);
1873
1874  __kmp_add_stats(&destroyedStats, lck);
1875  __kmp_forget_lock(lck);
1876
1877  __kmp_release_bootstrap_lock(&chain_lock);
1878}
1879
1880static float percent(kmp_uint32 count, kmp_uint32 total) {
1881  return (total == 0) ? 0.0 : (100.0 * count) / total;
1882}
1883
1884void __kmp_print_speculative_stats() {
1885  kmp_adaptive_lock_statistics_t total = destroyedStats;
1886  kmp_adaptive_lock_info_t *lck;
1887
1888  for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
1889    __kmp_add_stats(&total, lck);
1890  }
1891  kmp_adaptive_lock_statistics_t *t = &total;
1892  kmp_uint32 totalSections =
1893      t->nonSpeculativeAcquires + t->successfulSpeculations;
1894  kmp_uint32 totalSpeculations = t->successfulSpeculations +
1895                                 t->hardFailedSpeculations +
1896                                 t->softFailedSpeculations;
1897  if (totalSections <= 0)
1898    return;
1899
1900  kmp_safe_raii_file_t statsFile;
1901  if (strcmp(__kmp_speculative_statsfile, "-") == 0) {
1902    statsFile.set_stdout();
1903  } else {
1904    size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20;
1905    char buffer[buffLen];
1906    KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile,
1907                 (kmp_int32)getpid());
1908    statsFile.open(buffer, "w");
1909  }
1910
1911  fprintf(statsFile, "Speculative lock statistics (all approximate!)\n");
1912  fprintf(statsFile,
1913          " Lock parameters: \n"
1914          "   max_soft_retries               : %10d\n"
1915          "   max_badness                    : %10d\n",
1916          __kmp_adaptive_backoff_params.max_soft_retries,
1917          __kmp_adaptive_backoff_params.max_badness);
1918  fprintf(statsFile, " Non-speculative acquire attempts : %10d\n",
1919          t->nonSpeculativeAcquireAttempts);
1920  fprintf(statsFile, " Total critical sections          : %10d\n",
1921          totalSections);
1922  fprintf(statsFile, " Successful speculations          : %10d (%5.1f%%)\n",
1923          t->successfulSpeculations,
1924          percent(t->successfulSpeculations, totalSections));
1925  fprintf(statsFile, " Non-speculative acquires         : %10d (%5.1f%%)\n",
1926          t->nonSpeculativeAcquires,
1927          percent(t->nonSpeculativeAcquires, totalSections));
1928  fprintf(statsFile, " Lemming yields                   : %10d\n\n",
1929          t->lemmingYields);
1930
1931  fprintf(statsFile, " Speculative acquire attempts     : %10d\n",
1932          totalSpeculations);
1933  fprintf(statsFile, " Successes                        : %10d (%5.1f%%)\n",
1934          t->successfulSpeculations,
1935          percent(t->successfulSpeculations, totalSpeculations));
1936  fprintf(statsFile, " Soft failures                    : %10d (%5.1f%%)\n",
1937          t->softFailedSpeculations,
1938          percent(t->softFailedSpeculations, totalSpeculations));
1939  fprintf(statsFile, " Hard failures                    : %10d (%5.1f%%)\n",
1940          t->hardFailedSpeculations,
1941          percent(t->hardFailedSpeculations, totalSpeculations));
1942}
1943
1944#define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++)
1945#else
1946#define KMP_INC_STAT(lck, stat)
1947
1948#endif // KMP_DEBUG_ADAPTIVE_LOCKS
1949
1950static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
1951  // It is enough to check that the head_id is zero.
1952  // We don't also need to check the tail.
1953  bool res = lck->lk.head_id == 0;
1954
1955// We need a fence here, since we must ensure that no memory operations
1956// from later in this thread float above that read.
1957#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
1958  _mm_mfence();
1959#else
1960  __sync_synchronize();
1961#endif
1962
1963  return res;
1964}
1965
1966// Functions for manipulating the badness
1967static __inline void
1968__kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) {
1969  // Reset the badness to zero so we eagerly try to speculate again
1970  lck->lk.adaptive.badness = 0;
1971  KMP_INC_STAT(lck, successfulSpeculations);
1972}
1973
1974// Create a bit mask with one more set bit.
1975static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) {
1976  kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1;
1977  if (newBadness > lck->lk.adaptive.max_badness) {
1978    return;
1979  } else {
1980    lck->lk.adaptive.badness = newBadness;
1981  }
1982}
1983
1984// Check whether speculation should be attempted.
1985KMP_ATTRIBUTE_TARGET_RTM
1986static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck,
1987                                           kmp_int32 gtid) {
1988  kmp_uint32 badness = lck->lk.adaptive.badness;
1989  kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts;
1990  int res = (attempts & badness) == 0;
1991  return res;
1992}
1993
1994// Attempt to acquire only the speculative lock.
1995// Does not back off to the non-speculative lock.
1996KMP_ATTRIBUTE_TARGET_RTM
1997static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck,
1998                                         kmp_int32 gtid) {
1999  int retries = lck->lk.adaptive.max_soft_retries;
2000
2001  // We don't explicitly count the start of speculation, rather we record the
2002  // results (success, hard fail, soft fail). The sum of all of those is the
2003  // total number of times we started speculation since all speculations must
2004  // end one of those ways.
2005  do {
2006    kmp_uint32 status = _xbegin();
2007    // Switch this in to disable actual speculation but exercise at least some
2008    // of the rest of the code. Useful for debugging...
2009    // kmp_uint32 status = _XABORT_NESTED;
2010
2011    if (status == _XBEGIN_STARTED) {
2012      /* We have successfully started speculation. Check that no-one acquired
2013         the lock for real between when we last looked and now. This also gets
2014         the lock cache line into our read-set, which we need so that we'll
2015         abort if anyone later claims it for real. */
2016      if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2017        // Lock is now visibly acquired, so someone beat us to it. Abort the
2018        // transaction so we'll restart from _xbegin with the failure status.
2019        _xabort(0x01);
2020        KMP_ASSERT2(0, "should not get here");
2021      }
2022      return 1; // Lock has been acquired (speculatively)
2023    } else {
2024      // We have aborted, update the statistics
2025      if (status & SOFT_ABORT_MASK) {
2026        KMP_INC_STAT(lck, softFailedSpeculations);
2027        // and loop round to retry.
2028      } else {
2029        KMP_INC_STAT(lck, hardFailedSpeculations);
2030        // Give up if we had a hard failure.
2031        break;
2032      }
2033    }
2034  } while (retries--); // Loop while we have retries, and didn't fail hard.
2035
2036  // Either we had a hard failure or we didn't succeed softly after
2037  // the full set of attempts, so back off the badness.
2038  __kmp_step_badness(lck);
2039  return 0;
2040}
2041
2042// Attempt to acquire the speculative lock, or back off to the non-speculative
2043// one if the speculative lock cannot be acquired.
2044// We can succeed speculatively, non-speculatively, or fail.
2045static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) {
2046  // First try to acquire the lock speculatively
2047  if (__kmp_should_speculate(lck, gtid) &&
2048      __kmp_test_adaptive_lock_only(lck, gtid))
2049    return 1;
2050
2051  // Speculative acquisition failed, so try to acquire it non-speculatively.
2052  // Count the non-speculative acquire attempt
2053  lck->lk.adaptive.acquire_attempts++;
2054
2055  // Use base, non-speculative lock.
2056  if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) {
2057    KMP_INC_STAT(lck, nonSpeculativeAcquires);
2058    return 1; // Lock is acquired (non-speculatively)
2059  } else {
2060    return 0; // Failed to acquire the lock, it's already visibly locked.
2061  }
2062}
2063
2064static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2065                                                kmp_int32 gtid) {
2066  char const *const func = "omp_test_lock";
2067  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2068    KMP_FATAL(LockIsUninitialized, func);
2069  }
2070
2071  int retval = __kmp_test_adaptive_lock(lck, gtid);
2072
2073  if (retval) {
2074    lck->lk.qlk.owner_id = gtid + 1;
2075  }
2076  return retval;
2077}
2078
2079// Block until we can acquire a speculative, adaptive lock. We check whether we
2080// should be trying to speculate. If we should be, we check the real lock to see
2081// if it is free, and, if not, pause without attempting to acquire it until it
2082// is. Then we try the speculative acquire. This means that although we suffer
2083// from lemmings a little (because all we can't acquire the lock speculatively
2084// until the queue of threads waiting has cleared), we don't get into a state
2085// where we can never acquire the lock speculatively (because we force the queue
2086// to clear by preventing new arrivals from entering the queue). This does mean
2087// that when we're trying to break lemmings, the lock is no longer fair. However
2088// OpenMP makes no guarantee that its locks are fair, so this isn't a real
2089// problem.
2090static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
2091                                        kmp_int32 gtid) {
2092  if (__kmp_should_speculate(lck, gtid)) {
2093    if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2094      if (__kmp_test_adaptive_lock_only(lck, gtid))
2095        return;
2096      // We tried speculation and failed, so give up.
2097    } else {
2098      // We can't try speculation until the lock is free, so we pause here
2099      // (without suspending on the queueing lock, to allow it to drain, then
2100      // try again. All other threads will also see the same result for
2101      // shouldSpeculate, so will be doing the same if they try to claim the
2102      // lock from now on.
2103      while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2104        KMP_INC_STAT(lck, lemmingYields);
2105        KMP_YIELD(TRUE);
2106      }
2107
2108      if (__kmp_test_adaptive_lock_only(lck, gtid))
2109        return;
2110    }
2111  }
2112
2113  // Speculative acquisition failed, so acquire it non-speculatively.
2114  // Count the non-speculative acquire attempt
2115  lck->lk.adaptive.acquire_attempts++;
2116
2117  __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid);
2118  // We have acquired the base lock, so count that.
2119  KMP_INC_STAT(lck, nonSpeculativeAcquires);
2120}
2121
2122static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2123                                                    kmp_int32 gtid) {
2124  char const *const func = "omp_set_lock";
2125  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2126    KMP_FATAL(LockIsUninitialized, func);
2127  }
2128  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) {
2129    KMP_FATAL(LockIsAlreadyOwned, func);
2130  }
2131
2132  __kmp_acquire_adaptive_lock(lck, gtid);
2133
2134  lck->lk.qlk.owner_id = gtid + 1;
2135}
2136
2137KMP_ATTRIBUTE_TARGET_RTM
2138static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck,
2139                                       kmp_int32 gtid) {
2140  if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(
2141          lck))) { // If the lock doesn't look claimed we must be speculating.
2142    // (Or the user's code is buggy and they're releasing without locking;
2143    // if we had XTEST we'd be able to check that case...)
2144    _xend(); // Exit speculation
2145    __kmp_update_badness_after_success(lck);
2146  } else { // Since the lock *is* visibly locked we're not speculating,
2147    // so should use the underlying lock's release scheme.
2148    __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid);
2149  }
2150  return KMP_LOCK_RELEASED;
2151}
2152
2153static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2154                                                   kmp_int32 gtid) {
2155  char const *const func = "omp_unset_lock";
2156  KMP_MB(); /* in case another processor initialized lock */
2157  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2158    KMP_FATAL(LockIsUninitialized, func);
2159  }
2160  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) {
2161    KMP_FATAL(LockUnsettingFree, func);
2162  }
2163  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) {
2164    KMP_FATAL(LockUnsettingSetByAnother, func);
2165  }
2166  lck->lk.qlk.owner_id = 0;
2167  __kmp_release_adaptive_lock(lck, gtid);
2168  return KMP_LOCK_RELEASED;
2169}
2170
2171static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) {
2172  __kmp_init_queuing_lock(GET_QLK_PTR(lck));
2173  lck->lk.adaptive.badness = 0;
2174  lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0;
2175  lck->lk.adaptive.max_soft_retries =
2176      __kmp_adaptive_backoff_params.max_soft_retries;
2177  lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2178#if KMP_DEBUG_ADAPTIVE_LOCKS
2179  __kmp_zero_speculative_stats(&lck->lk.adaptive);
2180#endif
2181  KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2182}
2183
2184static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) {
2185#if KMP_DEBUG_ADAPTIVE_LOCKS
2186  __kmp_accumulate_speculative_stats(&lck->lk.adaptive);
2187#endif
2188  __kmp_destroy_queuing_lock(GET_QLK_PTR(lck));
2189  // Nothing needed for the speculative part.
2190}
2191
2192static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
2193  char const *const func = "omp_destroy_lock";
2194  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2195    KMP_FATAL(LockIsUninitialized, func);
2196  }
2197  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) {
2198    KMP_FATAL(LockStillOwned, func);
2199  }
2200  __kmp_destroy_adaptive_lock(lck);
2201}
2202
2203#endif // KMP_USE_ADAPTIVE_LOCKS
2204
2205/* ------------------------------------------------------------------------ */
2206/* DRDPA ticket locks                                                */
2207/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2208
2209static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) {
2210  return lck->lk.owner_id - 1;
2211}
2212
2213static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) {
2214  return lck->lk.depth_locked != -1;
2215}
2216
2217__forceinline static int
2218__kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2219  kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket);
2220  kmp_uint64 mask = lck->lk.mask; // atomic load
2221  std::atomic<kmp_uint64> *polls = lck->lk.polls;
2222
2223#ifdef USE_LOCK_PROFILE
2224  if (polls[ticket & mask] != ticket)
2225    __kmp_printf("LOCK CONTENTION: %p\n", lck);
2226/* else __kmp_printf( "." );*/
2227#endif /* USE_LOCK_PROFILE */
2228
2229  // Now spin-wait, but reload the polls pointer and mask, in case the
2230  // polling area has been reconfigured.  Unless it is reconfigured, the
2231  // reloads stay in L1 cache and are cheap.
2232  //
2233  // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
2234  // The current implementation of KMP_WAIT doesn't allow for mask
2235  // and poll to be re-read every spin iteration.
2236  kmp_uint32 spins;
2237  kmp_uint64 time;
2238  KMP_FSYNC_PREPARE(lck);
2239  KMP_INIT_YIELD(spins);
2240  KMP_INIT_BACKOFF(time);
2241  while (polls[ticket & mask] < ticket) { // atomic load
2242    KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
2243    // Re-read the mask and the poll pointer from the lock structure.
2244    //
2245    // Make certain that "mask" is read before "polls" !!!
2246    //
2247    // If another thread picks reconfigures the polling area and updates their
2248    // values, and we get the new value of mask and the old polls pointer, we
2249    // could access memory beyond the end of the old polling area.
2250    mask = lck->lk.mask; // atomic load
2251    polls = lck->lk.polls; // atomic load
2252  }
2253
2254  // Critical section starts here
2255  KMP_FSYNC_ACQUIRED(lck);
2256  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2257                  ticket, lck));
2258  lck->lk.now_serving = ticket; // non-volatile store
2259
2260  // Deallocate a garbage polling area if we know that we are the last
2261  // thread that could possibly access it.
2262  //
2263  // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2264  // ticket.
2265  if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2266    __kmp_free(lck->lk.old_polls);
2267    lck->lk.old_polls = NULL;
2268    lck->lk.cleanup_ticket = 0;
2269  }
2270
2271  // Check to see if we should reconfigure the polling area.
2272  // If there is still a garbage polling area to be deallocated from a
2273  // previous reconfiguration, let a later thread reconfigure it.
2274  if (lck->lk.old_polls == NULL) {
2275    bool reconfigure = false;
2276    std::atomic<kmp_uint64> *old_polls = polls;
2277    kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2278
2279    if (TCR_4(__kmp_nth) >
2280        (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2281      // We are in oversubscription mode.  Contract the polling area
2282      // down to a single location, if that hasn't been done already.
2283      if (num_polls > 1) {
2284        reconfigure = true;
2285        num_polls = TCR_4(lck->lk.num_polls);
2286        mask = 0;
2287        num_polls = 1;
2288        polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2289                                                          sizeof(*polls));
2290        polls[0] = ticket;
2291      }
2292    } else {
2293      // We are in under/fully subscribed mode.  Check the number of
2294      // threads waiting on the lock.  The size of the polling area
2295      // should be at least the number of threads waiting.
2296      kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2297      if (num_waiting > num_polls) {
2298        kmp_uint32 old_num_polls = num_polls;
2299        reconfigure = true;
2300        do {
2301          mask = (mask << 1) | 1;
2302          num_polls *= 2;
2303        } while (num_polls <= num_waiting);
2304
2305        // Allocate the new polling area, and copy the relevant portion
2306        // of the old polling area to the new area.  __kmp_allocate()
2307        // zeroes the memory it allocates, and most of the old area is
2308        // just zero padding, so we only copy the release counters.
2309        polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2310                                                          sizeof(*polls));
2311        kmp_uint32 i;
2312        for (i = 0; i < old_num_polls; i++) {
2313          polls[i].store(old_polls[i]);
2314        }
2315      }
2316    }
2317
2318    if (reconfigure) {
2319      // Now write the updated fields back to the lock structure.
2320      //
2321      // Make certain that "polls" is written before "mask" !!!
2322      //
2323      // If another thread picks up the new value of mask and the old polls
2324      // pointer , it could access memory beyond the end of the old polling
2325      // area.
2326      //
2327      // On x86, we need memory fences.
2328      KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring "
2329                      "lock %p to %d polls\n",
2330                      ticket, lck, num_polls));
2331
2332      lck->lk.old_polls = old_polls;
2333      lck->lk.polls = polls; // atomic store
2334
2335      KMP_MB();
2336
2337      lck->lk.num_polls = num_polls;
2338      lck->lk.mask = mask; // atomic store
2339
2340      KMP_MB();
2341
2342      // Only after the new polling area and mask have been flushed
2343      // to main memory can we update the cleanup ticket field.
2344      //
2345      // volatile load / non-volatile store
2346      lck->lk.cleanup_ticket = lck->lk.next_ticket;
2347    }
2348  }
2349  return KMP_LOCK_ACQUIRED_FIRST;
2350}
2351
2352int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2353  int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2354  return retval;
2355}
2356
2357static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2358                                                kmp_int32 gtid) {
2359  char const *const func = "omp_set_lock";
2360  if (lck->lk.initialized != lck) {
2361    KMP_FATAL(LockIsUninitialized, func);
2362  }
2363  if (__kmp_is_drdpa_lock_nestable(lck)) {
2364    KMP_FATAL(LockNestableUsedAsSimple, func);
2365  }
2366  if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) {
2367    KMP_FATAL(LockIsAlreadyOwned, func);
2368  }
2369
2370  __kmp_acquire_drdpa_lock(lck, gtid);
2371
2372  lck->lk.owner_id = gtid + 1;
2373  return KMP_LOCK_ACQUIRED_FIRST;
2374}
2375
2376int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2377  // First get a ticket, then read the polls pointer and the mask.
2378  // The polls pointer must be read before the mask!!! (See above)
2379  kmp_uint64 ticket = lck->lk.next_ticket; // atomic load
2380  std::atomic<kmp_uint64> *polls = lck->lk.polls;
2381  kmp_uint64 mask = lck->lk.mask; // atomic load
2382  if (polls[ticket & mask] == ticket) {
2383    kmp_uint64 next_ticket = ticket + 1;
2384    if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket,
2385                                       next_ticket)) {
2386      KMP_FSYNC_ACQUIRED(lck);
2387      KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2388                      ticket, lck));
2389      lck->lk.now_serving = ticket; // non-volatile store
2390
2391      // Since no threads are waiting, there is no possibility that we would
2392      // want to reconfigure the polling area.  We might have the cleanup ticket
2393      // value (which says that it is now safe to deallocate old_polls), but
2394      // we'll let a later thread which calls __kmp_acquire_lock do that - this
2395      // routine isn't supposed to block, and we would risk blocks if we called
2396      // __kmp_free() to do the deallocation.
2397      return TRUE;
2398    }
2399  }
2400  return FALSE;
2401}
2402
2403static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2404                                             kmp_int32 gtid) {
2405  char const *const func = "omp_test_lock";
2406  if (lck->lk.initialized != lck) {
2407    KMP_FATAL(LockIsUninitialized, func);
2408  }
2409  if (__kmp_is_drdpa_lock_nestable(lck)) {
2410    KMP_FATAL(LockNestableUsedAsSimple, func);
2411  }
2412
2413  int retval = __kmp_test_drdpa_lock(lck, gtid);
2414
2415  if (retval) {
2416    lck->lk.owner_id = gtid + 1;
2417  }
2418  return retval;
2419}
2420
2421int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2422  // Read the ticket value from the lock data struct, then the polls pointer and
2423  // the mask.  The polls pointer must be read before the mask!!! (See above)
2424  kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load
2425  std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load
2426  kmp_uint64 mask = lck->lk.mask; // atomic load
2427  KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2428                  ticket - 1, lck));
2429  KMP_FSYNC_RELEASING(lck);
2430  polls[ticket & mask] = ticket; // atomic store
2431  return KMP_LOCK_RELEASED;
2432}
2433
2434static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2435                                                kmp_int32 gtid) {
2436  char const *const func = "omp_unset_lock";
2437  KMP_MB(); /* in case another processor initialized lock */
2438  if (lck->lk.initialized != lck) {
2439    KMP_FATAL(LockIsUninitialized, func);
2440  }
2441  if (__kmp_is_drdpa_lock_nestable(lck)) {
2442    KMP_FATAL(LockNestableUsedAsSimple, func);
2443  }
2444  if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2445    KMP_FATAL(LockUnsettingFree, func);
2446  }
2447  if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) &&
2448      (__kmp_get_drdpa_lock_owner(lck) != gtid)) {
2449    KMP_FATAL(LockUnsettingSetByAnother, func);
2450  }
2451  lck->lk.owner_id = 0;
2452  return __kmp_release_drdpa_lock(lck, gtid);
2453}
2454
2455void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) {
2456  lck->lk.location = NULL;
2457  lck->lk.mask = 0;
2458  lck->lk.num_polls = 1;
2459  lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate(
2460      lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2461  lck->lk.cleanup_ticket = 0;
2462  lck->lk.old_polls = NULL;
2463  lck->lk.next_ticket = 0;
2464  lck->lk.now_serving = 0;
2465  lck->lk.owner_id = 0; // no thread owns the lock.
2466  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2467  lck->lk.initialized = lck;
2468
2469  KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2470}
2471
2472void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) {
2473  lck->lk.initialized = NULL;
2474  lck->lk.location = NULL;
2475  if (lck->lk.polls.load() != NULL) {
2476    __kmp_free(lck->lk.polls.load());
2477    lck->lk.polls = NULL;
2478  }
2479  if (lck->lk.old_polls != NULL) {
2480    __kmp_free(lck->lk.old_polls);
2481    lck->lk.old_polls = NULL;
2482  }
2483  lck->lk.mask = 0;
2484  lck->lk.num_polls = 0;
2485  lck->lk.cleanup_ticket = 0;
2486  lck->lk.next_ticket = 0;
2487  lck->lk.now_serving = 0;
2488  lck->lk.owner_id = 0;
2489  lck->lk.depth_locked = -1;
2490}
2491
2492static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2493  char const *const func = "omp_destroy_lock";
2494  if (lck->lk.initialized != lck) {
2495    KMP_FATAL(LockIsUninitialized, func);
2496  }
2497  if (__kmp_is_drdpa_lock_nestable(lck)) {
2498    KMP_FATAL(LockNestableUsedAsSimple, func);
2499  }
2500  if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2501    KMP_FATAL(LockStillOwned, func);
2502  }
2503  __kmp_destroy_drdpa_lock(lck);
2504}
2505
2506// nested drdpa ticket locks
2507
2508int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2509  KMP_DEBUG_ASSERT(gtid >= 0);
2510
2511  if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2512    lck->lk.depth_locked += 1;
2513    return KMP_LOCK_ACQUIRED_NEXT;
2514  } else {
2515    __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2516    KMP_MB();
2517    lck->lk.depth_locked = 1;
2518    KMP_MB();
2519    lck->lk.owner_id = gtid + 1;
2520    return KMP_LOCK_ACQUIRED_FIRST;
2521  }
2522}
2523
2524static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2525                                                        kmp_int32 gtid) {
2526  char const *const func = "omp_set_nest_lock";
2527  if (lck->lk.initialized != lck) {
2528    KMP_FATAL(LockIsUninitialized, func);
2529  }
2530  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2531    KMP_FATAL(LockSimpleUsedAsNestable, func);
2532  }
2533  __kmp_acquire_nested_drdpa_lock(lck, gtid);
2534}
2535
2536int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2537  int retval;
2538
2539  KMP_DEBUG_ASSERT(gtid >= 0);
2540
2541  if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2542    retval = ++lck->lk.depth_locked;
2543  } else if (!__kmp_test_drdpa_lock(lck, gtid)) {
2544    retval = 0;
2545  } else {
2546    KMP_MB();
2547    retval = lck->lk.depth_locked = 1;
2548    KMP_MB();
2549    lck->lk.owner_id = gtid + 1;
2550  }
2551  return retval;
2552}
2553
2554static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2555                                                    kmp_int32 gtid) {
2556  char const *const func = "omp_test_nest_lock";
2557  if (lck->lk.initialized != lck) {
2558    KMP_FATAL(LockIsUninitialized, func);
2559  }
2560  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2561    KMP_FATAL(LockSimpleUsedAsNestable, func);
2562  }
2563  return __kmp_test_nested_drdpa_lock(lck, gtid);
2564}
2565
2566int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2567  KMP_DEBUG_ASSERT(gtid >= 0);
2568
2569  KMP_MB();
2570  if (--(lck->lk.depth_locked) == 0) {
2571    KMP_MB();
2572    lck->lk.owner_id = 0;
2573    __kmp_release_drdpa_lock(lck, gtid);
2574    return KMP_LOCK_RELEASED;
2575  }
2576  return KMP_LOCK_STILL_HELD;
2577}
2578
2579static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2580                                                       kmp_int32 gtid) {
2581  char const *const func = "omp_unset_nest_lock";
2582  KMP_MB(); /* in case another processor initialized lock */
2583  if (lck->lk.initialized != lck) {
2584    KMP_FATAL(LockIsUninitialized, func);
2585  }
2586  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2587    KMP_FATAL(LockSimpleUsedAsNestable, func);
2588  }
2589  if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2590    KMP_FATAL(LockUnsettingFree, func);
2591  }
2592  if (__kmp_get_drdpa_lock_owner(lck) != gtid) {
2593    KMP_FATAL(LockUnsettingSetByAnother, func);
2594  }
2595  return __kmp_release_nested_drdpa_lock(lck, gtid);
2596}
2597
2598void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2599  __kmp_init_drdpa_lock(lck);
2600  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2601}
2602
2603void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2604  __kmp_destroy_drdpa_lock(lck);
2605  lck->lk.depth_locked = 0;
2606}
2607
2608static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2609  char const *const func = "omp_destroy_nest_lock";
2610  if (lck->lk.initialized != lck) {
2611    KMP_FATAL(LockIsUninitialized, func);
2612  }
2613  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2614    KMP_FATAL(LockSimpleUsedAsNestable, func);
2615  }
2616  if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2617    KMP_FATAL(LockStillOwned, func);
2618  }
2619  __kmp_destroy_nested_drdpa_lock(lck);
2620}
2621
2622// access functions to fields which don't exist for all lock kinds.
2623
2624static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) {
2625  return lck->lk.location;
2626}
2627
2628static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck,
2629                                          const ident_t *loc) {
2630  lck->lk.location = loc;
2631}
2632
2633static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) {
2634  return lck->lk.flags;
2635}
2636
2637static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck,
2638                                       kmp_lock_flags_t flags) {
2639  lck->lk.flags = flags;
2640}
2641
2642// Time stamp counter
2643#if KMP_ARCH_X86 || KMP_ARCH_X86_64
2644#define __kmp_tsc() __kmp_hardware_timestamp()
2645// Runtime's default backoff parameters
2646kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100};
2647#else
2648// Use nanoseconds for other platforms
2649extern kmp_uint64 __kmp_now_nsec();
2650kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100};
2651#define __kmp_tsc() __kmp_now_nsec()
2652#endif
2653
2654// A useful predicate for dealing with timestamps that may wrap.
2655// Is a before b? Since the timestamps may wrap, this is asking whether it's
2656// shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
2657// Times where going clockwise is less distance than going anti-clockwise
2658// are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),
2659// then a > b (true) does not mean a reached b; whereas signed(a) = -2,
2660// signed(b) = 0 captures the actual difference
2661static inline bool before(kmp_uint64 a, kmp_uint64 b) {
2662  return ((kmp_int64)b - (kmp_int64)a) > 0;
2663}
2664
2665// Truncated binary exponential backoff function
2666void __kmp_spin_backoff(kmp_backoff_t *boff) {
2667  // We could flatten this loop, but making it a nested loop gives better result
2668  kmp_uint32 i;
2669  for (i = boff->step; i > 0; i--) {
2670    kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
2671#if KMP_HAVE_UMWAIT
2672    if (__kmp_umwait_enabled) {
2673      __kmp_tpause(0, boff->min_tick);
2674    } else {
2675#endif
2676      do {
2677        KMP_CPU_PAUSE();
2678      } while (before(__kmp_tsc(), goal));
2679#if KMP_HAVE_UMWAIT
2680    }
2681#endif
2682  }
2683  boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);
2684}
2685
2686#if KMP_USE_DYNAMIC_LOCK
2687
2688// Direct lock initializers. It simply writes a tag to the low 8 bits of the
2689// lock word.
2690static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,
2691                                   kmp_dyna_lockseq_t seq) {
2692  TCW_4(((kmp_base_tas_lock_t *)lck)->poll, KMP_GET_D_TAG(seq));
2693  KA_TRACE(
2694      20,
2695      ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
2696}
2697
2698#if KMP_USE_TSX
2699
2700// HLE lock functions - imported from the testbed runtime.
2701#define HLE_ACQUIRE ".byte 0xf2;"
2702#define HLE_RELEASE ".byte 0xf3;"
2703
2704static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) {
2705  __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory");
2706  return v;
2707}
2708
2709static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); }
2710
2711static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) {
2712  TCW_4(*lck, 0);
2713}
2714
2715static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2716  // Use gtid for KMP_LOCK_BUSY if necessary
2717  if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
2718    int delay = 1;
2719    do {
2720      while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
2721        for (int i = delay; i != 0; --i)
2722          KMP_CPU_PAUSE();
2723        delay = ((delay << 1) | 1) & 7;
2724      }
2725    } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
2726  }
2727}
2728
2729static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2730                                               kmp_int32 gtid) {
2731  __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
2732}
2733
2734static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2735  __asm__ volatile(HLE_RELEASE "movl %1,%0"
2736                   : "=m"(*lck)
2737                   : "r"(KMP_LOCK_FREE(hle))
2738                   : "memory");
2739  return KMP_LOCK_RELEASED;
2740}
2741
2742static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2743                                              kmp_int32 gtid) {
2744  return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
2745}
2746
2747static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2748  return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
2749}
2750
2751static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2752                                           kmp_int32 gtid) {
2753  return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
2754}
2755
2756static void __kmp_init_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2757  __kmp_init_queuing_lock(lck);
2758}
2759
2760static void __kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2761  __kmp_destroy_queuing_lock(lck);
2762}
2763
2764static void
2765__kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
2766  __kmp_destroy_queuing_lock_with_checks(lck);
2767}
2768
2769KMP_ATTRIBUTE_TARGET_RTM
2770static void __kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2771                                           kmp_int32 gtid) {
2772  unsigned retries = 3, status;
2773  do {
2774    status = _xbegin();
2775    if (status == _XBEGIN_STARTED) {
2776      if (__kmp_is_unlocked_queuing_lock(lck))
2777        return;
2778      _xabort(0xff);
2779    }
2780    if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2781      // Wait until lock becomes free
2782      while (!__kmp_is_unlocked_queuing_lock(lck)) {
2783        KMP_YIELD(TRUE);
2784      }
2785    } else if (!(status & _XABORT_RETRY))
2786      break;
2787  } while (retries--);
2788
2789  // Fall-back non-speculative lock (xchg)
2790  __kmp_acquire_queuing_lock(lck, gtid);
2791}
2792
2793static void __kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2794                                                       kmp_int32 gtid) {
2795  __kmp_acquire_rtm_queuing_lock(lck, gtid);
2796}
2797
2798KMP_ATTRIBUTE_TARGET_RTM
2799static int __kmp_release_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2800                                          kmp_int32 gtid) {
2801  if (__kmp_is_unlocked_queuing_lock(lck)) {
2802    // Releasing from speculation
2803    _xend();
2804  } else {
2805    // Releasing from a real lock
2806    __kmp_release_queuing_lock(lck, gtid);
2807  }
2808  return KMP_LOCK_RELEASED;
2809}
2810
2811static int __kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2812                                                      kmp_int32 gtid) {
2813  return __kmp_release_rtm_queuing_lock(lck, gtid);
2814}
2815
2816KMP_ATTRIBUTE_TARGET_RTM
2817static int __kmp_test_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2818                                       kmp_int32 gtid) {
2819  unsigned retries = 3, status;
2820  do {
2821    status = _xbegin();
2822    if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
2823      return 1;
2824    }
2825    if (!(status & _XABORT_RETRY))
2826      break;
2827  } while (retries--);
2828
2829  return __kmp_test_queuing_lock(lck, gtid);
2830}
2831
2832static int __kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2833                                                   kmp_int32 gtid) {
2834  return __kmp_test_rtm_queuing_lock(lck, gtid);
2835}
2836
2837// Reuse kmp_tas_lock_t for TSX lock which use RTM with fall-back spin lock.
2838typedef kmp_tas_lock_t kmp_rtm_spin_lock_t;
2839
2840static void __kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t *lck) {
2841  KMP_ATOMIC_ST_REL(&lck->lk.poll, 0);
2842}
2843
2844static void __kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck) {
2845  __kmp_destroy_rtm_spin_lock(lck);
2846}
2847
2848KMP_ATTRIBUTE_TARGET_RTM
2849static int __kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2850                                       kmp_int32 gtid) {
2851  unsigned retries = 3, status;
2852  kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2853  kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2854  do {
2855    status = _xbegin();
2856    if (status == _XBEGIN_STARTED) {
2857      if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free)
2858        return KMP_LOCK_ACQUIRED_FIRST;
2859      _xabort(0xff);
2860    }
2861    if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2862      // Wait until lock becomes free
2863      while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free) {
2864        KMP_YIELD(TRUE);
2865      }
2866    } else if (!(status & _XABORT_RETRY))
2867      break;
2868  } while (retries--);
2869
2870  // Fall-back spin lock
2871  KMP_FSYNC_PREPARE(lck);
2872  kmp_backoff_t backoff = __kmp_spin_backoff_params;
2873  while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free ||
2874         !__kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2875    __kmp_spin_backoff(&backoff);
2876  }
2877  KMP_FSYNC_ACQUIRED(lck);
2878  return KMP_LOCK_ACQUIRED_FIRST;
2879}
2880
2881static int __kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2882                                                   kmp_int32 gtid) {
2883  return __kmp_acquire_rtm_spin_lock(lck, gtid);
2884}
2885
2886KMP_ATTRIBUTE_TARGET_RTM
2887static int __kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2888                                       kmp_int32 gtid) {
2889  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == KMP_LOCK_FREE(rtm_spin)) {
2890    // Releasing from speculation
2891    _xend();
2892  } else {
2893    // Releasing from a real lock
2894    KMP_FSYNC_RELEASING(lck);
2895    KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(rtm_spin));
2896  }
2897  return KMP_LOCK_RELEASED;
2898}
2899
2900static int __kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2901                                                   kmp_int32 gtid) {
2902  return __kmp_release_rtm_spin_lock(lck, gtid);
2903}
2904
2905KMP_ATTRIBUTE_TARGET_RTM
2906static int __kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t *lck, kmp_int32 gtid) {
2907  unsigned retries = 3, status;
2908  kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2909  kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2910  do {
2911    status = _xbegin();
2912    if (status == _XBEGIN_STARTED &&
2913        KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free) {
2914      return TRUE;
2915    }
2916    if (!(status & _XABORT_RETRY))
2917      break;
2918  } while (retries--);
2919
2920  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free &&
2921      __kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2922    KMP_FSYNC_ACQUIRED(lck);
2923    return TRUE;
2924  }
2925  return FALSE;
2926}
2927
2928static int __kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2929                                                kmp_int32 gtid) {
2930  return __kmp_test_rtm_spin_lock(lck, gtid);
2931}
2932
2933#endif // KMP_USE_TSX
2934
2935// Entry functions for indirect locks (first element of direct lock jump tables)
2936static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
2937                                     kmp_dyna_lockseq_t tag);
2938static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
2939static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2940static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2941static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2942static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2943                                               kmp_int32);
2944static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2945                                                 kmp_int32);
2946static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2947                                                kmp_int32);
2948
2949// Lock function definitions for the union parameter type
2950#define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a)
2951
2952#define expand1(lk, op)                                                        \
2953  static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) {               \
2954    __kmp_##op##_##lk##_##lock(&lock->lk);                                     \
2955  }
2956#define expand2(lk, op)                                                        \
2957  static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock,                  \
2958                                        kmp_int32 gtid) {                      \
2959    return __kmp_##op##_##lk##_##lock(&lock->lk, gtid);                        \
2960  }
2961#define expand3(lk, op)                                                        \
2962  static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock,              \
2963                                            kmp_lock_flags_t flags) {          \
2964    __kmp_set_##lk##_lock_flags(&lock->lk, flags);                             \
2965  }
2966#define expand4(lk, op)                                                        \
2967  static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock,           \
2968                                               const ident_t *loc) {           \
2969    __kmp_set_##lk##_lock_location(&lock->lk, loc);                            \
2970  }
2971
2972KMP_FOREACH_LOCK_KIND(expand1, init)
2973KMP_FOREACH_LOCK_KIND(expand1, init_nested)
2974KMP_FOREACH_LOCK_KIND(expand1, destroy)
2975KMP_FOREACH_LOCK_KIND(expand1, destroy_nested)
2976KMP_FOREACH_LOCK_KIND(expand2, acquire)
2977KMP_FOREACH_LOCK_KIND(expand2, acquire_nested)
2978KMP_FOREACH_LOCK_KIND(expand2, release)
2979KMP_FOREACH_LOCK_KIND(expand2, release_nested)
2980KMP_FOREACH_LOCK_KIND(expand2, test)
2981KMP_FOREACH_LOCK_KIND(expand2, test_nested)
2982KMP_FOREACH_LOCK_KIND(expand3, )
2983KMP_FOREACH_LOCK_KIND(expand4, )
2984
2985#undef expand1
2986#undef expand2
2987#undef expand3
2988#undef expand4
2989
2990// Jump tables for the indirect lock functions
2991// Only fill in the odd entries, that avoids the need to shift out the low bit
2992
2993// init functions
2994#define expand(l, op) 0, __kmp_init_direct_lock,
2995void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = {
2996    __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)};
2997#undef expand
2998
2999// destroy functions
3000#define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
3001static void (*direct_destroy[])(kmp_dyna_lock_t *) = {
3002    __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
3003#undef expand
3004#define expand(l, op)                                                          \
3005  0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks,
3006static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = {
3007    __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
3008#undef expand
3009
3010// set/acquire functions
3011#define expand(l, op)                                                          \
3012  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3013static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
3014    __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};
3015#undef expand
3016#define expand(l, op)                                                          \
3017  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3018static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3019    __kmp_set_indirect_lock_with_checks, 0,
3020    KMP_FOREACH_D_LOCK(expand, acquire)};
3021#undef expand
3022
3023// unset/release and test functions
3024#define expand(l, op)                                                          \
3025  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3026static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = {
3027    __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)};
3028static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = {
3029    __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)};
3030#undef expand
3031#define expand(l, op)                                                          \
3032  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3033static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3034    __kmp_unset_indirect_lock_with_checks, 0,
3035    KMP_FOREACH_D_LOCK(expand, release)};
3036static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3037    __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)};
3038#undef expand
3039
3040// Exposes only one set of jump tables (*lock or *lock_with_checks).
3041void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0;
3042int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0;
3043int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0;
3044int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0;
3045
3046// Jump tables for the indirect lock functions
3047#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3048void (*__kmp_indirect_init[])(kmp_user_lock_p) = {
3049    KMP_FOREACH_I_LOCK(expand, init)};
3050#undef expand
3051
3052#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3053static void (*indirect_destroy[])(kmp_user_lock_p) = {
3054    KMP_FOREACH_I_LOCK(expand, destroy)};
3055#undef expand
3056#define expand(l, op)                                                          \
3057  (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks,
3058static void (*indirect_destroy_check[])(kmp_user_lock_p) = {
3059    KMP_FOREACH_I_LOCK(expand, destroy)};
3060#undef expand
3061
3062// set/acquire functions
3063#define expand(l, op)                                                          \
3064  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3065static int (*indirect_set[])(kmp_user_lock_p,
3066                             kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};
3067#undef expand
3068#define expand(l, op)                                                          \
3069  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3070static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
3071    KMP_FOREACH_I_LOCK(expand, acquire)};
3072#undef expand
3073
3074// unset/release and test functions
3075#define expand(l, op)                                                          \
3076  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3077static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = {
3078    KMP_FOREACH_I_LOCK(expand, release)};
3079static int (*indirect_test[])(kmp_user_lock_p,
3080                              kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)};
3081#undef expand
3082#define expand(l, op)                                                          \
3083  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3084static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = {
3085    KMP_FOREACH_I_LOCK(expand, release)};
3086static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
3087    KMP_FOREACH_I_LOCK(expand, test)};
3088#undef expand
3089
3090// Exposes only one jump tables (*lock or *lock_with_checks).
3091void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0;
3092int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0;
3093int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0;
3094int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0;
3095
3096// Lock index table.
3097kmp_indirect_lock_table_t __kmp_i_lock_table;
3098
3099// Size of indirect locks.
3100static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0};
3101
3102// Jump tables for lock accessor/modifier.
3103void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3104                                                     const ident_t *) = {0};
3105void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3106                                                  kmp_lock_flags_t) = {0};
3107const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
3108    kmp_user_lock_p) = {0};
3109kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
3110    kmp_user_lock_p) = {0};
3111
3112// Use different lock pools for different lock types.
3113static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0};
3114
3115// User lock allocator for dynamically dispatched indirect locks. Every entry of
3116// the indirect lock table holds the address and type of the allocated indirect
3117// lock (kmp_indirect_lock_t), and the size of the table doubles when it is
3118// full. A destroyed indirect lock object is returned to the reusable pool of
3119// locks, unique to each lock type.
3120kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
3121                                                  kmp_int32 gtid,
3122                                                  kmp_indirect_locktag_t tag) {
3123  kmp_indirect_lock_t *lck;
3124  kmp_lock_index_t idx, table_idx;
3125
3126  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3127
3128  if (__kmp_indirect_lock_pool[tag] != NULL) {
3129    // Reuse the allocated and destroyed lock object
3130    lck = __kmp_indirect_lock_pool[tag];
3131    if (OMP_LOCK_T_SIZE < sizeof(void *))
3132      idx = lck->lock->pool.index;
3133    __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3134    KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
3135                  lck));
3136  } else {
3137    kmp_uint32 row, col;
3138    kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table;
3139    idx = 0;
3140    // Find location in list of lock tables to put new lock
3141    while (1) {
3142      table_idx = lock_table->next; // index within this table
3143      idx += lock_table->next; // global index within list of tables
3144      if (table_idx < lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK) {
3145        row = table_idx / KMP_I_LOCK_CHUNK;
3146        col = table_idx % KMP_I_LOCK_CHUNK;
3147        // Allocate a new row of locks if necessary
3148        if (!lock_table->table[row]) {
3149          lock_table->table[row] = (kmp_indirect_lock_t *)__kmp_allocate(
3150              sizeof(kmp_indirect_lock_t) * KMP_I_LOCK_CHUNK);
3151        }
3152        break;
3153      }
3154      // Allocate a new lock table if necessary with double the capacity
3155      if (!lock_table->next_table) {
3156        kmp_indirect_lock_table_t *next_table =
3157            (kmp_indirect_lock_table_t *)__kmp_allocate(
3158                sizeof(kmp_indirect_lock_table_t));
3159        next_table->table = (kmp_indirect_lock_t **)__kmp_allocate(
3160            sizeof(kmp_indirect_lock_t *) * 2 * lock_table->nrow_ptrs);
3161        next_table->nrow_ptrs = 2 * lock_table->nrow_ptrs;
3162        next_table->next = 0;
3163        next_table->next_table = nullptr;
3164        lock_table->next_table = next_table;
3165      }
3166      lock_table = lock_table->next_table;
3167      KMP_ASSERT(lock_table);
3168    }
3169    lock_table->next++;
3170
3171    lck = &lock_table->table[row][col];
3172    // Allocate a new base lock object
3173    lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3174    KA_TRACE(20,
3175             ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3176  }
3177
3178  __kmp_release_lock(&__kmp_global_lock, gtid);
3179
3180  lck->type = tag;
3181
3182  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3183    *(kmp_lock_index_t *)&(((kmp_base_tas_lock_t *)user_lock)->poll) =
3184        idx << 1; // indirect lock word must be even
3185  } else {
3186    *((kmp_indirect_lock_t **)user_lock) = lck;
3187  }
3188
3189  return lck;
3190}
3191
3192// User lock lookup for dynamically dispatched locks.
3193static __forceinline kmp_indirect_lock_t *
3194__kmp_lookup_indirect_lock(void **user_lock, const char *func) {
3195  if (__kmp_env_consistency_check) {
3196    kmp_indirect_lock_t *lck = NULL;
3197    if (user_lock == NULL) {
3198      KMP_FATAL(LockIsUninitialized, func);
3199    }
3200    if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3201      kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3202      lck = __kmp_get_i_lock(idx);
3203    } else {
3204      lck = *((kmp_indirect_lock_t **)user_lock);
3205    }
3206    if (lck == NULL) {
3207      KMP_FATAL(LockIsUninitialized, func);
3208    }
3209    return lck;
3210  } else {
3211    if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3212      return __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(user_lock));
3213    } else {
3214      return *((kmp_indirect_lock_t **)user_lock);
3215    }
3216  }
3217}
3218
3219static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
3220                                     kmp_dyna_lockseq_t seq) {
3221#if KMP_USE_ADAPTIVE_LOCKS
3222  if (seq == lockseq_adaptive && !__kmp_cpuinfo.flags.rtm) {
3223    KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3224    seq = lockseq_queuing;
3225  }
3226#endif
3227#if KMP_USE_TSX
3228  if (seq == lockseq_rtm_queuing && !__kmp_cpuinfo.flags.rtm) {
3229    seq = lockseq_queuing;
3230  }
3231#endif
3232  kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3233  kmp_indirect_lock_t *l =
3234      __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3235  KMP_I_LOCK_FUNC(l, init)(l->lock);
3236  KA_TRACE(
3237      20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n",
3238           seq));
3239}
3240
3241static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {
3242  kmp_uint32 gtid = __kmp_entry_gtid();
3243  kmp_indirect_lock_t *l =
3244      __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3245  KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3246  kmp_indirect_locktag_t tag = l->type;
3247
3248  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3249
3250  // Use the base lock's space to keep the pool chain.
3251  l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3252  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3253    l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3254  }
3255  __kmp_indirect_lock_pool[tag] = l;
3256
3257  __kmp_release_lock(&__kmp_global_lock, gtid);
3258}
3259
3260static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3261  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3262  return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3263}
3264
3265static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3266  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3267  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3268}
3269
3270static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3271  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3272  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3273}
3274
3275static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3276                                               kmp_int32 gtid) {
3277  kmp_indirect_lock_t *l =
3278      __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3279  return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3280}
3281
3282static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3283                                                 kmp_int32 gtid) {
3284  kmp_indirect_lock_t *l =
3285      __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3286  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3287}
3288
3289static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3290                                                kmp_int32 gtid) {
3291  kmp_indirect_lock_t *l =
3292      __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3293  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3294}
3295
3296kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3297
3298// This is used only in kmp_error.cpp when consistency checking is on.
3299kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) {
3300  switch (seq) {
3301  case lockseq_tas:
3302  case lockseq_nested_tas:
3303    return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3304#if KMP_USE_FUTEX
3305  case lockseq_futex:
3306  case lockseq_nested_futex:
3307    return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3308#endif
3309  case lockseq_ticket:
3310  case lockseq_nested_ticket:
3311    return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3312  case lockseq_queuing:
3313  case lockseq_nested_queuing:
3314#if KMP_USE_ADAPTIVE_LOCKS
3315  case lockseq_adaptive:
3316#endif
3317    return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3318  case lockseq_drdpa:
3319  case lockseq_nested_drdpa:
3320    return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3321  default:
3322    return 0;
3323  }
3324}
3325
3326// Initializes data for dynamic user locks.
3327void __kmp_init_dynamic_user_locks() {
3328  // Initialize jump table for the lock functions
3329  if (__kmp_env_consistency_check) {
3330    __kmp_direct_set = direct_set_check;
3331    __kmp_direct_unset = direct_unset_check;
3332    __kmp_direct_test = direct_test_check;
3333    __kmp_direct_destroy = direct_destroy_check;
3334    __kmp_indirect_set = indirect_set_check;
3335    __kmp_indirect_unset = indirect_unset_check;
3336    __kmp_indirect_test = indirect_test_check;
3337    __kmp_indirect_destroy = indirect_destroy_check;
3338  } else {
3339    __kmp_direct_set = direct_set;
3340    __kmp_direct_unset = direct_unset;
3341    __kmp_direct_test = direct_test;
3342    __kmp_direct_destroy = direct_destroy;
3343    __kmp_indirect_set = indirect_set;
3344    __kmp_indirect_unset = indirect_unset;
3345    __kmp_indirect_test = indirect_test;
3346    __kmp_indirect_destroy = indirect_destroy;
3347  }
3348  // If the user locks have already been initialized, then return. Allow the
3349  // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate
3350  // new lock tables if they have already been allocated.
3351  if (__kmp_init_user_locks)
3352    return;
3353
3354  // Initialize lock index table
3355  __kmp_i_lock_table.nrow_ptrs = KMP_I_LOCK_TABLE_INIT_NROW_PTRS;
3356  __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(
3357      sizeof(kmp_indirect_lock_t *) * KMP_I_LOCK_TABLE_INIT_NROW_PTRS);
3358  *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(
3359      KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3360  __kmp_i_lock_table.next = 0;
3361  __kmp_i_lock_table.next_table = nullptr;
3362
3363  // Indirect lock size
3364  __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3365  __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3366#if KMP_USE_ADAPTIVE_LOCKS
3367  __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3368#endif
3369  __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3370#if KMP_USE_TSX
3371  __kmp_indirect_lock_size[locktag_rtm_queuing] = sizeof(kmp_queuing_lock_t);
3372#endif
3373  __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3374#if KMP_USE_FUTEX
3375  __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3376#endif
3377  __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3378  __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3379  __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
3380
3381// Initialize lock accessor/modifier
3382#define fill_jumps(table, expand, sep)                                         \
3383  {                                                                            \
3384    table[locktag##sep##ticket] = expand(ticket);                              \
3385    table[locktag##sep##queuing] = expand(queuing);                            \
3386    table[locktag##sep##drdpa] = expand(drdpa);                                \
3387  }
3388
3389#if KMP_USE_ADAPTIVE_LOCKS
3390#define fill_table(table, expand)                                              \
3391  {                                                                            \
3392    fill_jumps(table, expand, _);                                              \
3393    table[locktag_adaptive] = expand(queuing);                                 \
3394    fill_jumps(table, expand, _nested_);                                       \
3395  }
3396#else
3397#define fill_table(table, expand)                                              \
3398  {                                                                            \
3399    fill_jumps(table, expand, _);                                              \
3400    fill_jumps(table, expand, _nested_);                                       \
3401  }
3402#endif // KMP_USE_ADAPTIVE_LOCKS
3403
3404#define expand(l)                                                              \
3405  (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location
3406  fill_table(__kmp_indirect_set_location, expand);
3407#undef expand
3408#define expand(l)                                                              \
3409  (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags
3410  fill_table(__kmp_indirect_set_flags, expand);
3411#undef expand
3412#define expand(l)                                                              \
3413  (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location
3414  fill_table(__kmp_indirect_get_location, expand);
3415#undef expand
3416#define expand(l)                                                              \
3417  (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags
3418  fill_table(__kmp_indirect_get_flags, expand);
3419#undef expand
3420
3421  __kmp_init_user_locks = TRUE;
3422}
3423
3424// Clean up the lock table.
3425void __kmp_cleanup_indirect_user_locks() {
3426  int k;
3427
3428  // Clean up locks in the pools first (they were already destroyed before going
3429  // into the pools).
3430  for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3431    kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3432    while (l != NULL) {
3433      kmp_indirect_lock_t *ll = l;
3434      l = (kmp_indirect_lock_t *)l->lock->pool.next;
3435      KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n",
3436                    ll));
3437      __kmp_free(ll->lock);
3438      ll->lock = NULL;
3439    }
3440    __kmp_indirect_lock_pool[k] = NULL;
3441  }
3442  // Clean up the remaining undestroyed locks.
3443  kmp_indirect_lock_table_t *ptr = &__kmp_i_lock_table;
3444  while (ptr) {
3445    for (kmp_uint32 row = 0; row < ptr->nrow_ptrs; ++row) {
3446      if (!ptr->table[row])
3447        continue;
3448      for (kmp_uint32 col = 0; col < KMP_I_LOCK_CHUNK; ++col) {
3449        kmp_indirect_lock_t *l = &ptr->table[row][col];
3450        if (l->lock) {
3451          // Locks not destroyed explicitly need to be destroyed here.
3452          KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3453          KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p "
3454                        "from table\n",
3455                        l));
3456          __kmp_free(l->lock);
3457        }
3458      }
3459      __kmp_free(ptr->table[row]);
3460    }
3461    kmp_indirect_lock_table_t *next_table = ptr->next_table;
3462    if (ptr != &__kmp_i_lock_table)
3463      __kmp_free(ptr);
3464    ptr = next_table;
3465  }
3466
3467  __kmp_init_user_locks = FALSE;
3468}
3469
3470enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3471int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3472
3473#else // KMP_USE_DYNAMIC_LOCK
3474
3475static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3476  __kmp_init_tas_lock(lck);
3477}
3478
3479static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3480  __kmp_init_nested_tas_lock(lck);
3481}
3482
3483#if KMP_USE_FUTEX
3484static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3485  __kmp_init_futex_lock(lck);
3486}
3487
3488static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3489  __kmp_init_nested_futex_lock(lck);
3490}
3491#endif
3492
3493static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) {
3494  return lck == lck->lk.self;
3495}
3496
3497static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3498  __kmp_init_ticket_lock(lck);
3499}
3500
3501static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3502  __kmp_init_nested_ticket_lock(lck);
3503}
3504
3505static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) {
3506  return lck == lck->lk.initialized;
3507}
3508
3509static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3510  __kmp_init_queuing_lock(lck);
3511}
3512
3513static void
3514__kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3515  __kmp_init_nested_queuing_lock(lck);
3516}
3517
3518#if KMP_USE_ADAPTIVE_LOCKS
3519static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
3520  __kmp_init_adaptive_lock(lck);
3521}
3522#endif
3523
3524static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) {
3525  return lck == lck->lk.initialized;
3526}
3527
3528static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3529  __kmp_init_drdpa_lock(lck);
3530}
3531
3532static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3533  __kmp_init_nested_drdpa_lock(lck);
3534}
3535
3536/* user locks
3537 * They are implemented as a table of function pointers which are set to the
3538 * lock functions of the appropriate kind, once that has been determined. */
3539
3540enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3541
3542size_t __kmp_base_user_lock_size = 0;
3543size_t __kmp_user_lock_size = 0;
3544
3545kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL;
3546int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
3547                                            kmp_int32 gtid) = NULL;
3548
3549int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
3550                                         kmp_int32 gtid) = NULL;
3551int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
3552                                            kmp_int32 gtid) = NULL;
3553void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3554void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL;
3555void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3556int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3557                                                   kmp_int32 gtid) = NULL;
3558
3559int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3560                                                kmp_int32 gtid) = NULL;
3561int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3562                                                   kmp_int32 gtid) = NULL;
3563void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3564void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3565
3566int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL;
3567const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL;
3568void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
3569                                      const ident_t *loc) = NULL;
3570kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL;
3571void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
3572                                   kmp_lock_flags_t flags) = NULL;
3573
3574void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) {
3575  switch (user_lock_kind) {
3576  case lk_default:
3577  default:
3578    KMP_ASSERT(0);
3579
3580  case lk_tas: {
3581    __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t);
3582    __kmp_user_lock_size = sizeof(kmp_tas_lock_t);
3583
3584    __kmp_get_user_lock_owner_ =
3585        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner);
3586
3587    if (__kmp_env_consistency_check) {
3588      KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3589      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3590    } else {
3591      KMP_BIND_USER_LOCK(tas);
3592      KMP_BIND_NESTED_USER_LOCK(tas);
3593    }
3594
3595    __kmp_destroy_user_lock_ =
3596        (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock);
3597
3598    __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3599
3600    __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3601
3602    __kmp_set_user_lock_location_ =
3603        (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3604
3605    __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3606
3607    __kmp_set_user_lock_flags_ =
3608        (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3609  } break;
3610
3611#if KMP_USE_FUTEX
3612
3613  case lk_futex: {
3614    __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t);
3615    __kmp_user_lock_size = sizeof(kmp_futex_lock_t);
3616
3617    __kmp_get_user_lock_owner_ =
3618        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner);
3619
3620    if (__kmp_env_consistency_check) {
3621      KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3622      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3623    } else {
3624      KMP_BIND_USER_LOCK(futex);
3625      KMP_BIND_NESTED_USER_LOCK(futex);
3626    }
3627
3628    __kmp_destroy_user_lock_ =
3629        (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock);
3630
3631    __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3632
3633    __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3634
3635    __kmp_set_user_lock_location_ =
3636        (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3637
3638    __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3639
3640    __kmp_set_user_lock_flags_ =
3641        (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3642  } break;
3643
3644#endif // KMP_USE_FUTEX
3645
3646  case lk_ticket: {
3647    __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t);
3648    __kmp_user_lock_size = sizeof(kmp_ticket_lock_t);
3649
3650    __kmp_get_user_lock_owner_ =
3651        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner);
3652
3653    if (__kmp_env_consistency_check) {
3654      KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3655      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3656    } else {
3657      KMP_BIND_USER_LOCK(ticket);
3658      KMP_BIND_NESTED_USER_LOCK(ticket);
3659    }
3660
3661    __kmp_destroy_user_lock_ =
3662        (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock);
3663
3664    __kmp_is_user_lock_initialized_ =
3665        (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized);
3666
3667    __kmp_get_user_lock_location_ =
3668        (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location);
3669
3670    __kmp_set_user_lock_location_ = (void (*)(
3671        kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location);
3672
3673    __kmp_get_user_lock_flags_ =
3674        (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags);
3675
3676    __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3677        &__kmp_set_ticket_lock_flags);
3678  } break;
3679
3680  case lk_queuing: {
3681    __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t);
3682    __kmp_user_lock_size = sizeof(kmp_queuing_lock_t);
3683
3684    __kmp_get_user_lock_owner_ =
3685        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3686
3687    if (__kmp_env_consistency_check) {
3688      KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3689      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3690    } else {
3691      KMP_BIND_USER_LOCK(queuing);
3692      KMP_BIND_NESTED_USER_LOCK(queuing);
3693    }
3694
3695    __kmp_destroy_user_lock_ =
3696        (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock);
3697
3698    __kmp_is_user_lock_initialized_ =
3699        (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3700
3701    __kmp_get_user_lock_location_ =
3702        (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3703
3704    __kmp_set_user_lock_location_ = (void (*)(
3705        kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3706
3707    __kmp_get_user_lock_flags_ =
3708        (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3709
3710    __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3711        &__kmp_set_queuing_lock_flags);
3712  } break;
3713
3714#if KMP_USE_ADAPTIVE_LOCKS
3715  case lk_adaptive: {
3716    __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t);
3717    __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t);
3718
3719    __kmp_get_user_lock_owner_ =
3720        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3721
3722    if (__kmp_env_consistency_check) {
3723      KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3724    } else {
3725      KMP_BIND_USER_LOCK(adaptive);
3726    }
3727
3728    __kmp_destroy_user_lock_ =
3729        (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock);
3730
3731    __kmp_is_user_lock_initialized_ =
3732        (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3733
3734    __kmp_get_user_lock_location_ =
3735        (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3736
3737    __kmp_set_user_lock_location_ = (void (*)(
3738        kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3739
3740    __kmp_get_user_lock_flags_ =
3741        (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3742
3743    __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3744        &__kmp_set_queuing_lock_flags);
3745
3746  } break;
3747#endif // KMP_USE_ADAPTIVE_LOCKS
3748
3749  case lk_drdpa: {
3750    __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t);
3751    __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t);
3752
3753    __kmp_get_user_lock_owner_ =
3754        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner);
3755
3756    if (__kmp_env_consistency_check) {
3757      KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3758      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3759    } else {
3760      KMP_BIND_USER_LOCK(drdpa);
3761      KMP_BIND_NESTED_USER_LOCK(drdpa);
3762    }
3763
3764    __kmp_destroy_user_lock_ =
3765        (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock);
3766
3767    __kmp_is_user_lock_initialized_ =
3768        (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized);
3769
3770    __kmp_get_user_lock_location_ =
3771        (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location);
3772
3773    __kmp_set_user_lock_location_ = (void (*)(
3774        kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location);
3775
3776    __kmp_get_user_lock_flags_ =
3777        (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags);
3778
3779    __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3780        &__kmp_set_drdpa_lock_flags);
3781  } break;
3782  }
3783}
3784
3785// ----------------------------------------------------------------------------
3786// User lock table & lock allocation
3787
3788kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL};
3789kmp_user_lock_p __kmp_lock_pool = NULL;
3790
3791// Lock block-allocation support.
3792kmp_block_of_locks *__kmp_lock_blocks = NULL;
3793int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3794
3795static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) {
3796  // Assume that kmp_global_lock is held upon entry/exit.
3797  kmp_lock_index_t index;
3798  if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) {
3799    kmp_lock_index_t size;
3800    kmp_user_lock_p *table;
3801    // Reallocate lock table.
3802    if (__kmp_user_lock_table.allocated == 0) {
3803      size = 1024;
3804    } else {
3805      size = __kmp_user_lock_table.allocated * 2;
3806    }
3807    table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size);
3808    KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1,
3809               sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1));
3810    table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3811    // We cannot free the previous table now, since it may be in use by other
3812    // threads. So save the pointer to the previous table in the first
3813    // element of the new table. All the tables will be organized into a list,
3814    // and could be freed when library shutting down.
3815    __kmp_user_lock_table.table = table;
3816    __kmp_user_lock_table.allocated = size;
3817  }
3818  KMP_DEBUG_ASSERT(__kmp_user_lock_table.used <
3819                   __kmp_user_lock_table.allocated);
3820  index = __kmp_user_lock_table.used;
3821  __kmp_user_lock_table.table[index] = lck;
3822  ++__kmp_user_lock_table.used;
3823  return index;
3824}
3825
3826static kmp_user_lock_p __kmp_lock_block_allocate() {
3827  // Assume that kmp_global_lock is held upon entry/exit.
3828  static int last_index = 0;
3829  if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) {
3830    // Restart the index.
3831    last_index = 0;
3832    // Need to allocate a new block.
3833    KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3834    size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3835    char *buffer =
3836        (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks));
3837    // Set up the new block.
3838    kmp_block_of_locks *new_block =
3839        (kmp_block_of_locks *)(&buffer[space_for_locks]);
3840    new_block->next_block = __kmp_lock_blocks;
3841    new_block->locks = (void *)buffer;
3842    // Publish the new block.
3843    KMP_MB();
3844    __kmp_lock_blocks = new_block;
3845  }
3846  kmp_user_lock_p ret = (kmp_user_lock_p)(&(
3847      ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size]));
3848  last_index++;
3849  return ret;
3850}
3851
3852// Get memory for a lock. It may be freshly allocated memory or reused memory
3853// from lock pool.
3854kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid,
3855                                         kmp_lock_flags_t flags) {
3856  kmp_user_lock_p lck;
3857  kmp_lock_index_t index;
3858  KMP_DEBUG_ASSERT(user_lock);
3859
3860  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3861
3862  if (__kmp_lock_pool == NULL) {
3863    // Lock pool is empty. Allocate new memory.
3864
3865    if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point.
3866      lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size);
3867    } else {
3868      lck = __kmp_lock_block_allocate();
3869    }
3870
3871    // Insert lock in the table so that it can be freed in __kmp_cleanup,
3872    // and debugger has info on all allocated locks.
3873    index = __kmp_lock_table_insert(lck);
3874  } else {
3875    // Pick up lock from pool.
3876    lck = __kmp_lock_pool;
3877    index = __kmp_lock_pool->pool.index;
3878    __kmp_lock_pool = __kmp_lock_pool->pool.next;
3879  }
3880
3881  // We could potentially differentiate between nested and regular locks
3882  // here, and do the lock table lookup for regular locks only.
3883  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3884    *((kmp_lock_index_t *)user_lock) = index;
3885  } else {
3886    *((kmp_user_lock_p *)user_lock) = lck;
3887  }
3888
3889  // mark the lock if it is critical section lock.
3890  __kmp_set_user_lock_flags(lck, flags);
3891
3892  __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper
3893
3894  return lck;
3895}
3896
3897// Put lock's memory to pool for reusing.
3898void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
3899                          kmp_user_lock_p lck) {
3900  KMP_DEBUG_ASSERT(user_lock != NULL);
3901  KMP_DEBUG_ASSERT(lck != NULL);
3902
3903  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3904
3905  lck->pool.next = __kmp_lock_pool;
3906  __kmp_lock_pool = lck;
3907  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3908    kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3909    KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used);
3910    lck->pool.index = index;
3911  }
3912
3913  __kmp_release_lock(&__kmp_global_lock, gtid);
3914}
3915
3916kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) {
3917  kmp_user_lock_p lck = NULL;
3918
3919  if (__kmp_env_consistency_check) {
3920    if (user_lock == NULL) {
3921      KMP_FATAL(LockIsUninitialized, func);
3922    }
3923  }
3924
3925  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3926    kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3927    if (__kmp_env_consistency_check) {
3928      if (!(0 < index && index < __kmp_user_lock_table.used)) {
3929        KMP_FATAL(LockIsUninitialized, func);
3930      }
3931    }
3932    KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used);
3933    KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3934    lck = __kmp_user_lock_table.table[index];
3935  } else {
3936    lck = *((kmp_user_lock_p *)user_lock);
3937  }
3938
3939  if (__kmp_env_consistency_check) {
3940    if (lck == NULL) {
3941      KMP_FATAL(LockIsUninitialized, func);
3942    }
3943  }
3944
3945  return lck;
3946}
3947
3948void __kmp_cleanup_user_locks(void) {
3949  // Reset lock pool. Don't worry about lock in the pool--we will free them when
3950  // iterating through lock table (it includes all the locks, dead or alive).
3951  __kmp_lock_pool = NULL;
3952
3953#define IS_CRITICAL(lck)                                                       \
3954  ((__kmp_get_user_lock_flags_ != NULL) &&                                     \
3955   ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section))
3956
3957  // Loop through lock table, free all locks.
3958  // Do not free item [0], it is reserved for lock tables list.
3959  //
3960  // FIXME - we are iterating through a list of (pointers to) objects of type
3961  // union kmp_user_lock, but we have no way of knowing whether the base type is
3962  // currently "pool" or whatever the global user lock type is.
3963  //
3964  // We are relying on the fact that for all of the user lock types
3965  // (except "tas"), the first field in the lock struct is the "initialized"
3966  // field, which is set to the address of the lock object itself when
3967  // the lock is initialized.  When the union is of type "pool", the
3968  // first field is a pointer to the next object in the free list, which
3969  // will not be the same address as the object itself.
3970  //
3971  // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail
3972  // for "pool" objects on the free list.  This must happen as the "location"
3973  // field of real user locks overlaps the "index" field of "pool" objects.
3974  //
3975  // It would be better to run through the free list, and remove all "pool"
3976  // objects from the lock table before executing this loop.  However,
3977  // "pool" objects do not always have their index field set (only on
3978  // lin_32e), and I don't want to search the lock table for the address
3979  // of every "pool" object on the free list.
3980  while (__kmp_user_lock_table.used > 1) {
3981    const ident *loc;
3982
3983    // reduce __kmp_user_lock_table.used before freeing the lock,
3984    // so that state of locks is consistent
3985    kmp_user_lock_p lck =
3986        __kmp_user_lock_table.table[--__kmp_user_lock_table.used];
3987
3988    if ((__kmp_is_user_lock_initialized_ != NULL) &&
3989        (*__kmp_is_user_lock_initialized_)(lck)) {
3990      // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND
3991      // it is NOT a critical section (user is not responsible for destroying
3992      // criticals) AND we know source location to report.
3993      if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) &&
3994          ((loc = __kmp_get_user_lock_location(lck)) != NULL) &&
3995          (loc->psource != NULL)) {
3996        kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
3997        KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line);
3998        __kmp_str_loc_free(&str_loc);
3999      }
4000
4001#ifdef KMP_DEBUG
4002      if (IS_CRITICAL(lck)) {
4003        KA_TRACE(
4004            20,
4005            ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",
4006             lck, *(void **)lck));
4007      } else {
4008        KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck,
4009                      *(void **)lck));
4010      }
4011#endif // KMP_DEBUG
4012
4013      // Cleanup internal lock dynamic resources (for drdpa locks particularly).
4014      __kmp_destroy_user_lock(lck);
4015    }
4016
4017    // Free the lock if block allocation of locks is not used.
4018    if (__kmp_lock_blocks == NULL) {
4019      __kmp_free(lck);
4020    }
4021  }
4022
4023#undef IS_CRITICAL
4024
4025  // delete lock table(s).
4026  kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4027  __kmp_user_lock_table.table = NULL;
4028  __kmp_user_lock_table.allocated = 0;
4029
4030  while (table_ptr != NULL) {
4031    // In the first element we saved the pointer to the previous
4032    // (smaller) lock table.
4033    kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]);
4034    __kmp_free(table_ptr);
4035    table_ptr = next;
4036  }
4037
4038  // Free buffers allocated for blocks of locks.
4039  kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4040  __kmp_lock_blocks = NULL;
4041
4042  while (block_ptr != NULL) {
4043    kmp_block_of_locks_t *next = block_ptr->next_block;
4044    __kmp_free(block_ptr->locks);
4045    // *block_ptr itself was allocated at the end of the locks vector.
4046    block_ptr = next;
4047  }
4048
4049  TCW_4(__kmp_init_user_locks, FALSE);
4050}
4051
4052#endif // KMP_USE_DYNAMIC_LOCK
4053