kmp_lock.cpp revision 360784
1/*
2 * kmp_lock.cpp -- lock-related functions
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include <stddef.h>
14#include <atomic>
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_io.h"
19#include "kmp_itt.h"
20#include "kmp_lock.h"
21#include "kmp_wait_release.h"
22#include "kmp_wrapper_getpid.h"
23
24#include "tsan_annotations.h"
25
26#if KMP_USE_FUTEX
27#include <sys/syscall.h>
28#include <unistd.h>
29// We should really include <futex.h>, but that causes compatibility problems on
30// different Linux* OS distributions that either require that you include (or
31// break when you try to include) <pci/types.h>. Since all we need is the two
32// macros below (which are part of the kernel ABI, so can't change) we just
33// define the constants here and don't include <futex.h>
34#ifndef FUTEX_WAIT
35#define FUTEX_WAIT 0
36#endif
37#ifndef FUTEX_WAKE
38#define FUTEX_WAKE 1
39#endif
40#endif
41
42/* Implement spin locks for internal library use.             */
43/* The algorithm implemented is Lamport's bakery lock [1974]. */
44
45void __kmp_validate_locks(void) {
46  int i;
47  kmp_uint32 x, y;
48
49  /* Check to make sure unsigned arithmetic does wraps properly */
50  x = ~((kmp_uint32)0) - 2;
51  y = x - 2;
52
53  for (i = 0; i < 8; ++i, ++x, ++y) {
54    kmp_uint32 z = (x - y);
55    KMP_ASSERT(z == 2);
56  }
57
58  KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0);
59}
60
61/* ------------------------------------------------------------------------ */
62/* test and set locks */
63
64// For the non-nested locks, we can only assume that the first 4 bytes were
65// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
66// compiler only allocates a 4 byte pointer on IA-32 architecture.  On
67// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
68//
69// gcc reserves >= 8 bytes for nested locks, so we can assume that the
70// entire 8 bytes were allocated for nested locks on all 64-bit platforms.
71
72static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) {
73  return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1;
74}
75
76static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) {
77  return lck->lk.depth_locked != -1;
78}
79
80__forceinline static int
81__kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
82  KMP_MB();
83
84#ifdef USE_LOCK_PROFILE
85  kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll);
86  if ((curr != 0) && (curr != gtid + 1))
87    __kmp_printf("LOCK CONTENTION: %p\n", lck);
88/* else __kmp_printf( "." );*/
89#endif /* USE_LOCK_PROFILE */
90
91  kmp_int32 tas_free = KMP_LOCK_FREE(tas);
92  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
93
94  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
95      __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
96    KMP_FSYNC_ACQUIRED(lck);
97    return KMP_LOCK_ACQUIRED_FIRST;
98  }
99
100  kmp_uint32 spins;
101  KMP_FSYNC_PREPARE(lck);
102  KMP_INIT_YIELD(spins);
103  kmp_backoff_t backoff = __kmp_spin_backoff_params;
104  do {
105    __kmp_spin_backoff(&backoff);
106    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
107  } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
108           !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
109  KMP_FSYNC_ACQUIRED(lck);
110  return KMP_LOCK_ACQUIRED_FIRST;
111}
112
113int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
114  int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid);
115  ANNOTATE_TAS_ACQUIRED(lck);
116  return retval;
117}
118
119static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck,
120                                              kmp_int32 gtid) {
121  char const *const func = "omp_set_lock";
122  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
123      __kmp_is_tas_lock_nestable(lck)) {
124    KMP_FATAL(LockNestableUsedAsSimple, func);
125  }
126  if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) {
127    KMP_FATAL(LockIsAlreadyOwned, func);
128  }
129  return __kmp_acquire_tas_lock(lck, gtid);
130}
131
132int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
133  kmp_int32 tas_free = KMP_LOCK_FREE(tas);
134  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
135  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
136      __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
137    KMP_FSYNC_ACQUIRED(lck);
138    return TRUE;
139  }
140  return FALSE;
141}
142
143static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck,
144                                           kmp_int32 gtid) {
145  char const *const func = "omp_test_lock";
146  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
147      __kmp_is_tas_lock_nestable(lck)) {
148    KMP_FATAL(LockNestableUsedAsSimple, func);
149  }
150  return __kmp_test_tas_lock(lck, gtid);
151}
152
153int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
154  KMP_MB(); /* Flush all pending memory write invalidates.  */
155
156  KMP_FSYNC_RELEASING(lck);
157  ANNOTATE_TAS_RELEASED(lck);
158  KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
159  KMP_MB(); /* Flush all pending memory write invalidates.  */
160
161  KMP_YIELD_OVERSUB();
162  return KMP_LOCK_RELEASED;
163}
164
165static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck,
166                                              kmp_int32 gtid) {
167  char const *const func = "omp_unset_lock";
168  KMP_MB(); /* in case another processor initialized lock */
169  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
170      __kmp_is_tas_lock_nestable(lck)) {
171    KMP_FATAL(LockNestableUsedAsSimple, func);
172  }
173  if (__kmp_get_tas_lock_owner(lck) == -1) {
174    KMP_FATAL(LockUnsettingFree, func);
175  }
176  if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) &&
177      (__kmp_get_tas_lock_owner(lck) != gtid)) {
178    KMP_FATAL(LockUnsettingSetByAnother, func);
179  }
180  return __kmp_release_tas_lock(lck, gtid);
181}
182
183void __kmp_init_tas_lock(kmp_tas_lock_t *lck) {
184  lck->lk.poll = KMP_LOCK_FREE(tas);
185}
186
187void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; }
188
189static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) {
190  char const *const func = "omp_destroy_lock";
191  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
192      __kmp_is_tas_lock_nestable(lck)) {
193    KMP_FATAL(LockNestableUsedAsSimple, func);
194  }
195  if (__kmp_get_tas_lock_owner(lck) != -1) {
196    KMP_FATAL(LockStillOwned, func);
197  }
198  __kmp_destroy_tas_lock(lck);
199}
200
201// nested test and set locks
202
203int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
204  KMP_DEBUG_ASSERT(gtid >= 0);
205
206  if (__kmp_get_tas_lock_owner(lck) == gtid) {
207    lck->lk.depth_locked += 1;
208    return KMP_LOCK_ACQUIRED_NEXT;
209  } else {
210    __kmp_acquire_tas_lock_timed_template(lck, gtid);
211    ANNOTATE_TAS_ACQUIRED(lck);
212    lck->lk.depth_locked = 1;
213    return KMP_LOCK_ACQUIRED_FIRST;
214  }
215}
216
217static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
218                                                     kmp_int32 gtid) {
219  char const *const func = "omp_set_nest_lock";
220  if (!__kmp_is_tas_lock_nestable(lck)) {
221    KMP_FATAL(LockSimpleUsedAsNestable, func);
222  }
223  return __kmp_acquire_nested_tas_lock(lck, gtid);
224}
225
226int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
227  int retval;
228
229  KMP_DEBUG_ASSERT(gtid >= 0);
230
231  if (__kmp_get_tas_lock_owner(lck) == gtid) {
232    retval = ++lck->lk.depth_locked;
233  } else if (!__kmp_test_tas_lock(lck, gtid)) {
234    retval = 0;
235  } else {
236    KMP_MB();
237    retval = lck->lk.depth_locked = 1;
238  }
239  return retval;
240}
241
242static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
243                                                  kmp_int32 gtid) {
244  char const *const func = "omp_test_nest_lock";
245  if (!__kmp_is_tas_lock_nestable(lck)) {
246    KMP_FATAL(LockSimpleUsedAsNestable, func);
247  }
248  return __kmp_test_nested_tas_lock(lck, gtid);
249}
250
251int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
252  KMP_DEBUG_ASSERT(gtid >= 0);
253
254  KMP_MB();
255  if (--(lck->lk.depth_locked) == 0) {
256    __kmp_release_tas_lock(lck, gtid);
257    return KMP_LOCK_RELEASED;
258  }
259  return KMP_LOCK_STILL_HELD;
260}
261
262static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
263                                                     kmp_int32 gtid) {
264  char const *const func = "omp_unset_nest_lock";
265  KMP_MB(); /* in case another processor initialized lock */
266  if (!__kmp_is_tas_lock_nestable(lck)) {
267    KMP_FATAL(LockSimpleUsedAsNestable, func);
268  }
269  if (__kmp_get_tas_lock_owner(lck) == -1) {
270    KMP_FATAL(LockUnsettingFree, func);
271  }
272  if (__kmp_get_tas_lock_owner(lck) != gtid) {
273    KMP_FATAL(LockUnsettingSetByAnother, func);
274  }
275  return __kmp_release_nested_tas_lock(lck, gtid);
276}
277
278void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) {
279  __kmp_init_tas_lock(lck);
280  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
281}
282
283void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) {
284  __kmp_destroy_tas_lock(lck);
285  lck->lk.depth_locked = 0;
286}
287
288static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
289  char const *const func = "omp_destroy_nest_lock";
290  if (!__kmp_is_tas_lock_nestable(lck)) {
291    KMP_FATAL(LockSimpleUsedAsNestable, func);
292  }
293  if (__kmp_get_tas_lock_owner(lck) != -1) {
294    KMP_FATAL(LockStillOwned, func);
295  }
296  __kmp_destroy_nested_tas_lock(lck);
297}
298
299#if KMP_USE_FUTEX
300
301/* ------------------------------------------------------------------------ */
302/* futex locks */
303
304// futex locks are really just test and set locks, with a different method
305// of handling contention.  They take the same amount of space as test and
306// set locks, and are allocated the same way (i.e. use the area allocated by
307// the compiler for non-nested locks / allocate nested locks on the heap).
308
309static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) {
310  return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1;
311}
312
313static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) {
314  return lck->lk.depth_locked != -1;
315}
316
317__forceinline static int
318__kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) {
319  kmp_int32 gtid_code = (gtid + 1) << 1;
320
321  KMP_MB();
322
323#ifdef USE_LOCK_PROFILE
324  kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll));
325  if ((curr != 0) && (curr != gtid_code))
326    __kmp_printf("LOCK CONTENTION: %p\n", lck);
327/* else __kmp_printf( "." );*/
328#endif /* USE_LOCK_PROFILE */
329
330  KMP_FSYNC_PREPARE(lck);
331  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
332                  lck, lck->lk.poll, gtid));
333
334  kmp_int32 poll_val;
335
336  while ((poll_val = KMP_COMPARE_AND_STORE_RET32(
337              &(lck->lk.poll), KMP_LOCK_FREE(futex),
338              KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {
339
340    kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
341    KA_TRACE(
342        1000,
343        ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
344         lck, gtid, poll_val, cond));
345
346    // NOTE: if you try to use the following condition for this branch
347    //
348    // if ( poll_val & 1 == 0 )
349    //
350    // Then the 12.0 compiler has a bug where the following block will
351    // always be skipped, regardless of the value of the LSB of poll_val.
352    if (!cond) {
353      // Try to set the lsb in the poll to indicate to the owner
354      // thread that they need to wake this thread up.
355      if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val,
356                                       poll_val | KMP_LOCK_BUSY(1, futex))) {
357        KA_TRACE(
358            1000,
359            ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
360             lck, lck->lk.poll, gtid));
361        continue;
362      }
363      poll_val |= KMP_LOCK_BUSY(1, futex);
364
365      KA_TRACE(1000,
366               ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck,
367                lck->lk.poll, gtid));
368    }
369
370    KA_TRACE(
371        1000,
372        ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
373         lck, gtid, poll_val));
374
375    kmp_int32 rc;
376    if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL,
377                      NULL, 0)) != 0) {
378      KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) "
379                      "failed (rc=%d errno=%d)\n",
380                      lck, gtid, poll_val, rc, errno));
381      continue;
382    }
383
384    KA_TRACE(1000,
385             ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
386              lck, gtid, poll_val));
387    // This thread has now done a successful futex wait call and was entered on
388    // the OS futex queue.  We must now perform a futex wake call when releasing
389    // the lock, as we have no idea how many other threads are in the queue.
390    gtid_code |= 1;
391  }
392
393  KMP_FSYNC_ACQUIRED(lck);
394  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
395                  lck->lk.poll, gtid));
396  return KMP_LOCK_ACQUIRED_FIRST;
397}
398
399int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
400  int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid);
401  ANNOTATE_FUTEX_ACQUIRED(lck);
402  return retval;
403}
404
405static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck,
406                                                kmp_int32 gtid) {
407  char const *const func = "omp_set_lock";
408  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
409      __kmp_is_futex_lock_nestable(lck)) {
410    KMP_FATAL(LockNestableUsedAsSimple, func);
411  }
412  if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) {
413    KMP_FATAL(LockIsAlreadyOwned, func);
414  }
415  return __kmp_acquire_futex_lock(lck, gtid);
416}
417
418int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
419  if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex),
420                                  KMP_LOCK_BUSY((gtid + 1) << 1, futex))) {
421    KMP_FSYNC_ACQUIRED(lck);
422    return TRUE;
423  }
424  return FALSE;
425}
426
427static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck,
428                                             kmp_int32 gtid) {
429  char const *const func = "omp_test_lock";
430  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
431      __kmp_is_futex_lock_nestable(lck)) {
432    KMP_FATAL(LockNestableUsedAsSimple, func);
433  }
434  return __kmp_test_futex_lock(lck, gtid);
435}
436
437int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
438  KMP_MB(); /* Flush all pending memory write invalidates.  */
439
440  KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
441                  lck, lck->lk.poll, gtid));
442
443  KMP_FSYNC_RELEASING(lck);
444  ANNOTATE_FUTEX_RELEASED(lck);
445
446  kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex));
447
448  KA_TRACE(1000,
449           ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
450            lck, gtid, poll_val));
451
452  if (KMP_LOCK_STRIP(poll_val) & 1) {
453    KA_TRACE(1000,
454             ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
455              lck, gtid));
456    syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex),
457            NULL, NULL, 0);
458  }
459
460  KMP_MB(); /* Flush all pending memory write invalidates.  */
461
462  KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
463                  lck->lk.poll, gtid));
464
465  KMP_YIELD_OVERSUB();
466  return KMP_LOCK_RELEASED;
467}
468
469static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck,
470                                                kmp_int32 gtid) {
471  char const *const func = "omp_unset_lock";
472  KMP_MB(); /* in case another processor initialized lock */
473  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
474      __kmp_is_futex_lock_nestable(lck)) {
475    KMP_FATAL(LockNestableUsedAsSimple, func);
476  }
477  if (__kmp_get_futex_lock_owner(lck) == -1) {
478    KMP_FATAL(LockUnsettingFree, func);
479  }
480  if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) &&
481      (__kmp_get_futex_lock_owner(lck) != gtid)) {
482    KMP_FATAL(LockUnsettingSetByAnother, func);
483  }
484  return __kmp_release_futex_lock(lck, gtid);
485}
486
487void __kmp_init_futex_lock(kmp_futex_lock_t *lck) {
488  TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex));
489}
490
491void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; }
492
493static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) {
494  char const *const func = "omp_destroy_lock";
495  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
496      __kmp_is_futex_lock_nestable(lck)) {
497    KMP_FATAL(LockNestableUsedAsSimple, func);
498  }
499  if (__kmp_get_futex_lock_owner(lck) != -1) {
500    KMP_FATAL(LockStillOwned, func);
501  }
502  __kmp_destroy_futex_lock(lck);
503}
504
505// nested futex locks
506
507int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
508  KMP_DEBUG_ASSERT(gtid >= 0);
509
510  if (__kmp_get_futex_lock_owner(lck) == gtid) {
511    lck->lk.depth_locked += 1;
512    return KMP_LOCK_ACQUIRED_NEXT;
513  } else {
514    __kmp_acquire_futex_lock_timed_template(lck, gtid);
515    ANNOTATE_FUTEX_ACQUIRED(lck);
516    lck->lk.depth_locked = 1;
517    return KMP_LOCK_ACQUIRED_FIRST;
518  }
519}
520
521static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
522                                                       kmp_int32 gtid) {
523  char const *const func = "omp_set_nest_lock";
524  if (!__kmp_is_futex_lock_nestable(lck)) {
525    KMP_FATAL(LockSimpleUsedAsNestable, func);
526  }
527  return __kmp_acquire_nested_futex_lock(lck, gtid);
528}
529
530int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
531  int retval;
532
533  KMP_DEBUG_ASSERT(gtid >= 0);
534
535  if (__kmp_get_futex_lock_owner(lck) == gtid) {
536    retval = ++lck->lk.depth_locked;
537  } else if (!__kmp_test_futex_lock(lck, gtid)) {
538    retval = 0;
539  } else {
540    KMP_MB();
541    retval = lck->lk.depth_locked = 1;
542  }
543  return retval;
544}
545
546static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
547                                                    kmp_int32 gtid) {
548  char const *const func = "omp_test_nest_lock";
549  if (!__kmp_is_futex_lock_nestable(lck)) {
550    KMP_FATAL(LockSimpleUsedAsNestable, func);
551  }
552  return __kmp_test_nested_futex_lock(lck, gtid);
553}
554
555int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
556  KMP_DEBUG_ASSERT(gtid >= 0);
557
558  KMP_MB();
559  if (--(lck->lk.depth_locked) == 0) {
560    __kmp_release_futex_lock(lck, gtid);
561    return KMP_LOCK_RELEASED;
562  }
563  return KMP_LOCK_STILL_HELD;
564}
565
566static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
567                                                       kmp_int32 gtid) {
568  char const *const func = "omp_unset_nest_lock";
569  KMP_MB(); /* in case another processor initialized lock */
570  if (!__kmp_is_futex_lock_nestable(lck)) {
571    KMP_FATAL(LockSimpleUsedAsNestable, func);
572  }
573  if (__kmp_get_futex_lock_owner(lck) == -1) {
574    KMP_FATAL(LockUnsettingFree, func);
575  }
576  if (__kmp_get_futex_lock_owner(lck) != gtid) {
577    KMP_FATAL(LockUnsettingSetByAnother, func);
578  }
579  return __kmp_release_nested_futex_lock(lck, gtid);
580}
581
582void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) {
583  __kmp_init_futex_lock(lck);
584  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
585}
586
587void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) {
588  __kmp_destroy_futex_lock(lck);
589  lck->lk.depth_locked = 0;
590}
591
592static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
593  char const *const func = "omp_destroy_nest_lock";
594  if (!__kmp_is_futex_lock_nestable(lck)) {
595    KMP_FATAL(LockSimpleUsedAsNestable, func);
596  }
597  if (__kmp_get_futex_lock_owner(lck) != -1) {
598    KMP_FATAL(LockStillOwned, func);
599  }
600  __kmp_destroy_nested_futex_lock(lck);
601}
602
603#endif // KMP_USE_FUTEX
604
605/* ------------------------------------------------------------------------ */
606/* ticket (bakery) locks */
607
608static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) {
609  return std::atomic_load_explicit(&lck->lk.owner_id,
610                                   std::memory_order_relaxed) -
611         1;
612}
613
614static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) {
615  return std::atomic_load_explicit(&lck->lk.depth_locked,
616                                   std::memory_order_relaxed) != -1;
617}
618
619static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) {
620  return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving,
621                                   std::memory_order_acquire) == my_ticket;
622}
623
624__forceinline static int
625__kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,
626                                         kmp_int32 gtid) {
627  kmp_uint32 my_ticket = std::atomic_fetch_add_explicit(
628      &lck->lk.next_ticket, 1U, std::memory_order_relaxed);
629
630#ifdef USE_LOCK_PROFILE
631  if (std::atomic_load_explicit(&lck->lk.now_serving,
632                                std::memory_order_relaxed) != my_ticket)
633    __kmp_printf("LOCK CONTENTION: %p\n", lck);
634/* else __kmp_printf( "." );*/
635#endif /* USE_LOCK_PROFILE */
636
637  if (std::atomic_load_explicit(&lck->lk.now_serving,
638                                std::memory_order_acquire) == my_ticket) {
639    return KMP_LOCK_ACQUIRED_FIRST;
640  }
641  KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
642  return KMP_LOCK_ACQUIRED_FIRST;
643}
644
645int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
646  int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid);
647  ANNOTATE_TICKET_ACQUIRED(lck);
648  return retval;
649}
650
651static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
652                                                 kmp_int32 gtid) {
653  char const *const func = "omp_set_lock";
654
655  if (!std::atomic_load_explicit(&lck->lk.initialized,
656                                 std::memory_order_relaxed)) {
657    KMP_FATAL(LockIsUninitialized, func);
658  }
659  if (lck->lk.self != lck) {
660    KMP_FATAL(LockIsUninitialized, func);
661  }
662  if (__kmp_is_ticket_lock_nestable(lck)) {
663    KMP_FATAL(LockNestableUsedAsSimple, func);
664  }
665  if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) {
666    KMP_FATAL(LockIsAlreadyOwned, func);
667  }
668
669  __kmp_acquire_ticket_lock(lck, gtid);
670
671  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
672                             std::memory_order_relaxed);
673  return KMP_LOCK_ACQUIRED_FIRST;
674}
675
676int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
677  kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket,
678                                                   std::memory_order_relaxed);
679
680  if (std::atomic_load_explicit(&lck->lk.now_serving,
681                                std::memory_order_relaxed) == my_ticket) {
682    kmp_uint32 next_ticket = my_ticket + 1;
683    if (std::atomic_compare_exchange_strong_explicit(
684            &lck->lk.next_ticket, &my_ticket, next_ticket,
685            std::memory_order_acquire, std::memory_order_acquire)) {
686      return TRUE;
687    }
688  }
689  return FALSE;
690}
691
692static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
693                                              kmp_int32 gtid) {
694  char const *const func = "omp_test_lock";
695
696  if (!std::atomic_load_explicit(&lck->lk.initialized,
697                                 std::memory_order_relaxed)) {
698    KMP_FATAL(LockIsUninitialized, func);
699  }
700  if (lck->lk.self != lck) {
701    KMP_FATAL(LockIsUninitialized, func);
702  }
703  if (__kmp_is_ticket_lock_nestable(lck)) {
704    KMP_FATAL(LockNestableUsedAsSimple, func);
705  }
706
707  int retval = __kmp_test_ticket_lock(lck, gtid);
708
709  if (retval) {
710    std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
711                               std::memory_order_relaxed);
712  }
713  return retval;
714}
715
716int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
717  kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket,
718                                                  std::memory_order_relaxed) -
719                        std::atomic_load_explicit(&lck->lk.now_serving,
720                                                  std::memory_order_relaxed);
721
722  ANNOTATE_TICKET_RELEASED(lck);
723  std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U,
724                                 std::memory_order_release);
725
726  KMP_YIELD(distance >
727            (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
728  return KMP_LOCK_RELEASED;
729}
730
731static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
732                                                 kmp_int32 gtid) {
733  char const *const func = "omp_unset_lock";
734
735  if (!std::atomic_load_explicit(&lck->lk.initialized,
736                                 std::memory_order_relaxed)) {
737    KMP_FATAL(LockIsUninitialized, func);
738  }
739  if (lck->lk.self != lck) {
740    KMP_FATAL(LockIsUninitialized, func);
741  }
742  if (__kmp_is_ticket_lock_nestable(lck)) {
743    KMP_FATAL(LockNestableUsedAsSimple, func);
744  }
745  if (__kmp_get_ticket_lock_owner(lck) == -1) {
746    KMP_FATAL(LockUnsettingFree, func);
747  }
748  if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) &&
749      (__kmp_get_ticket_lock_owner(lck) != gtid)) {
750    KMP_FATAL(LockUnsettingSetByAnother, func);
751  }
752  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
753  return __kmp_release_ticket_lock(lck, gtid);
754}
755
756void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) {
757  lck->lk.location = NULL;
758  lck->lk.self = lck;
759  std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
760                             std::memory_order_relaxed);
761  std::atomic_store_explicit(&lck->lk.now_serving, 0U,
762                             std::memory_order_relaxed);
763  std::atomic_store_explicit(
764      &lck->lk.owner_id, 0,
765      std::memory_order_relaxed); // no thread owns the lock.
766  std::atomic_store_explicit(
767      &lck->lk.depth_locked, -1,
768      std::memory_order_relaxed); // -1 => not a nested lock.
769  std::atomic_store_explicit(&lck->lk.initialized, true,
770                             std::memory_order_release);
771}
772
773void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) {
774  std::atomic_store_explicit(&lck->lk.initialized, false,
775                             std::memory_order_release);
776  lck->lk.self = NULL;
777  lck->lk.location = NULL;
778  std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
779                             std::memory_order_relaxed);
780  std::atomic_store_explicit(&lck->lk.now_serving, 0U,
781                             std::memory_order_relaxed);
782  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
783  std::atomic_store_explicit(&lck->lk.depth_locked, -1,
784                             std::memory_order_relaxed);
785}
786
787static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
788  char const *const func = "omp_destroy_lock";
789
790  if (!std::atomic_load_explicit(&lck->lk.initialized,
791                                 std::memory_order_relaxed)) {
792    KMP_FATAL(LockIsUninitialized, func);
793  }
794  if (lck->lk.self != lck) {
795    KMP_FATAL(LockIsUninitialized, func);
796  }
797  if (__kmp_is_ticket_lock_nestable(lck)) {
798    KMP_FATAL(LockNestableUsedAsSimple, func);
799  }
800  if (__kmp_get_ticket_lock_owner(lck) != -1) {
801    KMP_FATAL(LockStillOwned, func);
802  }
803  __kmp_destroy_ticket_lock(lck);
804}
805
806// nested ticket locks
807
808int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
809  KMP_DEBUG_ASSERT(gtid >= 0);
810
811  if (__kmp_get_ticket_lock_owner(lck) == gtid) {
812    std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
813                                   std::memory_order_relaxed);
814    return KMP_LOCK_ACQUIRED_NEXT;
815  } else {
816    __kmp_acquire_ticket_lock_timed_template(lck, gtid);
817    ANNOTATE_TICKET_ACQUIRED(lck);
818    std::atomic_store_explicit(&lck->lk.depth_locked, 1,
819                               std::memory_order_relaxed);
820    std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
821                               std::memory_order_relaxed);
822    return KMP_LOCK_ACQUIRED_FIRST;
823  }
824}
825
826static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
827                                                        kmp_int32 gtid) {
828  char const *const func = "omp_set_nest_lock";
829
830  if (!std::atomic_load_explicit(&lck->lk.initialized,
831                                 std::memory_order_relaxed)) {
832    KMP_FATAL(LockIsUninitialized, func);
833  }
834  if (lck->lk.self != lck) {
835    KMP_FATAL(LockIsUninitialized, func);
836  }
837  if (!__kmp_is_ticket_lock_nestable(lck)) {
838    KMP_FATAL(LockSimpleUsedAsNestable, func);
839  }
840  return __kmp_acquire_nested_ticket_lock(lck, gtid);
841}
842
843int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
844  int retval;
845
846  KMP_DEBUG_ASSERT(gtid >= 0);
847
848  if (__kmp_get_ticket_lock_owner(lck) == gtid) {
849    retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
850                                            std::memory_order_relaxed) +
851             1;
852  } else if (!__kmp_test_ticket_lock(lck, gtid)) {
853    retval = 0;
854  } else {
855    std::atomic_store_explicit(&lck->lk.depth_locked, 1,
856                               std::memory_order_relaxed);
857    std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
858                               std::memory_order_relaxed);
859    retval = 1;
860  }
861  return retval;
862}
863
864static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
865                                                     kmp_int32 gtid) {
866  char const *const func = "omp_test_nest_lock";
867
868  if (!std::atomic_load_explicit(&lck->lk.initialized,
869                                 std::memory_order_relaxed)) {
870    KMP_FATAL(LockIsUninitialized, func);
871  }
872  if (lck->lk.self != lck) {
873    KMP_FATAL(LockIsUninitialized, func);
874  }
875  if (!__kmp_is_ticket_lock_nestable(lck)) {
876    KMP_FATAL(LockSimpleUsedAsNestable, func);
877  }
878  return __kmp_test_nested_ticket_lock(lck, gtid);
879}
880
881int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
882  KMP_DEBUG_ASSERT(gtid >= 0);
883
884  if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1,
885                                      std::memory_order_relaxed) -
886       1) == 0) {
887    std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
888    __kmp_release_ticket_lock(lck, gtid);
889    return KMP_LOCK_RELEASED;
890  }
891  return KMP_LOCK_STILL_HELD;
892}
893
894static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
895                                                        kmp_int32 gtid) {
896  char const *const func = "omp_unset_nest_lock";
897
898  if (!std::atomic_load_explicit(&lck->lk.initialized,
899                                 std::memory_order_relaxed)) {
900    KMP_FATAL(LockIsUninitialized, func);
901  }
902  if (lck->lk.self != lck) {
903    KMP_FATAL(LockIsUninitialized, func);
904  }
905  if (!__kmp_is_ticket_lock_nestable(lck)) {
906    KMP_FATAL(LockSimpleUsedAsNestable, func);
907  }
908  if (__kmp_get_ticket_lock_owner(lck) == -1) {
909    KMP_FATAL(LockUnsettingFree, func);
910  }
911  if (__kmp_get_ticket_lock_owner(lck) != gtid) {
912    KMP_FATAL(LockUnsettingSetByAnother, func);
913  }
914  return __kmp_release_nested_ticket_lock(lck, gtid);
915}
916
917void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) {
918  __kmp_init_ticket_lock(lck);
919  std::atomic_store_explicit(&lck->lk.depth_locked, 0,
920                             std::memory_order_relaxed);
921  // >= 0 for nestable locks, -1 for simple locks
922}
923
924void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) {
925  __kmp_destroy_ticket_lock(lck);
926  std::atomic_store_explicit(&lck->lk.depth_locked, 0,
927                             std::memory_order_relaxed);
928}
929
930static void
931__kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
932  char const *const func = "omp_destroy_nest_lock";
933
934  if (!std::atomic_load_explicit(&lck->lk.initialized,
935                                 std::memory_order_relaxed)) {
936    KMP_FATAL(LockIsUninitialized, func);
937  }
938  if (lck->lk.self != lck) {
939    KMP_FATAL(LockIsUninitialized, func);
940  }
941  if (!__kmp_is_ticket_lock_nestable(lck)) {
942    KMP_FATAL(LockSimpleUsedAsNestable, func);
943  }
944  if (__kmp_get_ticket_lock_owner(lck) != -1) {
945    KMP_FATAL(LockStillOwned, func);
946  }
947  __kmp_destroy_nested_ticket_lock(lck);
948}
949
950// access functions to fields which don't exist for all lock kinds.
951
952static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) {
953  return lck->lk.location;
954}
955
956static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck,
957                                           const ident_t *loc) {
958  lck->lk.location = loc;
959}
960
961static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) {
962  return lck->lk.flags;
963}
964
965static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck,
966                                        kmp_lock_flags_t flags) {
967  lck->lk.flags = flags;
968}
969
970/* ------------------------------------------------------------------------ */
971/* queuing locks */
972
973/* First the states
974   (head,tail) =              0, 0  means lock is unheld, nobody on queue
975                 UINT_MAX or -1, 0  means lock is held, nobody on queue
976                              h, h  means lock held or about to transition,
977                                    1 element on queue
978                              h, t  h <> t, means lock is held or about to
979                                    transition, >1 elements on queue
980
981   Now the transitions
982      Acquire(0,0)  = -1 ,0
983      Release(0,0)  = Error
984      Acquire(-1,0) =  h ,h    h > 0
985      Release(-1,0) =  0 ,0
986      Acquire(h,h)  =  h ,t    h > 0, t > 0, h <> t
987      Release(h,h)  = -1 ,0    h > 0
988      Acquire(h,t)  =  h ,t'   h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
989      Release(h,t)  =  h',t    h > 0, t > 0, h <> t, h <> h', h' maybe = t
990
991   And pictorially
992
993           +-----+
994           | 0, 0|------- release -------> Error
995           +-----+
996             |  ^
997      acquire|  |release
998             |  |
999             |  |
1000             v  |
1001           +-----+
1002           |-1, 0|
1003           +-----+
1004             |  ^
1005      acquire|  |release
1006             |  |
1007             |  |
1008             v  |
1009           +-----+
1010           | h, h|
1011           +-----+
1012             |  ^
1013      acquire|  |release
1014             |  |
1015             |  |
1016             v  |
1017           +-----+
1018           | h, t|----- acquire, release loopback ---+
1019           +-----+                                   |
1020                ^                                    |
1021                |                                    |
1022                +------------------------------------+
1023 */
1024
1025#ifdef DEBUG_QUEUING_LOCKS
1026
1027/* Stuff for circular trace buffer */
1028#define TRACE_BUF_ELE 1024
1029static char traces[TRACE_BUF_ELE][128] = {0};
1030static int tc = 0;
1031#define TRACE_LOCK(X, Y)                                                       \
1032  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y);
1033#define TRACE_LOCK_T(X, Y, Z)                                                  \
1034  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z);
1035#define TRACE_LOCK_HT(X, Y, Z, Q)                                              \
1036  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y,   \
1037               Z, Q);
1038
1039static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid,
1040                                    kmp_queuing_lock_t *lck, kmp_int32 head_id,
1041                                    kmp_int32 tail_id) {
1042  kmp_int32 t, i;
1043
1044  __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");
1045
1046  i = tc % TRACE_BUF_ELE;
1047  __kmp_printf_no_lock("%s\n", traces[i]);
1048  i = (i + 1) % TRACE_BUF_ELE;
1049  while (i != (tc % TRACE_BUF_ELE)) {
1050    __kmp_printf_no_lock("%s", traces[i]);
1051    i = (i + 1) % TRACE_BUF_ELE;
1052  }
1053  __kmp_printf_no_lock("\n");
1054
1055  __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "
1056                       "next_wait:%d, head_id:%d, tail_id:%d\n",
1057                       gtid + 1, this_thr->th.th_spin_here,
1058                       this_thr->th.th_next_waiting, head_id, tail_id);
1059
1060  __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id);
1061
1062  if (lck->lk.head_id >= 1) {
1063    t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting;
1064    while (t > 0) {
1065      __kmp_printf_no_lock("-> %d ", t);
1066      t = __kmp_threads[t - 1]->th.th_next_waiting;
1067    }
1068  }
1069  __kmp_printf_no_lock(";  tail: %d ", lck->lk.tail_id);
1070  __kmp_printf_no_lock("\n\n");
1071}
1072
1073#endif /* DEBUG_QUEUING_LOCKS */
1074
1075static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) {
1076  return TCR_4(lck->lk.owner_id) - 1;
1077}
1078
1079static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) {
1080  return lck->lk.depth_locked != -1;
1081}
1082
1083/* Acquire a lock using a the queuing lock implementation */
1084template <bool takeTime>
1085/* [TLW] The unused template above is left behind because of what BEB believes
1086   is a potential compiler problem with __forceinline. */
1087__forceinline static int
1088__kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
1089                                          kmp_int32 gtid) {
1090  kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1091  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1092  volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1093  volatile kmp_uint32 *spin_here_p;
1094  kmp_int32 need_mf = 1;
1095
1096#if OMPT_SUPPORT
1097  ompt_state_t prev_state = ompt_state_undefined;
1098#endif
1099
1100  KA_TRACE(1000,
1101           ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1102
1103  KMP_FSYNC_PREPARE(lck);
1104  KMP_DEBUG_ASSERT(this_thr != NULL);
1105  spin_here_p = &this_thr->th.th_spin_here;
1106
1107#ifdef DEBUG_QUEUING_LOCKS
1108  TRACE_LOCK(gtid + 1, "acq ent");
1109  if (*spin_here_p)
1110    __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1111  if (this_thr->th.th_next_waiting != 0)
1112    __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1113#endif
1114  KMP_DEBUG_ASSERT(!*spin_here_p);
1115  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1116
1117  /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to
1118     head_id_p that may follow, not just in execution order, but also in
1119     visibility order. This way, when a releasing thread observes the changes to
1120     the queue by this thread, it can rightly assume that spin_here_p has
1121     already been set to TRUE, so that when it sets spin_here_p to FALSE, it is
1122     not premature.  If the releasing thread sets spin_here_p to FALSE before
1123     this thread sets it to TRUE, this thread will hang. */
1124  *spin_here_p = TRUE; /* before enqueuing to prevent race */
1125
1126  while (1) {
1127    kmp_int32 enqueued;
1128    kmp_int32 head;
1129    kmp_int32 tail;
1130
1131    head = *head_id_p;
1132
1133    switch (head) {
1134
1135    case -1: {
1136#ifdef DEBUG_QUEUING_LOCKS
1137      tail = *tail_id_p;
1138      TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1139#endif
1140      tail = 0; /* to make sure next link asynchronously read is not set
1141                accidentally; this assignment prevents us from entering the
1142                if ( t > 0 ) condition in the enqueued case below, which is not
1143                necessary for this state transition */
1144
1145      need_mf = 0;
1146      /* try (-1,0)->(tid,tid) */
1147      enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p,
1148                                             KMP_PACK_64(-1, 0),
1149                                             KMP_PACK_64(gtid + 1, gtid + 1));
1150#ifdef DEBUG_QUEUING_LOCKS
1151      if (enqueued)
1152        TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)");
1153#endif
1154    } break;
1155
1156    default: {
1157      tail = *tail_id_p;
1158      KMP_DEBUG_ASSERT(tail != gtid + 1);
1159
1160#ifdef DEBUG_QUEUING_LOCKS
1161      TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1162#endif
1163
1164      if (tail == 0) {
1165        enqueued = FALSE;
1166      } else {
1167        need_mf = 0;
1168        /* try (h,t) or (h,h)->(h,tid) */
1169        enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1);
1170
1171#ifdef DEBUG_QUEUING_LOCKS
1172        if (enqueued)
1173          TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)");
1174#endif
1175      }
1176    } break;
1177
1178    case 0: /* empty queue */
1179    {
1180      kmp_int32 grabbed_lock;
1181
1182#ifdef DEBUG_QUEUING_LOCKS
1183      tail = *tail_id_p;
1184      TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1185#endif
1186      /* try (0,0)->(-1,0) */
1187
1188      /* only legal transition out of head = 0 is head = -1 with no change to
1189       * tail */
1190      grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1);
1191
1192      if (grabbed_lock) {
1193
1194        *spin_here_p = FALSE;
1195
1196        KA_TRACE(
1197            1000,
1198            ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1199             lck, gtid));
1200#ifdef DEBUG_QUEUING_LOCKS
1201        TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0);
1202#endif
1203
1204#if OMPT_SUPPORT
1205        if (ompt_enabled.enabled && prev_state != ompt_state_undefined) {
1206          /* change the state before clearing wait_id */
1207          this_thr->th.ompt_thread_info.state = prev_state;
1208          this_thr->th.ompt_thread_info.wait_id = 0;
1209        }
1210#endif
1211
1212        KMP_FSYNC_ACQUIRED(lck);
1213        return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1214      }
1215      enqueued = FALSE;
1216    } break;
1217    }
1218
1219#if OMPT_SUPPORT
1220    if (ompt_enabled.enabled && prev_state == ompt_state_undefined) {
1221      /* this thread will spin; set wait_id before entering wait state */
1222      prev_state = this_thr->th.ompt_thread_info.state;
1223      this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
1224      this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1225    }
1226#endif
1227
1228    if (enqueued) {
1229      if (tail > 0) {
1230        kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1);
1231        KMP_ASSERT(tail_thr != NULL);
1232        tail_thr->th.th_next_waiting = gtid + 1;
1233        /* corresponding wait for this write in release code */
1234      }
1235      KA_TRACE(1000,
1236               ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
1237                lck, gtid));
1238
1239      KMP_MB();
1240      // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
1241      KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
1242
1243#ifdef DEBUG_QUEUING_LOCKS
1244      TRACE_LOCK(gtid + 1, "acq spin");
1245
1246      if (this_thr->th.th_next_waiting != 0)
1247        __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1248#endif
1249      KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1250      KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after "
1251                      "waiting on queue\n",
1252                      lck, gtid));
1253
1254#ifdef DEBUG_QUEUING_LOCKS
1255      TRACE_LOCK(gtid + 1, "acq exit 2");
1256#endif
1257
1258#if OMPT_SUPPORT
1259      /* change the state before clearing wait_id */
1260      this_thr->th.ompt_thread_info.state = prev_state;
1261      this_thr->th.ompt_thread_info.wait_id = 0;
1262#endif
1263
1264      /* got lock, we were dequeued by the thread that released lock */
1265      return KMP_LOCK_ACQUIRED_FIRST;
1266    }
1267
1268    /* Yield if number of threads > number of logical processors */
1269    /* ToDo: Not sure why this should only be in oversubscription case,
1270       maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1271    KMP_YIELD_OVERSUB();
1272
1273#ifdef DEBUG_QUEUING_LOCKS
1274    TRACE_LOCK(gtid + 1, "acq retry");
1275#endif
1276  }
1277  KMP_ASSERT2(0, "should not get here");
1278  return KMP_LOCK_ACQUIRED_FIRST;
1279}
1280
1281int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1282  KMP_DEBUG_ASSERT(gtid >= 0);
1283
1284  int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1285  ANNOTATE_QUEUING_ACQUIRED(lck);
1286  return retval;
1287}
1288
1289static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1290                                                  kmp_int32 gtid) {
1291  char const *const func = "omp_set_lock";
1292  if (lck->lk.initialized != lck) {
1293    KMP_FATAL(LockIsUninitialized, func);
1294  }
1295  if (__kmp_is_queuing_lock_nestable(lck)) {
1296    KMP_FATAL(LockNestableUsedAsSimple, func);
1297  }
1298  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1299    KMP_FATAL(LockIsAlreadyOwned, func);
1300  }
1301
1302  __kmp_acquire_queuing_lock(lck, gtid);
1303
1304  lck->lk.owner_id = gtid + 1;
1305  return KMP_LOCK_ACQUIRED_FIRST;
1306}
1307
1308int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1309  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1310  kmp_int32 head;
1311#ifdef KMP_DEBUG
1312  kmp_info_t *this_thr;
1313#endif
1314
1315  KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid));
1316  KMP_DEBUG_ASSERT(gtid >= 0);
1317#ifdef KMP_DEBUG
1318  this_thr = __kmp_thread_from_gtid(gtid);
1319  KMP_DEBUG_ASSERT(this_thr != NULL);
1320  KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1321#endif
1322
1323  head = *head_id_p;
1324
1325  if (head == 0) { /* nobody on queue, nobody holding */
1326    /* try (0,0)->(-1,0) */
1327    if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) {
1328      KA_TRACE(1000,
1329               ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid));
1330      KMP_FSYNC_ACQUIRED(lck);
1331      ANNOTATE_QUEUING_ACQUIRED(lck);
1332      return TRUE;
1333    }
1334  }
1335
1336  KA_TRACE(1000,
1337           ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid));
1338  return FALSE;
1339}
1340
1341static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1342                                               kmp_int32 gtid) {
1343  char const *const func = "omp_test_lock";
1344  if (lck->lk.initialized != lck) {
1345    KMP_FATAL(LockIsUninitialized, func);
1346  }
1347  if (__kmp_is_queuing_lock_nestable(lck)) {
1348    KMP_FATAL(LockNestableUsedAsSimple, func);
1349  }
1350
1351  int retval = __kmp_test_queuing_lock(lck, gtid);
1352
1353  if (retval) {
1354    lck->lk.owner_id = gtid + 1;
1355  }
1356  return retval;
1357}
1358
1359int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1360  kmp_info_t *this_thr;
1361  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1362  volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1363
1364  KA_TRACE(1000,
1365           ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1366  KMP_DEBUG_ASSERT(gtid >= 0);
1367  this_thr = __kmp_thread_from_gtid(gtid);
1368  KMP_DEBUG_ASSERT(this_thr != NULL);
1369#ifdef DEBUG_QUEUING_LOCKS
1370  TRACE_LOCK(gtid + 1, "rel ent");
1371
1372  if (this_thr->th.th_spin_here)
1373    __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1374  if (this_thr->th.th_next_waiting != 0)
1375    __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1376#endif
1377  KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1378  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1379
1380  KMP_FSYNC_RELEASING(lck);
1381  ANNOTATE_QUEUING_RELEASED(lck);
1382
1383  while (1) {
1384    kmp_int32 dequeued;
1385    kmp_int32 head;
1386    kmp_int32 tail;
1387
1388    head = *head_id_p;
1389
1390#ifdef DEBUG_QUEUING_LOCKS
1391    tail = *tail_id_p;
1392    TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail);
1393    if (head == 0)
1394      __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1395#endif
1396    KMP_DEBUG_ASSERT(head !=
1397                     0); /* holding the lock, head must be -1 or queue head */
1398
1399    if (head == -1) { /* nobody on queue */
1400      /* try (-1,0)->(0,0) */
1401      if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) {
1402        KA_TRACE(
1403            1000,
1404            ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1405             lck, gtid));
1406#ifdef DEBUG_QUEUING_LOCKS
1407        TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0);
1408#endif
1409
1410#if OMPT_SUPPORT
1411/* nothing to do - no other thread is trying to shift blame */
1412#endif
1413        return KMP_LOCK_RELEASED;
1414      }
1415      dequeued = FALSE;
1416    } else {
1417      KMP_MB();
1418      tail = *tail_id_p;
1419      if (head == tail) { /* only one thread on the queue */
1420#ifdef DEBUG_QUEUING_LOCKS
1421        if (head <= 0)
1422          __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1423#endif
1424        KMP_DEBUG_ASSERT(head > 0);
1425
1426        /* try (h,h)->(-1,0) */
1427        dequeued = KMP_COMPARE_AND_STORE_REL64(
1428            RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head),
1429            KMP_PACK_64(-1, 0));
1430#ifdef DEBUG_QUEUING_LOCKS
1431        TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)");
1432#endif
1433
1434      } else {
1435        volatile kmp_int32 *waiting_id_p;
1436        kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1437        KMP_DEBUG_ASSERT(head_thr != NULL);
1438        waiting_id_p = &head_thr->th.th_next_waiting;
1439
1440/* Does this require synchronous reads? */
1441#ifdef DEBUG_QUEUING_LOCKS
1442        if (head <= 0 || tail <= 0)
1443          __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1444#endif
1445        KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1446
1447        /* try (h,t)->(h',t) or (t,t) */
1448        KMP_MB();
1449        /* make sure enqueuing thread has time to update next waiting thread
1450         * field */
1451        *head_id_p =
1452            KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);
1453#ifdef DEBUG_QUEUING_LOCKS
1454        TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
1455#endif
1456        dequeued = TRUE;
1457      }
1458    }
1459
1460    if (dequeued) {
1461      kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1462      KMP_DEBUG_ASSERT(head_thr != NULL);
1463
1464/* Does this require synchronous reads? */
1465#ifdef DEBUG_QUEUING_LOCKS
1466      if (head <= 0 || tail <= 0)
1467        __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1468#endif
1469      KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1470
1471      /* For clean code only. Thread not released until next statement prevents
1472         race with acquire code. */
1473      head_thr->th.th_next_waiting = 0;
1474#ifdef DEBUG_QUEUING_LOCKS
1475      TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head);
1476#endif
1477
1478      KMP_MB();
1479      /* reset spin value */
1480      head_thr->th.th_spin_here = FALSE;
1481
1482      KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after "
1483                      "dequeuing\n",
1484                      lck, gtid));
1485#ifdef DEBUG_QUEUING_LOCKS
1486      TRACE_LOCK(gtid + 1, "rel exit 2");
1487#endif
1488      return KMP_LOCK_RELEASED;
1489    }
1490/* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring
1491   threads */
1492
1493#ifdef DEBUG_QUEUING_LOCKS
1494    TRACE_LOCK(gtid + 1, "rel retry");
1495#endif
1496
1497  } /* while */
1498  KMP_ASSERT2(0, "should not get here");
1499  return KMP_LOCK_RELEASED;
1500}
1501
1502static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1503                                                  kmp_int32 gtid) {
1504  char const *const func = "omp_unset_lock";
1505  KMP_MB(); /* in case another processor initialized lock */
1506  if (lck->lk.initialized != lck) {
1507    KMP_FATAL(LockIsUninitialized, func);
1508  }
1509  if (__kmp_is_queuing_lock_nestable(lck)) {
1510    KMP_FATAL(LockNestableUsedAsSimple, func);
1511  }
1512  if (__kmp_get_queuing_lock_owner(lck) == -1) {
1513    KMP_FATAL(LockUnsettingFree, func);
1514  }
1515  if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1516    KMP_FATAL(LockUnsettingSetByAnother, func);
1517  }
1518  lck->lk.owner_id = 0;
1519  return __kmp_release_queuing_lock(lck, gtid);
1520}
1521
1522void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) {
1523  lck->lk.location = NULL;
1524  lck->lk.head_id = 0;
1525  lck->lk.tail_id = 0;
1526  lck->lk.next_ticket = 0;
1527  lck->lk.now_serving = 0;
1528  lck->lk.owner_id = 0; // no thread owns the lock.
1529  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1530  lck->lk.initialized = lck;
1531
1532  KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1533}
1534
1535void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) {
1536  lck->lk.initialized = NULL;
1537  lck->lk.location = NULL;
1538  lck->lk.head_id = 0;
1539  lck->lk.tail_id = 0;
1540  lck->lk.next_ticket = 0;
1541  lck->lk.now_serving = 0;
1542  lck->lk.owner_id = 0;
1543  lck->lk.depth_locked = -1;
1544}
1545
1546static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1547  char const *const func = "omp_destroy_lock";
1548  if (lck->lk.initialized != lck) {
1549    KMP_FATAL(LockIsUninitialized, func);
1550  }
1551  if (__kmp_is_queuing_lock_nestable(lck)) {
1552    KMP_FATAL(LockNestableUsedAsSimple, func);
1553  }
1554  if (__kmp_get_queuing_lock_owner(lck) != -1) {
1555    KMP_FATAL(LockStillOwned, func);
1556  }
1557  __kmp_destroy_queuing_lock(lck);
1558}
1559
1560// nested queuing locks
1561
1562int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1563  KMP_DEBUG_ASSERT(gtid >= 0);
1564
1565  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1566    lck->lk.depth_locked += 1;
1567    return KMP_LOCK_ACQUIRED_NEXT;
1568  } else {
1569    __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1570    ANNOTATE_QUEUING_ACQUIRED(lck);
1571    KMP_MB();
1572    lck->lk.depth_locked = 1;
1573    KMP_MB();
1574    lck->lk.owner_id = gtid + 1;
1575    return KMP_LOCK_ACQUIRED_FIRST;
1576  }
1577}
1578
1579static int
1580__kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1581                                              kmp_int32 gtid) {
1582  char const *const func = "omp_set_nest_lock";
1583  if (lck->lk.initialized != lck) {
1584    KMP_FATAL(LockIsUninitialized, func);
1585  }
1586  if (!__kmp_is_queuing_lock_nestable(lck)) {
1587    KMP_FATAL(LockSimpleUsedAsNestable, func);
1588  }
1589  return __kmp_acquire_nested_queuing_lock(lck, gtid);
1590}
1591
1592int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1593  int retval;
1594
1595  KMP_DEBUG_ASSERT(gtid >= 0);
1596
1597  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1598    retval = ++lck->lk.depth_locked;
1599  } else if (!__kmp_test_queuing_lock(lck, gtid)) {
1600    retval = 0;
1601  } else {
1602    KMP_MB();
1603    retval = lck->lk.depth_locked = 1;
1604    KMP_MB();
1605    lck->lk.owner_id = gtid + 1;
1606  }
1607  return retval;
1608}
1609
1610static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1611                                                      kmp_int32 gtid) {
1612  char const *const func = "omp_test_nest_lock";
1613  if (lck->lk.initialized != lck) {
1614    KMP_FATAL(LockIsUninitialized, func);
1615  }
1616  if (!__kmp_is_queuing_lock_nestable(lck)) {
1617    KMP_FATAL(LockSimpleUsedAsNestable, func);
1618  }
1619  return __kmp_test_nested_queuing_lock(lck, gtid);
1620}
1621
1622int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1623  KMP_DEBUG_ASSERT(gtid >= 0);
1624
1625  KMP_MB();
1626  if (--(lck->lk.depth_locked) == 0) {
1627    KMP_MB();
1628    lck->lk.owner_id = 0;
1629    __kmp_release_queuing_lock(lck, gtid);
1630    return KMP_LOCK_RELEASED;
1631  }
1632  return KMP_LOCK_STILL_HELD;
1633}
1634
1635static int
1636__kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1637                                              kmp_int32 gtid) {
1638  char const *const func = "omp_unset_nest_lock";
1639  KMP_MB(); /* in case another processor initialized lock */
1640  if (lck->lk.initialized != lck) {
1641    KMP_FATAL(LockIsUninitialized, func);
1642  }
1643  if (!__kmp_is_queuing_lock_nestable(lck)) {
1644    KMP_FATAL(LockSimpleUsedAsNestable, func);
1645  }
1646  if (__kmp_get_queuing_lock_owner(lck) == -1) {
1647    KMP_FATAL(LockUnsettingFree, func);
1648  }
1649  if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1650    KMP_FATAL(LockUnsettingSetByAnother, func);
1651  }
1652  return __kmp_release_nested_queuing_lock(lck, gtid);
1653}
1654
1655void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1656  __kmp_init_queuing_lock(lck);
1657  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1658}
1659
1660void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1661  __kmp_destroy_queuing_lock(lck);
1662  lck->lk.depth_locked = 0;
1663}
1664
1665static void
1666__kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1667  char const *const func = "omp_destroy_nest_lock";
1668  if (lck->lk.initialized != lck) {
1669    KMP_FATAL(LockIsUninitialized, func);
1670  }
1671  if (!__kmp_is_queuing_lock_nestable(lck)) {
1672    KMP_FATAL(LockSimpleUsedAsNestable, func);
1673  }
1674  if (__kmp_get_queuing_lock_owner(lck) != -1) {
1675    KMP_FATAL(LockStillOwned, func);
1676  }
1677  __kmp_destroy_nested_queuing_lock(lck);
1678}
1679
1680// access functions to fields which don't exist for all lock kinds.
1681
1682static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) {
1683  return lck->lk.location;
1684}
1685
1686static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck,
1687                                            const ident_t *loc) {
1688  lck->lk.location = loc;
1689}
1690
1691static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) {
1692  return lck->lk.flags;
1693}
1694
1695static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck,
1696                                         kmp_lock_flags_t flags) {
1697  lck->lk.flags = flags;
1698}
1699
1700#if KMP_USE_ADAPTIVE_LOCKS
1701
1702/* RTM Adaptive locks */
1703
1704#if (KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300) ||                          \
1705    (KMP_COMPILER_MSVC && _MSC_VER >= 1700) ||                                 \
1706    (KMP_COMPILER_CLANG && KMP_MSVC_COMPAT)
1707
1708#include <immintrin.h>
1709#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1710
1711#else
1712
1713// Values from the status register after failed speculation.
1714#define _XBEGIN_STARTED (~0u)
1715#define _XABORT_EXPLICIT (1 << 0)
1716#define _XABORT_RETRY (1 << 1)
1717#define _XABORT_CONFLICT (1 << 2)
1718#define _XABORT_CAPACITY (1 << 3)
1719#define _XABORT_DEBUG (1 << 4)
1720#define _XABORT_NESTED (1 << 5)
1721#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1722
1723// Aborts for which it's worth trying again immediately
1724#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1725
1726#define STRINGIZE_INTERNAL(arg) #arg
1727#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1728
1729// Access to RTM instructions
1730/*A version of XBegin which returns -1 on speculation, and the value of EAX on
1731  an abort. This is the same definition as the compiler intrinsic that will be
1732  supported at some point. */
1733static __inline int _xbegin() {
1734  int res = -1;
1735
1736#if KMP_OS_WINDOWS
1737#if KMP_ARCH_X86_64
1738  _asm {
1739        _emit 0xC7
1740        _emit 0xF8
1741        _emit 2
1742        _emit 0
1743        _emit 0
1744        _emit 0
1745        jmp   L2
1746        mov   res, eax
1747    L2:
1748  }
1749#else /* IA32 */
1750  _asm {
1751        _emit 0xC7
1752        _emit 0xF8
1753        _emit 2
1754        _emit 0
1755        _emit 0
1756        _emit 0
1757        jmp   L2
1758        mov   res, eax
1759    L2:
1760  }
1761#endif // KMP_ARCH_X86_64
1762#else
1763  /* Note that %eax must be noted as killed (clobbered), because the XSR is
1764     returned in %eax(%rax) on abort.  Other register values are restored, so
1765     don't need to be killed.
1766
1767     We must also mark 'res' as an input and an output, since otherwise
1768     'res=-1' may be dropped as being dead, whereas we do need the assignment on
1769     the successful (i.e., non-abort) path. */
1770  __asm__ volatile("1: .byte  0xC7; .byte 0xF8;\n"
1771                   "   .long  1f-1b-6\n"
1772                   "    jmp   2f\n"
1773                   "1:  movl  %%eax,%0\n"
1774                   "2:"
1775                   : "+r"(res)::"memory", "%eax");
1776#endif // KMP_OS_WINDOWS
1777  return res;
1778}
1779
1780/* Transaction end */
1781static __inline void _xend() {
1782#if KMP_OS_WINDOWS
1783  __asm {
1784        _emit 0x0f
1785        _emit 0x01
1786        _emit 0xd5
1787  }
1788#else
1789  __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
1790#endif
1791}
1792
1793/* This is a macro, the argument must be a single byte constant which can be
1794   evaluated by the inline assembler, since it is emitted as a byte into the
1795   assembly code. */
1796// clang-format off
1797#if KMP_OS_WINDOWS
1798#define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG
1799#else
1800#define _xabort(ARG)                                                           \
1801  __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory");
1802#endif
1803// clang-format on
1804#endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1805
1806// Statistics is collected for testing purpose
1807#if KMP_DEBUG_ADAPTIVE_LOCKS
1808
1809// We accumulate speculative lock statistics when the lock is destroyed. We
1810// keep locks that haven't been destroyed in the liveLocks list so that we can
1811// grab their statistics too.
1812static kmp_adaptive_lock_statistics_t destroyedStats;
1813
1814// To hold the list of live locks.
1815static kmp_adaptive_lock_info_t liveLocks;
1816
1817// A lock so we can safely update the list of locks.
1818static kmp_bootstrap_lock_t chain_lock =
1819    KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock);
1820
1821// Initialize the list of stats.
1822void __kmp_init_speculative_stats() {
1823  kmp_adaptive_lock_info_t *lck = &liveLocks;
1824
1825  memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0,
1826         sizeof(lck->stats));
1827  lck->stats.next = lck;
1828  lck->stats.prev = lck;
1829
1830  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1831  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1832
1833  __kmp_init_bootstrap_lock(&chain_lock);
1834}
1835
1836// Insert the lock into the circular list
1837static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) {
1838  __kmp_acquire_bootstrap_lock(&chain_lock);
1839
1840  lck->stats.next = liveLocks.stats.next;
1841  lck->stats.prev = &liveLocks;
1842
1843  liveLocks.stats.next = lck;
1844  lck->stats.next->stats.prev = lck;
1845
1846  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1847  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1848
1849  __kmp_release_bootstrap_lock(&chain_lock);
1850}
1851
1852static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) {
1853  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1854  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1855
1856  kmp_adaptive_lock_info_t *n = lck->stats.next;
1857  kmp_adaptive_lock_info_t *p = lck->stats.prev;
1858
1859  n->stats.prev = p;
1860  p->stats.next = n;
1861}
1862
1863static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1864  memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0,
1865         sizeof(lck->stats));
1866  __kmp_remember_lock(lck);
1867}
1868
1869static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t,
1870                            kmp_adaptive_lock_info_t *lck) {
1871  kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
1872
1873  t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
1874  t->successfulSpeculations += s->successfulSpeculations;
1875  t->hardFailedSpeculations += s->hardFailedSpeculations;
1876  t->softFailedSpeculations += s->softFailedSpeculations;
1877  t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
1878  t->lemmingYields += s->lemmingYields;
1879}
1880
1881static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1882  __kmp_acquire_bootstrap_lock(&chain_lock);
1883
1884  __kmp_add_stats(&destroyedStats, lck);
1885  __kmp_forget_lock(lck);
1886
1887  __kmp_release_bootstrap_lock(&chain_lock);
1888}
1889
1890static float percent(kmp_uint32 count, kmp_uint32 total) {
1891  return (total == 0) ? 0.0 : (100.0 * count) / total;
1892}
1893
1894static FILE *__kmp_open_stats_file() {
1895  if (strcmp(__kmp_speculative_statsfile, "-") == 0)
1896    return stdout;
1897
1898  size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20;
1899  char buffer[buffLen];
1900  KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile,
1901               (kmp_int32)getpid());
1902  FILE *result = fopen(&buffer[0], "w");
1903
1904  // Maybe we should issue a warning here...
1905  return result ? result : stdout;
1906}
1907
1908void __kmp_print_speculative_stats() {
1909  kmp_adaptive_lock_statistics_t total = destroyedStats;
1910  kmp_adaptive_lock_info_t *lck;
1911
1912  for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
1913    __kmp_add_stats(&total, lck);
1914  }
1915  kmp_adaptive_lock_statistics_t *t = &total;
1916  kmp_uint32 totalSections =
1917      t->nonSpeculativeAcquires + t->successfulSpeculations;
1918  kmp_uint32 totalSpeculations = t->successfulSpeculations +
1919                                 t->hardFailedSpeculations +
1920                                 t->softFailedSpeculations;
1921  if (totalSections <= 0)
1922    return;
1923
1924  FILE *statsFile = __kmp_open_stats_file();
1925
1926  fprintf(statsFile, "Speculative lock statistics (all approximate!)\n");
1927  fprintf(statsFile, " Lock parameters: \n"
1928                     "   max_soft_retries               : %10d\n"
1929                     "   max_badness                    : %10d\n",
1930          __kmp_adaptive_backoff_params.max_soft_retries,
1931          __kmp_adaptive_backoff_params.max_badness);
1932  fprintf(statsFile, " Non-speculative acquire attempts : %10d\n",
1933          t->nonSpeculativeAcquireAttempts);
1934  fprintf(statsFile, " Total critical sections          : %10d\n",
1935          totalSections);
1936  fprintf(statsFile, " Successful speculations          : %10d (%5.1f%%)\n",
1937          t->successfulSpeculations,
1938          percent(t->successfulSpeculations, totalSections));
1939  fprintf(statsFile, " Non-speculative acquires         : %10d (%5.1f%%)\n",
1940          t->nonSpeculativeAcquires,
1941          percent(t->nonSpeculativeAcquires, totalSections));
1942  fprintf(statsFile, " Lemming yields                   : %10d\n\n",
1943          t->lemmingYields);
1944
1945  fprintf(statsFile, " Speculative acquire attempts     : %10d\n",
1946          totalSpeculations);
1947  fprintf(statsFile, " Successes                        : %10d (%5.1f%%)\n",
1948          t->successfulSpeculations,
1949          percent(t->successfulSpeculations, totalSpeculations));
1950  fprintf(statsFile, " Soft failures                    : %10d (%5.1f%%)\n",
1951          t->softFailedSpeculations,
1952          percent(t->softFailedSpeculations, totalSpeculations));
1953  fprintf(statsFile, " Hard failures                    : %10d (%5.1f%%)\n",
1954          t->hardFailedSpeculations,
1955          percent(t->hardFailedSpeculations, totalSpeculations));
1956
1957  if (statsFile != stdout)
1958    fclose(statsFile);
1959}
1960
1961#define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++)
1962#else
1963#define KMP_INC_STAT(lck, stat)
1964
1965#endif // KMP_DEBUG_ADAPTIVE_LOCKS
1966
1967static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
1968  // It is enough to check that the head_id is zero.
1969  // We don't also need to check the tail.
1970  bool res = lck->lk.head_id == 0;
1971
1972// We need a fence here, since we must ensure that no memory operations
1973// from later in this thread float above that read.
1974#if KMP_COMPILER_ICC
1975  _mm_mfence();
1976#else
1977  __sync_synchronize();
1978#endif
1979
1980  return res;
1981}
1982
1983// Functions for manipulating the badness
1984static __inline void
1985__kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) {
1986  // Reset the badness to zero so we eagerly try to speculate again
1987  lck->lk.adaptive.badness = 0;
1988  KMP_INC_STAT(lck, successfulSpeculations);
1989}
1990
1991// Create a bit mask with one more set bit.
1992static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) {
1993  kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1;
1994  if (newBadness > lck->lk.adaptive.max_badness) {
1995    return;
1996  } else {
1997    lck->lk.adaptive.badness = newBadness;
1998  }
1999}
2000
2001// Check whether speculation should be attempted.
2002static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck,
2003                                           kmp_int32 gtid) {
2004  kmp_uint32 badness = lck->lk.adaptive.badness;
2005  kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts;
2006  int res = (attempts & badness) == 0;
2007  return res;
2008}
2009
2010// Attempt to acquire only the speculative lock.
2011// Does not back off to the non-speculative lock.
2012static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck,
2013                                         kmp_int32 gtid) {
2014  int retries = lck->lk.adaptive.max_soft_retries;
2015
2016  // We don't explicitly count the start of speculation, rather we record the
2017  // results (success, hard fail, soft fail). The sum of all of those is the
2018  // total number of times we started speculation since all speculations must
2019  // end one of those ways.
2020  do {
2021    kmp_uint32 status = _xbegin();
2022    // Switch this in to disable actual speculation but exercise at least some
2023    // of the rest of the code. Useful for debugging...
2024    // kmp_uint32 status = _XABORT_NESTED;
2025
2026    if (status == _XBEGIN_STARTED) {
2027      /* We have successfully started speculation. Check that no-one acquired
2028         the lock for real between when we last looked and now. This also gets
2029         the lock cache line into our read-set, which we need so that we'll
2030         abort if anyone later claims it for real. */
2031      if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2032        // Lock is now visibly acquired, so someone beat us to it. Abort the
2033        // transaction so we'll restart from _xbegin with the failure status.
2034        _xabort(0x01);
2035        KMP_ASSERT2(0, "should not get here");
2036      }
2037      return 1; // Lock has been acquired (speculatively)
2038    } else {
2039      // We have aborted, update the statistics
2040      if (status & SOFT_ABORT_MASK) {
2041        KMP_INC_STAT(lck, softFailedSpeculations);
2042        // and loop round to retry.
2043      } else {
2044        KMP_INC_STAT(lck, hardFailedSpeculations);
2045        // Give up if we had a hard failure.
2046        break;
2047      }
2048    }
2049  } while (retries--); // Loop while we have retries, and didn't fail hard.
2050
2051  // Either we had a hard failure or we didn't succeed softly after
2052  // the full set of attempts, so back off the badness.
2053  __kmp_step_badness(lck);
2054  return 0;
2055}
2056
2057// Attempt to acquire the speculative lock, or back off to the non-speculative
2058// one if the speculative lock cannot be acquired.
2059// We can succeed speculatively, non-speculatively, or fail.
2060static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) {
2061  // First try to acquire the lock speculatively
2062  if (__kmp_should_speculate(lck, gtid) &&
2063      __kmp_test_adaptive_lock_only(lck, gtid))
2064    return 1;
2065
2066  // Speculative acquisition failed, so try to acquire it non-speculatively.
2067  // Count the non-speculative acquire attempt
2068  lck->lk.adaptive.acquire_attempts++;
2069
2070  // Use base, non-speculative lock.
2071  if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) {
2072    KMP_INC_STAT(lck, nonSpeculativeAcquires);
2073    return 1; // Lock is acquired (non-speculatively)
2074  } else {
2075    return 0; // Failed to acquire the lock, it's already visibly locked.
2076  }
2077}
2078
2079static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2080                                                kmp_int32 gtid) {
2081  char const *const func = "omp_test_lock";
2082  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2083    KMP_FATAL(LockIsUninitialized, func);
2084  }
2085
2086  int retval = __kmp_test_adaptive_lock(lck, gtid);
2087
2088  if (retval) {
2089    lck->lk.qlk.owner_id = gtid + 1;
2090  }
2091  return retval;
2092}
2093
2094// Block until we can acquire a speculative, adaptive lock. We check whether we
2095// should be trying to speculate. If we should be, we check the real lock to see
2096// if it is free, and, if not, pause without attempting to acquire it until it
2097// is. Then we try the speculative acquire. This means that although we suffer
2098// from lemmings a little (because all we can't acquire the lock speculatively
2099// until the queue of threads waiting has cleared), we don't get into a state
2100// where we can never acquire the lock speculatively (because we force the queue
2101// to clear by preventing new arrivals from entering the queue). This does mean
2102// that when we're trying to break lemmings, the lock is no longer fair. However
2103// OpenMP makes no guarantee that its locks are fair, so this isn't a real
2104// problem.
2105static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
2106                                        kmp_int32 gtid) {
2107  if (__kmp_should_speculate(lck, gtid)) {
2108    if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2109      if (__kmp_test_adaptive_lock_only(lck, gtid))
2110        return;
2111      // We tried speculation and failed, so give up.
2112    } else {
2113      // We can't try speculation until the lock is free, so we pause here
2114      // (without suspending on the queueing lock, to allow it to drain, then
2115      // try again. All other threads will also see the same result for
2116      // shouldSpeculate, so will be doing the same if they try to claim the
2117      // lock from now on.
2118      while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2119        KMP_INC_STAT(lck, lemmingYields);
2120        KMP_YIELD(TRUE);
2121      }
2122
2123      if (__kmp_test_adaptive_lock_only(lck, gtid))
2124        return;
2125    }
2126  }
2127
2128  // Speculative acquisition failed, so acquire it non-speculatively.
2129  // Count the non-speculative acquire attempt
2130  lck->lk.adaptive.acquire_attempts++;
2131
2132  __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid);
2133  // We have acquired the base lock, so count that.
2134  KMP_INC_STAT(lck, nonSpeculativeAcquires);
2135  ANNOTATE_QUEUING_ACQUIRED(lck);
2136}
2137
2138static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2139                                                    kmp_int32 gtid) {
2140  char const *const func = "omp_set_lock";
2141  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2142    KMP_FATAL(LockIsUninitialized, func);
2143  }
2144  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) {
2145    KMP_FATAL(LockIsAlreadyOwned, func);
2146  }
2147
2148  __kmp_acquire_adaptive_lock(lck, gtid);
2149
2150  lck->lk.qlk.owner_id = gtid + 1;
2151}
2152
2153static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck,
2154                                       kmp_int32 gtid) {
2155  if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(
2156          lck))) { // If the lock doesn't look claimed we must be speculating.
2157    // (Or the user's code is buggy and they're releasing without locking;
2158    // if we had XTEST we'd be able to check that case...)
2159    _xend(); // Exit speculation
2160    __kmp_update_badness_after_success(lck);
2161  } else { // Since the lock *is* visibly locked we're not speculating,
2162    // so should use the underlying lock's release scheme.
2163    __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid);
2164  }
2165  return KMP_LOCK_RELEASED;
2166}
2167
2168static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2169                                                   kmp_int32 gtid) {
2170  char const *const func = "omp_unset_lock";
2171  KMP_MB(); /* in case another processor initialized lock */
2172  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2173    KMP_FATAL(LockIsUninitialized, func);
2174  }
2175  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) {
2176    KMP_FATAL(LockUnsettingFree, func);
2177  }
2178  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) {
2179    KMP_FATAL(LockUnsettingSetByAnother, func);
2180  }
2181  lck->lk.qlk.owner_id = 0;
2182  __kmp_release_adaptive_lock(lck, gtid);
2183  return KMP_LOCK_RELEASED;
2184}
2185
2186static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) {
2187  __kmp_init_queuing_lock(GET_QLK_PTR(lck));
2188  lck->lk.adaptive.badness = 0;
2189  lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0;
2190  lck->lk.adaptive.max_soft_retries =
2191      __kmp_adaptive_backoff_params.max_soft_retries;
2192  lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2193#if KMP_DEBUG_ADAPTIVE_LOCKS
2194  __kmp_zero_speculative_stats(&lck->lk.adaptive);
2195#endif
2196  KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2197}
2198
2199static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) {
2200#if KMP_DEBUG_ADAPTIVE_LOCKS
2201  __kmp_accumulate_speculative_stats(&lck->lk.adaptive);
2202#endif
2203  __kmp_destroy_queuing_lock(GET_QLK_PTR(lck));
2204  // Nothing needed for the speculative part.
2205}
2206
2207static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
2208  char const *const func = "omp_destroy_lock";
2209  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2210    KMP_FATAL(LockIsUninitialized, func);
2211  }
2212  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) {
2213    KMP_FATAL(LockStillOwned, func);
2214  }
2215  __kmp_destroy_adaptive_lock(lck);
2216}
2217
2218#endif // KMP_USE_ADAPTIVE_LOCKS
2219
2220/* ------------------------------------------------------------------------ */
2221/* DRDPA ticket locks                                                */
2222/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2223
2224static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) {
2225  return lck->lk.owner_id - 1;
2226}
2227
2228static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) {
2229  return lck->lk.depth_locked != -1;
2230}
2231
2232__forceinline static int
2233__kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2234  kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket);
2235  kmp_uint64 mask = lck->lk.mask; // atomic load
2236  std::atomic<kmp_uint64> *polls = lck->lk.polls;
2237
2238#ifdef USE_LOCK_PROFILE
2239  if (polls[ticket & mask] != ticket)
2240    __kmp_printf("LOCK CONTENTION: %p\n", lck);
2241/* else __kmp_printf( "." );*/
2242#endif /* USE_LOCK_PROFILE */
2243
2244  // Now spin-wait, but reload the polls pointer and mask, in case the
2245  // polling area has been reconfigured.  Unless it is reconfigured, the
2246  // reloads stay in L1 cache and are cheap.
2247  //
2248  // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
2249  // The current implementation of KMP_WAIT doesn't allow for mask
2250  // and poll to be re-read every spin iteration.
2251  kmp_uint32 spins;
2252  KMP_FSYNC_PREPARE(lck);
2253  KMP_INIT_YIELD(spins);
2254  while (polls[ticket & mask] < ticket) { // atomic load
2255    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
2256    // Re-read the mask and the poll pointer from the lock structure.
2257    //
2258    // Make certain that "mask" is read before "polls" !!!
2259    //
2260    // If another thread picks reconfigures the polling area and updates their
2261    // values, and we get the new value of mask and the old polls pointer, we
2262    // could access memory beyond the end of the old polling area.
2263    mask = lck->lk.mask; // atomic load
2264    polls = lck->lk.polls; // atomic load
2265  }
2266
2267  // Critical section starts here
2268  KMP_FSYNC_ACQUIRED(lck);
2269  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2270                  ticket, lck));
2271  lck->lk.now_serving = ticket; // non-volatile store
2272
2273  // Deallocate a garbage polling area if we know that we are the last
2274  // thread that could possibly access it.
2275  //
2276  // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2277  // ticket.
2278  if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2279    __kmp_free(lck->lk.old_polls);
2280    lck->lk.old_polls = NULL;
2281    lck->lk.cleanup_ticket = 0;
2282  }
2283
2284  // Check to see if we should reconfigure the polling area.
2285  // If there is still a garbage polling area to be deallocated from a
2286  // previous reconfiguration, let a later thread reconfigure it.
2287  if (lck->lk.old_polls == NULL) {
2288    bool reconfigure = false;
2289    std::atomic<kmp_uint64> *old_polls = polls;
2290    kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2291
2292    if (TCR_4(__kmp_nth) >
2293        (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2294      // We are in oversubscription mode.  Contract the polling area
2295      // down to a single location, if that hasn't been done already.
2296      if (num_polls > 1) {
2297        reconfigure = true;
2298        num_polls = TCR_4(lck->lk.num_polls);
2299        mask = 0;
2300        num_polls = 1;
2301        polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2302                                                          sizeof(*polls));
2303        polls[0] = ticket;
2304      }
2305    } else {
2306      // We are in under/fully subscribed mode.  Check the number of
2307      // threads waiting on the lock.  The size of the polling area
2308      // should be at least the number of threads waiting.
2309      kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2310      if (num_waiting > num_polls) {
2311        kmp_uint32 old_num_polls = num_polls;
2312        reconfigure = true;
2313        do {
2314          mask = (mask << 1) | 1;
2315          num_polls *= 2;
2316        } while (num_polls <= num_waiting);
2317
2318        // Allocate the new polling area, and copy the relevant portion
2319        // of the old polling area to the new area.  __kmp_allocate()
2320        // zeroes the memory it allocates, and most of the old area is
2321        // just zero padding, so we only copy the release counters.
2322        polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2323                                                          sizeof(*polls));
2324        kmp_uint32 i;
2325        for (i = 0; i < old_num_polls; i++) {
2326          polls[i].store(old_polls[i]);
2327        }
2328      }
2329    }
2330
2331    if (reconfigure) {
2332      // Now write the updated fields back to the lock structure.
2333      //
2334      // Make certain that "polls" is written before "mask" !!!
2335      //
2336      // If another thread picks up the new value of mask and the old polls
2337      // pointer , it could access memory beyond the end of the old polling
2338      // area.
2339      //
2340      // On x86, we need memory fences.
2341      KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring "
2342                      "lock %p to %d polls\n",
2343                      ticket, lck, num_polls));
2344
2345      lck->lk.old_polls = old_polls;
2346      lck->lk.polls = polls; // atomic store
2347
2348      KMP_MB();
2349
2350      lck->lk.num_polls = num_polls;
2351      lck->lk.mask = mask; // atomic store
2352
2353      KMP_MB();
2354
2355      // Only after the new polling area and mask have been flushed
2356      // to main memory can we update the cleanup ticket field.
2357      //
2358      // volatile load / non-volatile store
2359      lck->lk.cleanup_ticket = lck->lk.next_ticket;
2360    }
2361  }
2362  return KMP_LOCK_ACQUIRED_FIRST;
2363}
2364
2365int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2366  int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2367  ANNOTATE_DRDPA_ACQUIRED(lck);
2368  return retval;
2369}
2370
2371static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2372                                                kmp_int32 gtid) {
2373  char const *const func = "omp_set_lock";
2374  if (lck->lk.initialized != lck) {
2375    KMP_FATAL(LockIsUninitialized, func);
2376  }
2377  if (__kmp_is_drdpa_lock_nestable(lck)) {
2378    KMP_FATAL(LockNestableUsedAsSimple, func);
2379  }
2380  if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) {
2381    KMP_FATAL(LockIsAlreadyOwned, func);
2382  }
2383
2384  __kmp_acquire_drdpa_lock(lck, gtid);
2385
2386  lck->lk.owner_id = gtid + 1;
2387  return KMP_LOCK_ACQUIRED_FIRST;
2388}
2389
2390int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2391  // First get a ticket, then read the polls pointer and the mask.
2392  // The polls pointer must be read before the mask!!! (See above)
2393  kmp_uint64 ticket = lck->lk.next_ticket; // atomic load
2394  std::atomic<kmp_uint64> *polls = lck->lk.polls;
2395  kmp_uint64 mask = lck->lk.mask; // atomic load
2396  if (polls[ticket & mask] == ticket) {
2397    kmp_uint64 next_ticket = ticket + 1;
2398    if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket,
2399                                       next_ticket)) {
2400      KMP_FSYNC_ACQUIRED(lck);
2401      KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2402                      ticket, lck));
2403      lck->lk.now_serving = ticket; // non-volatile store
2404
2405      // Since no threads are waiting, there is no possibility that we would
2406      // want to reconfigure the polling area.  We might have the cleanup ticket
2407      // value (which says that it is now safe to deallocate old_polls), but
2408      // we'll let a later thread which calls __kmp_acquire_lock do that - this
2409      // routine isn't supposed to block, and we would risk blocks if we called
2410      // __kmp_free() to do the deallocation.
2411      return TRUE;
2412    }
2413  }
2414  return FALSE;
2415}
2416
2417static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2418                                             kmp_int32 gtid) {
2419  char const *const func = "omp_test_lock";
2420  if (lck->lk.initialized != lck) {
2421    KMP_FATAL(LockIsUninitialized, func);
2422  }
2423  if (__kmp_is_drdpa_lock_nestable(lck)) {
2424    KMP_FATAL(LockNestableUsedAsSimple, func);
2425  }
2426
2427  int retval = __kmp_test_drdpa_lock(lck, gtid);
2428
2429  if (retval) {
2430    lck->lk.owner_id = gtid + 1;
2431  }
2432  return retval;
2433}
2434
2435int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2436  // Read the ticket value from the lock data struct, then the polls pointer and
2437  // the mask.  The polls pointer must be read before the mask!!! (See above)
2438  kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load
2439  std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load
2440  kmp_uint64 mask = lck->lk.mask; // atomic load
2441  KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2442                  ticket - 1, lck));
2443  KMP_FSYNC_RELEASING(lck);
2444  ANNOTATE_DRDPA_RELEASED(lck);
2445  polls[ticket & mask] = ticket; // atomic store
2446  return KMP_LOCK_RELEASED;
2447}
2448
2449static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2450                                                kmp_int32 gtid) {
2451  char const *const func = "omp_unset_lock";
2452  KMP_MB(); /* in case another processor initialized lock */
2453  if (lck->lk.initialized != lck) {
2454    KMP_FATAL(LockIsUninitialized, func);
2455  }
2456  if (__kmp_is_drdpa_lock_nestable(lck)) {
2457    KMP_FATAL(LockNestableUsedAsSimple, func);
2458  }
2459  if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2460    KMP_FATAL(LockUnsettingFree, func);
2461  }
2462  if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) &&
2463      (__kmp_get_drdpa_lock_owner(lck) != gtid)) {
2464    KMP_FATAL(LockUnsettingSetByAnother, func);
2465  }
2466  lck->lk.owner_id = 0;
2467  return __kmp_release_drdpa_lock(lck, gtid);
2468}
2469
2470void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) {
2471  lck->lk.location = NULL;
2472  lck->lk.mask = 0;
2473  lck->lk.num_polls = 1;
2474  lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate(
2475      lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2476  lck->lk.cleanup_ticket = 0;
2477  lck->lk.old_polls = NULL;
2478  lck->lk.next_ticket = 0;
2479  lck->lk.now_serving = 0;
2480  lck->lk.owner_id = 0; // no thread owns the lock.
2481  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2482  lck->lk.initialized = lck;
2483
2484  KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2485}
2486
2487void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) {
2488  lck->lk.initialized = NULL;
2489  lck->lk.location = NULL;
2490  if (lck->lk.polls.load() != NULL) {
2491    __kmp_free(lck->lk.polls.load());
2492    lck->lk.polls = NULL;
2493  }
2494  if (lck->lk.old_polls != NULL) {
2495    __kmp_free(lck->lk.old_polls);
2496    lck->lk.old_polls = NULL;
2497  }
2498  lck->lk.mask = 0;
2499  lck->lk.num_polls = 0;
2500  lck->lk.cleanup_ticket = 0;
2501  lck->lk.next_ticket = 0;
2502  lck->lk.now_serving = 0;
2503  lck->lk.owner_id = 0;
2504  lck->lk.depth_locked = -1;
2505}
2506
2507static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2508  char const *const func = "omp_destroy_lock";
2509  if (lck->lk.initialized != lck) {
2510    KMP_FATAL(LockIsUninitialized, func);
2511  }
2512  if (__kmp_is_drdpa_lock_nestable(lck)) {
2513    KMP_FATAL(LockNestableUsedAsSimple, func);
2514  }
2515  if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2516    KMP_FATAL(LockStillOwned, func);
2517  }
2518  __kmp_destroy_drdpa_lock(lck);
2519}
2520
2521// nested drdpa ticket locks
2522
2523int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2524  KMP_DEBUG_ASSERT(gtid >= 0);
2525
2526  if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2527    lck->lk.depth_locked += 1;
2528    return KMP_LOCK_ACQUIRED_NEXT;
2529  } else {
2530    __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2531    ANNOTATE_DRDPA_ACQUIRED(lck);
2532    KMP_MB();
2533    lck->lk.depth_locked = 1;
2534    KMP_MB();
2535    lck->lk.owner_id = gtid + 1;
2536    return KMP_LOCK_ACQUIRED_FIRST;
2537  }
2538}
2539
2540static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2541                                                        kmp_int32 gtid) {
2542  char const *const func = "omp_set_nest_lock";
2543  if (lck->lk.initialized != lck) {
2544    KMP_FATAL(LockIsUninitialized, func);
2545  }
2546  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2547    KMP_FATAL(LockSimpleUsedAsNestable, func);
2548  }
2549  __kmp_acquire_nested_drdpa_lock(lck, gtid);
2550}
2551
2552int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2553  int retval;
2554
2555  KMP_DEBUG_ASSERT(gtid >= 0);
2556
2557  if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2558    retval = ++lck->lk.depth_locked;
2559  } else if (!__kmp_test_drdpa_lock(lck, gtid)) {
2560    retval = 0;
2561  } else {
2562    KMP_MB();
2563    retval = lck->lk.depth_locked = 1;
2564    KMP_MB();
2565    lck->lk.owner_id = gtid + 1;
2566  }
2567  return retval;
2568}
2569
2570static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2571                                                    kmp_int32 gtid) {
2572  char const *const func = "omp_test_nest_lock";
2573  if (lck->lk.initialized != lck) {
2574    KMP_FATAL(LockIsUninitialized, func);
2575  }
2576  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2577    KMP_FATAL(LockSimpleUsedAsNestable, func);
2578  }
2579  return __kmp_test_nested_drdpa_lock(lck, gtid);
2580}
2581
2582int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2583  KMP_DEBUG_ASSERT(gtid >= 0);
2584
2585  KMP_MB();
2586  if (--(lck->lk.depth_locked) == 0) {
2587    KMP_MB();
2588    lck->lk.owner_id = 0;
2589    __kmp_release_drdpa_lock(lck, gtid);
2590    return KMP_LOCK_RELEASED;
2591  }
2592  return KMP_LOCK_STILL_HELD;
2593}
2594
2595static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2596                                                       kmp_int32 gtid) {
2597  char const *const func = "omp_unset_nest_lock";
2598  KMP_MB(); /* in case another processor initialized lock */
2599  if (lck->lk.initialized != lck) {
2600    KMP_FATAL(LockIsUninitialized, func);
2601  }
2602  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2603    KMP_FATAL(LockSimpleUsedAsNestable, func);
2604  }
2605  if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2606    KMP_FATAL(LockUnsettingFree, func);
2607  }
2608  if (__kmp_get_drdpa_lock_owner(lck) != gtid) {
2609    KMP_FATAL(LockUnsettingSetByAnother, func);
2610  }
2611  return __kmp_release_nested_drdpa_lock(lck, gtid);
2612}
2613
2614void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2615  __kmp_init_drdpa_lock(lck);
2616  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2617}
2618
2619void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2620  __kmp_destroy_drdpa_lock(lck);
2621  lck->lk.depth_locked = 0;
2622}
2623
2624static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2625  char const *const func = "omp_destroy_nest_lock";
2626  if (lck->lk.initialized != lck) {
2627    KMP_FATAL(LockIsUninitialized, func);
2628  }
2629  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2630    KMP_FATAL(LockSimpleUsedAsNestable, func);
2631  }
2632  if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2633    KMP_FATAL(LockStillOwned, func);
2634  }
2635  __kmp_destroy_nested_drdpa_lock(lck);
2636}
2637
2638// access functions to fields which don't exist for all lock kinds.
2639
2640static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) {
2641  return lck->lk.location;
2642}
2643
2644static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck,
2645                                          const ident_t *loc) {
2646  lck->lk.location = loc;
2647}
2648
2649static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) {
2650  return lck->lk.flags;
2651}
2652
2653static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck,
2654                                       kmp_lock_flags_t flags) {
2655  lck->lk.flags = flags;
2656}
2657
2658// Time stamp counter
2659#if KMP_ARCH_X86 || KMP_ARCH_X86_64
2660#define __kmp_tsc() __kmp_hardware_timestamp()
2661// Runtime's default backoff parameters
2662kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100};
2663#else
2664// Use nanoseconds for other platforms
2665extern kmp_uint64 __kmp_now_nsec();
2666kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100};
2667#define __kmp_tsc() __kmp_now_nsec()
2668#endif
2669
2670// A useful predicate for dealing with timestamps that may wrap.
2671// Is a before b? Since the timestamps may wrap, this is asking whether it's
2672// shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
2673// Times where going clockwise is less distance than going anti-clockwise
2674// are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),
2675// then a > b (true) does not mean a reached b; whereas signed(a) = -2,
2676// signed(b) = 0 captures the actual difference
2677static inline bool before(kmp_uint64 a, kmp_uint64 b) {
2678  return ((kmp_int64)b - (kmp_int64)a) > 0;
2679}
2680
2681// Truncated binary exponential backoff function
2682void __kmp_spin_backoff(kmp_backoff_t *boff) {
2683  // We could flatten this loop, but making it a nested loop gives better result
2684  kmp_uint32 i;
2685  for (i = boff->step; i > 0; i--) {
2686    kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
2687    do {
2688      KMP_CPU_PAUSE();
2689    } while (before(__kmp_tsc(), goal));
2690  }
2691  boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);
2692}
2693
2694#if KMP_USE_DYNAMIC_LOCK
2695
2696// Direct lock initializers. It simply writes a tag to the low 8 bits of the
2697// lock word.
2698static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,
2699                                   kmp_dyna_lockseq_t seq) {
2700  TCW_4(*lck, KMP_GET_D_TAG(seq));
2701  KA_TRACE(
2702      20,
2703      ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
2704}
2705
2706#if KMP_USE_TSX
2707
2708// HLE lock functions - imported from the testbed runtime.
2709#define HLE_ACQUIRE ".byte 0xf2;"
2710#define HLE_RELEASE ".byte 0xf3;"
2711
2712static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) {
2713  __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory");
2714  return v;
2715}
2716
2717static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); }
2718
2719static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) {
2720  TCW_4(*lck, 0);
2721}
2722
2723static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2724  // Use gtid for KMP_LOCK_BUSY if necessary
2725  if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
2726    int delay = 1;
2727    do {
2728      while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
2729        for (int i = delay; i != 0; --i)
2730          KMP_CPU_PAUSE();
2731        delay = ((delay << 1) | 1) & 7;
2732      }
2733    } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
2734  }
2735}
2736
2737static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2738                                               kmp_int32 gtid) {
2739  __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
2740}
2741
2742static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2743  __asm__ volatile(HLE_RELEASE "movl %1,%0"
2744                   : "=m"(*lck)
2745                   : "r"(KMP_LOCK_FREE(hle))
2746                   : "memory");
2747  return KMP_LOCK_RELEASED;
2748}
2749
2750static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2751                                              kmp_int32 gtid) {
2752  return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
2753}
2754
2755static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2756  return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
2757}
2758
2759static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2760                                           kmp_int32 gtid) {
2761  return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
2762}
2763
2764static void __kmp_init_rtm_lock(kmp_queuing_lock_t *lck) {
2765  __kmp_init_queuing_lock(lck);
2766}
2767
2768static void __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) {
2769  __kmp_destroy_queuing_lock(lck);
2770}
2771
2772static void __kmp_destroy_rtm_lock_with_checks(kmp_queuing_lock_t *lck) {
2773  __kmp_destroy_queuing_lock_with_checks(lck);
2774}
2775
2776static void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
2777  unsigned retries = 3, status;
2778  do {
2779    status = _xbegin();
2780    if (status == _XBEGIN_STARTED) {
2781      if (__kmp_is_unlocked_queuing_lock(lck))
2782        return;
2783      _xabort(0xff);
2784    }
2785    if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2786      // Wait until lock becomes free
2787      while (!__kmp_is_unlocked_queuing_lock(lck)) {
2788        KMP_YIELD(TRUE);
2789      }
2790    } else if (!(status & _XABORT_RETRY))
2791      break;
2792  } while (retries--);
2793
2794  // Fall-back non-speculative lock (xchg)
2795  __kmp_acquire_queuing_lock(lck, gtid);
2796}
2797
2798static void __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck,
2799                                               kmp_int32 gtid) {
2800  __kmp_acquire_rtm_lock(lck, gtid);
2801}
2802
2803static int __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
2804  if (__kmp_is_unlocked_queuing_lock(lck)) {
2805    // Releasing from speculation
2806    _xend();
2807  } else {
2808    // Releasing from a real lock
2809    __kmp_release_queuing_lock(lck, gtid);
2810  }
2811  return KMP_LOCK_RELEASED;
2812}
2813
2814static int __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck,
2815                                              kmp_int32 gtid) {
2816  return __kmp_release_rtm_lock(lck, gtid);
2817}
2818
2819static int __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
2820  unsigned retries = 3, status;
2821  do {
2822    status = _xbegin();
2823    if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
2824      return 1;
2825    }
2826    if (!(status & _XABORT_RETRY))
2827      break;
2828  } while (retries--);
2829
2830  return (__kmp_is_unlocked_queuing_lock(lck)) ? 1 : 0;
2831}
2832
2833static int __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck,
2834                                           kmp_int32 gtid) {
2835  return __kmp_test_rtm_lock(lck, gtid);
2836}
2837
2838#endif // KMP_USE_TSX
2839
2840// Entry functions for indirect locks (first element of direct lock jump tables)
2841static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
2842                                     kmp_dyna_lockseq_t tag);
2843static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
2844static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2845static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2846static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2847static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2848                                               kmp_int32);
2849static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2850                                                 kmp_int32);
2851static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2852                                                kmp_int32);
2853
2854// Lock function definitions for the union parameter type
2855#define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a)
2856
2857#define expand1(lk, op)                                                        \
2858  static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) {               \
2859    __kmp_##op##_##lk##_##lock(&lock->lk);                                     \
2860  }
2861#define expand2(lk, op)                                                        \
2862  static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock,                  \
2863                                        kmp_int32 gtid) {                      \
2864    return __kmp_##op##_##lk##_##lock(&lock->lk, gtid);                        \
2865  }
2866#define expand3(lk, op)                                                        \
2867  static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock,              \
2868                                            kmp_lock_flags_t flags) {          \
2869    __kmp_set_##lk##_lock_flags(&lock->lk, flags);                             \
2870  }
2871#define expand4(lk, op)                                                        \
2872  static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock,           \
2873                                               const ident_t *loc) {           \
2874    __kmp_set_##lk##_lock_location(&lock->lk, loc);                            \
2875  }
2876
2877KMP_FOREACH_LOCK_KIND(expand1, init)
2878KMP_FOREACH_LOCK_KIND(expand1, init_nested)
2879KMP_FOREACH_LOCK_KIND(expand1, destroy)
2880KMP_FOREACH_LOCK_KIND(expand1, destroy_nested)
2881KMP_FOREACH_LOCK_KIND(expand2, acquire)
2882KMP_FOREACH_LOCK_KIND(expand2, acquire_nested)
2883KMP_FOREACH_LOCK_KIND(expand2, release)
2884KMP_FOREACH_LOCK_KIND(expand2, release_nested)
2885KMP_FOREACH_LOCK_KIND(expand2, test)
2886KMP_FOREACH_LOCK_KIND(expand2, test_nested)
2887KMP_FOREACH_LOCK_KIND(expand3, )
2888KMP_FOREACH_LOCK_KIND(expand4, )
2889
2890#undef expand1
2891#undef expand2
2892#undef expand3
2893#undef expand4
2894
2895// Jump tables for the indirect lock functions
2896// Only fill in the odd entries, that avoids the need to shift out the low bit
2897
2898// init functions
2899#define expand(l, op) 0, __kmp_init_direct_lock,
2900void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = {
2901    __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)};
2902#undef expand
2903
2904// destroy functions
2905#define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
2906static void (*direct_destroy[])(kmp_dyna_lock_t *) = {
2907    __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2908#undef expand
2909#define expand(l, op)                                                          \
2910  0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks,
2911static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = {
2912    __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2913#undef expand
2914
2915// set/acquire functions
2916#define expand(l, op)                                                          \
2917  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
2918static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
2919    __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};
2920#undef expand
2921#define expand(l, op)                                                          \
2922  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
2923static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
2924    __kmp_set_indirect_lock_with_checks, 0,
2925    KMP_FOREACH_D_LOCK(expand, acquire)};
2926#undef expand
2927
2928// unset/release and test functions
2929#define expand(l, op)                                                          \
2930  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
2931static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = {
2932    __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)};
2933static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = {
2934    __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)};
2935#undef expand
2936#define expand(l, op)                                                          \
2937  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
2938static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = {
2939    __kmp_unset_indirect_lock_with_checks, 0,
2940    KMP_FOREACH_D_LOCK(expand, release)};
2941static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
2942    __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)};
2943#undef expand
2944
2945// Exposes only one set of jump tables (*lock or *lock_with_checks).
2946void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0;
2947int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0;
2948int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0;
2949int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0;
2950
2951// Jump tables for the indirect lock functions
2952#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
2953void (*__kmp_indirect_init[])(kmp_user_lock_p) = {
2954    KMP_FOREACH_I_LOCK(expand, init)};
2955#undef expand
2956
2957#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
2958static void (*indirect_destroy[])(kmp_user_lock_p) = {
2959    KMP_FOREACH_I_LOCK(expand, destroy)};
2960#undef expand
2961#define expand(l, op)                                                          \
2962  (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks,
2963static void (*indirect_destroy_check[])(kmp_user_lock_p) = {
2964    KMP_FOREACH_I_LOCK(expand, destroy)};
2965#undef expand
2966
2967// set/acquire functions
2968#define expand(l, op)                                                          \
2969  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
2970static int (*indirect_set[])(kmp_user_lock_p,
2971                             kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};
2972#undef expand
2973#define expand(l, op)                                                          \
2974  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
2975static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
2976    KMP_FOREACH_I_LOCK(expand, acquire)};
2977#undef expand
2978
2979// unset/release and test functions
2980#define expand(l, op)                                                          \
2981  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
2982static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = {
2983    KMP_FOREACH_I_LOCK(expand, release)};
2984static int (*indirect_test[])(kmp_user_lock_p,
2985                              kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)};
2986#undef expand
2987#define expand(l, op)                                                          \
2988  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
2989static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = {
2990    KMP_FOREACH_I_LOCK(expand, release)};
2991static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
2992    KMP_FOREACH_I_LOCK(expand, test)};
2993#undef expand
2994
2995// Exposes only one jump tables (*lock or *lock_with_checks).
2996void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0;
2997int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0;
2998int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0;
2999int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0;
3000
3001// Lock index table.
3002kmp_indirect_lock_table_t __kmp_i_lock_table;
3003
3004// Size of indirect locks.
3005static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0};
3006
3007// Jump tables for lock accessor/modifier.
3008void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3009                                                     const ident_t *) = {0};
3010void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3011                                                  kmp_lock_flags_t) = {0};
3012const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
3013    kmp_user_lock_p) = {0};
3014kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
3015    kmp_user_lock_p) = {0};
3016
3017// Use different lock pools for different lock types.
3018static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0};
3019
3020// User lock allocator for dynamically dispatched indirect locks. Every entry of
3021// the indirect lock table holds the address and type of the allocated indrect
3022// lock (kmp_indirect_lock_t), and the size of the table doubles when it is
3023// full. A destroyed indirect lock object is returned to the reusable pool of
3024// locks, unique to each lock type.
3025kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
3026                                                  kmp_int32 gtid,
3027                                                  kmp_indirect_locktag_t tag) {
3028  kmp_indirect_lock_t *lck;
3029  kmp_lock_index_t idx;
3030
3031  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3032
3033  if (__kmp_indirect_lock_pool[tag] != NULL) {
3034    // Reuse the allocated and destroyed lock object
3035    lck = __kmp_indirect_lock_pool[tag];
3036    if (OMP_LOCK_T_SIZE < sizeof(void *))
3037      idx = lck->lock->pool.index;
3038    __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3039    KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
3040                  lck));
3041  } else {
3042    idx = __kmp_i_lock_table.next;
3043    // Check capacity and double the size if it is full
3044    if (idx == __kmp_i_lock_table.size) {
3045      // Double up the space for block pointers
3046      int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK;
3047      kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate(
3048          2 * row * sizeof(kmp_indirect_lock_t *));
3049      KMP_MEMCPY(new_table, __kmp_i_lock_table.table,
3050                 row * sizeof(kmp_indirect_lock_t *));
3051      kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
3052      __kmp_i_lock_table.table = new_table;
3053      __kmp_free(old_table);
3054      // Allocate new objects in the new blocks
3055      for (int i = row; i < 2 * row; ++i)
3056        *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate(
3057            KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3058      __kmp_i_lock_table.size = 2 * idx;
3059    }
3060    __kmp_i_lock_table.next++;
3061    lck = KMP_GET_I_LOCK(idx);
3062    // Allocate a new base lock object
3063    lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3064    KA_TRACE(20,
3065             ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3066  }
3067
3068  __kmp_release_lock(&__kmp_global_lock, gtid);
3069
3070  lck->type = tag;
3071
3072  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3073    *((kmp_lock_index_t *)user_lock) = idx
3074                                       << 1; // indirect lock word must be even
3075  } else {
3076    *((kmp_indirect_lock_t **)user_lock) = lck;
3077  }
3078
3079  return lck;
3080}
3081
3082// User lock lookup for dynamically dispatched locks.
3083static __forceinline kmp_indirect_lock_t *
3084__kmp_lookup_indirect_lock(void **user_lock, const char *func) {
3085  if (__kmp_env_consistency_check) {
3086    kmp_indirect_lock_t *lck = NULL;
3087    if (user_lock == NULL) {
3088      KMP_FATAL(LockIsUninitialized, func);
3089    }
3090    if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3091      kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3092      if (idx >= __kmp_i_lock_table.size) {
3093        KMP_FATAL(LockIsUninitialized, func);
3094      }
3095      lck = KMP_GET_I_LOCK(idx);
3096    } else {
3097      lck = *((kmp_indirect_lock_t **)user_lock);
3098    }
3099    if (lck == NULL) {
3100      KMP_FATAL(LockIsUninitialized, func);
3101    }
3102    return lck;
3103  } else {
3104    if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3105      return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
3106    } else {
3107      return *((kmp_indirect_lock_t **)user_lock);
3108    }
3109  }
3110}
3111
3112static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
3113                                     kmp_dyna_lockseq_t seq) {
3114#if KMP_USE_ADAPTIVE_LOCKS
3115  if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3116    KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3117    seq = lockseq_queuing;
3118  }
3119#endif
3120#if KMP_USE_TSX
3121  if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) {
3122    seq = lockseq_queuing;
3123  }
3124#endif
3125  kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3126  kmp_indirect_lock_t *l =
3127      __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3128  KMP_I_LOCK_FUNC(l, init)(l->lock);
3129  KA_TRACE(
3130      20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n",
3131           seq));
3132}
3133
3134static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {
3135  kmp_uint32 gtid = __kmp_entry_gtid();
3136  kmp_indirect_lock_t *l =
3137      __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3138  KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3139  kmp_indirect_locktag_t tag = l->type;
3140
3141  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3142
3143  // Use the base lock's space to keep the pool chain.
3144  l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3145  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3146    l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3147  }
3148  __kmp_indirect_lock_pool[tag] = l;
3149
3150  __kmp_release_lock(&__kmp_global_lock, gtid);
3151}
3152
3153static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3154  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3155  return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3156}
3157
3158static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3159  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3160  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3161}
3162
3163static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3164  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3165  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3166}
3167
3168static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3169                                               kmp_int32 gtid) {
3170  kmp_indirect_lock_t *l =
3171      __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3172  return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3173}
3174
3175static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3176                                                 kmp_int32 gtid) {
3177  kmp_indirect_lock_t *l =
3178      __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3179  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3180}
3181
3182static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3183                                                kmp_int32 gtid) {
3184  kmp_indirect_lock_t *l =
3185      __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3186  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3187}
3188
3189kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3190
3191// This is used only in kmp_error.cpp when consistency checking is on.
3192kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) {
3193  switch (seq) {
3194  case lockseq_tas:
3195  case lockseq_nested_tas:
3196    return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3197#if KMP_USE_FUTEX
3198  case lockseq_futex:
3199  case lockseq_nested_futex:
3200    return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3201#endif
3202  case lockseq_ticket:
3203  case lockseq_nested_ticket:
3204    return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3205  case lockseq_queuing:
3206  case lockseq_nested_queuing:
3207#if KMP_USE_ADAPTIVE_LOCKS
3208  case lockseq_adaptive:
3209#endif
3210    return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3211  case lockseq_drdpa:
3212  case lockseq_nested_drdpa:
3213    return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3214  default:
3215    return 0;
3216  }
3217}
3218
3219// Initializes data for dynamic user locks.
3220void __kmp_init_dynamic_user_locks() {
3221  // Initialize jump table for the lock functions
3222  if (__kmp_env_consistency_check) {
3223    __kmp_direct_set = direct_set_check;
3224    __kmp_direct_unset = direct_unset_check;
3225    __kmp_direct_test = direct_test_check;
3226    __kmp_direct_destroy = direct_destroy_check;
3227    __kmp_indirect_set = indirect_set_check;
3228    __kmp_indirect_unset = indirect_unset_check;
3229    __kmp_indirect_test = indirect_test_check;
3230    __kmp_indirect_destroy = indirect_destroy_check;
3231  } else {
3232    __kmp_direct_set = direct_set;
3233    __kmp_direct_unset = direct_unset;
3234    __kmp_direct_test = direct_test;
3235    __kmp_direct_destroy = direct_destroy;
3236    __kmp_indirect_set = indirect_set;
3237    __kmp_indirect_unset = indirect_unset;
3238    __kmp_indirect_test = indirect_test;
3239    __kmp_indirect_destroy = indirect_destroy;
3240  }
3241  // If the user locks have already been initialized, then return. Allow the
3242  // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate
3243  // new lock tables if they have already been allocated.
3244  if (__kmp_init_user_locks)
3245    return;
3246
3247  // Initialize lock index table
3248  __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
3249  __kmp_i_lock_table.table =
3250      (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
3251  *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(
3252      KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3253  __kmp_i_lock_table.next = 0;
3254
3255  // Indirect lock size
3256  __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3257  __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3258#if KMP_USE_ADAPTIVE_LOCKS
3259  __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3260#endif
3261  __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3262#if KMP_USE_TSX
3263  __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t);
3264#endif
3265  __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3266#if KMP_USE_FUTEX
3267  __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3268#endif
3269  __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3270  __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3271  __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
3272
3273// Initialize lock accessor/modifier
3274#define fill_jumps(table, expand, sep)                                         \
3275  {                                                                            \
3276    table[locktag##sep##ticket] = expand(ticket);                              \
3277    table[locktag##sep##queuing] = expand(queuing);                            \
3278    table[locktag##sep##drdpa] = expand(drdpa);                                \
3279  }
3280
3281#if KMP_USE_ADAPTIVE_LOCKS
3282#define fill_table(table, expand)                                              \
3283  {                                                                            \
3284    fill_jumps(table, expand, _);                                              \
3285    table[locktag_adaptive] = expand(queuing);                                 \
3286    fill_jumps(table, expand, _nested_);                                       \
3287  }
3288#else
3289#define fill_table(table, expand)                                              \
3290  {                                                                            \
3291    fill_jumps(table, expand, _);                                              \
3292    fill_jumps(table, expand, _nested_);                                       \
3293  }
3294#endif // KMP_USE_ADAPTIVE_LOCKS
3295
3296#define expand(l)                                                              \
3297  (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location
3298  fill_table(__kmp_indirect_set_location, expand);
3299#undef expand
3300#define expand(l)                                                              \
3301  (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags
3302  fill_table(__kmp_indirect_set_flags, expand);
3303#undef expand
3304#define expand(l)                                                              \
3305  (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location
3306  fill_table(__kmp_indirect_get_location, expand);
3307#undef expand
3308#define expand(l)                                                              \
3309  (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags
3310  fill_table(__kmp_indirect_get_flags, expand);
3311#undef expand
3312
3313  __kmp_init_user_locks = TRUE;
3314}
3315
3316// Clean up the lock table.
3317void __kmp_cleanup_indirect_user_locks() {
3318  kmp_lock_index_t i;
3319  int k;
3320
3321  // Clean up locks in the pools first (they were already destroyed before going
3322  // into the pools).
3323  for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3324    kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3325    while (l != NULL) {
3326      kmp_indirect_lock_t *ll = l;
3327      l = (kmp_indirect_lock_t *)l->lock->pool.next;
3328      KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n",
3329                    ll));
3330      __kmp_free(ll->lock);
3331      ll->lock = NULL;
3332    }
3333    __kmp_indirect_lock_pool[k] = NULL;
3334  }
3335  // Clean up the remaining undestroyed locks.
3336  for (i = 0; i < __kmp_i_lock_table.next; i++) {
3337    kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
3338    if (l->lock != NULL) {
3339      // Locks not destroyed explicitly need to be destroyed here.
3340      KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3341      KA_TRACE(
3342          20,
3343          ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n",
3344           l));
3345      __kmp_free(l->lock);
3346    }
3347  }
3348  // Free the table
3349  for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
3350    __kmp_free(__kmp_i_lock_table.table[i]);
3351  __kmp_free(__kmp_i_lock_table.table);
3352
3353  __kmp_init_user_locks = FALSE;
3354}
3355
3356enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3357int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3358
3359#else // KMP_USE_DYNAMIC_LOCK
3360
3361static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3362  __kmp_init_tas_lock(lck);
3363}
3364
3365static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3366  __kmp_init_nested_tas_lock(lck);
3367}
3368
3369#if KMP_USE_FUTEX
3370static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3371  __kmp_init_futex_lock(lck);
3372}
3373
3374static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3375  __kmp_init_nested_futex_lock(lck);
3376}
3377#endif
3378
3379static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) {
3380  return lck == lck->lk.self;
3381}
3382
3383static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3384  __kmp_init_ticket_lock(lck);
3385}
3386
3387static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3388  __kmp_init_nested_ticket_lock(lck);
3389}
3390
3391static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) {
3392  return lck == lck->lk.initialized;
3393}
3394
3395static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3396  __kmp_init_queuing_lock(lck);
3397}
3398
3399static void
3400__kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3401  __kmp_init_nested_queuing_lock(lck);
3402}
3403
3404#if KMP_USE_ADAPTIVE_LOCKS
3405static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
3406  __kmp_init_adaptive_lock(lck);
3407}
3408#endif
3409
3410static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) {
3411  return lck == lck->lk.initialized;
3412}
3413
3414static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3415  __kmp_init_drdpa_lock(lck);
3416}
3417
3418static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3419  __kmp_init_nested_drdpa_lock(lck);
3420}
3421
3422/* user locks
3423 * They are implemented as a table of function pointers which are set to the
3424 * lock functions of the appropriate kind, once that has been determined. */
3425
3426enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3427
3428size_t __kmp_base_user_lock_size = 0;
3429size_t __kmp_user_lock_size = 0;
3430
3431kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL;
3432int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
3433                                            kmp_int32 gtid) = NULL;
3434
3435int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
3436                                         kmp_int32 gtid) = NULL;
3437int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
3438                                            kmp_int32 gtid) = NULL;
3439void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3440void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL;
3441void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3442int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3443                                                   kmp_int32 gtid) = NULL;
3444
3445int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3446                                                kmp_int32 gtid) = NULL;
3447int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3448                                                   kmp_int32 gtid) = NULL;
3449void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3450void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3451
3452int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL;
3453const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL;
3454void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
3455                                      const ident_t *loc) = NULL;
3456kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL;
3457void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
3458                                   kmp_lock_flags_t flags) = NULL;
3459
3460void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) {
3461  switch (user_lock_kind) {
3462  case lk_default:
3463  default:
3464    KMP_ASSERT(0);
3465
3466  case lk_tas: {
3467    __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t);
3468    __kmp_user_lock_size = sizeof(kmp_tas_lock_t);
3469
3470    __kmp_get_user_lock_owner_ =
3471        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner);
3472
3473    if (__kmp_env_consistency_check) {
3474      KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3475      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3476    } else {
3477      KMP_BIND_USER_LOCK(tas);
3478      KMP_BIND_NESTED_USER_LOCK(tas);
3479    }
3480
3481    __kmp_destroy_user_lock_ =
3482        (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock);
3483
3484    __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3485
3486    __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3487
3488    __kmp_set_user_lock_location_ =
3489        (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3490
3491    __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3492
3493    __kmp_set_user_lock_flags_ =
3494        (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3495  } break;
3496
3497#if KMP_USE_FUTEX
3498
3499  case lk_futex: {
3500    __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t);
3501    __kmp_user_lock_size = sizeof(kmp_futex_lock_t);
3502
3503    __kmp_get_user_lock_owner_ =
3504        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner);
3505
3506    if (__kmp_env_consistency_check) {
3507      KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3508      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3509    } else {
3510      KMP_BIND_USER_LOCK(futex);
3511      KMP_BIND_NESTED_USER_LOCK(futex);
3512    }
3513
3514    __kmp_destroy_user_lock_ =
3515        (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock);
3516
3517    __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3518
3519    __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3520
3521    __kmp_set_user_lock_location_ =
3522        (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3523
3524    __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3525
3526    __kmp_set_user_lock_flags_ =
3527        (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3528  } break;
3529
3530#endif // KMP_USE_FUTEX
3531
3532  case lk_ticket: {
3533    __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t);
3534    __kmp_user_lock_size = sizeof(kmp_ticket_lock_t);
3535
3536    __kmp_get_user_lock_owner_ =
3537        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner);
3538
3539    if (__kmp_env_consistency_check) {
3540      KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3541      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3542    } else {
3543      KMP_BIND_USER_LOCK(ticket);
3544      KMP_BIND_NESTED_USER_LOCK(ticket);
3545    }
3546
3547    __kmp_destroy_user_lock_ =
3548        (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock);
3549
3550    __kmp_is_user_lock_initialized_ =
3551        (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized);
3552
3553    __kmp_get_user_lock_location_ =
3554        (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location);
3555
3556    __kmp_set_user_lock_location_ = (void (*)(
3557        kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location);
3558
3559    __kmp_get_user_lock_flags_ =
3560        (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags);
3561
3562    __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3563        &__kmp_set_ticket_lock_flags);
3564  } break;
3565
3566  case lk_queuing: {
3567    __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t);
3568    __kmp_user_lock_size = sizeof(kmp_queuing_lock_t);
3569
3570    __kmp_get_user_lock_owner_ =
3571        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3572
3573    if (__kmp_env_consistency_check) {
3574      KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3575      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3576    } else {
3577      KMP_BIND_USER_LOCK(queuing);
3578      KMP_BIND_NESTED_USER_LOCK(queuing);
3579    }
3580
3581    __kmp_destroy_user_lock_ =
3582        (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock);
3583
3584    __kmp_is_user_lock_initialized_ =
3585        (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3586
3587    __kmp_get_user_lock_location_ =
3588        (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3589
3590    __kmp_set_user_lock_location_ = (void (*)(
3591        kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3592
3593    __kmp_get_user_lock_flags_ =
3594        (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3595
3596    __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3597        &__kmp_set_queuing_lock_flags);
3598  } break;
3599
3600#if KMP_USE_ADAPTIVE_LOCKS
3601  case lk_adaptive: {
3602    __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t);
3603    __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t);
3604
3605    __kmp_get_user_lock_owner_ =
3606        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3607
3608    if (__kmp_env_consistency_check) {
3609      KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3610    } else {
3611      KMP_BIND_USER_LOCK(adaptive);
3612    }
3613
3614    __kmp_destroy_user_lock_ =
3615        (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock);
3616
3617    __kmp_is_user_lock_initialized_ =
3618        (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3619
3620    __kmp_get_user_lock_location_ =
3621        (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3622
3623    __kmp_set_user_lock_location_ = (void (*)(
3624        kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3625
3626    __kmp_get_user_lock_flags_ =
3627        (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3628
3629    __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3630        &__kmp_set_queuing_lock_flags);
3631
3632  } break;
3633#endif // KMP_USE_ADAPTIVE_LOCKS
3634
3635  case lk_drdpa: {
3636    __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t);
3637    __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t);
3638
3639    __kmp_get_user_lock_owner_ =
3640        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner);
3641
3642    if (__kmp_env_consistency_check) {
3643      KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3644      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3645    } else {
3646      KMP_BIND_USER_LOCK(drdpa);
3647      KMP_BIND_NESTED_USER_LOCK(drdpa);
3648    }
3649
3650    __kmp_destroy_user_lock_ =
3651        (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock);
3652
3653    __kmp_is_user_lock_initialized_ =
3654        (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized);
3655
3656    __kmp_get_user_lock_location_ =
3657        (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location);
3658
3659    __kmp_set_user_lock_location_ = (void (*)(
3660        kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location);
3661
3662    __kmp_get_user_lock_flags_ =
3663        (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags);
3664
3665    __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3666        &__kmp_set_drdpa_lock_flags);
3667  } break;
3668  }
3669}
3670
3671// ----------------------------------------------------------------------------
3672// User lock table & lock allocation
3673
3674kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL};
3675kmp_user_lock_p __kmp_lock_pool = NULL;
3676
3677// Lock block-allocation support.
3678kmp_block_of_locks *__kmp_lock_blocks = NULL;
3679int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3680
3681static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) {
3682  // Assume that kmp_global_lock is held upon entry/exit.
3683  kmp_lock_index_t index;
3684  if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) {
3685    kmp_lock_index_t size;
3686    kmp_user_lock_p *table;
3687    // Reallocate lock table.
3688    if (__kmp_user_lock_table.allocated == 0) {
3689      size = 1024;
3690    } else {
3691      size = __kmp_user_lock_table.allocated * 2;
3692    }
3693    table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size);
3694    KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1,
3695               sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1));
3696    table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3697    // We cannot free the previous table now, since it may be in use by other
3698    // threads. So save the pointer to the previous table in in the first
3699    // element of the new table. All the tables will be organized into a list,
3700    // and could be freed when library shutting down.
3701    __kmp_user_lock_table.table = table;
3702    __kmp_user_lock_table.allocated = size;
3703  }
3704  KMP_DEBUG_ASSERT(__kmp_user_lock_table.used <
3705                   __kmp_user_lock_table.allocated);
3706  index = __kmp_user_lock_table.used;
3707  __kmp_user_lock_table.table[index] = lck;
3708  ++__kmp_user_lock_table.used;
3709  return index;
3710}
3711
3712static kmp_user_lock_p __kmp_lock_block_allocate() {
3713  // Assume that kmp_global_lock is held upon entry/exit.
3714  static int last_index = 0;
3715  if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) {
3716    // Restart the index.
3717    last_index = 0;
3718    // Need to allocate a new block.
3719    KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3720    size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3721    char *buffer =
3722        (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks));
3723    // Set up the new block.
3724    kmp_block_of_locks *new_block =
3725        (kmp_block_of_locks *)(&buffer[space_for_locks]);
3726    new_block->next_block = __kmp_lock_blocks;
3727    new_block->locks = (void *)buffer;
3728    // Publish the new block.
3729    KMP_MB();
3730    __kmp_lock_blocks = new_block;
3731  }
3732  kmp_user_lock_p ret = (kmp_user_lock_p)(&(
3733      ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size]));
3734  last_index++;
3735  return ret;
3736}
3737
3738// Get memory for a lock. It may be freshly allocated memory or reused memory
3739// from lock pool.
3740kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid,
3741                                         kmp_lock_flags_t flags) {
3742  kmp_user_lock_p lck;
3743  kmp_lock_index_t index;
3744  KMP_DEBUG_ASSERT(user_lock);
3745
3746  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3747
3748  if (__kmp_lock_pool == NULL) {
3749    // Lock pool is empty. Allocate new memory.
3750
3751    // ANNOTATION: Found no good way to express the syncronisation
3752    // between allocation and usage, so ignore the allocation
3753    ANNOTATE_IGNORE_WRITES_BEGIN();
3754    if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point.
3755      lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size);
3756    } else {
3757      lck = __kmp_lock_block_allocate();
3758    }
3759    ANNOTATE_IGNORE_WRITES_END();
3760
3761    // Insert lock in the table so that it can be freed in __kmp_cleanup,
3762    // and debugger has info on all allocated locks.
3763    index = __kmp_lock_table_insert(lck);
3764  } else {
3765    // Pick up lock from pool.
3766    lck = __kmp_lock_pool;
3767    index = __kmp_lock_pool->pool.index;
3768    __kmp_lock_pool = __kmp_lock_pool->pool.next;
3769  }
3770
3771  // We could potentially differentiate between nested and regular locks
3772  // here, and do the lock table lookup for regular locks only.
3773  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3774    *((kmp_lock_index_t *)user_lock) = index;
3775  } else {
3776    *((kmp_user_lock_p *)user_lock) = lck;
3777  }
3778
3779  // mark the lock if it is critical section lock.
3780  __kmp_set_user_lock_flags(lck, flags);
3781
3782  __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper
3783
3784  return lck;
3785}
3786
3787// Put lock's memory to pool for reusing.
3788void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
3789                          kmp_user_lock_p lck) {
3790  KMP_DEBUG_ASSERT(user_lock != NULL);
3791  KMP_DEBUG_ASSERT(lck != NULL);
3792
3793  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3794
3795  lck->pool.next = __kmp_lock_pool;
3796  __kmp_lock_pool = lck;
3797  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3798    kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3799    KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used);
3800    lck->pool.index = index;
3801  }
3802
3803  __kmp_release_lock(&__kmp_global_lock, gtid);
3804}
3805
3806kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) {
3807  kmp_user_lock_p lck = NULL;
3808
3809  if (__kmp_env_consistency_check) {
3810    if (user_lock == NULL) {
3811      KMP_FATAL(LockIsUninitialized, func);
3812    }
3813  }
3814
3815  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3816    kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3817    if (__kmp_env_consistency_check) {
3818      if (!(0 < index && index < __kmp_user_lock_table.used)) {
3819        KMP_FATAL(LockIsUninitialized, func);
3820      }
3821    }
3822    KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used);
3823    KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3824    lck = __kmp_user_lock_table.table[index];
3825  } else {
3826    lck = *((kmp_user_lock_p *)user_lock);
3827  }
3828
3829  if (__kmp_env_consistency_check) {
3830    if (lck == NULL) {
3831      KMP_FATAL(LockIsUninitialized, func);
3832    }
3833  }
3834
3835  return lck;
3836}
3837
3838void __kmp_cleanup_user_locks(void) {
3839  // Reset lock pool. Don't worry about lock in the pool--we will free them when
3840  // iterating through lock table (it includes all the locks, dead or alive).
3841  __kmp_lock_pool = NULL;
3842
3843#define IS_CRITICAL(lck)                                                       \
3844  ((__kmp_get_user_lock_flags_ != NULL) &&                                     \
3845   ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section))
3846
3847  // Loop through lock table, free all locks.
3848  // Do not free item [0], it is reserved for lock tables list.
3849  //
3850  // FIXME - we are iterating through a list of (pointers to) objects of type
3851  // union kmp_user_lock, but we have no way of knowing whether the base type is
3852  // currently "pool" or whatever the global user lock type is.
3853  //
3854  // We are relying on the fact that for all of the user lock types
3855  // (except "tas"), the first field in the lock struct is the "initialized"
3856  // field, which is set to the address of the lock object itself when
3857  // the lock is initialized.  When the union is of type "pool", the
3858  // first field is a pointer to the next object in the free list, which
3859  // will not be the same address as the object itself.
3860  //
3861  // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail
3862  // for "pool" objects on the free list.  This must happen as the "location"
3863  // field of real user locks overlaps the "index" field of "pool" objects.
3864  //
3865  // It would be better to run through the free list, and remove all "pool"
3866  // objects from the lock table before executing this loop.  However,
3867  // "pool" objects do not always have their index field set (only on
3868  // lin_32e), and I don't want to search the lock table for the address
3869  // of every "pool" object on the free list.
3870  while (__kmp_user_lock_table.used > 1) {
3871    const ident *loc;
3872
3873    // reduce __kmp_user_lock_table.used before freeing the lock,
3874    // so that state of locks is consistent
3875    kmp_user_lock_p lck =
3876        __kmp_user_lock_table.table[--__kmp_user_lock_table.used];
3877
3878    if ((__kmp_is_user_lock_initialized_ != NULL) &&
3879        (*__kmp_is_user_lock_initialized_)(lck)) {
3880      // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND
3881      // it is NOT a critical section (user is not responsible for destroying
3882      // criticals) AND we know source location to report.
3883      if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) &&
3884          ((loc = __kmp_get_user_lock_location(lck)) != NULL) &&
3885          (loc->psource != NULL)) {
3886        kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 0);
3887        KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line);
3888        __kmp_str_loc_free(&str_loc);
3889      }
3890
3891#ifdef KMP_DEBUG
3892      if (IS_CRITICAL(lck)) {
3893        KA_TRACE(
3894            20,
3895            ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",
3896             lck, *(void **)lck));
3897      } else {
3898        KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck,
3899                      *(void **)lck));
3900      }
3901#endif // KMP_DEBUG
3902
3903      // Cleanup internal lock dynamic resources (for drdpa locks particularly).
3904      __kmp_destroy_user_lock(lck);
3905    }
3906
3907    // Free the lock if block allocation of locks is not used.
3908    if (__kmp_lock_blocks == NULL) {
3909      __kmp_free(lck);
3910    }
3911  }
3912
3913#undef IS_CRITICAL
3914
3915  // delete lock table(s).
3916  kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
3917  __kmp_user_lock_table.table = NULL;
3918  __kmp_user_lock_table.allocated = 0;
3919
3920  while (table_ptr != NULL) {
3921    // In the first element we saved the pointer to the previous
3922    // (smaller) lock table.
3923    kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]);
3924    __kmp_free(table_ptr);
3925    table_ptr = next;
3926  }
3927
3928  // Free buffers allocated for blocks of locks.
3929  kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
3930  __kmp_lock_blocks = NULL;
3931
3932  while (block_ptr != NULL) {
3933    kmp_block_of_locks_t *next = block_ptr->next_block;
3934    __kmp_free(block_ptr->locks);
3935    // *block_ptr itself was allocated at the end of the locks vector.
3936    block_ptr = next;
3937  }
3938
3939  TCW_4(__kmp_init_user_locks, FALSE);
3940}
3941
3942#endif // KMP_USE_DYNAMIC_LOCK
3943