kmp_atomic.cpp revision 360784
1/*
2 * kmp_atomic.cpp -- ATOMIC implementation routines
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp_atomic.h"
14#include "kmp.h" // TRUE, asm routines prototypes
15
16typedef unsigned char uchar;
17typedef unsigned short ushort;
18
19/*!
20@defgroup ATOMIC_OPS Atomic Operations
21These functions are used for implementing the many different varieties of atomic
22operations.
23
24The compiler is at liberty to inline atomic operations that are naturally
25supported by the target architecture. For instance on IA-32 architecture an
26atomic like this can be inlined
27@code
28static int s = 0;
29#pragma omp atomic
30    s++;
31@endcode
32using the single instruction: `lock; incl s`
33
34However the runtime does provide entrypoints for these operations to support
35compilers that choose not to inline them. (For instance,
36`__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
37
38The names of the functions are encoded by using the data type name and the
39operation name, as in these tables.
40
41Data Type  | Data type encoding
42-----------|---------------
43int8_t     | `fixed1`
44uint8_t    | `fixed1u`
45int16_t    | `fixed2`
46uint16_t   | `fixed2u`
47int32_t    | `fixed4`
48uint32_t   | `fixed4u`
49int32_t    | `fixed8`
50uint32_t   | `fixed8u`
51float      | `float4`
52double     | `float8`
53float 10 (8087 eighty bit float)  | `float10`
54complex<float>   |  `cmplx4`
55complex<double>  | `cmplx8`
56complex<float10> | `cmplx10`
57<br>
58
59Operation | Operation encoding
60----------|-------------------
61+ | add
62- | sub
63\* | mul
64/ | div
65& | andb
66<< | shl
67\>\> | shr
68\| | orb
69^  | xor
70&& | andl
71\|\| | orl
72maximum | max
73minimum | min
74.eqv.   | eqv
75.neqv.  | neqv
76
77<br>
78For non-commutative operations, `_rev` can also be added for the reversed
79operation. For the functions that capture the result, the suffix `_cpt` is
80added.
81
82Update Functions
83================
84The general form of an atomic function that just performs an update (without a
85`capture`)
86@code
87void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
88lhs, TYPE rhs );
89@endcode
90@param ident_t  a pointer to source location
91@param gtid  the global thread id
92@param lhs   a pointer to the left operand
93@param rhs   the right operand
94
95`capture` functions
96===================
97The capture functions perform an atomic update and return a result, which is
98either the value before the capture, or that after. They take an additional
99argument to determine which result is returned.
100Their general form is therefore
101@code
102TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
103lhs, TYPE rhs, int flag );
104@endcode
105@param ident_t  a pointer to source location
106@param gtid  the global thread id
107@param lhs   a pointer to the left operand
108@param rhs   the right operand
109@param flag  one if the result is to be captured *after* the operation, zero if
110captured *before*.
111
112The one set of exceptions to this is the `complex<float>` type where the value
113is not returned, rather an extra argument pointer is passed.
114
115They look like
116@code
117void __kmpc_atomic_cmplx4_<op>_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 *
118lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
119@endcode
120
121Read and Write Operations
122=========================
123The OpenMP<sup>*</sup> standard now supports atomic operations that simply
124ensure that the value is read or written atomically, with no modification
125performed. In many cases on IA-32 architecture these operations can be inlined
126since the architecture guarantees that no tearing occurs on aligned objects
127accessed with a single memory operation of up to 64 bits in size.
128
129The general form of the read operations is
130@code
131TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
132@endcode
133
134For the write operations the form is
135@code
136void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
137);
138@endcode
139
140Full list of functions
141======================
142This leads to the generation of 376 atomic functions, as follows.
143
144Functions for integers
145---------------------
146There are versions here for integers of size 1,2,4 and 8 bytes both signed and
147unsigned (where that matters).
148@code
149    __kmpc_atomic_fixed1_add
150    __kmpc_atomic_fixed1_add_cpt
151    __kmpc_atomic_fixed1_add_fp
152    __kmpc_atomic_fixed1_andb
153    __kmpc_atomic_fixed1_andb_cpt
154    __kmpc_atomic_fixed1_andl
155    __kmpc_atomic_fixed1_andl_cpt
156    __kmpc_atomic_fixed1_div
157    __kmpc_atomic_fixed1_div_cpt
158    __kmpc_atomic_fixed1_div_cpt_rev
159    __kmpc_atomic_fixed1_div_float8
160    __kmpc_atomic_fixed1_div_fp
161    __kmpc_atomic_fixed1_div_rev
162    __kmpc_atomic_fixed1_eqv
163    __kmpc_atomic_fixed1_eqv_cpt
164    __kmpc_atomic_fixed1_max
165    __kmpc_atomic_fixed1_max_cpt
166    __kmpc_atomic_fixed1_min
167    __kmpc_atomic_fixed1_min_cpt
168    __kmpc_atomic_fixed1_mul
169    __kmpc_atomic_fixed1_mul_cpt
170    __kmpc_atomic_fixed1_mul_float8
171    __kmpc_atomic_fixed1_mul_fp
172    __kmpc_atomic_fixed1_neqv
173    __kmpc_atomic_fixed1_neqv_cpt
174    __kmpc_atomic_fixed1_orb
175    __kmpc_atomic_fixed1_orb_cpt
176    __kmpc_atomic_fixed1_orl
177    __kmpc_atomic_fixed1_orl_cpt
178    __kmpc_atomic_fixed1_rd
179    __kmpc_atomic_fixed1_shl
180    __kmpc_atomic_fixed1_shl_cpt
181    __kmpc_atomic_fixed1_shl_cpt_rev
182    __kmpc_atomic_fixed1_shl_rev
183    __kmpc_atomic_fixed1_shr
184    __kmpc_atomic_fixed1_shr_cpt
185    __kmpc_atomic_fixed1_shr_cpt_rev
186    __kmpc_atomic_fixed1_shr_rev
187    __kmpc_atomic_fixed1_sub
188    __kmpc_atomic_fixed1_sub_cpt
189    __kmpc_atomic_fixed1_sub_cpt_rev
190    __kmpc_atomic_fixed1_sub_fp
191    __kmpc_atomic_fixed1_sub_rev
192    __kmpc_atomic_fixed1_swp
193    __kmpc_atomic_fixed1_wr
194    __kmpc_atomic_fixed1_xor
195    __kmpc_atomic_fixed1_xor_cpt
196    __kmpc_atomic_fixed1u_add_fp
197    __kmpc_atomic_fixed1u_sub_fp
198    __kmpc_atomic_fixed1u_mul_fp
199    __kmpc_atomic_fixed1u_div
200    __kmpc_atomic_fixed1u_div_cpt
201    __kmpc_atomic_fixed1u_div_cpt_rev
202    __kmpc_atomic_fixed1u_div_fp
203    __kmpc_atomic_fixed1u_div_rev
204    __kmpc_atomic_fixed1u_shr
205    __kmpc_atomic_fixed1u_shr_cpt
206    __kmpc_atomic_fixed1u_shr_cpt_rev
207    __kmpc_atomic_fixed1u_shr_rev
208    __kmpc_atomic_fixed2_add
209    __kmpc_atomic_fixed2_add_cpt
210    __kmpc_atomic_fixed2_add_fp
211    __kmpc_atomic_fixed2_andb
212    __kmpc_atomic_fixed2_andb_cpt
213    __kmpc_atomic_fixed2_andl
214    __kmpc_atomic_fixed2_andl_cpt
215    __kmpc_atomic_fixed2_div
216    __kmpc_atomic_fixed2_div_cpt
217    __kmpc_atomic_fixed2_div_cpt_rev
218    __kmpc_atomic_fixed2_div_float8
219    __kmpc_atomic_fixed2_div_fp
220    __kmpc_atomic_fixed2_div_rev
221    __kmpc_atomic_fixed2_eqv
222    __kmpc_atomic_fixed2_eqv_cpt
223    __kmpc_atomic_fixed2_max
224    __kmpc_atomic_fixed2_max_cpt
225    __kmpc_atomic_fixed2_min
226    __kmpc_atomic_fixed2_min_cpt
227    __kmpc_atomic_fixed2_mul
228    __kmpc_atomic_fixed2_mul_cpt
229    __kmpc_atomic_fixed2_mul_float8
230    __kmpc_atomic_fixed2_mul_fp
231    __kmpc_atomic_fixed2_neqv
232    __kmpc_atomic_fixed2_neqv_cpt
233    __kmpc_atomic_fixed2_orb
234    __kmpc_atomic_fixed2_orb_cpt
235    __kmpc_atomic_fixed2_orl
236    __kmpc_atomic_fixed2_orl_cpt
237    __kmpc_atomic_fixed2_rd
238    __kmpc_atomic_fixed2_shl
239    __kmpc_atomic_fixed2_shl_cpt
240    __kmpc_atomic_fixed2_shl_cpt_rev
241    __kmpc_atomic_fixed2_shl_rev
242    __kmpc_atomic_fixed2_shr
243    __kmpc_atomic_fixed2_shr_cpt
244    __kmpc_atomic_fixed2_shr_cpt_rev
245    __kmpc_atomic_fixed2_shr_rev
246    __kmpc_atomic_fixed2_sub
247    __kmpc_atomic_fixed2_sub_cpt
248    __kmpc_atomic_fixed2_sub_cpt_rev
249    __kmpc_atomic_fixed2_sub_fp
250    __kmpc_atomic_fixed2_sub_rev
251    __kmpc_atomic_fixed2_swp
252    __kmpc_atomic_fixed2_wr
253    __kmpc_atomic_fixed2_xor
254    __kmpc_atomic_fixed2_xor_cpt
255    __kmpc_atomic_fixed2u_add_fp
256    __kmpc_atomic_fixed2u_sub_fp
257    __kmpc_atomic_fixed2u_mul_fp
258    __kmpc_atomic_fixed2u_div
259    __kmpc_atomic_fixed2u_div_cpt
260    __kmpc_atomic_fixed2u_div_cpt_rev
261    __kmpc_atomic_fixed2u_div_fp
262    __kmpc_atomic_fixed2u_div_rev
263    __kmpc_atomic_fixed2u_shr
264    __kmpc_atomic_fixed2u_shr_cpt
265    __kmpc_atomic_fixed2u_shr_cpt_rev
266    __kmpc_atomic_fixed2u_shr_rev
267    __kmpc_atomic_fixed4_add
268    __kmpc_atomic_fixed4_add_cpt
269    __kmpc_atomic_fixed4_add_fp
270    __kmpc_atomic_fixed4_andb
271    __kmpc_atomic_fixed4_andb_cpt
272    __kmpc_atomic_fixed4_andl
273    __kmpc_atomic_fixed4_andl_cpt
274    __kmpc_atomic_fixed4_div
275    __kmpc_atomic_fixed4_div_cpt
276    __kmpc_atomic_fixed4_div_cpt_rev
277    __kmpc_atomic_fixed4_div_float8
278    __kmpc_atomic_fixed4_div_fp
279    __kmpc_atomic_fixed4_div_rev
280    __kmpc_atomic_fixed4_eqv
281    __kmpc_atomic_fixed4_eqv_cpt
282    __kmpc_atomic_fixed4_max
283    __kmpc_atomic_fixed4_max_cpt
284    __kmpc_atomic_fixed4_min
285    __kmpc_atomic_fixed4_min_cpt
286    __kmpc_atomic_fixed4_mul
287    __kmpc_atomic_fixed4_mul_cpt
288    __kmpc_atomic_fixed4_mul_float8
289    __kmpc_atomic_fixed4_mul_fp
290    __kmpc_atomic_fixed4_neqv
291    __kmpc_atomic_fixed4_neqv_cpt
292    __kmpc_atomic_fixed4_orb
293    __kmpc_atomic_fixed4_orb_cpt
294    __kmpc_atomic_fixed4_orl
295    __kmpc_atomic_fixed4_orl_cpt
296    __kmpc_atomic_fixed4_rd
297    __kmpc_atomic_fixed4_shl
298    __kmpc_atomic_fixed4_shl_cpt
299    __kmpc_atomic_fixed4_shl_cpt_rev
300    __kmpc_atomic_fixed4_shl_rev
301    __kmpc_atomic_fixed4_shr
302    __kmpc_atomic_fixed4_shr_cpt
303    __kmpc_atomic_fixed4_shr_cpt_rev
304    __kmpc_atomic_fixed4_shr_rev
305    __kmpc_atomic_fixed4_sub
306    __kmpc_atomic_fixed4_sub_cpt
307    __kmpc_atomic_fixed4_sub_cpt_rev
308    __kmpc_atomic_fixed4_sub_fp
309    __kmpc_atomic_fixed4_sub_rev
310    __kmpc_atomic_fixed4_swp
311    __kmpc_atomic_fixed4_wr
312    __kmpc_atomic_fixed4_xor
313    __kmpc_atomic_fixed4_xor_cpt
314    __kmpc_atomic_fixed4u_add_fp
315    __kmpc_atomic_fixed4u_sub_fp
316    __kmpc_atomic_fixed4u_mul_fp
317    __kmpc_atomic_fixed4u_div
318    __kmpc_atomic_fixed4u_div_cpt
319    __kmpc_atomic_fixed4u_div_cpt_rev
320    __kmpc_atomic_fixed4u_div_fp
321    __kmpc_atomic_fixed4u_div_rev
322    __kmpc_atomic_fixed4u_shr
323    __kmpc_atomic_fixed4u_shr_cpt
324    __kmpc_atomic_fixed4u_shr_cpt_rev
325    __kmpc_atomic_fixed4u_shr_rev
326    __kmpc_atomic_fixed8_add
327    __kmpc_atomic_fixed8_add_cpt
328    __kmpc_atomic_fixed8_add_fp
329    __kmpc_atomic_fixed8_andb
330    __kmpc_atomic_fixed8_andb_cpt
331    __kmpc_atomic_fixed8_andl
332    __kmpc_atomic_fixed8_andl_cpt
333    __kmpc_atomic_fixed8_div
334    __kmpc_atomic_fixed8_div_cpt
335    __kmpc_atomic_fixed8_div_cpt_rev
336    __kmpc_atomic_fixed8_div_float8
337    __kmpc_atomic_fixed8_div_fp
338    __kmpc_atomic_fixed8_div_rev
339    __kmpc_atomic_fixed8_eqv
340    __kmpc_atomic_fixed8_eqv_cpt
341    __kmpc_atomic_fixed8_max
342    __kmpc_atomic_fixed8_max_cpt
343    __kmpc_atomic_fixed8_min
344    __kmpc_atomic_fixed8_min_cpt
345    __kmpc_atomic_fixed8_mul
346    __kmpc_atomic_fixed8_mul_cpt
347    __kmpc_atomic_fixed8_mul_float8
348    __kmpc_atomic_fixed8_mul_fp
349    __kmpc_atomic_fixed8_neqv
350    __kmpc_atomic_fixed8_neqv_cpt
351    __kmpc_atomic_fixed8_orb
352    __kmpc_atomic_fixed8_orb_cpt
353    __kmpc_atomic_fixed8_orl
354    __kmpc_atomic_fixed8_orl_cpt
355    __kmpc_atomic_fixed8_rd
356    __kmpc_atomic_fixed8_shl
357    __kmpc_atomic_fixed8_shl_cpt
358    __kmpc_atomic_fixed8_shl_cpt_rev
359    __kmpc_atomic_fixed8_shl_rev
360    __kmpc_atomic_fixed8_shr
361    __kmpc_atomic_fixed8_shr_cpt
362    __kmpc_atomic_fixed8_shr_cpt_rev
363    __kmpc_atomic_fixed8_shr_rev
364    __kmpc_atomic_fixed8_sub
365    __kmpc_atomic_fixed8_sub_cpt
366    __kmpc_atomic_fixed8_sub_cpt_rev
367    __kmpc_atomic_fixed8_sub_fp
368    __kmpc_atomic_fixed8_sub_rev
369    __kmpc_atomic_fixed8_swp
370    __kmpc_atomic_fixed8_wr
371    __kmpc_atomic_fixed8_xor
372    __kmpc_atomic_fixed8_xor_cpt
373    __kmpc_atomic_fixed8u_add_fp
374    __kmpc_atomic_fixed8u_sub_fp
375    __kmpc_atomic_fixed8u_mul_fp
376    __kmpc_atomic_fixed8u_div
377    __kmpc_atomic_fixed8u_div_cpt
378    __kmpc_atomic_fixed8u_div_cpt_rev
379    __kmpc_atomic_fixed8u_div_fp
380    __kmpc_atomic_fixed8u_div_rev
381    __kmpc_atomic_fixed8u_shr
382    __kmpc_atomic_fixed8u_shr_cpt
383    __kmpc_atomic_fixed8u_shr_cpt_rev
384    __kmpc_atomic_fixed8u_shr_rev
385@endcode
386
387Functions for floating point
388----------------------------
389There are versions here for floating point numbers of size 4, 8, 10 and 16
390bytes. (Ten byte floats are used by X87, but are now rare).
391@code
392    __kmpc_atomic_float4_add
393    __kmpc_atomic_float4_add_cpt
394    __kmpc_atomic_float4_add_float8
395    __kmpc_atomic_float4_add_fp
396    __kmpc_atomic_float4_div
397    __kmpc_atomic_float4_div_cpt
398    __kmpc_atomic_float4_div_cpt_rev
399    __kmpc_atomic_float4_div_float8
400    __kmpc_atomic_float4_div_fp
401    __kmpc_atomic_float4_div_rev
402    __kmpc_atomic_float4_max
403    __kmpc_atomic_float4_max_cpt
404    __kmpc_atomic_float4_min
405    __kmpc_atomic_float4_min_cpt
406    __kmpc_atomic_float4_mul
407    __kmpc_atomic_float4_mul_cpt
408    __kmpc_atomic_float4_mul_float8
409    __kmpc_atomic_float4_mul_fp
410    __kmpc_atomic_float4_rd
411    __kmpc_atomic_float4_sub
412    __kmpc_atomic_float4_sub_cpt
413    __kmpc_atomic_float4_sub_cpt_rev
414    __kmpc_atomic_float4_sub_float8
415    __kmpc_atomic_float4_sub_fp
416    __kmpc_atomic_float4_sub_rev
417    __kmpc_atomic_float4_swp
418    __kmpc_atomic_float4_wr
419    __kmpc_atomic_float8_add
420    __kmpc_atomic_float8_add_cpt
421    __kmpc_atomic_float8_add_fp
422    __kmpc_atomic_float8_div
423    __kmpc_atomic_float8_div_cpt
424    __kmpc_atomic_float8_div_cpt_rev
425    __kmpc_atomic_float8_div_fp
426    __kmpc_atomic_float8_div_rev
427    __kmpc_atomic_float8_max
428    __kmpc_atomic_float8_max_cpt
429    __kmpc_atomic_float8_min
430    __kmpc_atomic_float8_min_cpt
431    __kmpc_atomic_float8_mul
432    __kmpc_atomic_float8_mul_cpt
433    __kmpc_atomic_float8_mul_fp
434    __kmpc_atomic_float8_rd
435    __kmpc_atomic_float8_sub
436    __kmpc_atomic_float8_sub_cpt
437    __kmpc_atomic_float8_sub_cpt_rev
438    __kmpc_atomic_float8_sub_fp
439    __kmpc_atomic_float8_sub_rev
440    __kmpc_atomic_float8_swp
441    __kmpc_atomic_float8_wr
442    __kmpc_atomic_float10_add
443    __kmpc_atomic_float10_add_cpt
444    __kmpc_atomic_float10_add_fp
445    __kmpc_atomic_float10_div
446    __kmpc_atomic_float10_div_cpt
447    __kmpc_atomic_float10_div_cpt_rev
448    __kmpc_atomic_float10_div_fp
449    __kmpc_atomic_float10_div_rev
450    __kmpc_atomic_float10_mul
451    __kmpc_atomic_float10_mul_cpt
452    __kmpc_atomic_float10_mul_fp
453    __kmpc_atomic_float10_rd
454    __kmpc_atomic_float10_sub
455    __kmpc_atomic_float10_sub_cpt
456    __kmpc_atomic_float10_sub_cpt_rev
457    __kmpc_atomic_float10_sub_fp
458    __kmpc_atomic_float10_sub_rev
459    __kmpc_atomic_float10_swp
460    __kmpc_atomic_float10_wr
461    __kmpc_atomic_float16_add
462    __kmpc_atomic_float16_add_cpt
463    __kmpc_atomic_float16_div
464    __kmpc_atomic_float16_div_cpt
465    __kmpc_atomic_float16_div_cpt_rev
466    __kmpc_atomic_float16_div_rev
467    __kmpc_atomic_float16_max
468    __kmpc_atomic_float16_max_cpt
469    __kmpc_atomic_float16_min
470    __kmpc_atomic_float16_min_cpt
471    __kmpc_atomic_float16_mul
472    __kmpc_atomic_float16_mul_cpt
473    __kmpc_atomic_float16_rd
474    __kmpc_atomic_float16_sub
475    __kmpc_atomic_float16_sub_cpt
476    __kmpc_atomic_float16_sub_cpt_rev
477    __kmpc_atomic_float16_sub_rev
478    __kmpc_atomic_float16_swp
479    __kmpc_atomic_float16_wr
480@endcode
481
482Functions for Complex types
483---------------------------
484Functions for complex types whose component floating point variables are of size
4854,8,10 or 16 bytes. The names here are based on the size of the component float,
486*not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an
487operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
488
489@code
490    __kmpc_atomic_cmplx4_add
491    __kmpc_atomic_cmplx4_add_cmplx8
492    __kmpc_atomic_cmplx4_add_cpt
493    __kmpc_atomic_cmplx4_div
494    __kmpc_atomic_cmplx4_div_cmplx8
495    __kmpc_atomic_cmplx4_div_cpt
496    __kmpc_atomic_cmplx4_div_cpt_rev
497    __kmpc_atomic_cmplx4_div_rev
498    __kmpc_atomic_cmplx4_mul
499    __kmpc_atomic_cmplx4_mul_cmplx8
500    __kmpc_atomic_cmplx4_mul_cpt
501    __kmpc_atomic_cmplx4_rd
502    __kmpc_atomic_cmplx4_sub
503    __kmpc_atomic_cmplx4_sub_cmplx8
504    __kmpc_atomic_cmplx4_sub_cpt
505    __kmpc_atomic_cmplx4_sub_cpt_rev
506    __kmpc_atomic_cmplx4_sub_rev
507    __kmpc_atomic_cmplx4_swp
508    __kmpc_atomic_cmplx4_wr
509    __kmpc_atomic_cmplx8_add
510    __kmpc_atomic_cmplx8_add_cpt
511    __kmpc_atomic_cmplx8_div
512    __kmpc_atomic_cmplx8_div_cpt
513    __kmpc_atomic_cmplx8_div_cpt_rev
514    __kmpc_atomic_cmplx8_div_rev
515    __kmpc_atomic_cmplx8_mul
516    __kmpc_atomic_cmplx8_mul_cpt
517    __kmpc_atomic_cmplx8_rd
518    __kmpc_atomic_cmplx8_sub
519    __kmpc_atomic_cmplx8_sub_cpt
520    __kmpc_atomic_cmplx8_sub_cpt_rev
521    __kmpc_atomic_cmplx8_sub_rev
522    __kmpc_atomic_cmplx8_swp
523    __kmpc_atomic_cmplx8_wr
524    __kmpc_atomic_cmplx10_add
525    __kmpc_atomic_cmplx10_add_cpt
526    __kmpc_atomic_cmplx10_div
527    __kmpc_atomic_cmplx10_div_cpt
528    __kmpc_atomic_cmplx10_div_cpt_rev
529    __kmpc_atomic_cmplx10_div_rev
530    __kmpc_atomic_cmplx10_mul
531    __kmpc_atomic_cmplx10_mul_cpt
532    __kmpc_atomic_cmplx10_rd
533    __kmpc_atomic_cmplx10_sub
534    __kmpc_atomic_cmplx10_sub_cpt
535    __kmpc_atomic_cmplx10_sub_cpt_rev
536    __kmpc_atomic_cmplx10_sub_rev
537    __kmpc_atomic_cmplx10_swp
538    __kmpc_atomic_cmplx10_wr
539    __kmpc_atomic_cmplx16_add
540    __kmpc_atomic_cmplx16_add_cpt
541    __kmpc_atomic_cmplx16_div
542    __kmpc_atomic_cmplx16_div_cpt
543    __kmpc_atomic_cmplx16_div_cpt_rev
544    __kmpc_atomic_cmplx16_div_rev
545    __kmpc_atomic_cmplx16_mul
546    __kmpc_atomic_cmplx16_mul_cpt
547    __kmpc_atomic_cmplx16_rd
548    __kmpc_atomic_cmplx16_sub
549    __kmpc_atomic_cmplx16_sub_cpt
550    __kmpc_atomic_cmplx16_sub_cpt_rev
551    __kmpc_atomic_cmplx16_swp
552    __kmpc_atomic_cmplx16_wr
553@endcode
554*/
555
556/*!
557@ingroup ATOMIC_OPS
558@{
559*/
560
561/*
562 * Global vars
563 */
564
565#ifndef KMP_GOMP_COMPAT
566int __kmp_atomic_mode = 1; // Intel perf
567#else
568int __kmp_atomic_mode = 2; // GOMP compatibility
569#endif /* KMP_GOMP_COMPAT */
570
571KMP_ALIGN(128)
572
573// Control access to all user coded atomics in Gnu compat mode
574kmp_atomic_lock_t __kmp_atomic_lock;
575// Control access to all user coded atomics for 1-byte fixed data types
576kmp_atomic_lock_t __kmp_atomic_lock_1i;
577// Control access to all user coded atomics for 2-byte fixed data types
578kmp_atomic_lock_t __kmp_atomic_lock_2i;
579// Control access to all user coded atomics for 4-byte fixed data types
580kmp_atomic_lock_t __kmp_atomic_lock_4i;
581// Control access to all user coded atomics for kmp_real32 data type
582kmp_atomic_lock_t __kmp_atomic_lock_4r;
583// Control access to all user coded atomics for 8-byte fixed data types
584kmp_atomic_lock_t __kmp_atomic_lock_8i;
585// Control access to all user coded atomics for kmp_real64 data type
586kmp_atomic_lock_t __kmp_atomic_lock_8r;
587// Control access to all user coded atomics for complex byte data type
588kmp_atomic_lock_t __kmp_atomic_lock_8c;
589// Control access to all user coded atomics for long double data type
590kmp_atomic_lock_t __kmp_atomic_lock_10r;
591// Control access to all user coded atomics for _Quad data type
592kmp_atomic_lock_t __kmp_atomic_lock_16r;
593// Control access to all user coded atomics for double complex data type
594kmp_atomic_lock_t __kmp_atomic_lock_16c;
595// Control access to all user coded atomics for long double complex type
596kmp_atomic_lock_t __kmp_atomic_lock_20c;
597// Control access to all user coded atomics for _Quad complex data type
598kmp_atomic_lock_t __kmp_atomic_lock_32c;
599
600/* 2007-03-02:
601   Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602   on *_32 and *_32e. This is just a temporary workaround for the problem. It
603   seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604   in assembler language. */
605#define KMP_ATOMIC_VOLATILE volatile
606
607#if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608
609static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610  lhs.q += rhs.q;
611}
612static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613  lhs.q -= rhs.q;
614}
615static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616  lhs.q *= rhs.q;
617}
618static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619  lhs.q /= rhs.q;
620}
621static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622  return lhs.q < rhs.q;
623}
624static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625  return lhs.q > rhs.q;
626}
627
628static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629  lhs.q += rhs.q;
630}
631static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632  lhs.q -= rhs.q;
633}
634static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635  lhs.q *= rhs.q;
636}
637static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638  lhs.q /= rhs.q;
639}
640static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641  return lhs.q < rhs.q;
642}
643static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644  return lhs.q > rhs.q;
645}
646
647static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
648  lhs.q += rhs.q;
649}
650static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
651  lhs.q -= rhs.q;
652}
653static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
654  lhs.q *= rhs.q;
655}
656static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
657  lhs.q /= rhs.q;
658}
659
660static inline void operator+=(kmp_cmplx128_a16_t &lhs,
661                              kmp_cmplx128_a16_t &rhs) {
662  lhs.q += rhs.q;
663}
664static inline void operator-=(kmp_cmplx128_a16_t &lhs,
665                              kmp_cmplx128_a16_t &rhs) {
666  lhs.q -= rhs.q;
667}
668static inline void operator*=(kmp_cmplx128_a16_t &lhs,
669                              kmp_cmplx128_a16_t &rhs) {
670  lhs.q *= rhs.q;
671}
672static inline void operator/=(kmp_cmplx128_a16_t &lhs,
673                              kmp_cmplx128_a16_t &rhs) {
674  lhs.q /= rhs.q;
675}
676
677#endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
678
679// ATOMIC implementation routines -----------------------------------------
680// One routine for each operation and operand type.
681// All routines declarations looks like
682// void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
683
684#define KMP_CHECK_GTID                                                         \
685  if (gtid == KMP_GTID_UNKNOWN) {                                              \
686    gtid = __kmp_entry_gtid();                                                 \
687  } // check and get gtid when needed
688
689// Beginning of a definition (provides name, parameters, gebug trace)
690//     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
691//     fixed)
692//     OP_ID   - operation identifier (add, sub, mul, ...)
693//     TYPE    - operands' type
694#define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE)                           \
695  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
696                                             TYPE *lhs, TYPE rhs) {            \
697    KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
698    KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
699
700// ------------------------------------------------------------------------
701// Lock variables used for critical sections for various size operands
702#define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
703#define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
704#define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
705#define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
706#define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
707#define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
708#define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
709#define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
710#define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
711#define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
712#define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
713#define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
714#define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
715
716// ------------------------------------------------------------------------
717// Operation on *lhs, rhs bound by critical section
718//     OP     - operator (it's supposed to contain an assignment)
719//     LCK_ID - lock identifier
720// Note: don't check gtid as it should always be valid
721// 1, 2-byte - expect valid parameter, other - check before this macro
722#define OP_CRITICAL(OP, LCK_ID)                                                \
723  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
724                                                                               \
725  (*lhs) OP(rhs);                                                              \
726                                                                               \
727  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
728
729// ------------------------------------------------------------------------
730// For GNU compatibility, we may need to use a critical section,
731// even though it is not required by the ISA.
732//
733// On IA-32 architecture, all atomic operations except for fixed 4 byte add,
734// sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
735// critical section.  On Intel(R) 64, all atomic operations are done with fetch
736// and add or compare and exchange.  Therefore, the FLAG parameter to this
737// macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
738// require a critical section, where we predict that they will be implemented
739// in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
740//
741// When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
742// the FLAG parameter should always be 1.  If we know that we will be using
743// a critical section, then we want to make certain that we use the generic
744// lock __kmp_atomic_lock to protect the atomic update, and not of of the
745// locks that are specialized based upon the size or type of the data.
746//
747// If FLAG is 0, then we are relying on dead code elimination by the build
748// compiler to get rid of the useless block of code, and save a needless
749// branch at runtime.
750
751#ifdef KMP_GOMP_COMPAT
752#define OP_GOMP_CRITICAL(OP, FLAG)                                             \
753  if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
754    KMP_CHECK_GTID;                                                            \
755    OP_CRITICAL(OP, 0);                                                        \
756    return;                                                                    \
757  }
758#else
759#define OP_GOMP_CRITICAL(OP, FLAG)
760#endif /* KMP_GOMP_COMPAT */
761
762#if KMP_MIC
763#define KMP_DO_PAUSE _mm_delay_32(1)
764#else
765#define KMP_DO_PAUSE KMP_CPU_PAUSE()
766#endif /* KMP_MIC */
767
768// ------------------------------------------------------------------------
769// Operation on *lhs, rhs using "compare_and_store" routine
770//     TYPE    - operands' type
771//     BITS    - size in bits, used to distinguish low level calls
772//     OP      - operator
773#define OP_CMPXCHG(TYPE, BITS, OP)                                             \
774  {                                                                            \
775    TYPE old_value, new_value;                                                 \
776    old_value = *(TYPE volatile *)lhs;                                         \
777    new_value = old_value OP rhs;                                              \
778    while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
779        (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
780        *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
781      KMP_DO_PAUSE;                                                            \
782                                                                               \
783      old_value = *(TYPE volatile *)lhs;                                       \
784      new_value = old_value OP rhs;                                            \
785    }                                                                          \
786  }
787
788#if USE_CMPXCHG_FIX
789// 2007-06-25:
790// workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
791// and win_32e are affected (I verified the asm). Compiler ignores the volatile
792// qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
793// compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
794// the workaround.
795#define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                  \
796  {                                                                            \
797    struct _sss {                                                              \
798      TYPE cmp;                                                                \
799      kmp_int##BITS *vvv;                                                      \
800    };                                                                         \
801    struct _sss old_value, new_value;                                          \
802    old_value.vvv = (kmp_int##BITS *)&old_value.cmp;                           \
803    new_value.vvv = (kmp_int##BITS *)&new_value.cmp;                           \
804    *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                           \
805    new_value.cmp = old_value.cmp OP rhs;                                      \
806    while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
807        (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,   \
808        *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) {                      \
809      KMP_DO_PAUSE;                                                            \
810                                                                               \
811      *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                         \
812      new_value.cmp = old_value.cmp OP rhs;                                    \
813    }                                                                          \
814  }
815// end of the first part of the workaround for C78287
816#endif // USE_CMPXCHG_FIX
817
818#if KMP_ARCH_X86 || KMP_ARCH_X86_64
819
820// ------------------------------------------------------------------------
821// X86 or X86_64: no alignment problems ====================================
822#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
823                         GOMP_FLAG)                                            \
824  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
825  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
826  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
827  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                        \
828  }
829// -------------------------------------------------------------------------
830#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
831                       GOMP_FLAG)                                              \
832  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
833  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
834  OP_CMPXCHG(TYPE, BITS, OP)                                                   \
835  }
836#if USE_CMPXCHG_FIX
837// -------------------------------------------------------------------------
838// workaround for C78287 (complex(kind=4) data type)
839#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
840                                  MASK, GOMP_FLAG)                             \
841  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
842  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
843  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
844  }
845// end of the second part of the workaround for C78287
846#endif // USE_CMPXCHG_FIX
847
848#else
849// -------------------------------------------------------------------------
850// Code for other architectures that don't handle unaligned accesses.
851#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
852                         GOMP_FLAG)                                            \
853  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
854  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
855  if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
856    /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */          \
857    KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                      \
858  } else {                                                                     \
859    KMP_CHECK_GTID;                                                            \
860    OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
861  }                                                                            \
862  }
863// -------------------------------------------------------------------------
864#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
865                       GOMP_FLAG)                                              \
866  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
867  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
868  if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
869    OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
870  } else {                                                                     \
871    KMP_CHECK_GTID;                                                            \
872    OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
873  }                                                                            \
874  }
875#if USE_CMPXCHG_FIX
876// -------------------------------------------------------------------------
877// workaround for C78287 (complex(kind=4) data type)
878#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
879                                  MASK, GOMP_FLAG)                             \
880  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
881  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
882  if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
883    OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
884  } else {                                                                     \
885    KMP_CHECK_GTID;                                                            \
886    OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
887  }                                                                            \
888  }
889// end of the second part of the workaround for C78287
890#endif // USE_CMPXCHG_FIX
891#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
892
893// Routines for ATOMIC 4-byte operands addition and subtraction
894ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
895                 0) // __kmpc_atomic_fixed4_add
896ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
897                 0) // __kmpc_atomic_fixed4_sub
898
899ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
900               KMP_ARCH_X86) // __kmpc_atomic_float4_add
901ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
902               KMP_ARCH_X86) // __kmpc_atomic_float4_sub
903
904// Routines for ATOMIC 8-byte operands addition and subtraction
905ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
906                 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
907ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
908                 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
909
910ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
911               KMP_ARCH_X86) // __kmpc_atomic_float8_add
912ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
913               KMP_ARCH_X86) // __kmpc_atomic_float8_sub
914
915// ------------------------------------------------------------------------
916// Entries definition for integer operands
917//     TYPE_ID - operands type and size (fixed4, float4)
918//     OP_ID   - operation identifier (add, sub, mul, ...)
919//     TYPE    - operand type
920//     BITS    - size in bits, used to distinguish low level calls
921//     OP      - operator (used in critical section)
922//     LCK_ID  - lock identifier, used to possibly distinguish lock variable
923//     MASK    - used for alignment check
924
925//               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,MASK,GOMP_FLAG
926// ------------------------------------------------------------------------
927// Routines for ATOMIC integer operands, other operators
928// ------------------------------------------------------------------------
929//              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
930ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
931               KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
932ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
933               0) // __kmpc_atomic_fixed1_andb
934ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
935               KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
936ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
937               KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
938ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
939               KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
940ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
941               0) // __kmpc_atomic_fixed1_orb
942ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
943               KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
944ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
945               KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
946ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
947               KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
948ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
949               KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
950ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
951               0) // __kmpc_atomic_fixed1_xor
952ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
953               KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
954ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
955               0) // __kmpc_atomic_fixed2_andb
956ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
957               KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
958ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
959               KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
960ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
961               KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
962ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
963               0) // __kmpc_atomic_fixed2_orb
964ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
965               KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
966ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
967               KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
968ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
969               KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
970ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
971               KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
972ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
973               0) // __kmpc_atomic_fixed2_xor
974ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
975               0) // __kmpc_atomic_fixed4_andb
976ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
977               KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
978ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
979               KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
980ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
981               KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
982ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
983               0) // __kmpc_atomic_fixed4_orb
984ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
985               KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
986ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
987               KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
988ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
989               KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
990ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
991               0) // __kmpc_atomic_fixed4_xor
992ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
993               KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
994ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
995               KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
996ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
997               KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
998ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
999               KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1000ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1001               KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1002ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1003               KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1004ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1005               KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1006ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1007               KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1008ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1009               KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1010ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1011               KMP_ARCH_X86) // __kmpc_atomic_float4_div
1012ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1013               KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1014ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1015               KMP_ARCH_X86) // __kmpc_atomic_float8_div
1016ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1017               KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1018//              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
1019
1020/* ------------------------------------------------------------------------ */
1021/* Routines for C/C++ Reduction operators && and ||                         */
1022
1023// ------------------------------------------------------------------------
1024// Need separate macros for &&, || because there is no combined assignment
1025//   TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1026#define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)             \
1027  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1028  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1029  OP_CRITICAL(= *lhs OP, LCK_ID)                                               \
1030  }
1031
1032#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1033
1034// ------------------------------------------------------------------------
1035// X86 or X86_64: no alignment problems ===================================
1036#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1037  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1038  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1039  OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1040  }
1041
1042#else
1043// ------------------------------------------------------------------------
1044// Code for other architectures that don't handle unaligned accesses.
1045#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1046  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1047  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1048  if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1049    OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1050  } else {                                                                     \
1051    KMP_CHECK_GTID;                                                            \
1052    OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */              \
1053  }                                                                            \
1054  }
1055#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1056
1057ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1058              KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1059ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1060              KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1061ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1062              KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1063ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1064              KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1065ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1066              0) // __kmpc_atomic_fixed4_andl
1067ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1068              0) // __kmpc_atomic_fixed4_orl
1069ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1070              KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1071ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1072              KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1073
1074/* ------------------------------------------------------------------------- */
1075/* Routines for Fortran operators that matched no one in C:                  */
1076/* MAX, MIN, .EQV., .NEQV.                                                   */
1077/* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}           */
1078/* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}  */
1079
1080// -------------------------------------------------------------------------
1081// MIN and MAX need separate macros
1082// OP - operator to check if we need any actions?
1083#define MIN_MAX_CRITSECT(OP, LCK_ID)                                           \
1084  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1085                                                                               \
1086  if (*lhs OP rhs) { /* still need actions? */                                 \
1087    *lhs = rhs;                                                                \
1088  }                                                                            \
1089  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1090
1091// -------------------------------------------------------------------------
1092#ifdef KMP_GOMP_COMPAT
1093#define GOMP_MIN_MAX_CRITSECT(OP, FLAG)                                        \
1094  if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1095    KMP_CHECK_GTID;                                                            \
1096    MIN_MAX_CRITSECT(OP, 0);                                                   \
1097    return;                                                                    \
1098  }
1099#else
1100#define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1101#endif /* KMP_GOMP_COMPAT */
1102
1103// -------------------------------------------------------------------------
1104#define MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                        \
1105  {                                                                            \
1106    TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1107    TYPE old_value;                                                            \
1108    temp_val = *lhs;                                                           \
1109    old_value = temp_val;                                                      \
1110    while (old_value OP rhs && /* still need actions? */                       \
1111           !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1112               (kmp_int##BITS *)lhs,                                           \
1113               *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
1114               *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
1115      KMP_CPU_PAUSE();                                                         \
1116      temp_val = *lhs;                                                         \
1117      old_value = temp_val;                                                    \
1118    }                                                                          \
1119  }
1120
1121// -------------------------------------------------------------------------
1122// 1-byte, 2-byte operands - use critical section
1123#define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)          \
1124  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1125  if (*lhs OP rhs) { /* need actions? */                                       \
1126    GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1127    MIN_MAX_CRITSECT(OP, LCK_ID)                                               \
1128  }                                                                            \
1129  }
1130
1131#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1132
1133// -------------------------------------------------------------------------
1134// X86 or X86_64: no alignment problems ====================================
1135#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1136                         GOMP_FLAG)                                            \
1137  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1138  if (*lhs OP rhs) {                                                           \
1139    GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1140    MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                            \
1141  }                                                                            \
1142  }
1143
1144#else
1145// -------------------------------------------------------------------------
1146// Code for other architectures that don't handle unaligned accesses.
1147#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1148                         GOMP_FLAG)                                            \
1149  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1150  if (*lhs OP rhs) {                                                           \
1151    GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1152    if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                    \
1153      MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */                    \
1154    } else {                                                                   \
1155      KMP_CHECK_GTID;                                                          \
1156      MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */                     \
1157    }                                                                          \
1158  }                                                                            \
1159  }
1160#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1161
1162MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1163                 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1164MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1165                 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1166MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1167                 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1168MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1169                 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1170MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1171                 0) // __kmpc_atomic_fixed4_max
1172MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1173                 0) // __kmpc_atomic_fixed4_min
1174MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1175                 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1176MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1177                 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1178MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1179                 KMP_ARCH_X86) // __kmpc_atomic_float4_max
1180MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1181                 KMP_ARCH_X86) // __kmpc_atomic_float4_min
1182MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1183                 KMP_ARCH_X86) // __kmpc_atomic_float8_max
1184MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1185                 KMP_ARCH_X86) // __kmpc_atomic_float8_min
1186#if KMP_HAVE_QUAD
1187MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1188                 1) // __kmpc_atomic_float16_max
1189MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1190                 1) // __kmpc_atomic_float16_min
1191#if (KMP_ARCH_X86)
1192MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1193                 1) // __kmpc_atomic_float16_max_a16
1194MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1195                 1) // __kmpc_atomic_float16_min_a16
1196#endif // (KMP_ARCH_X86)
1197#endif // KMP_HAVE_QUAD
1198// ------------------------------------------------------------------------
1199// Need separate macros for .EQV. because of the need of complement (~)
1200// OP ignored for critical sections, ^=~ used instead
1201#define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1202  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1203  OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */                      \
1204  OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */               \
1205  }
1206
1207// ------------------------------------------------------------------------
1208#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1209// ------------------------------------------------------------------------
1210// X86 or X86_64: no alignment problems ===================================
1211#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1212                        GOMP_FLAG)                                             \
1213  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1214  OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */                      \
1215  OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1216  }
1217// ------------------------------------------------------------------------
1218#else
1219// ------------------------------------------------------------------------
1220// Code for other architectures that don't handle unaligned accesses.
1221#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1222                        GOMP_FLAG)                                             \
1223  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1224  OP_GOMP_CRITICAL(^= ~, GOMP_FLAG)                                            \
1225  if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1226    OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1227  } else {                                                                     \
1228    KMP_CHECK_GTID;                                                            \
1229    OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */           \
1230  }                                                                            \
1231  }
1232#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1233
1234ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1235               KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1236ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1237               KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1238ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1239               KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1240ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1241               KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1242ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1243                KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1244ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1245                KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1246ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1247                KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1248ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1249                KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1250
1251// ------------------------------------------------------------------------
1252// Routines for Extended types: long double, _Quad, complex flavours (use
1253// critical section)
1254//     TYPE_ID, OP_ID, TYPE - detailed above
1255//     OP      - operator
1256//     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1257#define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1258  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1259  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */                    \
1260  OP_CRITICAL(OP## =, LCK_ID) /* send assignment */                            \
1261  }
1262
1263/* ------------------------------------------------------------------------- */
1264// routines for long double type
1265ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1266                1) // __kmpc_atomic_float10_add
1267ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1268                1) // __kmpc_atomic_float10_sub
1269ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1270                1) // __kmpc_atomic_float10_mul
1271ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1272                1) // __kmpc_atomic_float10_div
1273#if KMP_HAVE_QUAD
1274// routines for _Quad type
1275ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1276                1) // __kmpc_atomic_float16_add
1277ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1278                1) // __kmpc_atomic_float16_sub
1279ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1280                1) // __kmpc_atomic_float16_mul
1281ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1282                1) // __kmpc_atomic_float16_div
1283#if (KMP_ARCH_X86)
1284ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1285                1) // __kmpc_atomic_float16_add_a16
1286ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1287                1) // __kmpc_atomic_float16_sub_a16
1288ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1289                1) // __kmpc_atomic_float16_mul_a16
1290ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1291                1) // __kmpc_atomic_float16_div_a16
1292#endif // (KMP_ARCH_X86)
1293#endif // KMP_HAVE_QUAD
1294// routines for complex types
1295
1296#if USE_CMPXCHG_FIX
1297// workaround for C78287 (complex(kind=4) data type)
1298ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1299                          1) // __kmpc_atomic_cmplx4_add
1300ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1301                          1) // __kmpc_atomic_cmplx4_sub
1302ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1303                          1) // __kmpc_atomic_cmplx4_mul
1304ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1305                          1) // __kmpc_atomic_cmplx4_div
1306// end of the workaround for C78287
1307#else
1308ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1309ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1310ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1311ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1312#endif // USE_CMPXCHG_FIX
1313
1314ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1315ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1316ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1317ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1318ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1319                1) // __kmpc_atomic_cmplx10_add
1320ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1321                1) // __kmpc_atomic_cmplx10_sub
1322ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1323                1) // __kmpc_atomic_cmplx10_mul
1324ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1325                1) // __kmpc_atomic_cmplx10_div
1326#if KMP_HAVE_QUAD
1327ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1328                1) // __kmpc_atomic_cmplx16_add
1329ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1330                1) // __kmpc_atomic_cmplx16_sub
1331ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1332                1) // __kmpc_atomic_cmplx16_mul
1333ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1334                1) // __kmpc_atomic_cmplx16_div
1335#if (KMP_ARCH_X86)
1336ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1337                1) // __kmpc_atomic_cmplx16_add_a16
1338ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1339                1) // __kmpc_atomic_cmplx16_sub_a16
1340ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1341                1) // __kmpc_atomic_cmplx16_mul_a16
1342ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1343                1) // __kmpc_atomic_cmplx16_div_a16
1344#endif // (KMP_ARCH_X86)
1345#endif // KMP_HAVE_QUAD
1346
1347// OpenMP 4.0: x = expr binop x for non-commutative operations.
1348// Supported only on IA-32 architecture and Intel(R) 64
1349#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1350
1351// ------------------------------------------------------------------------
1352// Operation on *lhs, rhs bound by critical section
1353//     OP     - operator (it's supposed to contain an assignment)
1354//     LCK_ID - lock identifier
1355// Note: don't check gtid as it should always be valid
1356// 1, 2-byte - expect valid parameter, other - check before this macro
1357#define OP_CRITICAL_REV(OP, LCK_ID)                                            \
1358  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1359                                                                               \
1360  (*lhs) = (rhs)OP(*lhs);                                                      \
1361                                                                               \
1362  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1363
1364#ifdef KMP_GOMP_COMPAT
1365#define OP_GOMP_CRITICAL_REV(OP, FLAG)                                         \
1366  if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1367    KMP_CHECK_GTID;                                                            \
1368    OP_CRITICAL_REV(OP, 0);                                                    \
1369    return;                                                                    \
1370  }
1371#else
1372#define OP_GOMP_CRITICAL_REV(OP, FLAG)
1373#endif /* KMP_GOMP_COMPAT */
1374
1375// Beginning of a definition (provides name, parameters, gebug trace)
1376//     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1377//     fixed)
1378//     OP_ID   - operation identifier (add, sub, mul, ...)
1379//     TYPE    - operands' type
1380#define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
1381  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid,  \
1382                                                   TYPE *lhs, TYPE rhs) {      \
1383    KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1384    KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1385
1386// ------------------------------------------------------------------------
1387// Operation on *lhs, rhs using "compare_and_store" routine
1388//     TYPE    - operands' type
1389//     BITS    - size in bits, used to distinguish low level calls
1390//     OP      - operator
1391// Note: temp_val introduced in order to force the compiler to read
1392//       *lhs only once (w/o it the compiler reads *lhs twice)
1393#define OP_CMPXCHG_REV(TYPE, BITS, OP)                                         \
1394  {                                                                            \
1395    TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1396    TYPE old_value, new_value;                                                 \
1397    temp_val = *lhs;                                                           \
1398    old_value = temp_val;                                                      \
1399    new_value = rhs OP old_value;                                              \
1400    while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1401        (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
1402        *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
1403      KMP_DO_PAUSE;                                                            \
1404                                                                               \
1405      temp_val = *lhs;                                                         \
1406      old_value = temp_val;                                                    \
1407      new_value = rhs OP old_value;                                            \
1408    }                                                                          \
1409  }
1410
1411// -------------------------------------------------------------------------
1412#define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG)  \
1413  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1414  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1415  OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1416  }
1417
1418// ------------------------------------------------------------------------
1419// Entries definition for integer operands
1420//     TYPE_ID - operands type and size (fixed4, float4)
1421//     OP_ID   - operation identifier (add, sub, mul, ...)
1422//     TYPE    - operand type
1423//     BITS    - size in bits, used to distinguish low level calls
1424//     OP      - operator (used in critical section)
1425//     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1426
1427//               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,GOMP_FLAG
1428// ------------------------------------------------------------------------
1429// Routines for ATOMIC integer operands, other operators
1430// ------------------------------------------------------------------------
1431//                  TYPE_ID,OP_ID, TYPE,    BITS, OP, LCK_ID, GOMP_FLAG
1432ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1433                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1434ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1435                   KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1436ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1437                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1438ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1439                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1440ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1441                   KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1442ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1443                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1444
1445ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1446                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1447ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1448                   KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1449ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1450                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1451ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1452                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1453ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1454                   KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1455ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1456                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1457
1458ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1459                   KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1460ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1461                   KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1462ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1463                   KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1464ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1465                   KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1466ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1467                   KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1468ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1469                   KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1470
1471ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1472                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1473ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1474                   KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1475ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1476                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1477ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1478                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1479ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1480                   KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1481ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1482                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1483
1484ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1485                   KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1486ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1487                   KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1488
1489ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1490                   KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1491ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1492                   KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1493//                  TYPE_ID,OP_ID, TYPE,     BITS,OP,LCK_ID, GOMP_FLAG
1494
1495// ------------------------------------------------------------------------
1496// Routines for Extended types: long double, _Quad, complex flavours (use
1497// critical section)
1498//     TYPE_ID, OP_ID, TYPE - detailed above
1499//     OP      - operator
1500//     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1501#define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
1502  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1503  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1504  OP_CRITICAL_REV(OP, LCK_ID)                                                  \
1505  }
1506
1507/* ------------------------------------------------------------------------- */
1508// routines for long double type
1509ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1510                    1) // __kmpc_atomic_float10_sub_rev
1511ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1512                    1) // __kmpc_atomic_float10_div_rev
1513#if KMP_HAVE_QUAD
1514// routines for _Quad type
1515ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1516                    1) // __kmpc_atomic_float16_sub_rev
1517ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1518                    1) // __kmpc_atomic_float16_div_rev
1519#if (KMP_ARCH_X86)
1520ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1521                    1) // __kmpc_atomic_float16_sub_a16_rev
1522ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1523                    1) // __kmpc_atomic_float16_div_a16_rev
1524#endif // KMP_ARCH_X86
1525#endif // KMP_HAVE_QUAD
1526
1527// routines for complex types
1528ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1529                    1) // __kmpc_atomic_cmplx4_sub_rev
1530ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1531                    1) // __kmpc_atomic_cmplx4_div_rev
1532ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1533                    1) // __kmpc_atomic_cmplx8_sub_rev
1534ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1535                    1) // __kmpc_atomic_cmplx8_div_rev
1536ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1537                    1) // __kmpc_atomic_cmplx10_sub_rev
1538ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1539                    1) // __kmpc_atomic_cmplx10_div_rev
1540#if KMP_HAVE_QUAD
1541ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1542                    1) // __kmpc_atomic_cmplx16_sub_rev
1543ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1544                    1) // __kmpc_atomic_cmplx16_div_rev
1545#if (KMP_ARCH_X86)
1546ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1547                    1) // __kmpc_atomic_cmplx16_sub_a16_rev
1548ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1549                    1) // __kmpc_atomic_cmplx16_div_a16_rev
1550#endif // KMP_ARCH_X86
1551#endif // KMP_HAVE_QUAD
1552
1553#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1554// End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1555
1556/* ------------------------------------------------------------------------ */
1557/* Routines for mixed types of LHS and RHS, when RHS is "larger"            */
1558/* Note: in order to reduce the total number of types combinations          */
1559/*       it is supposed that compiler converts RHS to longest floating type,*/
1560/*       that is _Quad, before call to any of these routines                */
1561/* Conversion to _Quad will be done by the compiler during calculation,     */
1562/*    conversion back to TYPE - before the assignment, like:                */
1563/*    *lhs = (TYPE)( (_Quad)(*lhs) OP rhs )                                 */
1564/* Performance penalty expected because of SW emulation use                 */
1565/* ------------------------------------------------------------------------ */
1566
1567#define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                \
1568  void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
1569      ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) {                       \
1570    KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1571    KA_TRACE(100,                                                              \
1572             ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
1573              gtid));
1574
1575// -------------------------------------------------------------------------
1576#define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID,  \
1577                           GOMP_FLAG)                                          \
1578  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1579  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */                    \
1580  OP_CRITICAL(OP## =, LCK_ID) /* send assignment */                            \
1581  }
1582
1583// -------------------------------------------------------------------------
1584#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1585// -------------------------------------------------------------------------
1586// X86 or X86_64: no alignment problems ====================================
1587#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1588                           LCK_ID, MASK, GOMP_FLAG)                            \
1589  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1590  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1591  OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1592  }
1593// -------------------------------------------------------------------------
1594#else
1595// ------------------------------------------------------------------------
1596// Code for other architectures that don't handle unaligned accesses.
1597#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1598                           LCK_ID, MASK, GOMP_FLAG)                            \
1599  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1600  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1601  if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1602    OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1603  } else {                                                                     \
1604    KMP_CHECK_GTID;                                                            \
1605    OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
1606  }                                                                            \
1607  }
1608#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1609
1610// -------------------------------------------------------------------------
1611#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1612// -------------------------------------------------------------------------
1613#define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
1614                               RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
1615  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1616  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1617  OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1618  }
1619#define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,      \
1620                               LCK_ID, GOMP_FLAG)                              \
1621  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1622  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1623  OP_CRITICAL_REV(OP, LCK_ID)                                                  \
1624  }
1625#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1626
1627// RHS=float8
1628ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1629                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1630ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1631                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1632ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1633                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1634ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1635                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1636ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1637                   0) // __kmpc_atomic_fixed4_mul_float8
1638ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1639                   0) // __kmpc_atomic_fixed4_div_float8
1640ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1641                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1642ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1643                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1644ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1645                   KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1646ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1647                   KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1648ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1649                   KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1650ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1651                   KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1652
1653// RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1654// use them)
1655#if KMP_HAVE_QUAD
1656ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1657                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1658ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1659                   KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1660ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1661                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1662ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1663                   KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1664ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1665                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1666ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1667                   KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1668ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1669                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1670ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1671                   KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1672
1673ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1674                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1675ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1676                   KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1677ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1678                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1679ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1680                   KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1681ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1682                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1683ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1684                   KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1685ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1686                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1687ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1688                   KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1689
1690ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1691                   0) // __kmpc_atomic_fixed4_add_fp
1692ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1693                   0) // __kmpc_atomic_fixed4u_add_fp
1694ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1695                   0) // __kmpc_atomic_fixed4_sub_fp
1696ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1697                   0) // __kmpc_atomic_fixed4u_sub_fp
1698ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1699                   0) // __kmpc_atomic_fixed4_mul_fp
1700ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1701                   0) // __kmpc_atomic_fixed4u_mul_fp
1702ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1703                   0) // __kmpc_atomic_fixed4_div_fp
1704ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1705                   0) // __kmpc_atomic_fixed4u_div_fp
1706
1707ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1708                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1709ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1710                   KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1711ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1712                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1713ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1714                   KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1715ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1716                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1717ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1718                   KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1719ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1720                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1721ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1722                   KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1723
1724ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1725                   KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1726ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1727                   KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1728ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1729                   KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1730ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1731                   KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1732
1733ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1734                   KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1735ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1736                   KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1737ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1738                   KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1739ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1740                   KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1741
1742ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1743                   1) // __kmpc_atomic_float10_add_fp
1744ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1745                   1) // __kmpc_atomic_float10_sub_fp
1746ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1747                   1) // __kmpc_atomic_float10_mul_fp
1748ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1749                   1) // __kmpc_atomic_float10_div_fp
1750
1751#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1752// Reverse operations
1753ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1754                       KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1755ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1756                       KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1757ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1758                       KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1759ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1760                       KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1761
1762ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1763                       KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1764ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1765                       KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1766ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1767                       KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1768ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1769                       KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1770
1771ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1772                       0) // __kmpc_atomic_fixed4_sub_rev_fp
1773ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1774                       0) // __kmpc_atomic_fixed4u_sub_rev_fp
1775ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1776                       0) // __kmpc_atomic_fixed4_div_rev_fp
1777ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1778                       0) // __kmpc_atomic_fixed4u_div_rev_fp
1779
1780ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1781                       KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1782ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1783                       KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1784ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1785                       KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1786ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1787                       KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1788
1789ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1790                       KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1791ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1792                       KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1793
1794ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1795                       KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1796ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1797                       KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1798
1799ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1800                       1) // __kmpc_atomic_float10_sub_rev_fp
1801ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1802                       1) // __kmpc_atomic_float10_div_rev_fp
1803#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1804
1805#endif // KMP_HAVE_QUAD
1806
1807#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1808// ------------------------------------------------------------------------
1809// X86 or X86_64: no alignment problems ====================================
1810#if USE_CMPXCHG_FIX
1811// workaround for C78287 (complex(kind=4) data type)
1812#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1813                             LCK_ID, MASK, GOMP_FLAG)                          \
1814  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1815  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1816  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
1817  }
1818// end of the second part of the workaround for C78287
1819#else
1820#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1821                             LCK_ID, MASK, GOMP_FLAG)                          \
1822  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1823  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1824  OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1825  }
1826#endif // USE_CMPXCHG_FIX
1827#else
1828// ------------------------------------------------------------------------
1829// Code for other architectures that don't handle unaligned accesses.
1830#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1831                             LCK_ID, MASK, GOMP_FLAG)                          \
1832  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1833  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1834  if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1835    OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1836  } else {                                                                     \
1837    KMP_CHECK_GTID;                                                            \
1838    OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
1839  }                                                                            \
1840  }
1841#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1842
1843ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1844                     7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1845ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1846                     7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1847ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1848                     7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1849ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1850                     7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1851
1852// READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1853#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1854
1855// ------------------------------------------------------------------------
1856// Atomic READ routines
1857
1858// ------------------------------------------------------------------------
1859// Beginning of a definition (provides name, parameters, gebug trace)
1860//     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1861//     fixed)
1862//     OP_ID   - operation identifier (add, sub, mul, ...)
1863//     TYPE    - operands' type
1864#define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE)                      \
1865  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
1866                                             TYPE *loc) {                      \
1867    KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1868    KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1869
1870// ------------------------------------------------------------------------
1871// Operation on *lhs, rhs using "compare_and_store_ret" routine
1872//     TYPE    - operands' type
1873//     BITS    - size in bits, used to distinguish low level calls
1874//     OP      - operator
1875// Note: temp_val introduced in order to force the compiler to read
1876//       *lhs only once (w/o it the compiler reads *lhs twice)
1877// TODO: check if it is still necessary
1878// Return old value regardless of the result of "compare & swap# operation
1879#define OP_CMPXCHG_READ(TYPE, BITS, OP)                                        \
1880  {                                                                            \
1881    TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1882    union f_i_union {                                                          \
1883      TYPE f_val;                                                              \
1884      kmp_int##BITS i_val;                                                     \
1885    };                                                                         \
1886    union f_i_union old_value;                                                 \
1887    temp_val = *loc;                                                           \
1888    old_value.f_val = temp_val;                                                \
1889    old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS(                         \
1890        (kmp_int##BITS *)loc,                                                  \
1891        *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val,                     \
1892        *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val);                    \
1893    new_value = old_value.f_val;                                               \
1894    return new_value;                                                          \
1895  }
1896
1897// -------------------------------------------------------------------------
1898// Operation on *lhs, rhs bound by critical section
1899//     OP     - operator (it's supposed to contain an assignment)
1900//     LCK_ID - lock identifier
1901// Note: don't check gtid as it should always be valid
1902// 1, 2-byte - expect valid parameter, other - check before this macro
1903#define OP_CRITICAL_READ(OP, LCK_ID)                                           \
1904  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1905                                                                               \
1906  new_value = (*loc);                                                          \
1907                                                                               \
1908  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1909
1910// -------------------------------------------------------------------------
1911#ifdef KMP_GOMP_COMPAT
1912#define OP_GOMP_CRITICAL_READ(OP, FLAG)                                        \
1913  if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1914    KMP_CHECK_GTID;                                                            \
1915    OP_CRITICAL_READ(OP, 0);                                                   \
1916    return new_value;                                                          \
1917  }
1918#else
1919#define OP_GOMP_CRITICAL_READ(OP, FLAG)
1920#endif /* KMP_GOMP_COMPAT */
1921
1922// -------------------------------------------------------------------------
1923#define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
1924  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1925  TYPE new_value;                                                              \
1926  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1927  new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0);                              \
1928  return new_value;                                                            \
1929  }
1930// -------------------------------------------------------------------------
1931#define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
1932  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1933  TYPE new_value;                                                              \
1934  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1935  OP_CMPXCHG_READ(TYPE, BITS, OP)                                              \
1936  }
1937// ------------------------------------------------------------------------
1938// Routines for Extended types: long double, _Quad, complex flavours (use
1939// critical section)
1940//     TYPE_ID, OP_ID, TYPE - detailed above
1941//     OP      - operator
1942//     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1943#define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
1944  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1945  TYPE new_value;                                                              \
1946  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */               \
1947  OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */                           \
1948  return new_value;                                                            \
1949  }
1950
1951// ------------------------------------------------------------------------
1952// Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
1953// value doesn't work.
1954// Let's return the read value through the additional parameter.
1955#if (KMP_OS_WINDOWS)
1956
1957#define OP_CRITICAL_READ_WRK(OP, LCK_ID)                                       \
1958  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1959                                                                               \
1960  (*out) = (*loc);                                                             \
1961                                                                               \
1962  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1963// ------------------------------------------------------------------------
1964#ifdef KMP_GOMP_COMPAT
1965#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)                                    \
1966  if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1967    KMP_CHECK_GTID;                                                            \
1968    OP_CRITICAL_READ_WRK(OP, 0);                                               \
1969  }
1970#else
1971#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
1972#endif /* KMP_GOMP_COMPAT */
1973// ------------------------------------------------------------------------
1974#define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                            \
1975  void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
1976                                         TYPE *loc) {                          \
1977    KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1978    KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1979
1980// ------------------------------------------------------------------------
1981#define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)  \
1982  ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                                  \
1983  OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */           \
1984  OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */                       \
1985  }
1986
1987#endif // KMP_OS_WINDOWS
1988
1989// ------------------------------------------------------------------------
1990//                  TYPE_ID,OP_ID, TYPE,      OP, GOMP_FLAG
1991ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
1992ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
1993                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
1994ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
1995                    KMP_ARCH_X86) // __kmpc_atomic_float4_rd
1996ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
1997                    KMP_ARCH_X86) // __kmpc_atomic_float8_rd
1998
1999// !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2000ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2001                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2002ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2003                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2004
2005ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2006                     1) // __kmpc_atomic_float10_rd
2007#if KMP_HAVE_QUAD
2008ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2009                     1) // __kmpc_atomic_float16_rd
2010#endif // KMP_HAVE_QUAD
2011
2012// Fix for CQ220361 on Windows* OS
2013#if (KMP_OS_WINDOWS)
2014ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2015                         1) // __kmpc_atomic_cmplx4_rd
2016#else
2017ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2018                     1) // __kmpc_atomic_cmplx4_rd
2019#endif // (KMP_OS_WINDOWS)
2020ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2021                     1) // __kmpc_atomic_cmplx8_rd
2022ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2023                     1) // __kmpc_atomic_cmplx10_rd
2024#if KMP_HAVE_QUAD
2025ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2026                     1) // __kmpc_atomic_cmplx16_rd
2027#if (KMP_ARCH_X86)
2028ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2029                     1) // __kmpc_atomic_float16_a16_rd
2030ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2031                     1) // __kmpc_atomic_cmplx16_a16_rd
2032#endif // (KMP_ARCH_X86)
2033#endif // KMP_HAVE_QUAD
2034
2035// ------------------------------------------------------------------------
2036// Atomic WRITE routines
2037
2038#define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)              \
2039  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2040  OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2041  KMP_XCHG_FIXED##BITS(lhs, rhs);                                              \
2042  }
2043// ------------------------------------------------------------------------
2044#define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2045  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2046  OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2047  KMP_XCHG_REAL##BITS(lhs, rhs);                                               \
2048  }
2049
2050// ------------------------------------------------------------------------
2051// Operation on *lhs, rhs using "compare_and_store" routine
2052//     TYPE    - operands' type
2053//     BITS    - size in bits, used to distinguish low level calls
2054//     OP      - operator
2055// Note: temp_val introduced in order to force the compiler to read
2056//       *lhs only once (w/o it the compiler reads *lhs twice)
2057#define OP_CMPXCHG_WR(TYPE, BITS, OP)                                          \
2058  {                                                                            \
2059    TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2060    TYPE old_value, new_value;                                                 \
2061    temp_val = *lhs;                                                           \
2062    old_value = temp_val;                                                      \
2063    new_value = rhs;                                                           \
2064    while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2065        (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2066        *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2067      KMP_CPU_PAUSE();                                                         \
2068                                                                               \
2069      temp_val = *lhs;                                                         \
2070      old_value = temp_val;                                                    \
2071      new_value = rhs;                                                         \
2072    }                                                                          \
2073  }
2074
2075// -------------------------------------------------------------------------
2076#define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2077  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2078  OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2079  OP_CMPXCHG_WR(TYPE, BITS, OP)                                                \
2080  }
2081
2082// ------------------------------------------------------------------------
2083// Routines for Extended types: long double, _Quad, complex flavours (use
2084// critical section)
2085//     TYPE_ID, OP_ID, TYPE - detailed above
2086//     OP      - operator
2087//     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2088#define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)        \
2089  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2090  OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */                        \
2091  OP_CRITICAL(OP, LCK_ID) /* send assignment */                                \
2092  }
2093// -------------------------------------------------------------------------
2094
2095ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2096               KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2097ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2098               KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2099ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2100               KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2101#if (KMP_ARCH_X86)
2102ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2103                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2104#else
2105ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2106               KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2107#endif // (KMP_ARCH_X86)
2108
2109ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2110                     KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2111#if (KMP_ARCH_X86)
2112ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2113                  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2114#else
2115ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2116                     KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2117#endif // (KMP_ARCH_X86)
2118
2119ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2120                   1) // __kmpc_atomic_float10_wr
2121#if KMP_HAVE_QUAD
2122ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2123                   1) // __kmpc_atomic_float16_wr
2124#endif // KMP_HAVE_QUAD
2125ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2126ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2127                   1) // __kmpc_atomic_cmplx8_wr
2128ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2129                   1) // __kmpc_atomic_cmplx10_wr
2130#if KMP_HAVE_QUAD
2131ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2132                   1) // __kmpc_atomic_cmplx16_wr
2133#if (KMP_ARCH_X86)
2134ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2135                   1) // __kmpc_atomic_float16_a16_wr
2136ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2137                   1) // __kmpc_atomic_cmplx16_a16_wr
2138#endif // (KMP_ARCH_X86)
2139#endif // KMP_HAVE_QUAD
2140
2141// ------------------------------------------------------------------------
2142// Atomic CAPTURE routines
2143
2144// Beginning of a definition (provides name, parameters, gebug trace)
2145//     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2146//     fixed)
2147//     OP_ID   - operation identifier (add, sub, mul, ...)
2148//     TYPE    - operands' type
2149#define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
2150  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
2151                                             TYPE *lhs, TYPE rhs, int flag) {  \
2152    KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2153    KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2154
2155// -------------------------------------------------------------------------
2156// Operation on *lhs, rhs bound by critical section
2157//     OP     - operator (it's supposed to contain an assignment)
2158//     LCK_ID - lock identifier
2159// Note: don't check gtid as it should always be valid
2160// 1, 2-byte - expect valid parameter, other - check before this macro
2161#define OP_CRITICAL_CPT(OP, LCK_ID)                                            \
2162  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2163                                                                               \
2164  if (flag) {                                                                  \
2165    (*lhs) OP rhs;                                                             \
2166    new_value = (*lhs);                                                        \
2167  } else {                                                                     \
2168    new_value = (*lhs);                                                        \
2169    (*lhs) OP rhs;                                                             \
2170  }                                                                            \
2171                                                                               \
2172  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2173  return new_value;
2174
2175// ------------------------------------------------------------------------
2176#ifdef KMP_GOMP_COMPAT
2177#define OP_GOMP_CRITICAL_CPT(OP, FLAG)                                         \
2178  if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2179    KMP_CHECK_GTID;                                                            \
2180    OP_CRITICAL_CPT(OP## =, 0);                                                \
2181  }
2182#else
2183#define OP_GOMP_CRITICAL_CPT(OP, FLAG)
2184#endif /* KMP_GOMP_COMPAT */
2185
2186// ------------------------------------------------------------------------
2187// Operation on *lhs, rhs using "compare_and_store" routine
2188//     TYPE    - operands' type
2189//     BITS    - size in bits, used to distinguish low level calls
2190//     OP      - operator
2191// Note: temp_val introduced in order to force the compiler to read
2192//       *lhs only once (w/o it the compiler reads *lhs twice)
2193#define OP_CMPXCHG_CPT(TYPE, BITS, OP)                                         \
2194  {                                                                            \
2195    TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2196    TYPE old_value, new_value;                                                 \
2197    temp_val = *lhs;                                                           \
2198    old_value = temp_val;                                                      \
2199    new_value = old_value OP rhs;                                              \
2200    while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2201        (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2202        *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2203      KMP_CPU_PAUSE();                                                         \
2204                                                                               \
2205      temp_val = *lhs;                                                         \
2206      old_value = temp_val;                                                    \
2207      new_value = old_value OP rhs;                                            \
2208    }                                                                          \
2209    if (flag) {                                                                \
2210      return new_value;                                                        \
2211    } else                                                                     \
2212      return old_value;                                                        \
2213  }
2214
2215// -------------------------------------------------------------------------
2216#define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)          \
2217  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2218  TYPE new_value;                                                              \
2219  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG)                                          \
2220  OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2221  }
2222
2223// -------------------------------------------------------------------------
2224#define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2225  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2226  TYPE old_value, new_value;                                                   \
2227  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG)                                          \
2228  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
2229  old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                            \
2230  if (flag) {                                                                  \
2231    return old_value OP rhs;                                                   \
2232  } else                                                                       \
2233    return old_value;                                                          \
2234  }
2235// -------------------------------------------------------------------------
2236
2237ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2238                     0) // __kmpc_atomic_fixed4_add_cpt
2239ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2240                     0) // __kmpc_atomic_fixed4_sub_cpt
2241ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2242                     KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2243ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2244                     KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2245
2246ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2247                   KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2248ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2249                   KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2250ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2251                   KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2252ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2253                   KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2254
2255// ------------------------------------------------------------------------
2256// Entries definition for integer operands
2257//     TYPE_ID - operands type and size (fixed4, float4)
2258//     OP_ID   - operation identifier (add, sub, mul, ...)
2259//     TYPE    - operand type
2260//     BITS    - size in bits, used to distinguish low level calls
2261//     OP      - operator (used in critical section)
2262//               TYPE_ID,OP_ID,  TYPE,   BITS,OP,GOMP_FLAG
2263// ------------------------------------------------------------------------
2264// Routines for ATOMIC integer operands, other operators
2265// ------------------------------------------------------------------------
2266//              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2267ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2268                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2269ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2270                   0) // __kmpc_atomic_fixed1_andb_cpt
2271ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2272                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2273ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2274                   KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2275ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2276                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2277ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2278                   0) // __kmpc_atomic_fixed1_orb_cpt
2279ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2280                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2281ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2282                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2283ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2284                   KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2285ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2286                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2287ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2288                   0) // __kmpc_atomic_fixed1_xor_cpt
2289ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2290                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2291ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2292                   0) // __kmpc_atomic_fixed2_andb_cpt
2293ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2294                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2295ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2296                   KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2297ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2298                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2299ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2300                   0) // __kmpc_atomic_fixed2_orb_cpt
2301ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2302                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2303ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2304                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2305ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2306                   KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2307ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2308                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2309ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2310                   0) // __kmpc_atomic_fixed2_xor_cpt
2311ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2312                   0) // __kmpc_atomic_fixed4_andb_cpt
2313ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2314                   KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2315ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2316                   KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2317ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2318                   KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2319ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2320                   0) // __kmpc_atomic_fixed4_orb_cpt
2321ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2322                   KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2323ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2324                   KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2325ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2326                   KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2327ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2328                   0) // __kmpc_atomic_fixed4_xor_cpt
2329ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2330                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2331ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2332                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2333ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2334                   KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2335ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2336                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2337ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2338                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2339ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2340                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2341ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2342                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2343ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2344                   KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2345ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2346                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2347ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2348                   KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2349ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2350                   KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2351ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2352                   KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2353ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2354                   KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2355//              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2356
2357// CAPTURE routines for mixed types RHS=float16
2358#if KMP_HAVE_QUAD
2359
2360// Beginning of a definition (provides name, parameters, gebug trace)
2361//     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2362//     fixed)
2363//     OP_ID   - operation identifier (add, sub, mul, ...)
2364//     TYPE    - operands' type
2365#define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)            \
2366  TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
2367      ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) {             \
2368    KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2369    KA_TRACE(100,                                                              \
2370             ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
2371              gtid));
2372
2373// -------------------------------------------------------------------------
2374#define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
2375                               RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
2376  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2377  TYPE new_value;                                                              \
2378  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG)                                          \
2379  OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2380  }
2381
2382// -------------------------------------------------------------------------
2383#define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,     \
2384                                LCK_ID, GOMP_FLAG)                             \
2385  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2386  TYPE new_value;                                                              \
2387  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */                    \
2388  OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */                        \
2389  }
2390
2391ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2392                       KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2393ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2394                       KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2395ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2396                       KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2397ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2398                       KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2399ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2400                       KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2401ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2402                       KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2403ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2404                       KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2405ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2406                       KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2407
2408ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2409                       KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2410ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2411                       KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2412ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2413                       KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2414ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2415                       KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2416ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2417                       KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2418ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2419                       KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2420ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2421                       KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2422ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2423                       KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2424
2425ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2426                       0) // __kmpc_atomic_fixed4_add_cpt_fp
2427ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2428                       0) // __kmpc_atomic_fixed4u_add_cpt_fp
2429ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2430                       0) // __kmpc_atomic_fixed4_sub_cpt_fp
2431ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2432                       0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2433ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2434                       0) // __kmpc_atomic_fixed4_mul_cpt_fp
2435ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2436                       0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2437ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2438                       0) // __kmpc_atomic_fixed4_div_cpt_fp
2439ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2440                       0) // __kmpc_atomic_fixed4u_div_cpt_fp
2441
2442ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2443                       KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2444ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2445                       KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2446ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2447                       KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2448ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2449                       KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2450ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2451                       KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2452ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2453                       KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2454ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2455                       KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2456ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2457                       KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2458
2459ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2460                       KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2461ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2462                       KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2463ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2464                       KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2465ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2466                       KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2467
2468ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2469                       KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2470ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2471                       KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2472ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2473                       KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2474ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2475                       KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2476
2477ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2478                        1) // __kmpc_atomic_float10_add_cpt_fp
2479ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2480                        1) // __kmpc_atomic_float10_sub_cpt_fp
2481ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2482                        1) // __kmpc_atomic_float10_mul_cpt_fp
2483ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2484                        1) // __kmpc_atomic_float10_div_cpt_fp
2485
2486#endif // KMP_HAVE_QUAD
2487
2488// ------------------------------------------------------------------------
2489// Routines for C/C++ Reduction operators && and ||
2490
2491// -------------------------------------------------------------------------
2492// Operation on *lhs, rhs bound by critical section
2493//     OP     - operator (it's supposed to contain an assignment)
2494//     LCK_ID - lock identifier
2495// Note: don't check gtid as it should always be valid
2496// 1, 2-byte - expect valid parameter, other - check before this macro
2497#define OP_CRITICAL_L_CPT(OP, LCK_ID)                                          \
2498  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2499                                                                               \
2500  if (flag) {                                                                  \
2501    new_value OP rhs;                                                          \
2502  } else                                                                       \
2503    new_value = (*lhs);                                                        \
2504                                                                               \
2505  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2506
2507// ------------------------------------------------------------------------
2508#ifdef KMP_GOMP_COMPAT
2509#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)                                       \
2510  if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2511    KMP_CHECK_GTID;                                                            \
2512    OP_CRITICAL_L_CPT(OP, 0);                                                  \
2513    return new_value;                                                          \
2514  }
2515#else
2516#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2517#endif /* KMP_GOMP_COMPAT */
2518
2519// ------------------------------------------------------------------------
2520// Need separate macros for &&, || because there is no combined assignment
2521#define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2522  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2523  TYPE new_value;                                                              \
2524  OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG)                                 \
2525  OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2526  }
2527
2528ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2529                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2530ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2531                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2532ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2533                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2534ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2535                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2536ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2537                  0) // __kmpc_atomic_fixed4_andl_cpt
2538ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2539                  0) // __kmpc_atomic_fixed4_orl_cpt
2540ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2541                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2542ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2543                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2544
2545// -------------------------------------------------------------------------
2546// Routines for Fortran operators that matched no one in C:
2547// MAX, MIN, .EQV., .NEQV.
2548// Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2549// Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2550
2551// -------------------------------------------------------------------------
2552// MIN and MAX need separate macros
2553// OP - operator to check if we need any actions?
2554#define MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                       \
2555  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2556                                                                               \
2557  if (*lhs OP rhs) { /* still need actions? */                                 \
2558    old_value = *lhs;                                                          \
2559    *lhs = rhs;                                                                \
2560    if (flag)                                                                  \
2561      new_value = rhs;                                                         \
2562    else                                                                       \
2563      new_value = old_value;                                                   \
2564  } else {                                                                     \
2565    new_value = *lhs;                                                          \
2566  }                                                                            \
2567  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2568  return new_value;
2569
2570// -------------------------------------------------------------------------
2571#ifdef KMP_GOMP_COMPAT
2572#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)                                    \
2573  if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2574    KMP_CHECK_GTID;                                                            \
2575    MIN_MAX_CRITSECT_CPT(OP, 0);                                               \
2576  }
2577#else
2578#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2579#endif /* KMP_GOMP_COMPAT */
2580
2581// -------------------------------------------------------------------------
2582#define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                    \
2583  {                                                                            \
2584    TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2585    /*TYPE old_value; */                                                       \
2586    temp_val = *lhs;                                                           \
2587    old_value = temp_val;                                                      \
2588    while (old_value OP rhs && /* still need actions? */                       \
2589           !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2590               (kmp_int##BITS *)lhs,                                           \
2591               *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
2592               *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
2593      KMP_CPU_PAUSE();                                                         \
2594      temp_val = *lhs;                                                         \
2595      old_value = temp_val;                                                    \
2596    }                                                                          \
2597    if (flag)                                                                  \
2598      return rhs;                                                              \
2599    else                                                                       \
2600      return old_value;                                                        \
2601  }
2602
2603// -------------------------------------------------------------------------
2604// 1-byte, 2-byte operands - use critical section
2605#define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
2606  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2607  TYPE new_value, old_value;                                                   \
2608  if (*lhs OP rhs) { /* need actions? */                                       \
2609    GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2610    MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                           \
2611  }                                                                            \
2612  return *lhs;                                                                 \
2613  }
2614
2615#define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2616  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2617  TYPE new_value, old_value;                                                   \
2618  if (*lhs OP rhs) {                                                           \
2619    GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2620    MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                        \
2621  }                                                                            \
2622  return *lhs;                                                                 \
2623  }
2624
2625MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2626                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2627MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2628                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2629MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2630                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2631MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2632                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2633MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2634                     0) // __kmpc_atomic_fixed4_max_cpt
2635MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2636                     0) // __kmpc_atomic_fixed4_min_cpt
2637MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2638                     KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2639MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2640                     KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2641MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2642                     KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2643MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2644                     KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2645MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2646                     KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2647MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2648                     KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2649#if KMP_HAVE_QUAD
2650MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2651                     1) // __kmpc_atomic_float16_max_cpt
2652MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2653                     1) // __kmpc_atomic_float16_min_cpt
2654#if (KMP_ARCH_X86)
2655MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2656                     1) // __kmpc_atomic_float16_max_a16_cpt
2657MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2658                     1) // __kmpc_atomic_float16_mix_a16_cpt
2659#endif // (KMP_ARCH_X86)
2660#endif // KMP_HAVE_QUAD
2661
2662// ------------------------------------------------------------------------
2663#ifdef KMP_GOMP_COMPAT
2664#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)                                     \
2665  if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2666    KMP_CHECK_GTID;                                                            \
2667    OP_CRITICAL_CPT(OP, 0);                                                    \
2668  }
2669#else
2670#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2671#endif /* KMP_GOMP_COMPAT */
2672// ------------------------------------------------------------------------
2673#define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
2674  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2675  TYPE new_value;                                                              \
2676  OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */              \
2677  OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2678  }
2679
2680// ------------------------------------------------------------------------
2681
2682ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2683                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2684ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2685                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2686ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2687                   KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2688ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2689                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2690ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2691                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2692ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2693                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2694ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2695                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2696ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2697                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2698
2699// ------------------------------------------------------------------------
2700// Routines for Extended types: long double, _Quad, complex flavours (use
2701// critical section)
2702//     TYPE_ID, OP_ID, TYPE - detailed above
2703//     OP      - operator
2704//     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2705#define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
2706  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2707  TYPE new_value;                                                              \
2708  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */                    \
2709  OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */                        \
2710  }
2711
2712// ------------------------------------------------------------------------
2713// Workaround for cmplx4. Regular routines with return value don't work
2714// on Win_32e. Let's return captured values through the additional parameter.
2715#define OP_CRITICAL_CPT_WRK(OP, LCK_ID)                                        \
2716  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2717                                                                               \
2718  if (flag) {                                                                  \
2719    (*lhs) OP rhs;                                                             \
2720    (*out) = (*lhs);                                                           \
2721  } else {                                                                     \
2722    (*out) = (*lhs);                                                           \
2723    (*lhs) OP rhs;                                                             \
2724  }                                                                            \
2725                                                                               \
2726  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2727  return;
2728// ------------------------------------------------------------------------
2729
2730#ifdef KMP_GOMP_COMPAT
2731#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)                                     \
2732  if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2733    KMP_CHECK_GTID;                                                            \
2734    OP_CRITICAL_CPT_WRK(OP## =, 0);                                            \
2735  }
2736#else
2737#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2738#endif /* KMP_GOMP_COMPAT */
2739// ------------------------------------------------------------------------
2740
2741#define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                 \
2742  void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2743                                         TYPE rhs, TYPE *out, int flag) {      \
2744    KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2745    KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2746// ------------------------------------------------------------------------
2747
2748#define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
2749  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
2750  OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG)                                      \
2751  OP_CRITICAL_CPT_WRK(OP## =, LCK_ID)                                          \
2752  }
2753// The end of workaround for cmplx4
2754
2755/* ------------------------------------------------------------------------- */
2756// routines for long double type
2757ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2758                    1) // __kmpc_atomic_float10_add_cpt
2759ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2760                    1) // __kmpc_atomic_float10_sub_cpt
2761ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2762                    1) // __kmpc_atomic_float10_mul_cpt
2763ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2764                    1) // __kmpc_atomic_float10_div_cpt
2765#if KMP_HAVE_QUAD
2766// routines for _Quad type
2767ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2768                    1) // __kmpc_atomic_float16_add_cpt
2769ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2770                    1) // __kmpc_atomic_float16_sub_cpt
2771ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2772                    1) // __kmpc_atomic_float16_mul_cpt
2773ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2774                    1) // __kmpc_atomic_float16_div_cpt
2775#if (KMP_ARCH_X86)
2776ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2777                    1) // __kmpc_atomic_float16_add_a16_cpt
2778ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2779                    1) // __kmpc_atomic_float16_sub_a16_cpt
2780ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2781                    1) // __kmpc_atomic_float16_mul_a16_cpt
2782ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2783                    1) // __kmpc_atomic_float16_div_a16_cpt
2784#endif // (KMP_ARCH_X86)
2785#endif // KMP_HAVE_QUAD
2786
2787// routines for complex types
2788
2789// cmplx4 routines to return void
2790ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2791                        1) // __kmpc_atomic_cmplx4_add_cpt
2792ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2793                        1) // __kmpc_atomic_cmplx4_sub_cpt
2794ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2795                        1) // __kmpc_atomic_cmplx4_mul_cpt
2796ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2797                        1) // __kmpc_atomic_cmplx4_div_cpt
2798
2799ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2800                    1) // __kmpc_atomic_cmplx8_add_cpt
2801ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2802                    1) // __kmpc_atomic_cmplx8_sub_cpt
2803ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2804                    1) // __kmpc_atomic_cmplx8_mul_cpt
2805ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2806                    1) // __kmpc_atomic_cmplx8_div_cpt
2807ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2808                    1) // __kmpc_atomic_cmplx10_add_cpt
2809ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2810                    1) // __kmpc_atomic_cmplx10_sub_cpt
2811ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2812                    1) // __kmpc_atomic_cmplx10_mul_cpt
2813ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2814                    1) // __kmpc_atomic_cmplx10_div_cpt
2815#if KMP_HAVE_QUAD
2816ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2817                    1) // __kmpc_atomic_cmplx16_add_cpt
2818ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2819                    1) // __kmpc_atomic_cmplx16_sub_cpt
2820ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2821                    1) // __kmpc_atomic_cmplx16_mul_cpt
2822ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2823                    1) // __kmpc_atomic_cmplx16_div_cpt
2824#if (KMP_ARCH_X86)
2825ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2826                    1) // __kmpc_atomic_cmplx16_add_a16_cpt
2827ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2828                    1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2829ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2830                    1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2831ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2832                    1) // __kmpc_atomic_cmplx16_div_a16_cpt
2833#endif // (KMP_ARCH_X86)
2834#endif // KMP_HAVE_QUAD
2835
2836// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2837// binop x; v = x; }  for non-commutative operations.
2838// Supported only on IA-32 architecture and Intel(R) 64
2839
2840// -------------------------------------------------------------------------
2841// Operation on *lhs, rhs bound by critical section
2842//     OP     - operator (it's supposed to contain an assignment)
2843//     LCK_ID - lock identifier
2844// Note: don't check gtid as it should always be valid
2845// 1, 2-byte - expect valid parameter, other - check before this macro
2846#define OP_CRITICAL_CPT_REV(OP, LCK_ID)                                        \
2847  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2848                                                                               \
2849  if (flag) {                                                                  \
2850    /*temp_val = (*lhs);*/                                                     \
2851    (*lhs) = (rhs)OP(*lhs);                                                    \
2852    new_value = (*lhs);                                                        \
2853  } else {                                                                     \
2854    new_value = (*lhs);                                                        \
2855    (*lhs) = (rhs)OP(*lhs);                                                    \
2856  }                                                                            \
2857  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2858  return new_value;
2859
2860// ------------------------------------------------------------------------
2861#ifdef KMP_GOMP_COMPAT
2862#define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG)                                     \
2863  if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2864    KMP_CHECK_GTID;                                                            \
2865    OP_CRITICAL_CPT_REV(OP, 0);                                                \
2866  }
2867#else
2868#define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG)
2869#endif /* KMP_GOMP_COMPAT */
2870
2871// ------------------------------------------------------------------------
2872// Operation on *lhs, rhs using "compare_and_store" routine
2873//     TYPE    - operands' type
2874//     BITS    - size in bits, used to distinguish low level calls
2875//     OP      - operator
2876// Note: temp_val introduced in order to force the compiler to read
2877//       *lhs only once (w/o it the compiler reads *lhs twice)
2878#define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                     \
2879  {                                                                            \
2880    TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2881    TYPE old_value, new_value;                                                 \
2882    temp_val = *lhs;                                                           \
2883    old_value = temp_val;                                                      \
2884    new_value = rhs OP old_value;                                              \
2885    while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2886        (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2887        *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2888      KMP_CPU_PAUSE();                                                         \
2889                                                                               \
2890      temp_val = *lhs;                                                         \
2891      old_value = temp_val;                                                    \
2892      new_value = rhs OP old_value;                                            \
2893    }                                                                          \
2894    if (flag) {                                                                \
2895      return new_value;                                                        \
2896    } else                                                                     \
2897      return old_value;                                                        \
2898  }
2899
2900// -------------------------------------------------------------------------
2901#define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)      \
2902  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2903  TYPE new_value;                                                              \
2904  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG)                                      \
2905  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
2906  }
2907
2908ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2909                       KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2910ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2911                       KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2912ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2913                       KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2914ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2915                       KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2916ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2917                       KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2918ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2919                       KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2920ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2921                       KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2922ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2923                       KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2924ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2925                       KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2926ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2927                       KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2928ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
2929                       KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
2930ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
2931                       KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
2932ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
2933                       KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
2934ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
2935                       KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
2936ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
2937                       KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
2938ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
2939                       KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
2940ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
2941                       KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
2942ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
2943                       KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
2944ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
2945                       KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
2946ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
2947                       KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
2948ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
2949                       KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
2950ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
2951                       KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
2952ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
2953                       KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
2954ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
2955                       KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
2956ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
2957                       KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
2958ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
2959                       KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
2960ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
2961                       KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
2962ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
2963                       KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
2964//              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2965
2966// ------------------------------------------------------------------------
2967// Routines for Extended types: long double, _Quad, complex flavours (use
2968// critical section)
2969//     TYPE_ID, OP_ID, TYPE - detailed above
2970//     OP      - operator
2971//     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2972#define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
2973  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2974  TYPE new_value;                                                              \
2975  /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/                   \
2976  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG)                                      \
2977  OP_CRITICAL_CPT_REV(OP, LCK_ID)                                              \
2978  }
2979
2980/* ------------------------------------------------------------------------- */
2981// routines for long double type
2982ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
2983                        1) // __kmpc_atomic_float10_sub_cpt_rev
2984ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
2985                        1) // __kmpc_atomic_float10_div_cpt_rev
2986#if KMP_HAVE_QUAD
2987// routines for _Quad type
2988ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
2989                        1) // __kmpc_atomic_float16_sub_cpt_rev
2990ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
2991                        1) // __kmpc_atomic_float16_div_cpt_rev
2992#if (KMP_ARCH_X86)
2993ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
2994                        1) // __kmpc_atomic_float16_sub_a16_cpt_rev
2995ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
2996                        1) // __kmpc_atomic_float16_div_a16_cpt_rev
2997#endif // (KMP_ARCH_X86)
2998#endif // KMP_HAVE_QUAD
2999
3000// routines for complex types
3001
3002// ------------------------------------------------------------------------
3003// Workaround for cmplx4. Regular routines with return value don't work
3004// on Win_32e. Let's return captured values through the additional parameter.
3005#define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                    \
3006  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3007                                                                               \
3008  if (flag) {                                                                  \
3009    (*lhs) = (rhs)OP(*lhs);                                                    \
3010    (*out) = (*lhs);                                                           \
3011  } else {                                                                     \
3012    (*out) = (*lhs);                                                           \
3013    (*lhs) = (rhs)OP(*lhs);                                                    \
3014  }                                                                            \
3015                                                                               \
3016  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3017  return;
3018// ------------------------------------------------------------------------
3019
3020#ifdef KMP_GOMP_COMPAT
3021#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)                                 \
3022  if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3023    KMP_CHECK_GTID;                                                            \
3024    OP_CRITICAL_CPT_REV_WRK(OP, 0);                                            \
3025  }
3026#else
3027#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3028#endif /* KMP_GOMP_COMPAT */
3029// ------------------------------------------------------------------------
3030
3031#define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID,          \
3032                                    GOMP_FLAG)                                 \
3033  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
3034  OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG)                                  \
3035  OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                          \
3036  }
3037// The end of workaround for cmplx4
3038
3039// !!! TODO: check if we need to return void for cmplx4 routines
3040// cmplx4 routines to return void
3041ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3042                            1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3043ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3044                            1) // __kmpc_atomic_cmplx4_div_cpt_rev
3045
3046ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3047                        1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3048ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3049                        1) // __kmpc_atomic_cmplx8_div_cpt_rev
3050ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3051                        1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3052ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3053                        1) // __kmpc_atomic_cmplx10_div_cpt_rev
3054#if KMP_HAVE_QUAD
3055ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3056                        1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3057ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3058                        1) // __kmpc_atomic_cmplx16_div_cpt_rev
3059#if (KMP_ARCH_X86)
3060ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3061                        1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3062ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3063                        1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3064#endif // (KMP_ARCH_X86)
3065#endif // KMP_HAVE_QUAD
3066
3067// Capture reverse for mixed type: RHS=float16
3068#if KMP_HAVE_QUAD
3069
3070// Beginning of a definition (provides name, parameters, gebug trace)
3071//     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3072//     fixed)
3073//     OP_ID   - operation identifier (add, sub, mul, ...)
3074//     TYPE    - operands' type
3075// -------------------------------------------------------------------------
3076#define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,   \
3077                                   RTYPE, LCK_ID, MASK, GOMP_FLAG)             \
3078  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3079  TYPE new_value;                                                              \
3080  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG)                                      \
3081  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
3082  }
3083
3084// -------------------------------------------------------------------------
3085#define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3086                                    LCK_ID, GOMP_FLAG)                         \
3087  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3088  TYPE new_value;                                                              \
3089  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */                \
3090  OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */                        \
3091  }
3092
3093ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3094                           KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3095ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3096                           KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3097ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3098                           KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3099ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3100                           KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3101
3102ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3103                           KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3104ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3105                           1,
3106                           KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3107ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3108                           KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3109ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3110                           1,
3111                           KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3112
3113ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3114                           3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3115ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3116                           4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3117ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3118                           3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3119ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3120                           4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3121
3122ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3123                           7,
3124                           KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3125ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3126                           8i, 7,
3127                           KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3128ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3129                           7,
3130                           KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3131ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3132                           8i, 7,
3133                           KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3134
3135ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3136                           4r, 3,
3137                           KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3138ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3139                           4r, 3,
3140                           KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3141
3142ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3143                           8r, 7,
3144                           KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3145ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3146                           8r, 7,
3147                           KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3148
3149ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3150                            10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3151ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3152                            10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3153
3154#endif // KMP_HAVE_QUAD
3155
3156//   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3157
3158#define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                        \
3159  TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3160                                     TYPE rhs) {                               \
3161    KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3162    KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3163
3164#define CRITICAL_SWP(LCK_ID)                                                   \
3165  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3166                                                                               \
3167  old_value = (*lhs);                                                          \
3168  (*lhs) = rhs;                                                                \
3169                                                                               \
3170  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3171  return old_value;
3172
3173// ------------------------------------------------------------------------
3174#ifdef KMP_GOMP_COMPAT
3175#define GOMP_CRITICAL_SWP(FLAG)                                                \
3176  if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3177    KMP_CHECK_GTID;                                                            \
3178    CRITICAL_SWP(0);                                                           \
3179  }
3180#else
3181#define GOMP_CRITICAL_SWP(FLAG)
3182#endif /* KMP_GOMP_COMPAT */
3183
3184#define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                        \
3185  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3186  TYPE old_value;                                                              \
3187  GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3188  old_value = KMP_XCHG_FIXED##BITS(lhs, rhs);                                  \
3189  return old_value;                                                            \
3190  }
3191// ------------------------------------------------------------------------
3192#define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                  \
3193  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3194  TYPE old_value;                                                              \
3195  GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3196  old_value = KMP_XCHG_REAL##BITS(lhs, rhs);                                   \
3197  return old_value;                                                            \
3198  }
3199
3200// ------------------------------------------------------------------------
3201#define CMPXCHG_SWP(TYPE, BITS)                                                \
3202  {                                                                            \
3203    TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
3204    TYPE old_value, new_value;                                                 \
3205    temp_val = *lhs;                                                           \
3206    old_value = temp_val;                                                      \
3207    new_value = rhs;                                                           \
3208    while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
3209        (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
3210        *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
3211      KMP_CPU_PAUSE();                                                         \
3212                                                                               \
3213      temp_val = *lhs;                                                         \
3214      old_value = temp_val;                                                    \
3215      new_value = rhs;                                                         \
3216    }                                                                          \
3217    return old_value;                                                          \
3218  }
3219
3220// -------------------------------------------------------------------------
3221#define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                     \
3222  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3223  TYPE old_value;                                                              \
3224  GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3225  CMPXCHG_SWP(TYPE, BITS)                                                      \
3226  }
3227
3228ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3229ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3230ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3231
3232ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3233                      KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3234
3235#if (KMP_ARCH_X86)
3236ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3237                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3238ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3239                   KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3240#else
3241ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3242ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3243                      KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3244#endif // (KMP_ARCH_X86)
3245
3246// ------------------------------------------------------------------------
3247// Routines for Extended types: long double, _Quad, complex flavours (use
3248// critical section)
3249#define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)                  \
3250  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3251  TYPE old_value;                                                              \
3252  GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3253  CRITICAL_SWP(LCK_ID)                                                         \
3254  }
3255
3256// ------------------------------------------------------------------------
3257// !!! TODO: check if we need to return void for cmplx4 routines
3258// Workaround for cmplx4. Regular routines with return value don't work
3259// on Win_32e. Let's return captured values through the additional parameter.
3260
3261#define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                    \
3262  void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3263                                     TYPE rhs, TYPE *out) {                    \
3264    KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3265    KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3266
3267#define CRITICAL_SWP_WRK(LCK_ID)                                               \
3268  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3269                                                                               \
3270  tmp = (*lhs);                                                                \
3271  (*lhs) = (rhs);                                                              \
3272  (*out) = tmp;                                                                \
3273  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3274  return;
3275// ------------------------------------------------------------------------
3276
3277#ifdef KMP_GOMP_COMPAT
3278#define GOMP_CRITICAL_SWP_WRK(FLAG)                                            \
3279  if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3280    KMP_CHECK_GTID;                                                            \
3281    CRITICAL_SWP_WRK(0);                                                       \
3282  }
3283#else
3284#define GOMP_CRITICAL_SWP_WRK(FLAG)
3285#endif /* KMP_GOMP_COMPAT */
3286// ------------------------------------------------------------------------
3287
3288#define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)              \
3289  ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                          \
3290  TYPE tmp;                                                                    \
3291  GOMP_CRITICAL_SWP_WRK(GOMP_FLAG)                                             \
3292  CRITICAL_SWP_WRK(LCK_ID)                                                     \
3293  }
3294// The end of workaround for cmplx4
3295
3296ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3297#if KMP_HAVE_QUAD
3298ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3299#endif // KMP_HAVE_QUAD
3300// cmplx4 routine to return void
3301ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3302
3303// ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32,  8c,   1 )           //
3304// __kmpc_atomic_cmplx4_swp
3305
3306ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3307ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3308#if KMP_HAVE_QUAD
3309ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3310#if (KMP_ARCH_X86)
3311ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3312                    1) // __kmpc_atomic_float16_a16_swp
3313ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3314                    1) // __kmpc_atomic_cmplx16_a16_swp
3315#endif // (KMP_ARCH_X86)
3316#endif // KMP_HAVE_QUAD
3317
3318// End of OpenMP 4.0 Capture
3319
3320#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3321
3322#undef OP_CRITICAL
3323
3324/* ------------------------------------------------------------------------ */
3325/* Generic atomic routines                                                  */
3326
3327void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3328                     void (*f)(void *, void *, void *)) {
3329  KMP_DEBUG_ASSERT(__kmp_init_serial);
3330
3331  if (
3332#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3333      FALSE /* must use lock */
3334#else
3335      TRUE
3336#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3337      ) {
3338    kmp_int8 old_value, new_value;
3339
3340    old_value = *(kmp_int8 *)lhs;
3341    (*f)(&new_value, &old_value, rhs);
3342
3343    /* TODO: Should this be acquire or release? */
3344    while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3345                                       *(kmp_int8 *)&new_value)) {
3346      KMP_CPU_PAUSE();
3347
3348      old_value = *(kmp_int8 *)lhs;
3349      (*f)(&new_value, &old_value, rhs);
3350    }
3351
3352    return;
3353  } else {
3354// All 1-byte data is of integer data type.
3355
3356#ifdef KMP_GOMP_COMPAT
3357    if (__kmp_atomic_mode == 2) {
3358      __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3359    } else
3360#endif /* KMP_GOMP_COMPAT */
3361      __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3362
3363    (*f)(lhs, lhs, rhs);
3364
3365#ifdef KMP_GOMP_COMPAT
3366    if (__kmp_atomic_mode == 2) {
3367      __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3368    } else
3369#endif /* KMP_GOMP_COMPAT */
3370      __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3371  }
3372}
3373
3374void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3375                     void (*f)(void *, void *, void *)) {
3376  if (
3377#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3378      FALSE /* must use lock */
3379#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3380      TRUE /* no alignment problems */
3381#else
3382      !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3383#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3384      ) {
3385    kmp_int16 old_value, new_value;
3386
3387    old_value = *(kmp_int16 *)lhs;
3388    (*f)(&new_value, &old_value, rhs);
3389
3390    /* TODO: Should this be acquire or release? */
3391    while (!KMP_COMPARE_AND_STORE_ACQ16(
3392        (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3393      KMP_CPU_PAUSE();
3394
3395      old_value = *(kmp_int16 *)lhs;
3396      (*f)(&new_value, &old_value, rhs);
3397    }
3398
3399    return;
3400  } else {
3401// All 2-byte data is of integer data type.
3402
3403#ifdef KMP_GOMP_COMPAT
3404    if (__kmp_atomic_mode == 2) {
3405      __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3406    } else
3407#endif /* KMP_GOMP_COMPAT */
3408      __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3409
3410    (*f)(lhs, lhs, rhs);
3411
3412#ifdef KMP_GOMP_COMPAT
3413    if (__kmp_atomic_mode == 2) {
3414      __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3415    } else
3416#endif /* KMP_GOMP_COMPAT */
3417      __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3418  }
3419}
3420
3421void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3422                     void (*f)(void *, void *, void *)) {
3423  KMP_DEBUG_ASSERT(__kmp_init_serial);
3424
3425  if (
3426// FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3427// Gomp compatibility is broken if this routine is called for floats.
3428#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3429      TRUE /* no alignment problems */
3430#else
3431      !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3432#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3433      ) {
3434    kmp_int32 old_value, new_value;
3435
3436    old_value = *(kmp_int32 *)lhs;
3437    (*f)(&new_value, &old_value, rhs);
3438
3439    /* TODO: Should this be acquire or release? */
3440    while (!KMP_COMPARE_AND_STORE_ACQ32(
3441        (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3442      KMP_CPU_PAUSE();
3443
3444      old_value = *(kmp_int32 *)lhs;
3445      (*f)(&new_value, &old_value, rhs);
3446    }
3447
3448    return;
3449  } else {
3450// Use __kmp_atomic_lock_4i for all 4-byte data,
3451// even if it isn't of integer data type.
3452
3453#ifdef KMP_GOMP_COMPAT
3454    if (__kmp_atomic_mode == 2) {
3455      __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3456    } else
3457#endif /* KMP_GOMP_COMPAT */
3458      __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3459
3460    (*f)(lhs, lhs, rhs);
3461
3462#ifdef KMP_GOMP_COMPAT
3463    if (__kmp_atomic_mode == 2) {
3464      __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3465    } else
3466#endif /* KMP_GOMP_COMPAT */
3467      __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3468  }
3469}
3470
3471void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3472                     void (*f)(void *, void *, void *)) {
3473  KMP_DEBUG_ASSERT(__kmp_init_serial);
3474  if (
3475
3476#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3477      FALSE /* must use lock */
3478#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3479      TRUE /* no alignment problems */
3480#else
3481      !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3482#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3483      ) {
3484    kmp_int64 old_value, new_value;
3485
3486    old_value = *(kmp_int64 *)lhs;
3487    (*f)(&new_value, &old_value, rhs);
3488    /* TODO: Should this be acquire or release? */
3489    while (!KMP_COMPARE_AND_STORE_ACQ64(
3490        (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3491      KMP_CPU_PAUSE();
3492
3493      old_value = *(kmp_int64 *)lhs;
3494      (*f)(&new_value, &old_value, rhs);
3495    }
3496
3497    return;
3498  } else {
3499// Use __kmp_atomic_lock_8i for all 8-byte data,
3500// even if it isn't of integer data type.
3501
3502#ifdef KMP_GOMP_COMPAT
3503    if (__kmp_atomic_mode == 2) {
3504      __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3505    } else
3506#endif /* KMP_GOMP_COMPAT */
3507      __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3508
3509    (*f)(lhs, lhs, rhs);
3510
3511#ifdef KMP_GOMP_COMPAT
3512    if (__kmp_atomic_mode == 2) {
3513      __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3514    } else
3515#endif /* KMP_GOMP_COMPAT */
3516      __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3517  }
3518}
3519
3520void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3521                      void (*f)(void *, void *, void *)) {
3522  KMP_DEBUG_ASSERT(__kmp_init_serial);
3523
3524#ifdef KMP_GOMP_COMPAT
3525  if (__kmp_atomic_mode == 2) {
3526    __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3527  } else
3528#endif /* KMP_GOMP_COMPAT */
3529    __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3530
3531  (*f)(lhs, lhs, rhs);
3532
3533#ifdef KMP_GOMP_COMPAT
3534  if (__kmp_atomic_mode == 2) {
3535    __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3536  } else
3537#endif /* KMP_GOMP_COMPAT */
3538    __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3539}
3540
3541void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3542                      void (*f)(void *, void *, void *)) {
3543  KMP_DEBUG_ASSERT(__kmp_init_serial);
3544
3545#ifdef KMP_GOMP_COMPAT
3546  if (__kmp_atomic_mode == 2) {
3547    __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3548  } else
3549#endif /* KMP_GOMP_COMPAT */
3550    __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3551
3552  (*f)(lhs, lhs, rhs);
3553
3554#ifdef KMP_GOMP_COMPAT
3555  if (__kmp_atomic_mode == 2) {
3556    __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3557  } else
3558#endif /* KMP_GOMP_COMPAT */
3559    __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3560}
3561
3562void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3563                      void (*f)(void *, void *, void *)) {
3564  KMP_DEBUG_ASSERT(__kmp_init_serial);
3565
3566#ifdef KMP_GOMP_COMPAT
3567  if (__kmp_atomic_mode == 2) {
3568    __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3569  } else
3570#endif /* KMP_GOMP_COMPAT */
3571    __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3572
3573  (*f)(lhs, lhs, rhs);
3574
3575#ifdef KMP_GOMP_COMPAT
3576  if (__kmp_atomic_mode == 2) {
3577    __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3578  } else
3579#endif /* KMP_GOMP_COMPAT */
3580    __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3581}
3582
3583void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3584                      void (*f)(void *, void *, void *)) {
3585  KMP_DEBUG_ASSERT(__kmp_init_serial);
3586
3587#ifdef KMP_GOMP_COMPAT
3588  if (__kmp_atomic_mode == 2) {
3589    __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3590  } else
3591#endif /* KMP_GOMP_COMPAT */
3592    __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3593
3594  (*f)(lhs, lhs, rhs);
3595
3596#ifdef KMP_GOMP_COMPAT
3597  if (__kmp_atomic_mode == 2) {
3598    __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3599  } else
3600#endif /* KMP_GOMP_COMPAT */
3601    __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3602}
3603
3604// AC: same two routines as GOMP_atomic_start/end, but will be called by our
3605// compiler; duplicated in order to not use 3-party names in pure Intel code
3606// TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3607void __kmpc_atomic_start(void) {
3608  int gtid = __kmp_entry_gtid();
3609  KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3610  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3611}
3612
3613void __kmpc_atomic_end(void) {
3614  int gtid = __kmp_get_gtid();
3615  KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3616  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3617}
3618
3619/*!
3620@}
3621*/
3622
3623// end of file
3624