kmp_atomic.cpp revision 360784
1/* 2 * kmp_atomic.cpp -- ATOMIC implementation routines 3 */ 4 5//===----------------------------------------------------------------------===// 6// 7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8// See https://llvm.org/LICENSE.txt for license information. 9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10// 11//===----------------------------------------------------------------------===// 12 13#include "kmp_atomic.h" 14#include "kmp.h" // TRUE, asm routines prototypes 15 16typedef unsigned char uchar; 17typedef unsigned short ushort; 18 19/*! 20@defgroup ATOMIC_OPS Atomic Operations 21These functions are used for implementing the many different varieties of atomic 22operations. 23 24The compiler is at liberty to inline atomic operations that are naturally 25supported by the target architecture. For instance on IA-32 architecture an 26atomic like this can be inlined 27@code 28static int s = 0; 29#pragma omp atomic 30 s++; 31@endcode 32using the single instruction: `lock; incl s` 33 34However the runtime does provide entrypoints for these operations to support 35compilers that choose not to inline them. (For instance, 36`__kmpc_atomic_fixed4_add` could be used to perform the increment above.) 37 38The names of the functions are encoded by using the data type name and the 39operation name, as in these tables. 40 41Data Type | Data type encoding 42-----------|--------------- 43int8_t | `fixed1` 44uint8_t | `fixed1u` 45int16_t | `fixed2` 46uint16_t | `fixed2u` 47int32_t | `fixed4` 48uint32_t | `fixed4u` 49int32_t | `fixed8` 50uint32_t | `fixed8u` 51float | `float4` 52double | `float8` 53float 10 (8087 eighty bit float) | `float10` 54complex<float> | `cmplx4` 55complex<double> | `cmplx8` 56complex<float10> | `cmplx10` 57<br> 58 59Operation | Operation encoding 60----------|------------------- 61+ | add 62- | sub 63\* | mul 64/ | div 65& | andb 66<< | shl 67\>\> | shr 68\| | orb 69^ | xor 70&& | andl 71\|\| | orl 72maximum | max 73minimum | min 74.eqv. | eqv 75.neqv. | neqv 76 77<br> 78For non-commutative operations, `_rev` can also be added for the reversed 79operation. For the functions that capture the result, the suffix `_cpt` is 80added. 81 82Update Functions 83================ 84The general form of an atomic function that just performs an update (without a 85`capture`) 86@code 87void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE * 88lhs, TYPE rhs ); 89@endcode 90@param ident_t a pointer to source location 91@param gtid the global thread id 92@param lhs a pointer to the left operand 93@param rhs the right operand 94 95`capture` functions 96=================== 97The capture functions perform an atomic update and return a result, which is 98either the value before the capture, or that after. They take an additional 99argument to determine which result is returned. 100Their general form is therefore 101@code 102TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE * 103lhs, TYPE rhs, int flag ); 104@endcode 105@param ident_t a pointer to source location 106@param gtid the global thread id 107@param lhs a pointer to the left operand 108@param rhs the right operand 109@param flag one if the result is to be captured *after* the operation, zero if 110captured *before*. 111 112The one set of exceptions to this is the `complex<float>` type where the value 113is not returned, rather an extra argument pointer is passed. 114 115They look like 116@code 117void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * 118lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); 119@endcode 120 121Read and Write Operations 122========================= 123The OpenMP<sup>*</sup> standard now supports atomic operations that simply 124ensure that the value is read or written atomically, with no modification 125performed. In many cases on IA-32 architecture these operations can be inlined 126since the architecture guarantees that no tearing occurs on aligned objects 127accessed with a single memory operation of up to 64 bits in size. 128 129The general form of the read operations is 130@code 131TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc ); 132@endcode 133 134For the write operations the form is 135@code 136void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs 137); 138@endcode 139 140Full list of functions 141====================== 142This leads to the generation of 376 atomic functions, as follows. 143 144Functions for integers 145--------------------- 146There are versions here for integers of size 1,2,4 and 8 bytes both signed and 147unsigned (where that matters). 148@code 149 __kmpc_atomic_fixed1_add 150 __kmpc_atomic_fixed1_add_cpt 151 __kmpc_atomic_fixed1_add_fp 152 __kmpc_atomic_fixed1_andb 153 __kmpc_atomic_fixed1_andb_cpt 154 __kmpc_atomic_fixed1_andl 155 __kmpc_atomic_fixed1_andl_cpt 156 __kmpc_atomic_fixed1_div 157 __kmpc_atomic_fixed1_div_cpt 158 __kmpc_atomic_fixed1_div_cpt_rev 159 __kmpc_atomic_fixed1_div_float8 160 __kmpc_atomic_fixed1_div_fp 161 __kmpc_atomic_fixed1_div_rev 162 __kmpc_atomic_fixed1_eqv 163 __kmpc_atomic_fixed1_eqv_cpt 164 __kmpc_atomic_fixed1_max 165 __kmpc_atomic_fixed1_max_cpt 166 __kmpc_atomic_fixed1_min 167 __kmpc_atomic_fixed1_min_cpt 168 __kmpc_atomic_fixed1_mul 169 __kmpc_atomic_fixed1_mul_cpt 170 __kmpc_atomic_fixed1_mul_float8 171 __kmpc_atomic_fixed1_mul_fp 172 __kmpc_atomic_fixed1_neqv 173 __kmpc_atomic_fixed1_neqv_cpt 174 __kmpc_atomic_fixed1_orb 175 __kmpc_atomic_fixed1_orb_cpt 176 __kmpc_atomic_fixed1_orl 177 __kmpc_atomic_fixed1_orl_cpt 178 __kmpc_atomic_fixed1_rd 179 __kmpc_atomic_fixed1_shl 180 __kmpc_atomic_fixed1_shl_cpt 181 __kmpc_atomic_fixed1_shl_cpt_rev 182 __kmpc_atomic_fixed1_shl_rev 183 __kmpc_atomic_fixed1_shr 184 __kmpc_atomic_fixed1_shr_cpt 185 __kmpc_atomic_fixed1_shr_cpt_rev 186 __kmpc_atomic_fixed1_shr_rev 187 __kmpc_atomic_fixed1_sub 188 __kmpc_atomic_fixed1_sub_cpt 189 __kmpc_atomic_fixed1_sub_cpt_rev 190 __kmpc_atomic_fixed1_sub_fp 191 __kmpc_atomic_fixed1_sub_rev 192 __kmpc_atomic_fixed1_swp 193 __kmpc_atomic_fixed1_wr 194 __kmpc_atomic_fixed1_xor 195 __kmpc_atomic_fixed1_xor_cpt 196 __kmpc_atomic_fixed1u_add_fp 197 __kmpc_atomic_fixed1u_sub_fp 198 __kmpc_atomic_fixed1u_mul_fp 199 __kmpc_atomic_fixed1u_div 200 __kmpc_atomic_fixed1u_div_cpt 201 __kmpc_atomic_fixed1u_div_cpt_rev 202 __kmpc_atomic_fixed1u_div_fp 203 __kmpc_atomic_fixed1u_div_rev 204 __kmpc_atomic_fixed1u_shr 205 __kmpc_atomic_fixed1u_shr_cpt 206 __kmpc_atomic_fixed1u_shr_cpt_rev 207 __kmpc_atomic_fixed1u_shr_rev 208 __kmpc_atomic_fixed2_add 209 __kmpc_atomic_fixed2_add_cpt 210 __kmpc_atomic_fixed2_add_fp 211 __kmpc_atomic_fixed2_andb 212 __kmpc_atomic_fixed2_andb_cpt 213 __kmpc_atomic_fixed2_andl 214 __kmpc_atomic_fixed2_andl_cpt 215 __kmpc_atomic_fixed2_div 216 __kmpc_atomic_fixed2_div_cpt 217 __kmpc_atomic_fixed2_div_cpt_rev 218 __kmpc_atomic_fixed2_div_float8 219 __kmpc_atomic_fixed2_div_fp 220 __kmpc_atomic_fixed2_div_rev 221 __kmpc_atomic_fixed2_eqv 222 __kmpc_atomic_fixed2_eqv_cpt 223 __kmpc_atomic_fixed2_max 224 __kmpc_atomic_fixed2_max_cpt 225 __kmpc_atomic_fixed2_min 226 __kmpc_atomic_fixed2_min_cpt 227 __kmpc_atomic_fixed2_mul 228 __kmpc_atomic_fixed2_mul_cpt 229 __kmpc_atomic_fixed2_mul_float8 230 __kmpc_atomic_fixed2_mul_fp 231 __kmpc_atomic_fixed2_neqv 232 __kmpc_atomic_fixed2_neqv_cpt 233 __kmpc_atomic_fixed2_orb 234 __kmpc_atomic_fixed2_orb_cpt 235 __kmpc_atomic_fixed2_orl 236 __kmpc_atomic_fixed2_orl_cpt 237 __kmpc_atomic_fixed2_rd 238 __kmpc_atomic_fixed2_shl 239 __kmpc_atomic_fixed2_shl_cpt 240 __kmpc_atomic_fixed2_shl_cpt_rev 241 __kmpc_atomic_fixed2_shl_rev 242 __kmpc_atomic_fixed2_shr 243 __kmpc_atomic_fixed2_shr_cpt 244 __kmpc_atomic_fixed2_shr_cpt_rev 245 __kmpc_atomic_fixed2_shr_rev 246 __kmpc_atomic_fixed2_sub 247 __kmpc_atomic_fixed2_sub_cpt 248 __kmpc_atomic_fixed2_sub_cpt_rev 249 __kmpc_atomic_fixed2_sub_fp 250 __kmpc_atomic_fixed2_sub_rev 251 __kmpc_atomic_fixed2_swp 252 __kmpc_atomic_fixed2_wr 253 __kmpc_atomic_fixed2_xor 254 __kmpc_atomic_fixed2_xor_cpt 255 __kmpc_atomic_fixed2u_add_fp 256 __kmpc_atomic_fixed2u_sub_fp 257 __kmpc_atomic_fixed2u_mul_fp 258 __kmpc_atomic_fixed2u_div 259 __kmpc_atomic_fixed2u_div_cpt 260 __kmpc_atomic_fixed2u_div_cpt_rev 261 __kmpc_atomic_fixed2u_div_fp 262 __kmpc_atomic_fixed2u_div_rev 263 __kmpc_atomic_fixed2u_shr 264 __kmpc_atomic_fixed2u_shr_cpt 265 __kmpc_atomic_fixed2u_shr_cpt_rev 266 __kmpc_atomic_fixed2u_shr_rev 267 __kmpc_atomic_fixed4_add 268 __kmpc_atomic_fixed4_add_cpt 269 __kmpc_atomic_fixed4_add_fp 270 __kmpc_atomic_fixed4_andb 271 __kmpc_atomic_fixed4_andb_cpt 272 __kmpc_atomic_fixed4_andl 273 __kmpc_atomic_fixed4_andl_cpt 274 __kmpc_atomic_fixed4_div 275 __kmpc_atomic_fixed4_div_cpt 276 __kmpc_atomic_fixed4_div_cpt_rev 277 __kmpc_atomic_fixed4_div_float8 278 __kmpc_atomic_fixed4_div_fp 279 __kmpc_atomic_fixed4_div_rev 280 __kmpc_atomic_fixed4_eqv 281 __kmpc_atomic_fixed4_eqv_cpt 282 __kmpc_atomic_fixed4_max 283 __kmpc_atomic_fixed4_max_cpt 284 __kmpc_atomic_fixed4_min 285 __kmpc_atomic_fixed4_min_cpt 286 __kmpc_atomic_fixed4_mul 287 __kmpc_atomic_fixed4_mul_cpt 288 __kmpc_atomic_fixed4_mul_float8 289 __kmpc_atomic_fixed4_mul_fp 290 __kmpc_atomic_fixed4_neqv 291 __kmpc_atomic_fixed4_neqv_cpt 292 __kmpc_atomic_fixed4_orb 293 __kmpc_atomic_fixed4_orb_cpt 294 __kmpc_atomic_fixed4_orl 295 __kmpc_atomic_fixed4_orl_cpt 296 __kmpc_atomic_fixed4_rd 297 __kmpc_atomic_fixed4_shl 298 __kmpc_atomic_fixed4_shl_cpt 299 __kmpc_atomic_fixed4_shl_cpt_rev 300 __kmpc_atomic_fixed4_shl_rev 301 __kmpc_atomic_fixed4_shr 302 __kmpc_atomic_fixed4_shr_cpt 303 __kmpc_atomic_fixed4_shr_cpt_rev 304 __kmpc_atomic_fixed4_shr_rev 305 __kmpc_atomic_fixed4_sub 306 __kmpc_atomic_fixed4_sub_cpt 307 __kmpc_atomic_fixed4_sub_cpt_rev 308 __kmpc_atomic_fixed4_sub_fp 309 __kmpc_atomic_fixed4_sub_rev 310 __kmpc_atomic_fixed4_swp 311 __kmpc_atomic_fixed4_wr 312 __kmpc_atomic_fixed4_xor 313 __kmpc_atomic_fixed4_xor_cpt 314 __kmpc_atomic_fixed4u_add_fp 315 __kmpc_atomic_fixed4u_sub_fp 316 __kmpc_atomic_fixed4u_mul_fp 317 __kmpc_atomic_fixed4u_div 318 __kmpc_atomic_fixed4u_div_cpt 319 __kmpc_atomic_fixed4u_div_cpt_rev 320 __kmpc_atomic_fixed4u_div_fp 321 __kmpc_atomic_fixed4u_div_rev 322 __kmpc_atomic_fixed4u_shr 323 __kmpc_atomic_fixed4u_shr_cpt 324 __kmpc_atomic_fixed4u_shr_cpt_rev 325 __kmpc_atomic_fixed4u_shr_rev 326 __kmpc_atomic_fixed8_add 327 __kmpc_atomic_fixed8_add_cpt 328 __kmpc_atomic_fixed8_add_fp 329 __kmpc_atomic_fixed8_andb 330 __kmpc_atomic_fixed8_andb_cpt 331 __kmpc_atomic_fixed8_andl 332 __kmpc_atomic_fixed8_andl_cpt 333 __kmpc_atomic_fixed8_div 334 __kmpc_atomic_fixed8_div_cpt 335 __kmpc_atomic_fixed8_div_cpt_rev 336 __kmpc_atomic_fixed8_div_float8 337 __kmpc_atomic_fixed8_div_fp 338 __kmpc_atomic_fixed8_div_rev 339 __kmpc_atomic_fixed8_eqv 340 __kmpc_atomic_fixed8_eqv_cpt 341 __kmpc_atomic_fixed8_max 342 __kmpc_atomic_fixed8_max_cpt 343 __kmpc_atomic_fixed8_min 344 __kmpc_atomic_fixed8_min_cpt 345 __kmpc_atomic_fixed8_mul 346 __kmpc_atomic_fixed8_mul_cpt 347 __kmpc_atomic_fixed8_mul_float8 348 __kmpc_atomic_fixed8_mul_fp 349 __kmpc_atomic_fixed8_neqv 350 __kmpc_atomic_fixed8_neqv_cpt 351 __kmpc_atomic_fixed8_orb 352 __kmpc_atomic_fixed8_orb_cpt 353 __kmpc_atomic_fixed8_orl 354 __kmpc_atomic_fixed8_orl_cpt 355 __kmpc_atomic_fixed8_rd 356 __kmpc_atomic_fixed8_shl 357 __kmpc_atomic_fixed8_shl_cpt 358 __kmpc_atomic_fixed8_shl_cpt_rev 359 __kmpc_atomic_fixed8_shl_rev 360 __kmpc_atomic_fixed8_shr 361 __kmpc_atomic_fixed8_shr_cpt 362 __kmpc_atomic_fixed8_shr_cpt_rev 363 __kmpc_atomic_fixed8_shr_rev 364 __kmpc_atomic_fixed8_sub 365 __kmpc_atomic_fixed8_sub_cpt 366 __kmpc_atomic_fixed8_sub_cpt_rev 367 __kmpc_atomic_fixed8_sub_fp 368 __kmpc_atomic_fixed8_sub_rev 369 __kmpc_atomic_fixed8_swp 370 __kmpc_atomic_fixed8_wr 371 __kmpc_atomic_fixed8_xor 372 __kmpc_atomic_fixed8_xor_cpt 373 __kmpc_atomic_fixed8u_add_fp 374 __kmpc_atomic_fixed8u_sub_fp 375 __kmpc_atomic_fixed8u_mul_fp 376 __kmpc_atomic_fixed8u_div 377 __kmpc_atomic_fixed8u_div_cpt 378 __kmpc_atomic_fixed8u_div_cpt_rev 379 __kmpc_atomic_fixed8u_div_fp 380 __kmpc_atomic_fixed8u_div_rev 381 __kmpc_atomic_fixed8u_shr 382 __kmpc_atomic_fixed8u_shr_cpt 383 __kmpc_atomic_fixed8u_shr_cpt_rev 384 __kmpc_atomic_fixed8u_shr_rev 385@endcode 386 387Functions for floating point 388---------------------------- 389There are versions here for floating point numbers of size 4, 8, 10 and 16 390bytes. (Ten byte floats are used by X87, but are now rare). 391@code 392 __kmpc_atomic_float4_add 393 __kmpc_atomic_float4_add_cpt 394 __kmpc_atomic_float4_add_float8 395 __kmpc_atomic_float4_add_fp 396 __kmpc_atomic_float4_div 397 __kmpc_atomic_float4_div_cpt 398 __kmpc_atomic_float4_div_cpt_rev 399 __kmpc_atomic_float4_div_float8 400 __kmpc_atomic_float4_div_fp 401 __kmpc_atomic_float4_div_rev 402 __kmpc_atomic_float4_max 403 __kmpc_atomic_float4_max_cpt 404 __kmpc_atomic_float4_min 405 __kmpc_atomic_float4_min_cpt 406 __kmpc_atomic_float4_mul 407 __kmpc_atomic_float4_mul_cpt 408 __kmpc_atomic_float4_mul_float8 409 __kmpc_atomic_float4_mul_fp 410 __kmpc_atomic_float4_rd 411 __kmpc_atomic_float4_sub 412 __kmpc_atomic_float4_sub_cpt 413 __kmpc_atomic_float4_sub_cpt_rev 414 __kmpc_atomic_float4_sub_float8 415 __kmpc_atomic_float4_sub_fp 416 __kmpc_atomic_float4_sub_rev 417 __kmpc_atomic_float4_swp 418 __kmpc_atomic_float4_wr 419 __kmpc_atomic_float8_add 420 __kmpc_atomic_float8_add_cpt 421 __kmpc_atomic_float8_add_fp 422 __kmpc_atomic_float8_div 423 __kmpc_atomic_float8_div_cpt 424 __kmpc_atomic_float8_div_cpt_rev 425 __kmpc_atomic_float8_div_fp 426 __kmpc_atomic_float8_div_rev 427 __kmpc_atomic_float8_max 428 __kmpc_atomic_float8_max_cpt 429 __kmpc_atomic_float8_min 430 __kmpc_atomic_float8_min_cpt 431 __kmpc_atomic_float8_mul 432 __kmpc_atomic_float8_mul_cpt 433 __kmpc_atomic_float8_mul_fp 434 __kmpc_atomic_float8_rd 435 __kmpc_atomic_float8_sub 436 __kmpc_atomic_float8_sub_cpt 437 __kmpc_atomic_float8_sub_cpt_rev 438 __kmpc_atomic_float8_sub_fp 439 __kmpc_atomic_float8_sub_rev 440 __kmpc_atomic_float8_swp 441 __kmpc_atomic_float8_wr 442 __kmpc_atomic_float10_add 443 __kmpc_atomic_float10_add_cpt 444 __kmpc_atomic_float10_add_fp 445 __kmpc_atomic_float10_div 446 __kmpc_atomic_float10_div_cpt 447 __kmpc_atomic_float10_div_cpt_rev 448 __kmpc_atomic_float10_div_fp 449 __kmpc_atomic_float10_div_rev 450 __kmpc_atomic_float10_mul 451 __kmpc_atomic_float10_mul_cpt 452 __kmpc_atomic_float10_mul_fp 453 __kmpc_atomic_float10_rd 454 __kmpc_atomic_float10_sub 455 __kmpc_atomic_float10_sub_cpt 456 __kmpc_atomic_float10_sub_cpt_rev 457 __kmpc_atomic_float10_sub_fp 458 __kmpc_atomic_float10_sub_rev 459 __kmpc_atomic_float10_swp 460 __kmpc_atomic_float10_wr 461 __kmpc_atomic_float16_add 462 __kmpc_atomic_float16_add_cpt 463 __kmpc_atomic_float16_div 464 __kmpc_atomic_float16_div_cpt 465 __kmpc_atomic_float16_div_cpt_rev 466 __kmpc_atomic_float16_div_rev 467 __kmpc_atomic_float16_max 468 __kmpc_atomic_float16_max_cpt 469 __kmpc_atomic_float16_min 470 __kmpc_atomic_float16_min_cpt 471 __kmpc_atomic_float16_mul 472 __kmpc_atomic_float16_mul_cpt 473 __kmpc_atomic_float16_rd 474 __kmpc_atomic_float16_sub 475 __kmpc_atomic_float16_sub_cpt 476 __kmpc_atomic_float16_sub_cpt_rev 477 __kmpc_atomic_float16_sub_rev 478 __kmpc_atomic_float16_swp 479 __kmpc_atomic_float16_wr 480@endcode 481 482Functions for Complex types 483--------------------------- 484Functions for complex types whose component floating point variables are of size 4854,8,10 or 16 bytes. The names here are based on the size of the component float, 486*not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an 487operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`. 488 489@code 490 __kmpc_atomic_cmplx4_add 491 __kmpc_atomic_cmplx4_add_cmplx8 492 __kmpc_atomic_cmplx4_add_cpt 493 __kmpc_atomic_cmplx4_div 494 __kmpc_atomic_cmplx4_div_cmplx8 495 __kmpc_atomic_cmplx4_div_cpt 496 __kmpc_atomic_cmplx4_div_cpt_rev 497 __kmpc_atomic_cmplx4_div_rev 498 __kmpc_atomic_cmplx4_mul 499 __kmpc_atomic_cmplx4_mul_cmplx8 500 __kmpc_atomic_cmplx4_mul_cpt 501 __kmpc_atomic_cmplx4_rd 502 __kmpc_atomic_cmplx4_sub 503 __kmpc_atomic_cmplx4_sub_cmplx8 504 __kmpc_atomic_cmplx4_sub_cpt 505 __kmpc_atomic_cmplx4_sub_cpt_rev 506 __kmpc_atomic_cmplx4_sub_rev 507 __kmpc_atomic_cmplx4_swp 508 __kmpc_atomic_cmplx4_wr 509 __kmpc_atomic_cmplx8_add 510 __kmpc_atomic_cmplx8_add_cpt 511 __kmpc_atomic_cmplx8_div 512 __kmpc_atomic_cmplx8_div_cpt 513 __kmpc_atomic_cmplx8_div_cpt_rev 514 __kmpc_atomic_cmplx8_div_rev 515 __kmpc_atomic_cmplx8_mul 516 __kmpc_atomic_cmplx8_mul_cpt 517 __kmpc_atomic_cmplx8_rd 518 __kmpc_atomic_cmplx8_sub 519 __kmpc_atomic_cmplx8_sub_cpt 520 __kmpc_atomic_cmplx8_sub_cpt_rev 521 __kmpc_atomic_cmplx8_sub_rev 522 __kmpc_atomic_cmplx8_swp 523 __kmpc_atomic_cmplx8_wr 524 __kmpc_atomic_cmplx10_add 525 __kmpc_atomic_cmplx10_add_cpt 526 __kmpc_atomic_cmplx10_div 527 __kmpc_atomic_cmplx10_div_cpt 528 __kmpc_atomic_cmplx10_div_cpt_rev 529 __kmpc_atomic_cmplx10_div_rev 530 __kmpc_atomic_cmplx10_mul 531 __kmpc_atomic_cmplx10_mul_cpt 532 __kmpc_atomic_cmplx10_rd 533 __kmpc_atomic_cmplx10_sub 534 __kmpc_atomic_cmplx10_sub_cpt 535 __kmpc_atomic_cmplx10_sub_cpt_rev 536 __kmpc_atomic_cmplx10_sub_rev 537 __kmpc_atomic_cmplx10_swp 538 __kmpc_atomic_cmplx10_wr 539 __kmpc_atomic_cmplx16_add 540 __kmpc_atomic_cmplx16_add_cpt 541 __kmpc_atomic_cmplx16_div 542 __kmpc_atomic_cmplx16_div_cpt 543 __kmpc_atomic_cmplx16_div_cpt_rev 544 __kmpc_atomic_cmplx16_div_rev 545 __kmpc_atomic_cmplx16_mul 546 __kmpc_atomic_cmplx16_mul_cpt 547 __kmpc_atomic_cmplx16_rd 548 __kmpc_atomic_cmplx16_sub 549 __kmpc_atomic_cmplx16_sub_cpt 550 __kmpc_atomic_cmplx16_sub_cpt_rev 551 __kmpc_atomic_cmplx16_swp 552 __kmpc_atomic_cmplx16_wr 553@endcode 554*/ 555 556/*! 557@ingroup ATOMIC_OPS 558@{ 559*/ 560 561/* 562 * Global vars 563 */ 564 565#ifndef KMP_GOMP_COMPAT 566int __kmp_atomic_mode = 1; // Intel perf 567#else 568int __kmp_atomic_mode = 2; // GOMP compatibility 569#endif /* KMP_GOMP_COMPAT */ 570 571KMP_ALIGN(128) 572 573// Control access to all user coded atomics in Gnu compat mode 574kmp_atomic_lock_t __kmp_atomic_lock; 575// Control access to all user coded atomics for 1-byte fixed data types 576kmp_atomic_lock_t __kmp_atomic_lock_1i; 577// Control access to all user coded atomics for 2-byte fixed data types 578kmp_atomic_lock_t __kmp_atomic_lock_2i; 579// Control access to all user coded atomics for 4-byte fixed data types 580kmp_atomic_lock_t __kmp_atomic_lock_4i; 581// Control access to all user coded atomics for kmp_real32 data type 582kmp_atomic_lock_t __kmp_atomic_lock_4r; 583// Control access to all user coded atomics for 8-byte fixed data types 584kmp_atomic_lock_t __kmp_atomic_lock_8i; 585// Control access to all user coded atomics for kmp_real64 data type 586kmp_atomic_lock_t __kmp_atomic_lock_8r; 587// Control access to all user coded atomics for complex byte data type 588kmp_atomic_lock_t __kmp_atomic_lock_8c; 589// Control access to all user coded atomics for long double data type 590kmp_atomic_lock_t __kmp_atomic_lock_10r; 591// Control access to all user coded atomics for _Quad data type 592kmp_atomic_lock_t __kmp_atomic_lock_16r; 593// Control access to all user coded atomics for double complex data type 594kmp_atomic_lock_t __kmp_atomic_lock_16c; 595// Control access to all user coded atomics for long double complex type 596kmp_atomic_lock_t __kmp_atomic_lock_20c; 597// Control access to all user coded atomics for _Quad complex data type 598kmp_atomic_lock_t __kmp_atomic_lock_32c; 599 600/* 2007-03-02: 601 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug 602 on *_32 and *_32e. This is just a temporary workaround for the problem. It 603 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines 604 in assembler language. */ 605#define KMP_ATOMIC_VOLATILE volatile 606 607#if (KMP_ARCH_X86) && KMP_HAVE_QUAD 608 609static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 610 lhs.q += rhs.q; 611} 612static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 613 lhs.q -= rhs.q; 614} 615static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 616 lhs.q *= rhs.q; 617} 618static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 619 lhs.q /= rhs.q; 620} 621static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) { 622 return lhs.q < rhs.q; 623} 624static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) { 625 return lhs.q > rhs.q; 626} 627 628static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 629 lhs.q += rhs.q; 630} 631static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 632 lhs.q -= rhs.q; 633} 634static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 635 lhs.q *= rhs.q; 636} 637static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 638 lhs.q /= rhs.q; 639} 640static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) { 641 return lhs.q < rhs.q; 642} 643static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) { 644 return lhs.q > rhs.q; 645} 646 647static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 648 lhs.q += rhs.q; 649} 650static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 651 lhs.q -= rhs.q; 652} 653static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 654 lhs.q *= rhs.q; 655} 656static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 657 lhs.q /= rhs.q; 658} 659 660static inline void operator+=(kmp_cmplx128_a16_t &lhs, 661 kmp_cmplx128_a16_t &rhs) { 662 lhs.q += rhs.q; 663} 664static inline void operator-=(kmp_cmplx128_a16_t &lhs, 665 kmp_cmplx128_a16_t &rhs) { 666 lhs.q -= rhs.q; 667} 668static inline void operator*=(kmp_cmplx128_a16_t &lhs, 669 kmp_cmplx128_a16_t &rhs) { 670 lhs.q *= rhs.q; 671} 672static inline void operator/=(kmp_cmplx128_a16_t &lhs, 673 kmp_cmplx128_a16_t &rhs) { 674 lhs.q /= rhs.q; 675} 676 677#endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD 678 679// ATOMIC implementation routines ----------------------------------------- 680// One routine for each operation and operand type. 681// All routines declarations looks like 682// void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); 683 684#define KMP_CHECK_GTID \ 685 if (gtid == KMP_GTID_UNKNOWN) { \ 686 gtid = __kmp_entry_gtid(); \ 687 } // check and get gtid when needed 688 689// Beginning of a definition (provides name, parameters, gebug trace) 690// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 691// fixed) 692// OP_ID - operation identifier (add, sub, mul, ...) 693// TYPE - operands' type 694#define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 695 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 696 TYPE *lhs, TYPE rhs) { \ 697 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 698 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 699 700// ------------------------------------------------------------------------ 701// Lock variables used for critical sections for various size operands 702#define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat 703#define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char 704#define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short 705#define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int 706#define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float 707#define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int 708#define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double 709#define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex 710#define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double 711#define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad 712#define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex 713#define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex 714#define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex 715 716// ------------------------------------------------------------------------ 717// Operation on *lhs, rhs bound by critical section 718// OP - operator (it's supposed to contain an assignment) 719// LCK_ID - lock identifier 720// Note: don't check gtid as it should always be valid 721// 1, 2-byte - expect valid parameter, other - check before this macro 722#define OP_CRITICAL(OP, LCK_ID) \ 723 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 724 \ 725 (*lhs) OP(rhs); \ 726 \ 727 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 728 729// ------------------------------------------------------------------------ 730// For GNU compatibility, we may need to use a critical section, 731// even though it is not required by the ISA. 732// 733// On IA-32 architecture, all atomic operations except for fixed 4 byte add, 734// sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common 735// critical section. On Intel(R) 64, all atomic operations are done with fetch 736// and add or compare and exchange. Therefore, the FLAG parameter to this 737// macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which 738// require a critical section, where we predict that they will be implemented 739// in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()). 740// 741// When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct, 742// the FLAG parameter should always be 1. If we know that we will be using 743// a critical section, then we want to make certain that we use the generic 744// lock __kmp_atomic_lock to protect the atomic update, and not of of the 745// locks that are specialized based upon the size or type of the data. 746// 747// If FLAG is 0, then we are relying on dead code elimination by the build 748// compiler to get rid of the useless block of code, and save a needless 749// branch at runtime. 750 751#ifdef KMP_GOMP_COMPAT 752#define OP_GOMP_CRITICAL(OP, FLAG) \ 753 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 754 KMP_CHECK_GTID; \ 755 OP_CRITICAL(OP, 0); \ 756 return; \ 757 } 758#else 759#define OP_GOMP_CRITICAL(OP, FLAG) 760#endif /* KMP_GOMP_COMPAT */ 761 762#if KMP_MIC 763#define KMP_DO_PAUSE _mm_delay_32(1) 764#else 765#define KMP_DO_PAUSE KMP_CPU_PAUSE() 766#endif /* KMP_MIC */ 767 768// ------------------------------------------------------------------------ 769// Operation on *lhs, rhs using "compare_and_store" routine 770// TYPE - operands' type 771// BITS - size in bits, used to distinguish low level calls 772// OP - operator 773#define OP_CMPXCHG(TYPE, BITS, OP) \ 774 { \ 775 TYPE old_value, new_value; \ 776 old_value = *(TYPE volatile *)lhs; \ 777 new_value = old_value OP rhs; \ 778 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 779 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 780 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 781 KMP_DO_PAUSE; \ 782 \ 783 old_value = *(TYPE volatile *)lhs; \ 784 new_value = old_value OP rhs; \ 785 } \ 786 } 787 788#if USE_CMPXCHG_FIX 789// 2007-06-25: 790// workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32 791// and win_32e are affected (I verified the asm). Compiler ignores the volatile 792// qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the 793// compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of 794// the workaround. 795#define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 796 { \ 797 struct _sss { \ 798 TYPE cmp; \ 799 kmp_int##BITS *vvv; \ 800 }; \ 801 struct _sss old_value, new_value; \ 802 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \ 803 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \ 804 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 805 new_value.cmp = old_value.cmp OP rhs; \ 806 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 807 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ 808 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \ 809 KMP_DO_PAUSE; \ 810 \ 811 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 812 new_value.cmp = old_value.cmp OP rhs; \ 813 } \ 814 } 815// end of the first part of the workaround for C78287 816#endif // USE_CMPXCHG_FIX 817 818#if KMP_ARCH_X86 || KMP_ARCH_X86_64 819 820// ------------------------------------------------------------------------ 821// X86 or X86_64: no alignment problems ==================================== 822#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 823 GOMP_FLAG) \ 824 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 825 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 826 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 827 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 828 } 829// ------------------------------------------------------------------------- 830#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 831 GOMP_FLAG) \ 832 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 833 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 834 OP_CMPXCHG(TYPE, BITS, OP) \ 835 } 836#if USE_CMPXCHG_FIX 837// ------------------------------------------------------------------------- 838// workaround for C78287 (complex(kind=4) data type) 839#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 840 MASK, GOMP_FLAG) \ 841 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 842 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 843 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 844 } 845// end of the second part of the workaround for C78287 846#endif // USE_CMPXCHG_FIX 847 848#else 849// ------------------------------------------------------------------------- 850// Code for other architectures that don't handle unaligned accesses. 851#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 852 GOMP_FLAG) \ 853 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 854 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 855 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 856 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 857 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 858 } else { \ 859 KMP_CHECK_GTID; \ 860 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 861 } \ 862 } 863// ------------------------------------------------------------------------- 864#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 865 GOMP_FLAG) \ 866 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 867 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 868 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 869 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 870 } else { \ 871 KMP_CHECK_GTID; \ 872 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 873 } \ 874 } 875#if USE_CMPXCHG_FIX 876// ------------------------------------------------------------------------- 877// workaround for C78287 (complex(kind=4) data type) 878#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 879 MASK, GOMP_FLAG) \ 880 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 881 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 882 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 883 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 884 } else { \ 885 KMP_CHECK_GTID; \ 886 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 887 } \ 888 } 889// end of the second part of the workaround for C78287 890#endif // USE_CMPXCHG_FIX 891#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 892 893// Routines for ATOMIC 4-byte operands addition and subtraction 894ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3, 895 0) // __kmpc_atomic_fixed4_add 896ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3, 897 0) // __kmpc_atomic_fixed4_sub 898 899ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3, 900 KMP_ARCH_X86) // __kmpc_atomic_float4_add 901ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3, 902 KMP_ARCH_X86) // __kmpc_atomic_float4_sub 903 904// Routines for ATOMIC 8-byte operands addition and subtraction 905ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7, 906 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add 907ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7, 908 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub 909 910ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7, 911 KMP_ARCH_X86) // __kmpc_atomic_float8_add 912ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7, 913 KMP_ARCH_X86) // __kmpc_atomic_float8_sub 914 915// ------------------------------------------------------------------------ 916// Entries definition for integer operands 917// TYPE_ID - operands type and size (fixed4, float4) 918// OP_ID - operation identifier (add, sub, mul, ...) 919// TYPE - operand type 920// BITS - size in bits, used to distinguish low level calls 921// OP - operator (used in critical section) 922// LCK_ID - lock identifier, used to possibly distinguish lock variable 923// MASK - used for alignment check 924 925// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG 926// ------------------------------------------------------------------------ 927// Routines for ATOMIC integer operands, other operators 928// ------------------------------------------------------------------------ 929// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 930ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0, 931 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add 932ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0, 933 0) // __kmpc_atomic_fixed1_andb 934ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0, 935 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div 936ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0, 937 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div 938ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0, 939 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul 940ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0, 941 0) // __kmpc_atomic_fixed1_orb 942ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0, 943 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl 944ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0, 945 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr 946ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, 947 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr 948ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0, 949 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub 950ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0, 951 0) // __kmpc_atomic_fixed1_xor 952ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1, 953 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add 954ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1, 955 0) // __kmpc_atomic_fixed2_andb 956ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1, 957 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div 958ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1, 959 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div 960ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1, 961 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul 962ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1, 963 0) // __kmpc_atomic_fixed2_orb 964ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1, 965 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl 966ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1, 967 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr 968ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, 969 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr 970ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1, 971 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub 972ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1, 973 0) // __kmpc_atomic_fixed2_xor 974ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3, 975 0) // __kmpc_atomic_fixed4_andb 976ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3, 977 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div 978ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3, 979 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div 980ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3, 981 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul 982ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3, 983 0) // __kmpc_atomic_fixed4_orb 984ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3, 985 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl 986ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3, 987 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr 988ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, 989 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr 990ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3, 991 0) // __kmpc_atomic_fixed4_xor 992ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7, 993 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb 994ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7, 995 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div 996ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7, 997 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div 998ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7, 999 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul 1000ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7, 1001 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb 1002ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7, 1003 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl 1004ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7, 1005 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr 1006ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, 1007 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr 1008ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7, 1009 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor 1010ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3, 1011 KMP_ARCH_X86) // __kmpc_atomic_float4_div 1012ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3, 1013 KMP_ARCH_X86) // __kmpc_atomic_float4_mul 1014ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7, 1015 KMP_ARCH_X86) // __kmpc_atomic_float8_div 1016ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7, 1017 KMP_ARCH_X86) // __kmpc_atomic_float8_mul 1018// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 1019 1020/* ------------------------------------------------------------------------ */ 1021/* Routines for C/C++ Reduction operators && and || */ 1022 1023// ------------------------------------------------------------------------ 1024// Need separate macros for &&, || because there is no combined assignment 1025// TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used 1026#define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1027 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1028 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1029 OP_CRITICAL(= *lhs OP, LCK_ID) \ 1030 } 1031 1032#if KMP_ARCH_X86 || KMP_ARCH_X86_64 1033 1034// ------------------------------------------------------------------------ 1035// X86 or X86_64: no alignment problems =================================== 1036#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1037 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1038 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1039 OP_CMPXCHG(TYPE, BITS, OP) \ 1040 } 1041 1042#else 1043// ------------------------------------------------------------------------ 1044// Code for other architectures that don't handle unaligned accesses. 1045#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1046 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1047 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1048 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1049 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1050 } else { \ 1051 KMP_CHECK_GTID; \ 1052 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \ 1053 } \ 1054 } 1055#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1056 1057ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0, 1058 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl 1059ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0, 1060 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl 1061ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1, 1062 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl 1063ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1, 1064 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl 1065ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3, 1066 0) // __kmpc_atomic_fixed4_andl 1067ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3, 1068 0) // __kmpc_atomic_fixed4_orl 1069ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7, 1070 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl 1071ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7, 1072 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl 1073 1074/* ------------------------------------------------------------------------- */ 1075/* Routines for Fortran operators that matched no one in C: */ 1076/* MAX, MIN, .EQV., .NEQV. */ 1077/* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */ 1078/* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */ 1079 1080// ------------------------------------------------------------------------- 1081// MIN and MAX need separate macros 1082// OP - operator to check if we need any actions? 1083#define MIN_MAX_CRITSECT(OP, LCK_ID) \ 1084 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1085 \ 1086 if (*lhs OP rhs) { /* still need actions? */ \ 1087 *lhs = rhs; \ 1088 } \ 1089 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1090 1091// ------------------------------------------------------------------------- 1092#ifdef KMP_GOMP_COMPAT 1093#define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \ 1094 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1095 KMP_CHECK_GTID; \ 1096 MIN_MAX_CRITSECT(OP, 0); \ 1097 return; \ 1098 } 1099#else 1100#define GOMP_MIN_MAX_CRITSECT(OP, FLAG) 1101#endif /* KMP_GOMP_COMPAT */ 1102 1103// ------------------------------------------------------------------------- 1104#define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1105 { \ 1106 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1107 TYPE old_value; \ 1108 temp_val = *lhs; \ 1109 old_value = temp_val; \ 1110 while (old_value OP rhs && /* still need actions? */ \ 1111 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1112 (kmp_int##BITS *)lhs, \ 1113 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1114 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 1115 KMP_CPU_PAUSE(); \ 1116 temp_val = *lhs; \ 1117 old_value = temp_val; \ 1118 } \ 1119 } 1120 1121// ------------------------------------------------------------------------- 1122// 1-byte, 2-byte operands - use critical section 1123#define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1124 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1125 if (*lhs OP rhs) { /* need actions? */ \ 1126 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1127 MIN_MAX_CRITSECT(OP, LCK_ID) \ 1128 } \ 1129 } 1130 1131#if KMP_ARCH_X86 || KMP_ARCH_X86_64 1132 1133// ------------------------------------------------------------------------- 1134// X86 or X86_64: no alignment problems ==================================== 1135#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1136 GOMP_FLAG) \ 1137 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1138 if (*lhs OP rhs) { \ 1139 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1140 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1141 } \ 1142 } 1143 1144#else 1145// ------------------------------------------------------------------------- 1146// Code for other architectures that don't handle unaligned accesses. 1147#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1148 GOMP_FLAG) \ 1149 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1150 if (*lhs OP rhs) { \ 1151 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1152 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1153 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1154 } else { \ 1155 KMP_CHECK_GTID; \ 1156 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \ 1157 } \ 1158 } \ 1159 } 1160#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1161 1162MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0, 1163 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max 1164MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0, 1165 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min 1166MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1, 1167 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max 1168MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1, 1169 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min 1170MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3, 1171 0) // __kmpc_atomic_fixed4_max 1172MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3, 1173 0) // __kmpc_atomic_fixed4_min 1174MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7, 1175 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max 1176MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7, 1177 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min 1178MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3, 1179 KMP_ARCH_X86) // __kmpc_atomic_float4_max 1180MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3, 1181 KMP_ARCH_X86) // __kmpc_atomic_float4_min 1182MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7, 1183 KMP_ARCH_X86) // __kmpc_atomic_float8_max 1184MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7, 1185 KMP_ARCH_X86) // __kmpc_atomic_float8_min 1186#if KMP_HAVE_QUAD 1187MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r, 1188 1) // __kmpc_atomic_float16_max 1189MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r, 1190 1) // __kmpc_atomic_float16_min 1191#if (KMP_ARCH_X86) 1192MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r, 1193 1) // __kmpc_atomic_float16_max_a16 1194MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r, 1195 1) // __kmpc_atomic_float16_min_a16 1196#endif // (KMP_ARCH_X86) 1197#endif // KMP_HAVE_QUAD 1198// ------------------------------------------------------------------------ 1199// Need separate macros for .EQV. because of the need of complement (~) 1200// OP ignored for critical sections, ^=~ used instead 1201#define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1202 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1203 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \ 1204 OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */ \ 1205 } 1206 1207// ------------------------------------------------------------------------ 1208#if KMP_ARCH_X86 || KMP_ARCH_X86_64 1209// ------------------------------------------------------------------------ 1210// X86 or X86_64: no alignment problems =================================== 1211#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1212 GOMP_FLAG) \ 1213 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1214 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \ 1215 OP_CMPXCHG(TYPE, BITS, OP) \ 1216 } 1217// ------------------------------------------------------------------------ 1218#else 1219// ------------------------------------------------------------------------ 1220// Code for other architectures that don't handle unaligned accesses. 1221#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1222 GOMP_FLAG) \ 1223 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1224 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) \ 1225 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1226 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1227 } else { \ 1228 KMP_CHECK_GTID; \ 1229 OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */ \ 1230 } \ 1231 } 1232#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1233 1234ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0, 1235 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv 1236ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1, 1237 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv 1238ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3, 1239 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv 1240ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7, 1241 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv 1242ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, 1243 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv 1244ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, 1245 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv 1246ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, 1247 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv 1248ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, 1249 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv 1250 1251// ------------------------------------------------------------------------ 1252// Routines for Extended types: long double, _Quad, complex flavours (use 1253// critical section) 1254// TYPE_ID, OP_ID, TYPE - detailed above 1255// OP - operator 1256// LCK_ID - lock identifier, used to possibly distinguish lock variable 1257#define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1258 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1259 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \ 1260 OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \ 1261 } 1262 1263/* ------------------------------------------------------------------------- */ 1264// routines for long double type 1265ATOMIC_CRITICAL(float10, add, long double, +, 10r, 1266 1) // __kmpc_atomic_float10_add 1267ATOMIC_CRITICAL(float10, sub, long double, -, 10r, 1268 1) // __kmpc_atomic_float10_sub 1269ATOMIC_CRITICAL(float10, mul, long double, *, 10r, 1270 1) // __kmpc_atomic_float10_mul 1271ATOMIC_CRITICAL(float10, div, long double, /, 10r, 1272 1) // __kmpc_atomic_float10_div 1273#if KMP_HAVE_QUAD 1274// routines for _Quad type 1275ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r, 1276 1) // __kmpc_atomic_float16_add 1277ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r, 1278 1) // __kmpc_atomic_float16_sub 1279ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r, 1280 1) // __kmpc_atomic_float16_mul 1281ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r, 1282 1) // __kmpc_atomic_float16_div 1283#if (KMP_ARCH_X86) 1284ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r, 1285 1) // __kmpc_atomic_float16_add_a16 1286ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r, 1287 1) // __kmpc_atomic_float16_sub_a16 1288ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r, 1289 1) // __kmpc_atomic_float16_mul_a16 1290ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r, 1291 1) // __kmpc_atomic_float16_div_a16 1292#endif // (KMP_ARCH_X86) 1293#endif // KMP_HAVE_QUAD 1294// routines for complex types 1295 1296#if USE_CMPXCHG_FIX 1297// workaround for C78287 (complex(kind=4) data type) 1298ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1299 1) // __kmpc_atomic_cmplx4_add 1300ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1301 1) // __kmpc_atomic_cmplx4_sub 1302ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1303 1) // __kmpc_atomic_cmplx4_mul 1304ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1305 1) // __kmpc_atomic_cmplx4_div 1306// end of the workaround for C78287 1307#else 1308ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add 1309ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub 1310ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul 1311ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div 1312#endif // USE_CMPXCHG_FIX 1313 1314ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add 1315ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub 1316ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul 1317ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div 1318ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c, 1319 1) // __kmpc_atomic_cmplx10_add 1320ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c, 1321 1) // __kmpc_atomic_cmplx10_sub 1322ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c, 1323 1) // __kmpc_atomic_cmplx10_mul 1324ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c, 1325 1) // __kmpc_atomic_cmplx10_div 1326#if KMP_HAVE_QUAD 1327ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c, 1328 1) // __kmpc_atomic_cmplx16_add 1329ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c, 1330 1) // __kmpc_atomic_cmplx16_sub 1331ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c, 1332 1) // __kmpc_atomic_cmplx16_mul 1333ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c, 1334 1) // __kmpc_atomic_cmplx16_div 1335#if (KMP_ARCH_X86) 1336ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1337 1) // __kmpc_atomic_cmplx16_add_a16 1338ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1339 1) // __kmpc_atomic_cmplx16_sub_a16 1340ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1341 1) // __kmpc_atomic_cmplx16_mul_a16 1342ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1343 1) // __kmpc_atomic_cmplx16_div_a16 1344#endif // (KMP_ARCH_X86) 1345#endif // KMP_HAVE_QUAD 1346 1347// OpenMP 4.0: x = expr binop x for non-commutative operations. 1348// Supported only on IA-32 architecture and Intel(R) 64 1349#if KMP_ARCH_X86 || KMP_ARCH_X86_64 1350 1351// ------------------------------------------------------------------------ 1352// Operation on *lhs, rhs bound by critical section 1353// OP - operator (it's supposed to contain an assignment) 1354// LCK_ID - lock identifier 1355// Note: don't check gtid as it should always be valid 1356// 1, 2-byte - expect valid parameter, other - check before this macro 1357#define OP_CRITICAL_REV(OP, LCK_ID) \ 1358 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1359 \ 1360 (*lhs) = (rhs)OP(*lhs); \ 1361 \ 1362 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1363 1364#ifdef KMP_GOMP_COMPAT 1365#define OP_GOMP_CRITICAL_REV(OP, FLAG) \ 1366 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1367 KMP_CHECK_GTID; \ 1368 OP_CRITICAL_REV(OP, 0); \ 1369 return; \ 1370 } 1371#else 1372#define OP_GOMP_CRITICAL_REV(OP, FLAG) 1373#endif /* KMP_GOMP_COMPAT */ 1374 1375// Beginning of a definition (provides name, parameters, gebug trace) 1376// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1377// fixed) 1378// OP_ID - operation identifier (add, sub, mul, ...) 1379// TYPE - operands' type 1380#define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1381 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \ 1382 TYPE *lhs, TYPE rhs) { \ 1383 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1384 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid)); 1385 1386// ------------------------------------------------------------------------ 1387// Operation on *lhs, rhs using "compare_and_store" routine 1388// TYPE - operands' type 1389// BITS - size in bits, used to distinguish low level calls 1390// OP - operator 1391// Note: temp_val introduced in order to force the compiler to read 1392// *lhs only once (w/o it the compiler reads *lhs twice) 1393#define OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1394 { \ 1395 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1396 TYPE old_value, new_value; \ 1397 temp_val = *lhs; \ 1398 old_value = temp_val; \ 1399 new_value = rhs OP old_value; \ 1400 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1401 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1402 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 1403 KMP_DO_PAUSE; \ 1404 \ 1405 temp_val = *lhs; \ 1406 old_value = temp_val; \ 1407 new_value = rhs OP old_value; \ 1408 } \ 1409 } 1410 1411// ------------------------------------------------------------------------- 1412#define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \ 1413 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1414 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1415 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1416 } 1417 1418// ------------------------------------------------------------------------ 1419// Entries definition for integer operands 1420// TYPE_ID - operands type and size (fixed4, float4) 1421// OP_ID - operation identifier (add, sub, mul, ...) 1422// TYPE - operand type 1423// BITS - size in bits, used to distinguish low level calls 1424// OP - operator (used in critical section) 1425// LCK_ID - lock identifier, used to possibly distinguish lock variable 1426 1427// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG 1428// ------------------------------------------------------------------------ 1429// Routines for ATOMIC integer operands, other operators 1430// ------------------------------------------------------------------------ 1431// TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG 1432ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i, 1433 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev 1434ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i, 1435 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev 1436ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i, 1437 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev 1438ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i, 1439 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev 1440ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i, 1441 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev 1442ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i, 1443 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev 1444 1445ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i, 1446 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev 1447ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i, 1448 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev 1449ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i, 1450 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev 1451ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i, 1452 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev 1453ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1454 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev 1455ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i, 1456 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev 1457 1458ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i, 1459 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev 1460ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i, 1461 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev 1462ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i, 1463 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev 1464ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i, 1465 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev 1466ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i, 1467 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev 1468ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i, 1469 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev 1470 1471ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i, 1472 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev 1473ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i, 1474 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev 1475ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i, 1476 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev 1477ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i, 1478 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev 1479ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i, 1480 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev 1481ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i, 1482 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev 1483 1484ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r, 1485 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev 1486ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r, 1487 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev 1488 1489ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r, 1490 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev 1491ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r, 1492 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev 1493// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG 1494 1495// ------------------------------------------------------------------------ 1496// Routines for Extended types: long double, _Quad, complex flavours (use 1497// critical section) 1498// TYPE_ID, OP_ID, TYPE - detailed above 1499// OP - operator 1500// LCK_ID - lock identifier, used to possibly distinguish lock variable 1501#define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1502 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1503 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1504 OP_CRITICAL_REV(OP, LCK_ID) \ 1505 } 1506 1507/* ------------------------------------------------------------------------- */ 1508// routines for long double type 1509ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r, 1510 1) // __kmpc_atomic_float10_sub_rev 1511ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r, 1512 1) // __kmpc_atomic_float10_div_rev 1513#if KMP_HAVE_QUAD 1514// routines for _Quad type 1515ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r, 1516 1) // __kmpc_atomic_float16_sub_rev 1517ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r, 1518 1) // __kmpc_atomic_float16_div_rev 1519#if (KMP_ARCH_X86) 1520ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r, 1521 1) // __kmpc_atomic_float16_sub_a16_rev 1522ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r, 1523 1) // __kmpc_atomic_float16_div_a16_rev 1524#endif // KMP_ARCH_X86 1525#endif // KMP_HAVE_QUAD 1526 1527// routines for complex types 1528ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c, 1529 1) // __kmpc_atomic_cmplx4_sub_rev 1530ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c, 1531 1) // __kmpc_atomic_cmplx4_div_rev 1532ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c, 1533 1) // __kmpc_atomic_cmplx8_sub_rev 1534ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c, 1535 1) // __kmpc_atomic_cmplx8_div_rev 1536ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c, 1537 1) // __kmpc_atomic_cmplx10_sub_rev 1538ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c, 1539 1) // __kmpc_atomic_cmplx10_div_rev 1540#if KMP_HAVE_QUAD 1541ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c, 1542 1) // __kmpc_atomic_cmplx16_sub_rev 1543ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c, 1544 1) // __kmpc_atomic_cmplx16_div_rev 1545#if (KMP_ARCH_X86) 1546ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1547 1) // __kmpc_atomic_cmplx16_sub_a16_rev 1548ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1549 1) // __kmpc_atomic_cmplx16_div_a16_rev 1550#endif // KMP_ARCH_X86 1551#endif // KMP_HAVE_QUAD 1552 1553#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 1554// End of OpenMP 4.0: x = expr binop x for non-commutative operations. 1555 1556/* ------------------------------------------------------------------------ */ 1557/* Routines for mixed types of LHS and RHS, when RHS is "larger" */ 1558/* Note: in order to reduce the total number of types combinations */ 1559/* it is supposed that compiler converts RHS to longest floating type,*/ 1560/* that is _Quad, before call to any of these routines */ 1561/* Conversion to _Quad will be done by the compiler during calculation, */ 1562/* conversion back to TYPE - before the assignment, like: */ 1563/* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */ 1564/* Performance penalty expected because of SW emulation use */ 1565/* ------------------------------------------------------------------------ */ 1566 1567#define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1568 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 1569 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \ 1570 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1571 KA_TRACE(100, \ 1572 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 1573 gtid)); 1574 1575// ------------------------------------------------------------------------- 1576#define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \ 1577 GOMP_FLAG) \ 1578 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1579 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \ 1580 OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \ 1581 } 1582 1583// ------------------------------------------------------------------------- 1584#if KMP_ARCH_X86 || KMP_ARCH_X86_64 1585// ------------------------------------------------------------------------- 1586// X86 or X86_64: no alignment problems ==================================== 1587#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1588 LCK_ID, MASK, GOMP_FLAG) \ 1589 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1590 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1591 OP_CMPXCHG(TYPE, BITS, OP) \ 1592 } 1593// ------------------------------------------------------------------------- 1594#else 1595// ------------------------------------------------------------------------ 1596// Code for other architectures that don't handle unaligned accesses. 1597#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1598 LCK_ID, MASK, GOMP_FLAG) \ 1599 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1600 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1601 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1602 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1603 } else { \ 1604 KMP_CHECK_GTID; \ 1605 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 1606 } \ 1607 } 1608#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1609 1610// ------------------------------------------------------------------------- 1611#if KMP_ARCH_X86 || KMP_ARCH_X86_64 1612// ------------------------------------------------------------------------- 1613#define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 1614 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 1615 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1616 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1617 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1618 } 1619#define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 1620 LCK_ID, GOMP_FLAG) \ 1621 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1622 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1623 OP_CRITICAL_REV(OP, LCK_ID) \ 1624 } 1625#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1626 1627// RHS=float8 1628ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, 1629 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8 1630ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, 1631 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8 1632ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, 1633 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8 1634ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, 1635 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8 1636ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 1637 0) // __kmpc_atomic_fixed4_mul_float8 1638ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 1639 0) // __kmpc_atomic_fixed4_div_float8 1640ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, 1641 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8 1642ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, 1643 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8 1644ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, 1645 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8 1646ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, 1647 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8 1648ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, 1649 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8 1650ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, 1651 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8 1652 1653// RHS=float16 (deprecated, to be removed when we are sure the compiler does not 1654// use them) 1655#if KMP_HAVE_QUAD 1656ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0, 1657 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp 1658ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0, 1659 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp 1660ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, 1661 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp 1662ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0, 1663 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp 1664ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, 1665 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp 1666ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0, 1667 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp 1668ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0, 1669 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp 1670ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, 1671 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp 1672 1673ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1, 1674 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp 1675ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1, 1676 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp 1677ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, 1678 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp 1679ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1, 1680 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp 1681ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, 1682 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp 1683ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1, 1684 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp 1685ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1, 1686 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp 1687ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, 1688 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp 1689 1690ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 1691 0) // __kmpc_atomic_fixed4_add_fp 1692ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3, 1693 0) // __kmpc_atomic_fixed4u_add_fp 1694ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 1695 0) // __kmpc_atomic_fixed4_sub_fp 1696ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3, 1697 0) // __kmpc_atomic_fixed4u_sub_fp 1698ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 1699 0) // __kmpc_atomic_fixed4_mul_fp 1700ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3, 1701 0) // __kmpc_atomic_fixed4u_mul_fp 1702ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 1703 0) // __kmpc_atomic_fixed4_div_fp 1704ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 1705 0) // __kmpc_atomic_fixed4u_div_fp 1706 1707ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, 1708 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp 1709ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7, 1710 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp 1711ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, 1712 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp 1713ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7, 1714 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp 1715ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, 1716 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp 1717ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7, 1718 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp 1719ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, 1720 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp 1721ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, 1722 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp 1723 1724ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, 1725 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp 1726ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, 1727 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp 1728ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, 1729 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp 1730ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, 1731 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp 1732 1733ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, 1734 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp 1735ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, 1736 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp 1737ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, 1738 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp 1739ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, 1740 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp 1741 1742ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r, 1743 1) // __kmpc_atomic_float10_add_fp 1744ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r, 1745 1) // __kmpc_atomic_float10_sub_fp 1746ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r, 1747 1) // __kmpc_atomic_float10_mul_fp 1748ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r, 1749 1) // __kmpc_atomic_float10_div_fp 1750 1751#if KMP_ARCH_X86 || KMP_ARCH_X86_64 1752// Reverse operations 1753ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0, 1754 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp 1755ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0, 1756 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp 1757ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0, 1758 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp 1759ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0, 1760 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp 1761 1762ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1, 1763 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp 1764ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1, 1765 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp 1766ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1, 1767 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp 1768ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1, 1769 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp 1770 1771ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1772 0) // __kmpc_atomic_fixed4_sub_rev_fp 1773ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1774 0) // __kmpc_atomic_fixed4u_sub_rev_fp 1775ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3, 1776 0) // __kmpc_atomic_fixed4_div_rev_fp 1777ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3, 1778 0) // __kmpc_atomic_fixed4u_div_rev_fp 1779 1780ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1781 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp 1782ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1783 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp 1784ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7, 1785 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp 1786ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7, 1787 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp 1788 1789ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3, 1790 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp 1791ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3, 1792 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp 1793 1794ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7, 1795 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp 1796ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7, 1797 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp 1798 1799ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r, 1800 1) // __kmpc_atomic_float10_sub_rev_fp 1801ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r, 1802 1) // __kmpc_atomic_float10_div_rev_fp 1803#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1804 1805#endif // KMP_HAVE_QUAD 1806 1807#if KMP_ARCH_X86 || KMP_ARCH_X86_64 1808// ------------------------------------------------------------------------ 1809// X86 or X86_64: no alignment problems ==================================== 1810#if USE_CMPXCHG_FIX 1811// workaround for C78287 (complex(kind=4) data type) 1812#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1813 LCK_ID, MASK, GOMP_FLAG) \ 1814 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1815 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1816 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 1817 } 1818// end of the second part of the workaround for C78287 1819#else 1820#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1821 LCK_ID, MASK, GOMP_FLAG) \ 1822 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1823 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1824 OP_CMPXCHG(TYPE, BITS, OP) \ 1825 } 1826#endif // USE_CMPXCHG_FIX 1827#else 1828// ------------------------------------------------------------------------ 1829// Code for other architectures that don't handle unaligned accesses. 1830#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1831 LCK_ID, MASK, GOMP_FLAG) \ 1832 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1833 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1834 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1835 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1836 } else { \ 1837 KMP_CHECK_GTID; \ 1838 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 1839 } \ 1840 } 1841#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1842 1843ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 1844 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8 1845ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 1846 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8 1847ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 1848 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8 1849ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 1850 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8 1851 1852// READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 1853#if KMP_ARCH_X86 || KMP_ARCH_X86_64 1854 1855// ------------------------------------------------------------------------ 1856// Atomic READ routines 1857 1858// ------------------------------------------------------------------------ 1859// Beginning of a definition (provides name, parameters, gebug trace) 1860// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1861// fixed) 1862// OP_ID - operation identifier (add, sub, mul, ...) 1863// TYPE - operands' type 1864#define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1865 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 1866 TYPE *loc) { \ 1867 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1868 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 1869 1870// ------------------------------------------------------------------------ 1871// Operation on *lhs, rhs using "compare_and_store_ret" routine 1872// TYPE - operands' type 1873// BITS - size in bits, used to distinguish low level calls 1874// OP - operator 1875// Note: temp_val introduced in order to force the compiler to read 1876// *lhs only once (w/o it the compiler reads *lhs twice) 1877// TODO: check if it is still necessary 1878// Return old value regardless of the result of "compare & swap# operation 1879#define OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1880 { \ 1881 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1882 union f_i_union { \ 1883 TYPE f_val; \ 1884 kmp_int##BITS i_val; \ 1885 }; \ 1886 union f_i_union old_value; \ 1887 temp_val = *loc; \ 1888 old_value.f_val = temp_val; \ 1889 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \ 1890 (kmp_int##BITS *)loc, \ 1891 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \ 1892 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \ 1893 new_value = old_value.f_val; \ 1894 return new_value; \ 1895 } 1896 1897// ------------------------------------------------------------------------- 1898// Operation on *lhs, rhs bound by critical section 1899// OP - operator (it's supposed to contain an assignment) 1900// LCK_ID - lock identifier 1901// Note: don't check gtid as it should always be valid 1902// 1, 2-byte - expect valid parameter, other - check before this macro 1903#define OP_CRITICAL_READ(OP, LCK_ID) \ 1904 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1905 \ 1906 new_value = (*loc); \ 1907 \ 1908 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1909 1910// ------------------------------------------------------------------------- 1911#ifdef KMP_GOMP_COMPAT 1912#define OP_GOMP_CRITICAL_READ(OP, FLAG) \ 1913 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1914 KMP_CHECK_GTID; \ 1915 OP_CRITICAL_READ(OP, 0); \ 1916 return new_value; \ 1917 } 1918#else 1919#define OP_GOMP_CRITICAL_READ(OP, FLAG) 1920#endif /* KMP_GOMP_COMPAT */ 1921 1922// ------------------------------------------------------------------------- 1923#define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1924 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1925 TYPE new_value; \ 1926 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1927 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \ 1928 return new_value; \ 1929 } 1930// ------------------------------------------------------------------------- 1931#define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1932 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1933 TYPE new_value; \ 1934 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1935 OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1936 } 1937// ------------------------------------------------------------------------ 1938// Routines for Extended types: long double, _Quad, complex flavours (use 1939// critical section) 1940// TYPE_ID, OP_ID, TYPE - detailed above 1941// OP - operator 1942// LCK_ID - lock identifier, used to possibly distinguish lock variable 1943#define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1944 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1945 TYPE new_value; \ 1946 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \ 1947 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \ 1948 return new_value; \ 1949 } 1950 1951// ------------------------------------------------------------------------ 1952// Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return 1953// value doesn't work. 1954// Let's return the read value through the additional parameter. 1955#if (KMP_OS_WINDOWS) 1956 1957#define OP_CRITICAL_READ_WRK(OP, LCK_ID) \ 1958 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1959 \ 1960 (*out) = (*loc); \ 1961 \ 1962 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1963// ------------------------------------------------------------------------ 1964#ifdef KMP_GOMP_COMPAT 1965#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \ 1966 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1967 KMP_CHECK_GTID; \ 1968 OP_CRITICAL_READ_WRK(OP, 0); \ 1969 } 1970#else 1971#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) 1972#endif /* KMP_GOMP_COMPAT */ 1973// ------------------------------------------------------------------------ 1974#define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 1975 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \ 1976 TYPE *loc) { \ 1977 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1978 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 1979 1980// ------------------------------------------------------------------------ 1981#define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1982 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 1983 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \ 1984 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \ 1985 } 1986 1987#endif // KMP_OS_WINDOWS 1988 1989// ------------------------------------------------------------------------ 1990// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 1991ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd 1992ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +, 1993 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd 1994ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +, 1995 KMP_ARCH_X86) // __kmpc_atomic_float4_rd 1996ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +, 1997 KMP_ARCH_X86) // __kmpc_atomic_float8_rd 1998 1999// !!! TODO: Remove lock operations for "char" since it can't be non-atomic 2000ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +, 2001 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd 2002ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +, 2003 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd 2004 2005ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r, 2006 1) // __kmpc_atomic_float10_rd 2007#if KMP_HAVE_QUAD 2008ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r, 2009 1) // __kmpc_atomic_float16_rd 2010#endif // KMP_HAVE_QUAD 2011 2012// Fix for CQ220361 on Windows* OS 2013#if (KMP_OS_WINDOWS) 2014ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c, 2015 1) // __kmpc_atomic_cmplx4_rd 2016#else 2017ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c, 2018 1) // __kmpc_atomic_cmplx4_rd 2019#endif // (KMP_OS_WINDOWS) 2020ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c, 2021 1) // __kmpc_atomic_cmplx8_rd 2022ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c, 2023 1) // __kmpc_atomic_cmplx10_rd 2024#if KMP_HAVE_QUAD 2025ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c, 2026 1) // __kmpc_atomic_cmplx16_rd 2027#if (KMP_ARCH_X86) 2028ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r, 2029 1) // __kmpc_atomic_float16_a16_rd 2030ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 2031 1) // __kmpc_atomic_cmplx16_a16_rd 2032#endif // (KMP_ARCH_X86) 2033#endif // KMP_HAVE_QUAD 2034 2035// ------------------------------------------------------------------------ 2036// Atomic WRITE routines 2037 2038#define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2039 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2040 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2041 KMP_XCHG_FIXED##BITS(lhs, rhs); \ 2042 } 2043// ------------------------------------------------------------------------ 2044#define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2045 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2046 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2047 KMP_XCHG_REAL##BITS(lhs, rhs); \ 2048 } 2049 2050// ------------------------------------------------------------------------ 2051// Operation on *lhs, rhs using "compare_and_store" routine 2052// TYPE - operands' type 2053// BITS - size in bits, used to distinguish low level calls 2054// OP - operator 2055// Note: temp_val introduced in order to force the compiler to read 2056// *lhs only once (w/o it the compiler reads *lhs twice) 2057#define OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2058 { \ 2059 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2060 TYPE old_value, new_value; \ 2061 temp_val = *lhs; \ 2062 old_value = temp_val; \ 2063 new_value = rhs; \ 2064 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2065 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2066 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2067 KMP_CPU_PAUSE(); \ 2068 \ 2069 temp_val = *lhs; \ 2070 old_value = temp_val; \ 2071 new_value = rhs; \ 2072 } \ 2073 } 2074 2075// ------------------------------------------------------------------------- 2076#define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2077 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2078 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2079 OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2080 } 2081 2082// ------------------------------------------------------------------------ 2083// Routines for Extended types: long double, _Quad, complex flavours (use 2084// critical section) 2085// TYPE_ID, OP_ID, TYPE - detailed above 2086// OP - operator 2087// LCK_ID - lock identifier, used to possibly distinguish lock variable 2088#define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2089 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2090 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \ 2091 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \ 2092 } 2093// ------------------------------------------------------------------------- 2094 2095ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =, 2096 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr 2097ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =, 2098 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr 2099ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =, 2100 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr 2101#if (KMP_ARCH_X86) 2102ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =, 2103 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2104#else 2105ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =, 2106 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2107#endif // (KMP_ARCH_X86) 2108 2109ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =, 2110 KMP_ARCH_X86) // __kmpc_atomic_float4_wr 2111#if (KMP_ARCH_X86) 2112ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =, 2113 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2114#else 2115ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =, 2116 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2117#endif // (KMP_ARCH_X86) 2118 2119ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r, 2120 1) // __kmpc_atomic_float10_wr 2121#if KMP_HAVE_QUAD 2122ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r, 2123 1) // __kmpc_atomic_float16_wr 2124#endif // KMP_HAVE_QUAD 2125ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr 2126ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c, 2127 1) // __kmpc_atomic_cmplx8_wr 2128ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c, 2129 1) // __kmpc_atomic_cmplx10_wr 2130#if KMP_HAVE_QUAD 2131ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c, 2132 1) // __kmpc_atomic_cmplx16_wr 2133#if (KMP_ARCH_X86) 2134ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r, 2135 1) // __kmpc_atomic_float16_a16_wr 2136ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 2137 1) // __kmpc_atomic_cmplx16_a16_wr 2138#endif // (KMP_ARCH_X86) 2139#endif // KMP_HAVE_QUAD 2140 2141// ------------------------------------------------------------------------ 2142// Atomic CAPTURE routines 2143 2144// Beginning of a definition (provides name, parameters, gebug trace) 2145// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2146// fixed) 2147// OP_ID - operation identifier (add, sub, mul, ...) 2148// TYPE - operands' type 2149#define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 2150 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 2151 TYPE *lhs, TYPE rhs, int flag) { \ 2152 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2153 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2154 2155// ------------------------------------------------------------------------- 2156// Operation on *lhs, rhs bound by critical section 2157// OP - operator (it's supposed to contain an assignment) 2158// LCK_ID - lock identifier 2159// Note: don't check gtid as it should always be valid 2160// 1, 2-byte - expect valid parameter, other - check before this macro 2161#define OP_CRITICAL_CPT(OP, LCK_ID) \ 2162 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2163 \ 2164 if (flag) { \ 2165 (*lhs) OP rhs; \ 2166 new_value = (*lhs); \ 2167 } else { \ 2168 new_value = (*lhs); \ 2169 (*lhs) OP rhs; \ 2170 } \ 2171 \ 2172 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2173 return new_value; 2174 2175// ------------------------------------------------------------------------ 2176#ifdef KMP_GOMP_COMPAT 2177#define OP_GOMP_CRITICAL_CPT(OP, FLAG) \ 2178 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2179 KMP_CHECK_GTID; \ 2180 OP_CRITICAL_CPT(OP## =, 0); \ 2181 } 2182#else 2183#define OP_GOMP_CRITICAL_CPT(OP, FLAG) 2184#endif /* KMP_GOMP_COMPAT */ 2185 2186// ------------------------------------------------------------------------ 2187// Operation on *lhs, rhs using "compare_and_store" routine 2188// TYPE - operands' type 2189// BITS - size in bits, used to distinguish low level calls 2190// OP - operator 2191// Note: temp_val introduced in order to force the compiler to read 2192// *lhs only once (w/o it the compiler reads *lhs twice) 2193#define OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2194 { \ 2195 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2196 TYPE old_value, new_value; \ 2197 temp_val = *lhs; \ 2198 old_value = temp_val; \ 2199 new_value = old_value OP rhs; \ 2200 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2201 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2202 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2203 KMP_CPU_PAUSE(); \ 2204 \ 2205 temp_val = *lhs; \ 2206 old_value = temp_val; \ 2207 new_value = old_value OP rhs; \ 2208 } \ 2209 if (flag) { \ 2210 return new_value; \ 2211 } else \ 2212 return old_value; \ 2213 } 2214 2215// ------------------------------------------------------------------------- 2216#define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2217 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2218 TYPE new_value; \ 2219 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ 2220 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2221 } 2222 2223// ------------------------------------------------------------------------- 2224#define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2225 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2226 TYPE old_value, new_value; \ 2227 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ 2228 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 2229 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 2230 if (flag) { \ 2231 return old_value OP rhs; \ 2232 } else \ 2233 return old_value; \ 2234 } 2235// ------------------------------------------------------------------------- 2236 2237ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +, 2238 0) // __kmpc_atomic_fixed4_add_cpt 2239ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -, 2240 0) // __kmpc_atomic_fixed4_sub_cpt 2241ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +, 2242 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt 2243ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -, 2244 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt 2245 2246ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +, 2247 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt 2248ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -, 2249 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt 2250ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +, 2251 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt 2252ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -, 2253 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt 2254 2255// ------------------------------------------------------------------------ 2256// Entries definition for integer operands 2257// TYPE_ID - operands type and size (fixed4, float4) 2258// OP_ID - operation identifier (add, sub, mul, ...) 2259// TYPE - operand type 2260// BITS - size in bits, used to distinguish low level calls 2261// OP - operator (used in critical section) 2262// TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG 2263// ------------------------------------------------------------------------ 2264// Routines for ATOMIC integer operands, other operators 2265// ------------------------------------------------------------------------ 2266// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2267ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +, 2268 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt 2269ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &, 2270 0) // __kmpc_atomic_fixed1_andb_cpt 2271ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /, 2272 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt 2273ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /, 2274 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt 2275ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *, 2276 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt 2277ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |, 2278 0) // __kmpc_atomic_fixed1_orb_cpt 2279ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<, 2280 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt 2281ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>, 2282 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt 2283ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>, 2284 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt 2285ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -, 2286 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt 2287ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^, 2288 0) // __kmpc_atomic_fixed1_xor_cpt 2289ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +, 2290 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt 2291ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &, 2292 0) // __kmpc_atomic_fixed2_andb_cpt 2293ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /, 2294 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt 2295ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /, 2296 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt 2297ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *, 2298 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt 2299ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |, 2300 0) // __kmpc_atomic_fixed2_orb_cpt 2301ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<, 2302 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt 2303ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>, 2304 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt 2305ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>, 2306 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt 2307ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -, 2308 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt 2309ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^, 2310 0) // __kmpc_atomic_fixed2_xor_cpt 2311ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &, 2312 0) // __kmpc_atomic_fixed4_andb_cpt 2313ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /, 2314 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt 2315ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /, 2316 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt 2317ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *, 2318 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt 2319ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |, 2320 0) // __kmpc_atomic_fixed4_orb_cpt 2321ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<, 2322 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt 2323ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>, 2324 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt 2325ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>, 2326 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt 2327ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^, 2328 0) // __kmpc_atomic_fixed4_xor_cpt 2329ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &, 2330 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt 2331ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /, 2332 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt 2333ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /, 2334 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt 2335ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *, 2336 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt 2337ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |, 2338 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt 2339ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<, 2340 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt 2341ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>, 2342 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt 2343ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>, 2344 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt 2345ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^, 2346 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt 2347ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /, 2348 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt 2349ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *, 2350 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt 2351ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /, 2352 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt 2353ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *, 2354 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt 2355// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2356 2357// CAPTURE routines for mixed types RHS=float16 2358#if KMP_HAVE_QUAD 2359 2360// Beginning of a definition (provides name, parameters, gebug trace) 2361// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2362// fixed) 2363// OP_ID - operation identifier (add, sub, mul, ...) 2364// TYPE - operands' type 2365#define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2366 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 2367 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \ 2368 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2369 KA_TRACE(100, \ 2370 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 2371 gtid)); 2372 2373// ------------------------------------------------------------------------- 2374#define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 2375 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 2376 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2377 TYPE new_value; \ 2378 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ 2379 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2380 } 2381 2382// ------------------------------------------------------------------------- 2383#define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 2384 LCK_ID, GOMP_FLAG) \ 2385 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2386 TYPE new_value; \ 2387 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \ 2388 OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \ 2389 } 2390 2391ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0, 2392 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp 2393ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0, 2394 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp 2395ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2396 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp 2397ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2398 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp 2399ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2400 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp 2401ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2402 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp 2403ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0, 2404 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp 2405ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0, 2406 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp 2407 2408ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1, 2409 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp 2410ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1, 2411 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp 2412ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2413 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp 2414ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2415 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp 2416ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2417 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp 2418ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2419 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp 2420ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1, 2421 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp 2422ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1, 2423 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp 2424 2425ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2426 0) // __kmpc_atomic_fixed4_add_cpt_fp 2427ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2428 0) // __kmpc_atomic_fixed4u_add_cpt_fp 2429ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2430 0) // __kmpc_atomic_fixed4_sub_cpt_fp 2431ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2432 0) // __kmpc_atomic_fixed4u_sub_cpt_fp 2433ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2434 0) // __kmpc_atomic_fixed4_mul_cpt_fp 2435ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2436 0) // __kmpc_atomic_fixed4u_mul_cpt_fp 2437ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2438 0) // __kmpc_atomic_fixed4_div_cpt_fp 2439ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2440 0) // __kmpc_atomic_fixed4u_div_cpt_fp 2441 2442ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2443 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp 2444ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2445 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp 2446ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2447 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp 2448ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2449 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp 2450ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2451 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp 2452ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2453 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp 2454ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2455 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp 2456ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2457 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp 2458 2459ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3, 2460 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp 2461ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3, 2462 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp 2463ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3, 2464 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp 2465ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3, 2466 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp 2467 2468ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7, 2469 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp 2470ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7, 2471 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp 2472ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7, 2473 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp 2474ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7, 2475 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp 2476 2477ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r, 2478 1) // __kmpc_atomic_float10_add_cpt_fp 2479ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r, 2480 1) // __kmpc_atomic_float10_sub_cpt_fp 2481ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r, 2482 1) // __kmpc_atomic_float10_mul_cpt_fp 2483ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r, 2484 1) // __kmpc_atomic_float10_div_cpt_fp 2485 2486#endif // KMP_HAVE_QUAD 2487 2488// ------------------------------------------------------------------------ 2489// Routines for C/C++ Reduction operators && and || 2490 2491// ------------------------------------------------------------------------- 2492// Operation on *lhs, rhs bound by critical section 2493// OP - operator (it's supposed to contain an assignment) 2494// LCK_ID - lock identifier 2495// Note: don't check gtid as it should always be valid 2496// 1, 2-byte - expect valid parameter, other - check before this macro 2497#define OP_CRITICAL_L_CPT(OP, LCK_ID) \ 2498 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2499 \ 2500 if (flag) { \ 2501 new_value OP rhs; \ 2502 } else \ 2503 new_value = (*lhs); \ 2504 \ 2505 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 2506 2507// ------------------------------------------------------------------------ 2508#ifdef KMP_GOMP_COMPAT 2509#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \ 2510 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2511 KMP_CHECK_GTID; \ 2512 OP_CRITICAL_L_CPT(OP, 0); \ 2513 return new_value; \ 2514 } 2515#else 2516#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) 2517#endif /* KMP_GOMP_COMPAT */ 2518 2519// ------------------------------------------------------------------------ 2520// Need separate macros for &&, || because there is no combined assignment 2521#define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2522 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2523 TYPE new_value; \ 2524 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \ 2525 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2526 } 2527 2528ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&, 2529 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt 2530ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||, 2531 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt 2532ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&, 2533 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt 2534ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||, 2535 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt 2536ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&, 2537 0) // __kmpc_atomic_fixed4_andl_cpt 2538ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||, 2539 0) // __kmpc_atomic_fixed4_orl_cpt 2540ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&, 2541 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt 2542ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||, 2543 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt 2544 2545// ------------------------------------------------------------------------- 2546// Routines for Fortran operators that matched no one in C: 2547// MAX, MIN, .EQV., .NEQV. 2548// Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt 2549// Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt 2550 2551// ------------------------------------------------------------------------- 2552// MIN and MAX need separate macros 2553// OP - operator to check if we need any actions? 2554#define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2555 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2556 \ 2557 if (*lhs OP rhs) { /* still need actions? */ \ 2558 old_value = *lhs; \ 2559 *lhs = rhs; \ 2560 if (flag) \ 2561 new_value = rhs; \ 2562 else \ 2563 new_value = old_value; \ 2564 } else { \ 2565 new_value = *lhs; \ 2566 } \ 2567 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2568 return new_value; 2569 2570// ------------------------------------------------------------------------- 2571#ifdef KMP_GOMP_COMPAT 2572#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \ 2573 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2574 KMP_CHECK_GTID; \ 2575 MIN_MAX_CRITSECT_CPT(OP, 0); \ 2576 } 2577#else 2578#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) 2579#endif /* KMP_GOMP_COMPAT */ 2580 2581// ------------------------------------------------------------------------- 2582#define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2583 { \ 2584 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2585 /*TYPE old_value; */ \ 2586 temp_val = *lhs; \ 2587 old_value = temp_val; \ 2588 while (old_value OP rhs && /* still need actions? */ \ 2589 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2590 (kmp_int##BITS *)lhs, \ 2591 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2592 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 2593 KMP_CPU_PAUSE(); \ 2594 temp_val = *lhs; \ 2595 old_value = temp_val; \ 2596 } \ 2597 if (flag) \ 2598 return rhs; \ 2599 else \ 2600 return old_value; \ 2601 } 2602 2603// ------------------------------------------------------------------------- 2604// 1-byte, 2-byte operands - use critical section 2605#define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2606 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2607 TYPE new_value, old_value; \ 2608 if (*lhs OP rhs) { /* need actions? */ \ 2609 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2610 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2611 } \ 2612 return *lhs; \ 2613 } 2614 2615#define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2616 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2617 TYPE new_value, old_value; \ 2618 if (*lhs OP rhs) { \ 2619 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2620 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2621 } \ 2622 return *lhs; \ 2623 } 2624 2625MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <, 2626 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt 2627MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >, 2628 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt 2629MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <, 2630 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt 2631MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >, 2632 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt 2633MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <, 2634 0) // __kmpc_atomic_fixed4_max_cpt 2635MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >, 2636 0) // __kmpc_atomic_fixed4_min_cpt 2637MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <, 2638 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt 2639MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >, 2640 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt 2641MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <, 2642 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt 2643MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >, 2644 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt 2645MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <, 2646 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt 2647MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >, 2648 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt 2649#if KMP_HAVE_QUAD 2650MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r, 2651 1) // __kmpc_atomic_float16_max_cpt 2652MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r, 2653 1) // __kmpc_atomic_float16_min_cpt 2654#if (KMP_ARCH_X86) 2655MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r, 2656 1) // __kmpc_atomic_float16_max_a16_cpt 2657MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r, 2658 1) // __kmpc_atomic_float16_mix_a16_cpt 2659#endif // (KMP_ARCH_X86) 2660#endif // KMP_HAVE_QUAD 2661 2662// ------------------------------------------------------------------------ 2663#ifdef KMP_GOMP_COMPAT 2664#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \ 2665 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2666 KMP_CHECK_GTID; \ 2667 OP_CRITICAL_CPT(OP, 0); \ 2668 } 2669#else 2670#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) 2671#endif /* KMP_GOMP_COMPAT */ 2672// ------------------------------------------------------------------------ 2673#define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2674 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2675 TYPE new_value; \ 2676 OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */ \ 2677 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2678 } 2679 2680// ------------------------------------------------------------------------ 2681 2682ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^, 2683 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt 2684ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^, 2685 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt 2686ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^, 2687 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt 2688ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^, 2689 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt 2690ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~, 2691 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt 2692ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~, 2693 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt 2694ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~, 2695 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt 2696ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~, 2697 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt 2698 2699// ------------------------------------------------------------------------ 2700// Routines for Extended types: long double, _Quad, complex flavours (use 2701// critical section) 2702// TYPE_ID, OP_ID, TYPE - detailed above 2703// OP - operator 2704// LCK_ID - lock identifier, used to possibly distinguish lock variable 2705#define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2706 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2707 TYPE new_value; \ 2708 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \ 2709 OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \ 2710 } 2711 2712// ------------------------------------------------------------------------ 2713// Workaround for cmplx4. Regular routines with return value don't work 2714// on Win_32e. Let's return captured values through the additional parameter. 2715#define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \ 2716 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2717 \ 2718 if (flag) { \ 2719 (*lhs) OP rhs; \ 2720 (*out) = (*lhs); \ 2721 } else { \ 2722 (*out) = (*lhs); \ 2723 (*lhs) OP rhs; \ 2724 } \ 2725 \ 2726 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2727 return; 2728// ------------------------------------------------------------------------ 2729 2730#ifdef KMP_GOMP_COMPAT 2731#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \ 2732 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2733 KMP_CHECK_GTID; \ 2734 OP_CRITICAL_CPT_WRK(OP## =, 0); \ 2735 } 2736#else 2737#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) 2738#endif /* KMP_GOMP_COMPAT */ 2739// ------------------------------------------------------------------------ 2740 2741#define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2742 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \ 2743 TYPE rhs, TYPE *out, int flag) { \ 2744 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2745 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2746// ------------------------------------------------------------------------ 2747 2748#define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2749 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2750 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \ 2751 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \ 2752 } 2753// The end of workaround for cmplx4 2754 2755/* ------------------------------------------------------------------------- */ 2756// routines for long double type 2757ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r, 2758 1) // __kmpc_atomic_float10_add_cpt 2759ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r, 2760 1) // __kmpc_atomic_float10_sub_cpt 2761ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r, 2762 1) // __kmpc_atomic_float10_mul_cpt 2763ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r, 2764 1) // __kmpc_atomic_float10_div_cpt 2765#if KMP_HAVE_QUAD 2766// routines for _Quad type 2767ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r, 2768 1) // __kmpc_atomic_float16_add_cpt 2769ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r, 2770 1) // __kmpc_atomic_float16_sub_cpt 2771ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r, 2772 1) // __kmpc_atomic_float16_mul_cpt 2773ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r, 2774 1) // __kmpc_atomic_float16_div_cpt 2775#if (KMP_ARCH_X86) 2776ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r, 2777 1) // __kmpc_atomic_float16_add_a16_cpt 2778ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r, 2779 1) // __kmpc_atomic_float16_sub_a16_cpt 2780ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r, 2781 1) // __kmpc_atomic_float16_mul_a16_cpt 2782ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r, 2783 1) // __kmpc_atomic_float16_div_a16_cpt 2784#endif // (KMP_ARCH_X86) 2785#endif // KMP_HAVE_QUAD 2786 2787// routines for complex types 2788 2789// cmplx4 routines to return void 2790ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c, 2791 1) // __kmpc_atomic_cmplx4_add_cpt 2792ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 2793 1) // __kmpc_atomic_cmplx4_sub_cpt 2794ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 2795 1) // __kmpc_atomic_cmplx4_mul_cpt 2796ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c, 2797 1) // __kmpc_atomic_cmplx4_div_cpt 2798 2799ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c, 2800 1) // __kmpc_atomic_cmplx8_add_cpt 2801ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 2802 1) // __kmpc_atomic_cmplx8_sub_cpt 2803ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 2804 1) // __kmpc_atomic_cmplx8_mul_cpt 2805ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c, 2806 1) // __kmpc_atomic_cmplx8_div_cpt 2807ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c, 2808 1) // __kmpc_atomic_cmplx10_add_cpt 2809ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 2810 1) // __kmpc_atomic_cmplx10_sub_cpt 2811ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 2812 1) // __kmpc_atomic_cmplx10_mul_cpt 2813ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c, 2814 1) // __kmpc_atomic_cmplx10_div_cpt 2815#if KMP_HAVE_QUAD 2816ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c, 2817 1) // __kmpc_atomic_cmplx16_add_cpt 2818ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 2819 1) // __kmpc_atomic_cmplx16_sub_cpt 2820ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 2821 1) // __kmpc_atomic_cmplx16_mul_cpt 2822ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c, 2823 1) // __kmpc_atomic_cmplx16_div_cpt 2824#if (KMP_ARCH_X86) 2825ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 2826 1) // __kmpc_atomic_cmplx16_add_a16_cpt 2827ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 2828 1) // __kmpc_atomic_cmplx16_sub_a16_cpt 2829ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 2830 1) // __kmpc_atomic_cmplx16_mul_a16_cpt 2831ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 2832 1) // __kmpc_atomic_cmplx16_div_a16_cpt 2833#endif // (KMP_ARCH_X86) 2834#endif // KMP_HAVE_QUAD 2835 2836// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr 2837// binop x; v = x; } for non-commutative operations. 2838// Supported only on IA-32 architecture and Intel(R) 64 2839 2840// ------------------------------------------------------------------------- 2841// Operation on *lhs, rhs bound by critical section 2842// OP - operator (it's supposed to contain an assignment) 2843// LCK_ID - lock identifier 2844// Note: don't check gtid as it should always be valid 2845// 1, 2-byte - expect valid parameter, other - check before this macro 2846#define OP_CRITICAL_CPT_REV(OP, LCK_ID) \ 2847 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2848 \ 2849 if (flag) { \ 2850 /*temp_val = (*lhs);*/ \ 2851 (*lhs) = (rhs)OP(*lhs); \ 2852 new_value = (*lhs); \ 2853 } else { \ 2854 new_value = (*lhs); \ 2855 (*lhs) = (rhs)OP(*lhs); \ 2856 } \ 2857 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2858 return new_value; 2859 2860// ------------------------------------------------------------------------ 2861#ifdef KMP_GOMP_COMPAT 2862#define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) \ 2863 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2864 KMP_CHECK_GTID; \ 2865 OP_CRITICAL_CPT_REV(OP, 0); \ 2866 } 2867#else 2868#define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) 2869#endif /* KMP_GOMP_COMPAT */ 2870 2871// ------------------------------------------------------------------------ 2872// Operation on *lhs, rhs using "compare_and_store" routine 2873// TYPE - operands' type 2874// BITS - size in bits, used to distinguish low level calls 2875// OP - operator 2876// Note: temp_val introduced in order to force the compiler to read 2877// *lhs only once (w/o it the compiler reads *lhs twice) 2878#define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2879 { \ 2880 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2881 TYPE old_value, new_value; \ 2882 temp_val = *lhs; \ 2883 old_value = temp_val; \ 2884 new_value = rhs OP old_value; \ 2885 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2886 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2887 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2888 KMP_CPU_PAUSE(); \ 2889 \ 2890 temp_val = *lhs; \ 2891 old_value = temp_val; \ 2892 new_value = rhs OP old_value; \ 2893 } \ 2894 if (flag) { \ 2895 return new_value; \ 2896 } else \ 2897 return old_value; \ 2898 } 2899 2900// ------------------------------------------------------------------------- 2901#define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2902 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2903 TYPE new_value; \ 2904 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ 2905 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2906 } 2907 2908ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /, 2909 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev 2910ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /, 2911 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev 2912ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<, 2913 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev 2914ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>, 2915 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev 2916ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, 2917 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev 2918ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -, 2919 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev 2920ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /, 2921 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev 2922ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /, 2923 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev 2924ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<, 2925 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev 2926ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>, 2927 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev 2928ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, 2929 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev 2930ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -, 2931 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev 2932ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /, 2933 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev 2934ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /, 2935 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev 2936ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<, 2937 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev 2938ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>, 2939 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev 2940ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, 2941 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev 2942ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -, 2943 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev 2944ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /, 2945 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev 2946ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /, 2947 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev 2948ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<, 2949 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev 2950ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>, 2951 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev 2952ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, 2953 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev 2954ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -, 2955 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev 2956ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /, 2957 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev 2958ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -, 2959 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev 2960ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /, 2961 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev 2962ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -, 2963 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev 2964// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2965 2966// ------------------------------------------------------------------------ 2967// Routines for Extended types: long double, _Quad, complex flavours (use 2968// critical section) 2969// TYPE_ID, OP_ID, TYPE - detailed above 2970// OP - operator 2971// LCK_ID - lock identifier, used to possibly distinguish lock variable 2972#define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2973 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2974 TYPE new_value; \ 2975 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \ 2976 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ 2977 OP_CRITICAL_CPT_REV(OP, LCK_ID) \ 2978 } 2979 2980/* ------------------------------------------------------------------------- */ 2981// routines for long double type 2982ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r, 2983 1) // __kmpc_atomic_float10_sub_cpt_rev 2984ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r, 2985 1) // __kmpc_atomic_float10_div_cpt_rev 2986#if KMP_HAVE_QUAD 2987// routines for _Quad type 2988ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 2989 1) // __kmpc_atomic_float16_sub_cpt_rev 2990ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 2991 1) // __kmpc_atomic_float16_div_cpt_rev 2992#if (KMP_ARCH_X86) 2993ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 2994 1) // __kmpc_atomic_float16_sub_a16_cpt_rev 2995ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 2996 1) // __kmpc_atomic_float16_div_a16_cpt_rev 2997#endif // (KMP_ARCH_X86) 2998#endif // KMP_HAVE_QUAD 2999 3000// routines for complex types 3001 3002// ------------------------------------------------------------------------ 3003// Workaround for cmplx4. Regular routines with return value don't work 3004// on Win_32e. Let's return captured values through the additional parameter. 3005#define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3006 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3007 \ 3008 if (flag) { \ 3009 (*lhs) = (rhs)OP(*lhs); \ 3010 (*out) = (*lhs); \ 3011 } else { \ 3012 (*out) = (*lhs); \ 3013 (*lhs) = (rhs)OP(*lhs); \ 3014 } \ 3015 \ 3016 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3017 return; 3018// ------------------------------------------------------------------------ 3019 3020#ifdef KMP_GOMP_COMPAT 3021#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \ 3022 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3023 KMP_CHECK_GTID; \ 3024 OP_CRITICAL_CPT_REV_WRK(OP, 0); \ 3025 } 3026#else 3027#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) 3028#endif /* KMP_GOMP_COMPAT */ 3029// ------------------------------------------------------------------------ 3030 3031#define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \ 3032 GOMP_FLAG) \ 3033 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 3034 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \ 3035 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3036 } 3037// The end of workaround for cmplx4 3038 3039// !!! TODO: check if we need to return void for cmplx4 routines 3040// cmplx4 routines to return void 3041ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 3042 1) // __kmpc_atomic_cmplx4_sub_cpt_rev 3043ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 3044 1) // __kmpc_atomic_cmplx4_div_cpt_rev 3045 3046ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 3047 1) // __kmpc_atomic_cmplx8_sub_cpt_rev 3048ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 3049 1) // __kmpc_atomic_cmplx8_div_cpt_rev 3050ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 3051 1) // __kmpc_atomic_cmplx10_sub_cpt_rev 3052ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 3053 1) // __kmpc_atomic_cmplx10_div_cpt_rev 3054#if KMP_HAVE_QUAD 3055ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 3056 1) // __kmpc_atomic_cmplx16_sub_cpt_rev 3057ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 3058 1) // __kmpc_atomic_cmplx16_div_cpt_rev 3059#if (KMP_ARCH_X86) 3060ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 3061 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev 3062ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 3063 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev 3064#endif // (KMP_ARCH_X86) 3065#endif // KMP_HAVE_QUAD 3066 3067// Capture reverse for mixed type: RHS=float16 3068#if KMP_HAVE_QUAD 3069 3070// Beginning of a definition (provides name, parameters, gebug trace) 3071// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 3072// fixed) 3073// OP_ID - operation identifier (add, sub, mul, ...) 3074// TYPE - operands' type 3075// ------------------------------------------------------------------------- 3076#define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 3077 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 3078 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3079 TYPE new_value; \ 3080 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ 3081 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 3082 } 3083 3084// ------------------------------------------------------------------------- 3085#define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 3086 LCK_ID, GOMP_FLAG) \ 3087 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3088 TYPE new_value; \ 3089 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */ \ 3090 OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */ \ 3091 } 3092 3093ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3094 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp 3095ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3096 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp 3097ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3098 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp 3099ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3100 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp 3101 3102ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, 3103 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp 3104ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i, 3105 1, 3106 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp 3107ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, 3108 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp 3109ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i, 3110 1, 3111 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp 3112 3113ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i, 3114 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp 3115ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad, 3116 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp 3117ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i, 3118 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp 3119ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad, 3120 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp 3121 3122ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i, 3123 7, 3124 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp 3125ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad, 3126 8i, 7, 3127 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp 3128ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i, 3129 7, 3130 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp 3131ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad, 3132 8i, 7, 3133 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp 3134 3135ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad, 3136 4r, 3, 3137 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp 3138ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad, 3139 4r, 3, 3140 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp 3141 3142ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad, 3143 8r, 7, 3144 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp 3145ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad, 3146 8r, 7, 3147 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp 3148 3149ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad, 3150 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp 3151ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad, 3152 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp 3153 3154#endif // KMP_HAVE_QUAD 3155 3156// OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} 3157 3158#define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3159 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3160 TYPE rhs) { \ 3161 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3162 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3163 3164#define CRITICAL_SWP(LCK_ID) \ 3165 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3166 \ 3167 old_value = (*lhs); \ 3168 (*lhs) = rhs; \ 3169 \ 3170 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3171 return old_value; 3172 3173// ------------------------------------------------------------------------ 3174#ifdef KMP_GOMP_COMPAT 3175#define GOMP_CRITICAL_SWP(FLAG) \ 3176 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3177 KMP_CHECK_GTID; \ 3178 CRITICAL_SWP(0); \ 3179 } 3180#else 3181#define GOMP_CRITICAL_SWP(FLAG) 3182#endif /* KMP_GOMP_COMPAT */ 3183 3184#define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3185 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3186 TYPE old_value; \ 3187 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3188 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \ 3189 return old_value; \ 3190 } 3191// ------------------------------------------------------------------------ 3192#define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3193 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3194 TYPE old_value; \ 3195 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3196 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \ 3197 return old_value; \ 3198 } 3199 3200// ------------------------------------------------------------------------ 3201#define CMPXCHG_SWP(TYPE, BITS) \ 3202 { \ 3203 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 3204 TYPE old_value, new_value; \ 3205 temp_val = *lhs; \ 3206 old_value = temp_val; \ 3207 new_value = rhs; \ 3208 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 3209 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 3210 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 3211 KMP_CPU_PAUSE(); \ 3212 \ 3213 temp_val = *lhs; \ 3214 old_value = temp_val; \ 3215 new_value = rhs; \ 3216 } \ 3217 return old_value; \ 3218 } 3219 3220// ------------------------------------------------------------------------- 3221#define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3222 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3223 TYPE old_value; \ 3224 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3225 CMPXCHG_SWP(TYPE, BITS) \ 3226 } 3227 3228ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp 3229ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp 3230ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp 3231 3232ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32, 3233 KMP_ARCH_X86) // __kmpc_atomic_float4_swp 3234 3235#if (KMP_ARCH_X86) 3236ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64, 3237 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3238ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64, 3239 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3240#else 3241ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3242ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64, 3243 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3244#endif // (KMP_ARCH_X86) 3245 3246// ------------------------------------------------------------------------ 3247// Routines for Extended types: long double, _Quad, complex flavours (use 3248// critical section) 3249#define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3250 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3251 TYPE old_value; \ 3252 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3253 CRITICAL_SWP(LCK_ID) \ 3254 } 3255 3256// ------------------------------------------------------------------------ 3257// !!! TODO: check if we need to return void for cmplx4 routines 3258// Workaround for cmplx4. Regular routines with return value don't work 3259// on Win_32e. Let's return captured values through the additional parameter. 3260 3261#define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3262 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3263 TYPE rhs, TYPE *out) { \ 3264 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3265 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3266 3267#define CRITICAL_SWP_WRK(LCK_ID) \ 3268 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3269 \ 3270 tmp = (*lhs); \ 3271 (*lhs) = (rhs); \ 3272 (*out) = tmp; \ 3273 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3274 return; 3275// ------------------------------------------------------------------------ 3276 3277#ifdef KMP_GOMP_COMPAT 3278#define GOMP_CRITICAL_SWP_WRK(FLAG) \ 3279 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3280 KMP_CHECK_GTID; \ 3281 CRITICAL_SWP_WRK(0); \ 3282 } 3283#else 3284#define GOMP_CRITICAL_SWP_WRK(FLAG) 3285#endif /* KMP_GOMP_COMPAT */ 3286// ------------------------------------------------------------------------ 3287 3288#define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3289 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3290 TYPE tmp; \ 3291 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \ 3292 CRITICAL_SWP_WRK(LCK_ID) \ 3293 } 3294// The end of workaround for cmplx4 3295 3296ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp 3297#if KMP_HAVE_QUAD 3298ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp 3299#endif // KMP_HAVE_QUAD 3300// cmplx4 routine to return void 3301ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp 3302 3303// ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // 3304// __kmpc_atomic_cmplx4_swp 3305 3306ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp 3307ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp 3308#if KMP_HAVE_QUAD 3309ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp 3310#if (KMP_ARCH_X86) 3311ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r, 3312 1) // __kmpc_atomic_float16_a16_swp 3313ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c, 3314 1) // __kmpc_atomic_cmplx16_a16_swp 3315#endif // (KMP_ARCH_X86) 3316#endif // KMP_HAVE_QUAD 3317 3318// End of OpenMP 4.0 Capture 3319 3320#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3321 3322#undef OP_CRITICAL 3323 3324/* ------------------------------------------------------------------------ */ 3325/* Generic atomic routines */ 3326 3327void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3328 void (*f)(void *, void *, void *)) { 3329 KMP_DEBUG_ASSERT(__kmp_init_serial); 3330 3331 if ( 3332#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3333 FALSE /* must use lock */ 3334#else 3335 TRUE 3336#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3337 ) { 3338 kmp_int8 old_value, new_value; 3339 3340 old_value = *(kmp_int8 *)lhs; 3341 (*f)(&new_value, &old_value, rhs); 3342 3343 /* TODO: Should this be acquire or release? */ 3344 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value, 3345 *(kmp_int8 *)&new_value)) { 3346 KMP_CPU_PAUSE(); 3347 3348 old_value = *(kmp_int8 *)lhs; 3349 (*f)(&new_value, &old_value, rhs); 3350 } 3351 3352 return; 3353 } else { 3354// All 1-byte data is of integer data type. 3355 3356#ifdef KMP_GOMP_COMPAT 3357 if (__kmp_atomic_mode == 2) { 3358 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3359 } else 3360#endif /* KMP_GOMP_COMPAT */ 3361 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3362 3363 (*f)(lhs, lhs, rhs); 3364 3365#ifdef KMP_GOMP_COMPAT 3366 if (__kmp_atomic_mode == 2) { 3367 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3368 } else 3369#endif /* KMP_GOMP_COMPAT */ 3370 __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3371 } 3372} 3373 3374void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3375 void (*f)(void *, void *, void *)) { 3376 if ( 3377#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3378 FALSE /* must use lock */ 3379#elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3380 TRUE /* no alignment problems */ 3381#else 3382 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */ 3383#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3384 ) { 3385 kmp_int16 old_value, new_value; 3386 3387 old_value = *(kmp_int16 *)lhs; 3388 (*f)(&new_value, &old_value, rhs); 3389 3390 /* TODO: Should this be acquire or release? */ 3391 while (!KMP_COMPARE_AND_STORE_ACQ16( 3392 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) { 3393 KMP_CPU_PAUSE(); 3394 3395 old_value = *(kmp_int16 *)lhs; 3396 (*f)(&new_value, &old_value, rhs); 3397 } 3398 3399 return; 3400 } else { 3401// All 2-byte data is of integer data type. 3402 3403#ifdef KMP_GOMP_COMPAT 3404 if (__kmp_atomic_mode == 2) { 3405 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3406 } else 3407#endif /* KMP_GOMP_COMPAT */ 3408 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3409 3410 (*f)(lhs, lhs, rhs); 3411 3412#ifdef KMP_GOMP_COMPAT 3413 if (__kmp_atomic_mode == 2) { 3414 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3415 } else 3416#endif /* KMP_GOMP_COMPAT */ 3417 __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3418 } 3419} 3420 3421void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3422 void (*f)(void *, void *, void *)) { 3423 KMP_DEBUG_ASSERT(__kmp_init_serial); 3424 3425 if ( 3426// FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. 3427// Gomp compatibility is broken if this routine is called for floats. 3428#if KMP_ARCH_X86 || KMP_ARCH_X86_64 3429 TRUE /* no alignment problems */ 3430#else 3431 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */ 3432#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3433 ) { 3434 kmp_int32 old_value, new_value; 3435 3436 old_value = *(kmp_int32 *)lhs; 3437 (*f)(&new_value, &old_value, rhs); 3438 3439 /* TODO: Should this be acquire or release? */ 3440 while (!KMP_COMPARE_AND_STORE_ACQ32( 3441 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) { 3442 KMP_CPU_PAUSE(); 3443 3444 old_value = *(kmp_int32 *)lhs; 3445 (*f)(&new_value, &old_value, rhs); 3446 } 3447 3448 return; 3449 } else { 3450// Use __kmp_atomic_lock_4i for all 4-byte data, 3451// even if it isn't of integer data type. 3452 3453#ifdef KMP_GOMP_COMPAT 3454 if (__kmp_atomic_mode == 2) { 3455 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3456 } else 3457#endif /* KMP_GOMP_COMPAT */ 3458 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3459 3460 (*f)(lhs, lhs, rhs); 3461 3462#ifdef KMP_GOMP_COMPAT 3463 if (__kmp_atomic_mode == 2) { 3464 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3465 } else 3466#endif /* KMP_GOMP_COMPAT */ 3467 __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3468 } 3469} 3470 3471void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3472 void (*f)(void *, void *, void *)) { 3473 KMP_DEBUG_ASSERT(__kmp_init_serial); 3474 if ( 3475 3476#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3477 FALSE /* must use lock */ 3478#elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3479 TRUE /* no alignment problems */ 3480#else 3481 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */ 3482#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3483 ) { 3484 kmp_int64 old_value, new_value; 3485 3486 old_value = *(kmp_int64 *)lhs; 3487 (*f)(&new_value, &old_value, rhs); 3488 /* TODO: Should this be acquire or release? */ 3489 while (!KMP_COMPARE_AND_STORE_ACQ64( 3490 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) { 3491 KMP_CPU_PAUSE(); 3492 3493 old_value = *(kmp_int64 *)lhs; 3494 (*f)(&new_value, &old_value, rhs); 3495 } 3496 3497 return; 3498 } else { 3499// Use __kmp_atomic_lock_8i for all 8-byte data, 3500// even if it isn't of integer data type. 3501 3502#ifdef KMP_GOMP_COMPAT 3503 if (__kmp_atomic_mode == 2) { 3504 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3505 } else 3506#endif /* KMP_GOMP_COMPAT */ 3507 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3508 3509 (*f)(lhs, lhs, rhs); 3510 3511#ifdef KMP_GOMP_COMPAT 3512 if (__kmp_atomic_mode == 2) { 3513 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3514 } else 3515#endif /* KMP_GOMP_COMPAT */ 3516 __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3517 } 3518} 3519 3520void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3521 void (*f)(void *, void *, void *)) { 3522 KMP_DEBUG_ASSERT(__kmp_init_serial); 3523 3524#ifdef KMP_GOMP_COMPAT 3525 if (__kmp_atomic_mode == 2) { 3526 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3527 } else 3528#endif /* KMP_GOMP_COMPAT */ 3529 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3530 3531 (*f)(lhs, lhs, rhs); 3532 3533#ifdef KMP_GOMP_COMPAT 3534 if (__kmp_atomic_mode == 2) { 3535 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3536 } else 3537#endif /* KMP_GOMP_COMPAT */ 3538 __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3539} 3540 3541void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3542 void (*f)(void *, void *, void *)) { 3543 KMP_DEBUG_ASSERT(__kmp_init_serial); 3544 3545#ifdef KMP_GOMP_COMPAT 3546 if (__kmp_atomic_mode == 2) { 3547 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3548 } else 3549#endif /* KMP_GOMP_COMPAT */ 3550 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3551 3552 (*f)(lhs, lhs, rhs); 3553 3554#ifdef KMP_GOMP_COMPAT 3555 if (__kmp_atomic_mode == 2) { 3556 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3557 } else 3558#endif /* KMP_GOMP_COMPAT */ 3559 __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3560} 3561 3562void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3563 void (*f)(void *, void *, void *)) { 3564 KMP_DEBUG_ASSERT(__kmp_init_serial); 3565 3566#ifdef KMP_GOMP_COMPAT 3567 if (__kmp_atomic_mode == 2) { 3568 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3569 } else 3570#endif /* KMP_GOMP_COMPAT */ 3571 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3572 3573 (*f)(lhs, lhs, rhs); 3574 3575#ifdef KMP_GOMP_COMPAT 3576 if (__kmp_atomic_mode == 2) { 3577 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3578 } else 3579#endif /* KMP_GOMP_COMPAT */ 3580 __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3581} 3582 3583void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3584 void (*f)(void *, void *, void *)) { 3585 KMP_DEBUG_ASSERT(__kmp_init_serial); 3586 3587#ifdef KMP_GOMP_COMPAT 3588 if (__kmp_atomic_mode == 2) { 3589 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3590 } else 3591#endif /* KMP_GOMP_COMPAT */ 3592 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3593 3594 (*f)(lhs, lhs, rhs); 3595 3596#ifdef KMP_GOMP_COMPAT 3597 if (__kmp_atomic_mode == 2) { 3598 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3599 } else 3600#endif /* KMP_GOMP_COMPAT */ 3601 __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3602} 3603 3604// AC: same two routines as GOMP_atomic_start/end, but will be called by our 3605// compiler; duplicated in order to not use 3-party names in pure Intel code 3606// TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin. 3607void __kmpc_atomic_start(void) { 3608 int gtid = __kmp_entry_gtid(); 3609 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid)); 3610 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3611} 3612 3613void __kmpc_atomic_end(void) { 3614 int gtid = __kmp_get_gtid(); 3615 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid)); 3616 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3617} 3618 3619/*! 3620@} 3621*/ 3622 3623// end of file 3624