1/* Copyright (C) 2005-2015 Free Software Foundation, Inc. 2 Contributed by Richard Henderson <rth@redhat.com>. 3 4 This file is part of the GNU Offloading and Multi Processing Library 5 (libgomp). 6 7 Libgomp is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26/* This file handles the LOOP (FOR/DO) construct. */ 27 28#include <limits.h> 29#include <stdlib.h> 30#include "libgomp.h" 31 32typedef unsigned long long gomp_ull; 33 34/* Initialize the given work share construct from the given arguments. */ 35 36static inline void 37gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start, 38 gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched, 39 gomp_ull chunk_size) 40{ 41 ws->sched = sched; 42 ws->chunk_size_ull = chunk_size; 43 /* Canonicalize loops that have zero iterations to ->next == ->end. */ 44 ws->end_ull = ((up && start > end) || (!up && start < end)) 45 ? start : end; 46 ws->incr_ull = incr; 47 ws->next_ull = start; 48 ws->mode = 0; 49 if (sched == GFS_DYNAMIC) 50 { 51 ws->chunk_size_ull *= incr; 52 53#if defined HAVE_SYNC_BUILTINS && defined __LP64__ 54 { 55 /* For dynamic scheduling prepare things to make each iteration 56 faster. */ 57 struct gomp_thread *thr = gomp_thread (); 58 struct gomp_team *team = thr->ts.team; 59 long nthreads = team ? team->nthreads : 1; 60 61 if (__builtin_expect (up, 1)) 62 { 63 /* Cheap overflow protection. */ 64 if (__builtin_expect ((nthreads | ws->chunk_size_ull) 65 < 1ULL << (sizeof (gomp_ull) 66 * __CHAR_BIT__ / 2 - 1), 1)) 67 ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1 68 - (nthreads + 1) * ws->chunk_size_ull); 69 } 70 /* Cheap overflow protection. */ 71 else if (__builtin_expect ((nthreads | -ws->chunk_size_ull) 72 < 1ULL << (sizeof (gomp_ull) 73 * __CHAR_BIT__ / 2 - 1), 1)) 74 ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull 75 - (__LONG_LONG_MAX__ * 2ULL + 1)); 76 } 77#endif 78 } 79 if (!up) 80 ws->mode |= 2; 81} 82 83/* The *_start routines are called when first encountering a loop construct 84 that is not bound directly to a parallel construct. The first thread 85 that arrives will create the work-share construct; subsequent threads 86 will see the construct exists and allocate work from it. 87 88 START, END, INCR are the bounds of the loop; due to the restrictions of 89 OpenMP, these values must be the same in every thread. This is not 90 verified (nor is it entirely verifiable, since START is not necessarily 91 retained intact in the work-share data structure). CHUNK_SIZE is the 92 scheduling parameter; again this must be identical in all threads. 93 94 Returns true if there's any work for this thread to perform. If so, 95 *ISTART and *IEND are filled with the bounds of the iteration block 96 allocated to this thread. Returns false if all work was assigned to 97 other threads prior to this thread's arrival. */ 98 99static bool 100gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end, 101 gomp_ull incr, gomp_ull chunk_size, 102 gomp_ull *istart, gomp_ull *iend) 103{ 104 struct gomp_thread *thr = gomp_thread (); 105 106 thr->ts.static_trip = 0; 107 if (gomp_work_share_start (false)) 108 { 109 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, 110 GFS_STATIC, chunk_size); 111 gomp_work_share_init_done (); 112 } 113 114 return !gomp_iter_ull_static_next (istart, iend); 115} 116 117static bool 118gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end, 119 gomp_ull incr, gomp_ull chunk_size, 120 gomp_ull *istart, gomp_ull *iend) 121{ 122 struct gomp_thread *thr = gomp_thread (); 123 bool ret; 124 125 if (gomp_work_share_start (false)) 126 { 127 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, 128 GFS_DYNAMIC, chunk_size); 129 gomp_work_share_init_done (); 130 } 131 132#if defined HAVE_SYNC_BUILTINS && defined __LP64__ 133 ret = gomp_iter_ull_dynamic_next (istart, iend); 134#else 135 gomp_mutex_lock (&thr->ts.work_share->lock); 136 ret = gomp_iter_ull_dynamic_next_locked (istart, iend); 137 gomp_mutex_unlock (&thr->ts.work_share->lock); 138#endif 139 140 return ret; 141} 142 143static bool 144gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end, 145 gomp_ull incr, gomp_ull chunk_size, 146 gomp_ull *istart, gomp_ull *iend) 147{ 148 struct gomp_thread *thr = gomp_thread (); 149 bool ret; 150 151 if (gomp_work_share_start (false)) 152 { 153 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, 154 GFS_GUIDED, chunk_size); 155 gomp_work_share_init_done (); 156 } 157 158#if defined HAVE_SYNC_BUILTINS && defined __LP64__ 159 ret = gomp_iter_ull_guided_next (istart, iend); 160#else 161 gomp_mutex_lock (&thr->ts.work_share->lock); 162 ret = gomp_iter_ull_guided_next_locked (istart, iend); 163 gomp_mutex_unlock (&thr->ts.work_share->lock); 164#endif 165 166 return ret; 167} 168 169bool 170GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end, 171 gomp_ull incr, gomp_ull *istart, gomp_ull *iend) 172{ 173 struct gomp_task_icv *icv = gomp_icv (false); 174 switch (icv->run_sched_var) 175 { 176 case GFS_STATIC: 177 return gomp_loop_ull_static_start (up, start, end, incr, 178 icv->run_sched_modifier, 179 istart, iend); 180 case GFS_DYNAMIC: 181 return gomp_loop_ull_dynamic_start (up, start, end, incr, 182 icv->run_sched_modifier, 183 istart, iend); 184 case GFS_GUIDED: 185 return gomp_loop_ull_guided_start (up, start, end, incr, 186 icv->run_sched_modifier, 187 istart, iend); 188 case GFS_AUTO: 189 /* For now map to schedule(static), later on we could play with feedback 190 driven choice. */ 191 return gomp_loop_ull_static_start (up, start, end, incr, 192 0, istart, iend); 193 default: 194 abort (); 195 } 196} 197 198/* The *_ordered_*_start routines are similar. The only difference is that 199 this work-share construct is initialized to expect an ORDERED section. */ 200 201static bool 202gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end, 203 gomp_ull incr, gomp_ull chunk_size, 204 gomp_ull *istart, gomp_ull *iend) 205{ 206 struct gomp_thread *thr = gomp_thread (); 207 208 thr->ts.static_trip = 0; 209 if (gomp_work_share_start (true)) 210 { 211 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, 212 GFS_STATIC, chunk_size); 213 gomp_ordered_static_init (); 214 gomp_work_share_init_done (); 215 } 216 217 return !gomp_iter_ull_static_next (istart, iend); 218} 219 220static bool 221gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end, 222 gomp_ull incr, gomp_ull chunk_size, 223 gomp_ull *istart, gomp_ull *iend) 224{ 225 struct gomp_thread *thr = gomp_thread (); 226 bool ret; 227 228 if (gomp_work_share_start (true)) 229 { 230 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, 231 GFS_DYNAMIC, chunk_size); 232 gomp_mutex_lock (&thr->ts.work_share->lock); 233 gomp_work_share_init_done (); 234 } 235 else 236 gomp_mutex_lock (&thr->ts.work_share->lock); 237 238 ret = gomp_iter_ull_dynamic_next_locked (istart, iend); 239 if (ret) 240 gomp_ordered_first (); 241 gomp_mutex_unlock (&thr->ts.work_share->lock); 242 243 return ret; 244} 245 246static bool 247gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end, 248 gomp_ull incr, gomp_ull chunk_size, 249 gomp_ull *istart, gomp_ull *iend) 250{ 251 struct gomp_thread *thr = gomp_thread (); 252 bool ret; 253 254 if (gomp_work_share_start (true)) 255 { 256 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, 257 GFS_GUIDED, chunk_size); 258 gomp_mutex_lock (&thr->ts.work_share->lock); 259 gomp_work_share_init_done (); 260 } 261 else 262 gomp_mutex_lock (&thr->ts.work_share->lock); 263 264 ret = gomp_iter_ull_guided_next_locked (istart, iend); 265 if (ret) 266 gomp_ordered_first (); 267 gomp_mutex_unlock (&thr->ts.work_share->lock); 268 269 return ret; 270} 271 272bool 273GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end, 274 gomp_ull incr, gomp_ull *istart, 275 gomp_ull *iend) 276{ 277 struct gomp_task_icv *icv = gomp_icv (false); 278 switch (icv->run_sched_var) 279 { 280 case GFS_STATIC: 281 return gomp_loop_ull_ordered_static_start (up, start, end, incr, 282 icv->run_sched_modifier, 283 istart, iend); 284 case GFS_DYNAMIC: 285 return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, 286 icv->run_sched_modifier, 287 istart, iend); 288 case GFS_GUIDED: 289 return gomp_loop_ull_ordered_guided_start (up, start, end, incr, 290 icv->run_sched_modifier, 291 istart, iend); 292 case GFS_AUTO: 293 /* For now map to schedule(static), later on we could play with feedback 294 driven choice. */ 295 return gomp_loop_ull_ordered_static_start (up, start, end, incr, 296 0, istart, iend); 297 default: 298 abort (); 299 } 300} 301 302/* The *_next routines are called when the thread completes processing of 303 the iteration block currently assigned to it. If the work-share 304 construct is bound directly to a parallel construct, then the iteration 305 bounds may have been set up before the parallel. In which case, this 306 may be the first iteration for the thread. 307 308 Returns true if there is work remaining to be performed; *ISTART and 309 *IEND are filled with a new iteration block. Returns false if all work 310 has been assigned. */ 311 312static bool 313gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend) 314{ 315 return !gomp_iter_ull_static_next (istart, iend); 316} 317 318static bool 319gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend) 320{ 321 bool ret; 322 323#if defined HAVE_SYNC_BUILTINS && defined __LP64__ 324 ret = gomp_iter_ull_dynamic_next (istart, iend); 325#else 326 struct gomp_thread *thr = gomp_thread (); 327 gomp_mutex_lock (&thr->ts.work_share->lock); 328 ret = gomp_iter_ull_dynamic_next_locked (istart, iend); 329 gomp_mutex_unlock (&thr->ts.work_share->lock); 330#endif 331 332 return ret; 333} 334 335static bool 336gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend) 337{ 338 bool ret; 339 340#if defined HAVE_SYNC_BUILTINS && defined __LP64__ 341 ret = gomp_iter_ull_guided_next (istart, iend); 342#else 343 struct gomp_thread *thr = gomp_thread (); 344 gomp_mutex_lock (&thr->ts.work_share->lock); 345 ret = gomp_iter_ull_guided_next_locked (istart, iend); 346 gomp_mutex_unlock (&thr->ts.work_share->lock); 347#endif 348 349 return ret; 350} 351 352bool 353GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend) 354{ 355 struct gomp_thread *thr = gomp_thread (); 356 357 switch (thr->ts.work_share->sched) 358 { 359 case GFS_STATIC: 360 case GFS_AUTO: 361 return gomp_loop_ull_static_next (istart, iend); 362 case GFS_DYNAMIC: 363 return gomp_loop_ull_dynamic_next (istart, iend); 364 case GFS_GUIDED: 365 return gomp_loop_ull_guided_next (istart, iend); 366 default: 367 abort (); 368 } 369} 370 371/* The *_ordered_*_next routines are called when the thread completes 372 processing of the iteration block currently assigned to it. 373 374 Returns true if there is work remaining to be performed; *ISTART and 375 *IEND are filled with a new iteration block. Returns false if all work 376 has been assigned. */ 377 378static bool 379gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend) 380{ 381 struct gomp_thread *thr = gomp_thread (); 382 int test; 383 384 gomp_ordered_sync (); 385 gomp_mutex_lock (&thr->ts.work_share->lock); 386 test = gomp_iter_ull_static_next (istart, iend); 387 if (test >= 0) 388 gomp_ordered_static_next (); 389 gomp_mutex_unlock (&thr->ts.work_share->lock); 390 391 return test == 0; 392} 393 394static bool 395gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend) 396{ 397 struct gomp_thread *thr = gomp_thread (); 398 bool ret; 399 400 gomp_ordered_sync (); 401 gomp_mutex_lock (&thr->ts.work_share->lock); 402 ret = gomp_iter_ull_dynamic_next_locked (istart, iend); 403 if (ret) 404 gomp_ordered_next (); 405 else 406 gomp_ordered_last (); 407 gomp_mutex_unlock (&thr->ts.work_share->lock); 408 409 return ret; 410} 411 412static bool 413gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend) 414{ 415 struct gomp_thread *thr = gomp_thread (); 416 bool ret; 417 418 gomp_ordered_sync (); 419 gomp_mutex_lock (&thr->ts.work_share->lock); 420 ret = gomp_iter_ull_guided_next_locked (istart, iend); 421 if (ret) 422 gomp_ordered_next (); 423 else 424 gomp_ordered_last (); 425 gomp_mutex_unlock (&thr->ts.work_share->lock); 426 427 return ret; 428} 429 430bool 431GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend) 432{ 433 struct gomp_thread *thr = gomp_thread (); 434 435 switch (thr->ts.work_share->sched) 436 { 437 case GFS_STATIC: 438 case GFS_AUTO: 439 return gomp_loop_ull_ordered_static_next (istart, iend); 440 case GFS_DYNAMIC: 441 return gomp_loop_ull_ordered_dynamic_next (istart, iend); 442 case GFS_GUIDED: 443 return gomp_loop_ull_ordered_guided_next (istart, iend); 444 default: 445 abort (); 446 } 447} 448 449/* We use static functions above so that we're sure that the "runtime" 450 function can defer to the proper routine without interposition. We 451 export the static function with a strong alias when possible, or with 452 a wrapper function otherwise. */ 453 454#ifdef HAVE_ATTRIBUTE_ALIAS 455extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start 456 __attribute__((alias ("gomp_loop_ull_static_start"))); 457extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start 458 __attribute__((alias ("gomp_loop_ull_dynamic_start"))); 459extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start 460 __attribute__((alias ("gomp_loop_ull_guided_start"))); 461 462extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start 463 __attribute__((alias ("gomp_loop_ull_ordered_static_start"))); 464extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start 465 __attribute__((alias ("gomp_loop_ull_ordered_dynamic_start"))); 466extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start 467 __attribute__((alias ("gomp_loop_ull_ordered_guided_start"))); 468 469extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next 470 __attribute__((alias ("gomp_loop_ull_static_next"))); 471extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next 472 __attribute__((alias ("gomp_loop_ull_dynamic_next"))); 473extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next 474 __attribute__((alias ("gomp_loop_ull_guided_next"))); 475 476extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next 477 __attribute__((alias ("gomp_loop_ull_ordered_static_next"))); 478extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next 479 __attribute__((alias ("gomp_loop_ull_ordered_dynamic_next"))); 480extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next 481 __attribute__((alias ("gomp_loop_ull_ordered_guided_next"))); 482#else 483bool 484GOMP_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end, 485 gomp_ull incr, gomp_ull chunk_size, 486 gomp_ull *istart, gomp_ull *iend) 487{ 488 return gomp_loop_ull_static_start (up, start, end, incr, chunk_size, istart, 489 iend); 490} 491 492bool 493GOMP_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end, 494 gomp_ull incr, gomp_ull chunk_size, 495 gomp_ull *istart, gomp_ull *iend) 496{ 497 return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart, 498 iend); 499} 500 501bool 502GOMP_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end, 503 gomp_ull incr, gomp_ull chunk_size, 504 gomp_ull *istart, gomp_ull *iend) 505{ 506 return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart, 507 iend); 508} 509 510bool 511GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end, 512 gomp_ull incr, gomp_ull chunk_size, 513 gomp_ull *istart, gomp_ull *iend) 514{ 515 return gomp_loop_ull_ordered_static_start (up, start, end, incr, chunk_size, 516 istart, iend); 517} 518 519bool 520GOMP_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end, 521 gomp_ull incr, gomp_ull chunk_size, 522 gomp_ull *istart, gomp_ull *iend) 523{ 524 return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, chunk_size, 525 istart, iend); 526} 527 528bool 529GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end, 530 gomp_ull incr, gomp_ull chunk_size, 531 gomp_ull *istart, gomp_ull *iend) 532{ 533 return gomp_loop_ull_ordered_guided_start (up, start, end, incr, chunk_size, 534 istart, iend); 535} 536 537bool 538GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend) 539{ 540 return gomp_loop_ull_static_next (istart, iend); 541} 542 543bool 544GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend) 545{ 546 return gomp_loop_ull_dynamic_next (istart, iend); 547} 548 549bool 550GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend) 551{ 552 return gomp_loop_ull_guided_next (istart, iend); 553} 554 555bool 556GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend) 557{ 558 return gomp_loop_ull_ordered_static_next (istart, iend); 559} 560 561bool 562GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend) 563{ 564 return gomp_loop_ull_ordered_dynamic_next (istart, iend); 565} 566 567bool 568GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend) 569{ 570 return gomp_loop_ull_ordered_guided_next (istart, iend); 571} 572#endif 573