1/* Copyright (C) 2005-2015 Free Software Foundation, Inc. 2 Contributed by Richard Henderson <rth@redhat.com>. 3 4 This file is part of the GNU Offloading and Multi Processing Library 5 (libgomp). 6 7 Libgomp is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26/* This file handles the maintainence of threads in response to team 27 creation and termination. */ 28 29#include "libgomp.h" 30#include <stdlib.h> 31#include <string.h> 32 33/* This attribute contains PTHREAD_CREATE_DETACHED. */ 34pthread_attr_t gomp_thread_attr; 35 36/* This key is for the thread destructor. */ 37pthread_key_t gomp_thread_destructor; 38 39 40/* This is the libgomp per-thread data structure. */ 41#if defined HAVE_TLS || defined USE_EMUTLS 42__thread struct gomp_thread gomp_tls_data; 43#else 44pthread_key_t gomp_tls_key; 45#endif 46 47 48/* This structure is used to communicate across pthread_create. */ 49 50struct gomp_thread_start_data 51{ 52 void (*fn) (void *); 53 void *fn_data; 54 struct gomp_team_state ts; 55 struct gomp_task *task; 56 struct gomp_thread_pool *thread_pool; 57 unsigned int place; 58 bool nested; 59}; 60 61 62/* This function is a pthread_create entry point. This contains the idle 63 loop in which a thread waits to be called up to become part of a team. */ 64 65static void * 66gomp_thread_start (void *xdata) 67{ 68 struct gomp_thread_start_data *data = xdata; 69 struct gomp_thread *thr; 70 struct gomp_thread_pool *pool; 71 void (*local_fn) (void *); 72 void *local_data; 73 74#if defined HAVE_TLS || defined USE_EMUTLS 75 thr = &gomp_tls_data; 76#else 77 struct gomp_thread local_thr; 78 thr = &local_thr; 79 pthread_setspecific (gomp_tls_key, thr); 80#endif 81 gomp_sem_init (&thr->release, 0); 82 83 /* Extract what we need from data. */ 84 local_fn = data->fn; 85 local_data = data->fn_data; 86 thr->thread_pool = data->thread_pool; 87 thr->ts = data->ts; 88 thr->task = data->task; 89 thr->place = data->place; 90 91 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release; 92 93 /* Make thread pool local. */ 94 pool = thr->thread_pool; 95 96 if (data->nested) 97 { 98 struct gomp_team *team = thr->ts.team; 99 struct gomp_task *task = thr->task; 100 101 gomp_barrier_wait (&team->barrier); 102 103 local_fn (local_data); 104 gomp_team_barrier_wait_final (&team->barrier); 105 gomp_finish_task (task); 106 gomp_barrier_wait_last (&team->barrier); 107 } 108 else 109 { 110 pool->threads[thr->ts.team_id] = thr; 111 112 gomp_barrier_wait (&pool->threads_dock); 113 do 114 { 115 struct gomp_team *team = thr->ts.team; 116 struct gomp_task *task = thr->task; 117 118 local_fn (local_data); 119 gomp_team_barrier_wait_final (&team->barrier); 120 gomp_finish_task (task); 121 122 gomp_barrier_wait (&pool->threads_dock); 123 124 local_fn = thr->fn; 125 local_data = thr->data; 126 thr->fn = NULL; 127 } 128 while (local_fn); 129 } 130 131 gomp_sem_destroy (&thr->release); 132 thr->thread_pool = NULL; 133 thr->task = NULL; 134 return NULL; 135} 136 137 138/* Create a new team data structure. */ 139 140struct gomp_team * 141gomp_new_team (unsigned nthreads) 142{ 143 struct gomp_team *team; 144 size_t size; 145 int i; 146 147 size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0]) 148 + sizeof (team->implicit_task[0])); 149 team = gomp_malloc (size); 150 151 team->work_share_chunk = 8; 152#ifdef HAVE_SYNC_BUILTINS 153 team->single_count = 0; 154#else 155 gomp_mutex_init (&team->work_share_list_free_lock); 156#endif 157 team->work_shares_to_free = &team->work_shares[0]; 158 gomp_init_work_share (&team->work_shares[0], false, nthreads); 159 team->work_shares[0].next_alloc = NULL; 160 team->work_share_list_free = NULL; 161 team->work_share_list_alloc = &team->work_shares[1]; 162 for (i = 1; i < 7; i++) 163 team->work_shares[i].next_free = &team->work_shares[i + 1]; 164 team->work_shares[i].next_free = NULL; 165 166 team->nthreads = nthreads; 167 gomp_barrier_init (&team->barrier, nthreads); 168 169 gomp_sem_init (&team->master_release, 0); 170 team->ordered_release = (void *) &team->implicit_task[nthreads]; 171 team->ordered_release[0] = &team->master_release; 172 173 gomp_mutex_init (&team->task_lock); 174 team->task_queue = NULL; 175 team->task_count = 0; 176 team->task_queued_count = 0; 177 team->task_running_count = 0; 178 team->work_share_cancelled = 0; 179 team->team_cancelled = 0; 180 181 return team; 182} 183 184 185/* Free a team data structure. */ 186 187static void 188free_team (struct gomp_team *team) 189{ 190 gomp_barrier_destroy (&team->barrier); 191 gomp_mutex_destroy (&team->task_lock); 192 free (team); 193} 194 195/* Allocate and initialize a thread pool. */ 196 197static struct gomp_thread_pool *gomp_new_thread_pool (void) 198{ 199 struct gomp_thread_pool *pool 200 = gomp_malloc (sizeof(struct gomp_thread_pool)); 201 pool->threads = NULL; 202 pool->threads_size = 0; 203 pool->threads_used = 0; 204 pool->last_team = NULL; 205 return pool; 206} 207 208static void 209gomp_free_pool_helper (void *thread_pool) 210{ 211 struct gomp_thread *thr = gomp_thread (); 212 struct gomp_thread_pool *pool 213 = (struct gomp_thread_pool *) thread_pool; 214 gomp_barrier_wait_last (&pool->threads_dock); 215 gomp_sem_destroy (&thr->release); 216 thr->thread_pool = NULL; 217 thr->task = NULL; 218 pthread_exit (NULL); 219} 220 221/* Free a thread pool and release its threads. */ 222 223void 224gomp_free_thread (void *arg __attribute__((unused))) 225{ 226 struct gomp_thread *thr = gomp_thread (); 227 struct gomp_thread_pool *pool = thr->thread_pool; 228 if (pool) 229 { 230 if (pool->threads_used > 0) 231 { 232 int i; 233 for (i = 1; i < pool->threads_used; i++) 234 { 235 struct gomp_thread *nthr = pool->threads[i]; 236 nthr->fn = gomp_free_pool_helper; 237 nthr->data = pool; 238 } 239 /* This barrier undocks threads docked on pool->threads_dock. */ 240 gomp_barrier_wait (&pool->threads_dock); 241 /* And this waits till all threads have called gomp_barrier_wait_last 242 in gomp_free_pool_helper. */ 243 gomp_barrier_wait (&pool->threads_dock); 244 /* Now it is safe to destroy the barrier and free the pool. */ 245 gomp_barrier_destroy (&pool->threads_dock); 246 247#ifdef HAVE_SYNC_BUILTINS 248 __sync_fetch_and_add (&gomp_managed_threads, 249 1L - pool->threads_used); 250#else 251 gomp_mutex_lock (&gomp_managed_threads_lock); 252 gomp_managed_threads -= pool->threads_used - 1L; 253 gomp_mutex_unlock (&gomp_managed_threads_lock); 254#endif 255 } 256 free (pool->threads); 257 if (pool->last_team) 258 free_team (pool->last_team); 259 free (pool); 260 thr->thread_pool = NULL; 261 } 262 if (thr->task != NULL) 263 { 264 struct gomp_task *task = thr->task; 265 gomp_end_task (); 266 free (task); 267 } 268} 269 270/* Launch a team. */ 271 272void 273gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, 274 unsigned flags, struct gomp_team *team) 275{ 276 struct gomp_thread_start_data *start_data; 277 struct gomp_thread *thr, *nthr; 278 struct gomp_task *task; 279 struct gomp_task_icv *icv; 280 bool nested; 281 struct gomp_thread_pool *pool; 282 unsigned i, n, old_threads_used = 0; 283 pthread_attr_t thread_attr, *attr; 284 unsigned long nthreads_var; 285 char bind, bind_var; 286 unsigned int s = 0, rest = 0, p = 0, k = 0; 287 unsigned int affinity_count = 0; 288 struct gomp_thread **affinity_thr = NULL; 289 290 thr = gomp_thread (); 291 nested = thr->ts.team != NULL; 292 if (__builtin_expect (thr->thread_pool == NULL, 0)) 293 { 294 thr->thread_pool = gomp_new_thread_pool (); 295 thr->thread_pool->threads_busy = nthreads; 296 pthread_setspecific (gomp_thread_destructor, thr); 297 } 298 pool = thr->thread_pool; 299 task = thr->task; 300 icv = task ? &task->icv : &gomp_global_icv; 301 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0) 302 gomp_init_affinity (); 303 304 /* Always save the previous state, even if this isn't a nested team. 305 In particular, we should save any work share state from an outer 306 orphaned work share construct. */ 307 team->prev_ts = thr->ts; 308 309 thr->ts.team = team; 310 thr->ts.team_id = 0; 311 ++thr->ts.level; 312 if (nthreads > 1) 313 ++thr->ts.active_level; 314 thr->ts.work_share = &team->work_shares[0]; 315 thr->ts.last_work_share = NULL; 316#ifdef HAVE_SYNC_BUILTINS 317 thr->ts.single_count = 0; 318#endif 319 thr->ts.static_trip = 0; 320 thr->task = &team->implicit_task[0]; 321 nthreads_var = icv->nthreads_var; 322 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0) 323 && thr->ts.level < gomp_nthreads_var_list_len) 324 nthreads_var = gomp_nthreads_var_list[thr->ts.level]; 325 bind_var = icv->bind_var; 326 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false) 327 bind_var = flags & 7; 328 bind = bind_var; 329 if (__builtin_expect (gomp_bind_var_list != NULL, 0) 330 && thr->ts.level < gomp_bind_var_list_len) 331 bind_var = gomp_bind_var_list[thr->ts.level]; 332 gomp_init_task (thr->task, task, icv); 333 team->implicit_task[0].icv.nthreads_var = nthreads_var; 334 team->implicit_task[0].icv.bind_var = bind_var; 335 336 if (nthreads == 1) 337 return; 338 339 i = 1; 340 341 if (__builtin_expect (gomp_places_list != NULL, 0)) 342 { 343 /* Depending on chosen proc_bind model, set subpartition 344 for the master thread and initialize helper variables 345 P and optionally S, K and/or REST used by later place 346 computation for each additional thread. */ 347 p = thr->place - 1; 348 switch (bind) 349 { 350 case omp_proc_bind_true: 351 case omp_proc_bind_close: 352 if (nthreads > thr->ts.place_partition_len) 353 { 354 /* T > P. S threads will be placed in each place, 355 and the final REM threads placed one by one 356 into the already occupied places. */ 357 s = nthreads / thr->ts.place_partition_len; 358 rest = nthreads % thr->ts.place_partition_len; 359 } 360 else 361 s = 1; 362 k = 1; 363 break; 364 case omp_proc_bind_master: 365 /* Each thread will be bound to master's place. */ 366 break; 367 case omp_proc_bind_spread: 368 if (nthreads <= thr->ts.place_partition_len) 369 { 370 /* T <= P. Each subpartition will have in between s 371 and s+1 places (subpartitions starting at or 372 after rest will have s places, earlier s+1 places), 373 each thread will be bound to the first place in 374 its subpartition (except for the master thread 375 that can be bound to another place in its 376 subpartition). */ 377 s = thr->ts.place_partition_len / nthreads; 378 rest = thr->ts.place_partition_len % nthreads; 379 rest = (s + 1) * rest + thr->ts.place_partition_off; 380 if (p < rest) 381 { 382 p -= (p - thr->ts.place_partition_off) % (s + 1); 383 thr->ts.place_partition_len = s + 1; 384 } 385 else 386 { 387 p -= (p - rest) % s; 388 thr->ts.place_partition_len = s; 389 } 390 thr->ts.place_partition_off = p; 391 } 392 else 393 { 394 /* T > P. Each subpartition will have just a single 395 place and we'll place between s and s+1 396 threads into each subpartition. */ 397 s = nthreads / thr->ts.place_partition_len; 398 rest = nthreads % thr->ts.place_partition_len; 399 thr->ts.place_partition_off = p; 400 thr->ts.place_partition_len = 1; 401 k = 1; 402 } 403 break; 404 } 405 } 406 else 407 bind = omp_proc_bind_false; 408 409 /* We only allow the reuse of idle threads for non-nested PARALLEL 410 regions. This appears to be implied by the semantics of 411 threadprivate variables, but perhaps that's reading too much into 412 things. Certainly it does prevent any locking problems, since 413 only the initial program thread will modify gomp_threads. */ 414 if (!nested) 415 { 416 old_threads_used = pool->threads_used; 417 418 if (nthreads <= old_threads_used) 419 n = nthreads; 420 else if (old_threads_used == 0) 421 { 422 n = 0; 423 gomp_barrier_init (&pool->threads_dock, nthreads); 424 } 425 else 426 { 427 n = old_threads_used; 428 429 /* Increase the barrier threshold to make sure all new 430 threads arrive before the team is released. */ 431 gomp_barrier_reinit (&pool->threads_dock, nthreads); 432 } 433 434 /* Not true yet, but soon will be. We're going to release all 435 threads from the dock, and those that aren't part of the 436 team will exit. */ 437 pool->threads_used = nthreads; 438 439 /* If necessary, expand the size of the gomp_threads array. It is 440 expected that changes in the number of threads are rare, thus we 441 make no effort to expand gomp_threads_size geometrically. */ 442 if (nthreads >= pool->threads_size) 443 { 444 pool->threads_size = nthreads + 1; 445 pool->threads 446 = gomp_realloc (pool->threads, 447 pool->threads_size 448 * sizeof (struct gomp_thread_data *)); 449 } 450 451 /* Release existing idle threads. */ 452 for (; i < n; ++i) 453 { 454 unsigned int place_partition_off = thr->ts.place_partition_off; 455 unsigned int place_partition_len = thr->ts.place_partition_len; 456 unsigned int place = 0; 457 if (__builtin_expect (gomp_places_list != NULL, 0)) 458 { 459 switch (bind) 460 { 461 case omp_proc_bind_true: 462 case omp_proc_bind_close: 463 if (k == s) 464 { 465 ++p; 466 if (p == (team->prev_ts.place_partition_off 467 + team->prev_ts.place_partition_len)) 468 p = team->prev_ts.place_partition_off; 469 k = 1; 470 if (i == nthreads - rest) 471 s = 1; 472 } 473 else 474 ++k; 475 break; 476 case omp_proc_bind_master: 477 break; 478 case omp_proc_bind_spread: 479 if (k == 0) 480 { 481 /* T <= P. */ 482 if (p < rest) 483 p += s + 1; 484 else 485 p += s; 486 if (p == (team->prev_ts.place_partition_off 487 + team->prev_ts.place_partition_len)) 488 p = team->prev_ts.place_partition_off; 489 place_partition_off = p; 490 if (p < rest) 491 place_partition_len = s + 1; 492 else 493 place_partition_len = s; 494 } 495 else 496 { 497 /* T > P. */ 498 if (k == s) 499 { 500 ++p; 501 if (p == (team->prev_ts.place_partition_off 502 + team->prev_ts.place_partition_len)) 503 p = team->prev_ts.place_partition_off; 504 k = 1; 505 if (i == nthreads - rest) 506 s = 1; 507 } 508 else 509 ++k; 510 place_partition_off = p; 511 place_partition_len = 1; 512 } 513 break; 514 } 515 if (affinity_thr != NULL 516 || (bind != omp_proc_bind_true 517 && pool->threads[i]->place != p + 1) 518 || pool->threads[i]->place <= place_partition_off 519 || pool->threads[i]->place > (place_partition_off 520 + place_partition_len)) 521 { 522 unsigned int l; 523 if (affinity_thr == NULL) 524 { 525 unsigned int j; 526 527 if (team->prev_ts.place_partition_len > 64) 528 affinity_thr 529 = gomp_malloc (team->prev_ts.place_partition_len 530 * sizeof (struct gomp_thread *)); 531 else 532 affinity_thr 533 = gomp_alloca (team->prev_ts.place_partition_len 534 * sizeof (struct gomp_thread *)); 535 memset (affinity_thr, '\0', 536 team->prev_ts.place_partition_len 537 * sizeof (struct gomp_thread *)); 538 for (j = i; j < old_threads_used; j++) 539 { 540 if (pool->threads[j]->place 541 > team->prev_ts.place_partition_off 542 && (pool->threads[j]->place 543 <= (team->prev_ts.place_partition_off 544 + team->prev_ts.place_partition_len))) 545 { 546 l = pool->threads[j]->place - 1 547 - team->prev_ts.place_partition_off; 548 pool->threads[j]->data = affinity_thr[l]; 549 affinity_thr[l] = pool->threads[j]; 550 } 551 pool->threads[j] = NULL; 552 } 553 if (nthreads > old_threads_used) 554 memset (&pool->threads[old_threads_used], 555 '\0', ((nthreads - old_threads_used) 556 * sizeof (struct gomp_thread *))); 557 n = nthreads; 558 affinity_count = old_threads_used - i; 559 } 560 if (affinity_count == 0) 561 break; 562 l = p; 563 if (affinity_thr[l - team->prev_ts.place_partition_off] 564 == NULL) 565 { 566 if (bind != omp_proc_bind_true) 567 continue; 568 for (l = place_partition_off; 569 l < place_partition_off + place_partition_len; 570 l++) 571 if (affinity_thr[l - team->prev_ts.place_partition_off] 572 != NULL) 573 break; 574 if (l == place_partition_off + place_partition_len) 575 continue; 576 } 577 nthr = affinity_thr[l - team->prev_ts.place_partition_off]; 578 affinity_thr[l - team->prev_ts.place_partition_off] 579 = (struct gomp_thread *) nthr->data; 580 affinity_count--; 581 pool->threads[i] = nthr; 582 } 583 else 584 nthr = pool->threads[i]; 585 place = p + 1; 586 } 587 else 588 nthr = pool->threads[i]; 589 nthr->ts.team = team; 590 nthr->ts.work_share = &team->work_shares[0]; 591 nthr->ts.last_work_share = NULL; 592 nthr->ts.team_id = i; 593 nthr->ts.level = team->prev_ts.level + 1; 594 nthr->ts.active_level = thr->ts.active_level; 595 nthr->ts.place_partition_off = place_partition_off; 596 nthr->ts.place_partition_len = place_partition_len; 597#ifdef HAVE_SYNC_BUILTINS 598 nthr->ts.single_count = 0; 599#endif 600 nthr->ts.static_trip = 0; 601 nthr->task = &team->implicit_task[i]; 602 nthr->place = place; 603 gomp_init_task (nthr->task, task, icv); 604 team->implicit_task[i].icv.nthreads_var = nthreads_var; 605 team->implicit_task[i].icv.bind_var = bind_var; 606 nthr->fn = fn; 607 nthr->data = data; 608 team->ordered_release[i] = &nthr->release; 609 } 610 611 if (__builtin_expect (affinity_thr != NULL, 0)) 612 { 613 /* If AFFINITY_THR is non-NULL just because we had to 614 permute some threads in the pool, but we've managed 615 to find exactly as many old threads as we'd find 616 without affinity, we don't need to handle this 617 specially anymore. */ 618 if (nthreads <= old_threads_used 619 ? (affinity_count == old_threads_used - nthreads) 620 : (i == old_threads_used)) 621 { 622 if (team->prev_ts.place_partition_len > 64) 623 free (affinity_thr); 624 affinity_thr = NULL; 625 affinity_count = 0; 626 } 627 else 628 { 629 i = 1; 630 /* We are going to compute the places/subpartitions 631 again from the beginning. So, we need to reinitialize 632 vars modified by the switch (bind) above inside 633 of the loop, to the state they had after the initial 634 switch (bind). */ 635 switch (bind) 636 { 637 case omp_proc_bind_true: 638 case omp_proc_bind_close: 639 if (nthreads > thr->ts.place_partition_len) 640 /* T > P. S has been changed, so needs 641 to be recomputed. */ 642 s = nthreads / thr->ts.place_partition_len; 643 k = 1; 644 p = thr->place - 1; 645 break; 646 case omp_proc_bind_master: 647 /* No vars have been changed. */ 648 break; 649 case omp_proc_bind_spread: 650 p = thr->ts.place_partition_off; 651 if (k != 0) 652 { 653 /* T > P. */ 654 s = nthreads / team->prev_ts.place_partition_len; 655 k = 1; 656 } 657 break; 658 } 659 660 /* Increase the barrier threshold to make sure all new 661 threads and all the threads we're going to let die 662 arrive before the team is released. */ 663 if (affinity_count) 664 gomp_barrier_reinit (&pool->threads_dock, 665 nthreads + affinity_count); 666 } 667 } 668 669 if (i == nthreads) 670 goto do_release; 671 672 } 673 674 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0)) 675 { 676 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used; 677 678 if (old_threads_used == 0) 679 --diff; 680 681#ifdef HAVE_SYNC_BUILTINS 682 __sync_fetch_and_add (&gomp_managed_threads, diff); 683#else 684 gomp_mutex_lock (&gomp_managed_threads_lock); 685 gomp_managed_threads += diff; 686 gomp_mutex_unlock (&gomp_managed_threads_lock); 687#endif 688 } 689 690 attr = &gomp_thread_attr; 691 if (__builtin_expect (gomp_places_list != NULL, 0)) 692 { 693 size_t stacksize; 694 pthread_attr_init (&thread_attr); 695 pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED); 696 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize)) 697 pthread_attr_setstacksize (&thread_attr, stacksize); 698 attr = &thread_attr; 699 } 700 701 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data) 702 * (nthreads-i)); 703 704 /* Launch new threads. */ 705 for (; i < nthreads; ++i) 706 { 707 pthread_t pt; 708 int err; 709 710 start_data->ts.place_partition_off = thr->ts.place_partition_off; 711 start_data->ts.place_partition_len = thr->ts.place_partition_len; 712 start_data->place = 0; 713 if (__builtin_expect (gomp_places_list != NULL, 0)) 714 { 715 switch (bind) 716 { 717 case omp_proc_bind_true: 718 case omp_proc_bind_close: 719 if (k == s) 720 { 721 ++p; 722 if (p == (team->prev_ts.place_partition_off 723 + team->prev_ts.place_partition_len)) 724 p = team->prev_ts.place_partition_off; 725 k = 1; 726 if (i == nthreads - rest) 727 s = 1; 728 } 729 else 730 ++k; 731 break; 732 case omp_proc_bind_master: 733 break; 734 case omp_proc_bind_spread: 735 if (k == 0) 736 { 737 /* T <= P. */ 738 if (p < rest) 739 p += s + 1; 740 else 741 p += s; 742 if (p == (team->prev_ts.place_partition_off 743 + team->prev_ts.place_partition_len)) 744 p = team->prev_ts.place_partition_off; 745 start_data->ts.place_partition_off = p; 746 if (p < rest) 747 start_data->ts.place_partition_len = s + 1; 748 else 749 start_data->ts.place_partition_len = s; 750 } 751 else 752 { 753 /* T > P. */ 754 if (k == s) 755 { 756 ++p; 757 if (p == (team->prev_ts.place_partition_off 758 + team->prev_ts.place_partition_len)) 759 p = team->prev_ts.place_partition_off; 760 k = 1; 761 if (i == nthreads - rest) 762 s = 1; 763 } 764 else 765 ++k; 766 start_data->ts.place_partition_off = p; 767 start_data->ts.place_partition_len = 1; 768 } 769 break; 770 } 771 start_data->place = p + 1; 772 if (affinity_thr != NULL && pool->threads[i] != NULL) 773 continue; 774 gomp_init_thread_affinity (attr, p); 775 } 776 777 start_data->fn = fn; 778 start_data->fn_data = data; 779 start_data->ts.team = team; 780 start_data->ts.work_share = &team->work_shares[0]; 781 start_data->ts.last_work_share = NULL; 782 start_data->ts.team_id = i; 783 start_data->ts.level = team->prev_ts.level + 1; 784 start_data->ts.active_level = thr->ts.active_level; 785#ifdef HAVE_SYNC_BUILTINS 786 start_data->ts.single_count = 0; 787#endif 788 start_data->ts.static_trip = 0; 789 start_data->task = &team->implicit_task[i]; 790 gomp_init_task (start_data->task, task, icv); 791 team->implicit_task[i].icv.nthreads_var = nthreads_var; 792 team->implicit_task[i].icv.bind_var = bind_var; 793 start_data->thread_pool = pool; 794 start_data->nested = nested; 795 796 err = pthread_create (&pt, attr, gomp_thread_start, start_data++); 797 if (err != 0) 798 gomp_fatal ("Thread creation failed: %s", strerror (err)); 799 } 800 801 if (__builtin_expect (gomp_places_list != NULL, 0)) 802 pthread_attr_destroy (&thread_attr); 803 804 do_release: 805 gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock); 806 807 /* Decrease the barrier threshold to match the number of threads 808 that should arrive back at the end of this team. The extra 809 threads should be exiting. Note that we arrange for this test 810 to never be true for nested teams. If AFFINITY_COUNT is non-zero, 811 the barrier as well as gomp_managed_threads was temporarily 812 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT, 813 AFFINITY_COUNT if non-zero will be always at least 814 OLD_THREADS_COUNT - NTHREADS. */ 815 if (__builtin_expect (nthreads < old_threads_used, 0) 816 || __builtin_expect (affinity_count, 0)) 817 { 818 long diff = (long) nthreads - (long) old_threads_used; 819 820 if (affinity_count) 821 diff = -affinity_count; 822 823 gomp_barrier_reinit (&pool->threads_dock, nthreads); 824 825#ifdef HAVE_SYNC_BUILTINS 826 __sync_fetch_and_add (&gomp_managed_threads, diff); 827#else 828 gomp_mutex_lock (&gomp_managed_threads_lock); 829 gomp_managed_threads += diff; 830 gomp_mutex_unlock (&gomp_managed_threads_lock); 831#endif 832 } 833 if (__builtin_expect (affinity_thr != NULL, 0) 834 && team->prev_ts.place_partition_len > 64) 835 free (affinity_thr); 836} 837 838 839/* Terminate the current team. This is only to be called by the master 840 thread. We assume that we must wait for the other threads. */ 841 842void 843gomp_team_end (void) 844{ 845 struct gomp_thread *thr = gomp_thread (); 846 struct gomp_team *team = thr->ts.team; 847 848 /* This barrier handles all pending explicit threads. 849 As #pragma omp cancel parallel might get awaited count in 850 team->barrier in a inconsistent state, we need to use a different 851 counter here. */ 852 gomp_team_barrier_wait_final (&team->barrier); 853 if (__builtin_expect (team->team_cancelled, 0)) 854 { 855 struct gomp_work_share *ws = team->work_shares_to_free; 856 do 857 { 858 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws); 859 if (next_ws == NULL) 860 gomp_ptrlock_set (&ws->next_ws, ws); 861 gomp_fini_work_share (ws); 862 ws = next_ws; 863 } 864 while (ws != NULL); 865 } 866 else 867 gomp_fini_work_share (thr->ts.work_share); 868 869 gomp_end_task (); 870 thr->ts = team->prev_ts; 871 872 if (__builtin_expect (thr->ts.team != NULL, 0)) 873 { 874#ifdef HAVE_SYNC_BUILTINS 875 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads); 876#else 877 gomp_mutex_lock (&gomp_managed_threads_lock); 878 gomp_managed_threads -= team->nthreads - 1L; 879 gomp_mutex_unlock (&gomp_managed_threads_lock); 880#endif 881 /* This barrier has gomp_barrier_wait_last counterparts 882 and ensures the team can be safely destroyed. */ 883 gomp_barrier_wait (&team->barrier); 884 } 885 886 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0)) 887 { 888 struct gomp_work_share *ws = team->work_shares[0].next_alloc; 889 do 890 { 891 struct gomp_work_share *next_ws = ws->next_alloc; 892 free (ws); 893 ws = next_ws; 894 } 895 while (ws != NULL); 896 } 897 gomp_sem_destroy (&team->master_release); 898#ifndef HAVE_SYNC_BUILTINS 899 gomp_mutex_destroy (&team->work_share_list_free_lock); 900#endif 901 902 if (__builtin_expect (thr->ts.team != NULL, 0) 903 || __builtin_expect (team->nthreads == 1, 0)) 904 free_team (team); 905 else 906 { 907 struct gomp_thread_pool *pool = thr->thread_pool; 908 if (pool->last_team) 909 free_team (pool->last_team); 910 pool->last_team = team; 911 } 912} 913 914 915/* Constructors for this file. */ 916 917static void __attribute__((constructor)) 918initialize_team (void) 919{ 920#if !defined HAVE_TLS && !defined USE_EMUTLS 921 static struct gomp_thread initial_thread_tls_data; 922 923 pthread_key_create (&gomp_tls_key, NULL); 924 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data); 925#endif 926 927 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0) 928 gomp_fatal ("could not create thread pool destructor."); 929} 930 931static void __attribute__((destructor)) 932team_destructor (void) 933{ 934 /* Without this dlclose on libgomp could lead to subsequent 935 crashes. */ 936 pthread_key_delete (gomp_thread_destructor); 937} 938 939struct gomp_task_icv * 940gomp_new_icv (void) 941{ 942 struct gomp_thread *thr = gomp_thread (); 943 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task)); 944 gomp_init_task (task, NULL, &gomp_global_icv); 945 thr->task = task; 946 pthread_setspecific (gomp_thread_destructor, thr); 947 return &task->icv; 948} 949