1/* Copyright (C) 2005-2015 Free Software Foundation, Inc.
2   Contributed by Richard Henderson <rth@redhat.com>.
3
4   This file is part of the GNU Offloading and Multi Processing Library
5   (libgomp).
6
7   Libgomp is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15   more details.
16
17   Under Section 7 of GPL version 3, you are granted additional
18   permissions described in the GCC Runtime Library Exception, version
19   3.1, as published by the Free Software Foundation.
20
21   You should have received a copy of the GNU General Public License and
22   a copy of the GCC Runtime Library Exception along with this program;
23   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24   <http://www.gnu.org/licenses/>.  */
25
26/* This file handles the maintainence of threads in response to team
27   creation and termination.  */
28
29#include "libgomp.h"
30#include <stdlib.h>
31#include <string.h>
32
33/* This attribute contains PTHREAD_CREATE_DETACHED.  */
34pthread_attr_t gomp_thread_attr;
35
36/* This key is for the thread destructor.  */
37pthread_key_t gomp_thread_destructor;
38
39
40/* This is the libgomp per-thread data structure.  */
41#if defined HAVE_TLS || defined USE_EMUTLS
42__thread struct gomp_thread gomp_tls_data;
43#else
44pthread_key_t gomp_tls_key;
45#endif
46
47
48/* This structure is used to communicate across pthread_create.  */
49
50struct gomp_thread_start_data
51{
52  void (*fn) (void *);
53  void *fn_data;
54  struct gomp_team_state ts;
55  struct gomp_task *task;
56  struct gomp_thread_pool *thread_pool;
57  unsigned int place;
58  bool nested;
59};
60
61
62/* This function is a pthread_create entry point.  This contains the idle
63   loop in which a thread waits to be called up to become part of a team.  */
64
65static void *
66gomp_thread_start (void *xdata)
67{
68  struct gomp_thread_start_data *data = xdata;
69  struct gomp_thread *thr;
70  struct gomp_thread_pool *pool;
71  void (*local_fn) (void *);
72  void *local_data;
73
74#if defined HAVE_TLS || defined USE_EMUTLS
75  thr = &gomp_tls_data;
76#else
77  struct gomp_thread local_thr;
78  thr = &local_thr;
79  pthread_setspecific (gomp_tls_key, thr);
80#endif
81  gomp_sem_init (&thr->release, 0);
82
83  /* Extract what we need from data.  */
84  local_fn = data->fn;
85  local_data = data->fn_data;
86  thr->thread_pool = data->thread_pool;
87  thr->ts = data->ts;
88  thr->task = data->task;
89  thr->place = data->place;
90
91  thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
92
93  /* Make thread pool local. */
94  pool = thr->thread_pool;
95
96  if (data->nested)
97    {
98      struct gomp_team *team = thr->ts.team;
99      struct gomp_task *task = thr->task;
100
101      gomp_barrier_wait (&team->barrier);
102
103      local_fn (local_data);
104      gomp_team_barrier_wait_final (&team->barrier);
105      gomp_finish_task (task);
106      gomp_barrier_wait_last (&team->barrier);
107    }
108  else
109    {
110      pool->threads[thr->ts.team_id] = thr;
111
112      gomp_barrier_wait (&pool->threads_dock);
113      do
114	{
115	  struct gomp_team *team = thr->ts.team;
116	  struct gomp_task *task = thr->task;
117
118	  local_fn (local_data);
119	  gomp_team_barrier_wait_final (&team->barrier);
120	  gomp_finish_task (task);
121
122	  gomp_barrier_wait (&pool->threads_dock);
123
124	  local_fn = thr->fn;
125	  local_data = thr->data;
126	  thr->fn = NULL;
127	}
128      while (local_fn);
129    }
130
131  gomp_sem_destroy (&thr->release);
132  thr->thread_pool = NULL;
133  thr->task = NULL;
134  return NULL;
135}
136
137
138/* Create a new team data structure.  */
139
140struct gomp_team *
141gomp_new_team (unsigned nthreads)
142{
143  struct gomp_team *team;
144  size_t size;
145  int i;
146
147  size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0])
148				      + sizeof (team->implicit_task[0]));
149  team = gomp_malloc (size);
150
151  team->work_share_chunk = 8;
152#ifdef HAVE_SYNC_BUILTINS
153  team->single_count = 0;
154#else
155  gomp_mutex_init (&team->work_share_list_free_lock);
156#endif
157  team->work_shares_to_free = &team->work_shares[0];
158  gomp_init_work_share (&team->work_shares[0], false, nthreads);
159  team->work_shares[0].next_alloc = NULL;
160  team->work_share_list_free = NULL;
161  team->work_share_list_alloc = &team->work_shares[1];
162  for (i = 1; i < 7; i++)
163    team->work_shares[i].next_free = &team->work_shares[i + 1];
164  team->work_shares[i].next_free = NULL;
165
166  team->nthreads = nthreads;
167  gomp_barrier_init (&team->barrier, nthreads);
168
169  gomp_sem_init (&team->master_release, 0);
170  team->ordered_release = (void *) &team->implicit_task[nthreads];
171  team->ordered_release[0] = &team->master_release;
172
173  gomp_mutex_init (&team->task_lock);
174  team->task_queue = NULL;
175  team->task_count = 0;
176  team->task_queued_count = 0;
177  team->task_running_count = 0;
178  team->work_share_cancelled = 0;
179  team->team_cancelled = 0;
180
181  return team;
182}
183
184
185/* Free a team data structure.  */
186
187static void
188free_team (struct gomp_team *team)
189{
190  gomp_barrier_destroy (&team->barrier);
191  gomp_mutex_destroy (&team->task_lock);
192  free (team);
193}
194
195/* Allocate and initialize a thread pool. */
196
197static struct gomp_thread_pool *gomp_new_thread_pool (void)
198{
199  struct gomp_thread_pool *pool
200    = gomp_malloc (sizeof(struct gomp_thread_pool));
201  pool->threads = NULL;
202  pool->threads_size = 0;
203  pool->threads_used = 0;
204  pool->last_team = NULL;
205  return pool;
206}
207
208static void
209gomp_free_pool_helper (void *thread_pool)
210{
211  struct gomp_thread *thr = gomp_thread ();
212  struct gomp_thread_pool *pool
213    = (struct gomp_thread_pool *) thread_pool;
214  gomp_barrier_wait_last (&pool->threads_dock);
215  gomp_sem_destroy (&thr->release);
216  thr->thread_pool = NULL;
217  thr->task = NULL;
218  pthread_exit (NULL);
219}
220
221/* Free a thread pool and release its threads. */
222
223void
224gomp_free_thread (void *arg __attribute__((unused)))
225{
226  struct gomp_thread *thr = gomp_thread ();
227  struct gomp_thread_pool *pool = thr->thread_pool;
228  if (pool)
229    {
230      if (pool->threads_used > 0)
231	{
232	  int i;
233	  for (i = 1; i < pool->threads_used; i++)
234	    {
235	      struct gomp_thread *nthr = pool->threads[i];
236	      nthr->fn = gomp_free_pool_helper;
237	      nthr->data = pool;
238	    }
239	  /* This barrier undocks threads docked on pool->threads_dock.  */
240	  gomp_barrier_wait (&pool->threads_dock);
241	  /* And this waits till all threads have called gomp_barrier_wait_last
242	     in gomp_free_pool_helper.  */
243	  gomp_barrier_wait (&pool->threads_dock);
244	  /* Now it is safe to destroy the barrier and free the pool.  */
245	  gomp_barrier_destroy (&pool->threads_dock);
246
247#ifdef HAVE_SYNC_BUILTINS
248	  __sync_fetch_and_add (&gomp_managed_threads,
249				1L - pool->threads_used);
250#else
251	  gomp_mutex_lock (&gomp_managed_threads_lock);
252	  gomp_managed_threads -= pool->threads_used - 1L;
253	  gomp_mutex_unlock (&gomp_managed_threads_lock);
254#endif
255	}
256      free (pool->threads);
257      if (pool->last_team)
258	free_team (pool->last_team);
259      free (pool);
260      thr->thread_pool = NULL;
261    }
262  if (thr->task != NULL)
263    {
264      struct gomp_task *task = thr->task;
265      gomp_end_task ();
266      free (task);
267    }
268}
269
270/* Launch a team.  */
271
272void
273gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
274		 unsigned flags, struct gomp_team *team)
275{
276  struct gomp_thread_start_data *start_data;
277  struct gomp_thread *thr, *nthr;
278  struct gomp_task *task;
279  struct gomp_task_icv *icv;
280  bool nested;
281  struct gomp_thread_pool *pool;
282  unsigned i, n, old_threads_used = 0;
283  pthread_attr_t thread_attr, *attr;
284  unsigned long nthreads_var;
285  char bind, bind_var;
286  unsigned int s = 0, rest = 0, p = 0, k = 0;
287  unsigned int affinity_count = 0;
288  struct gomp_thread **affinity_thr = NULL;
289
290  thr = gomp_thread ();
291  nested = thr->ts.team != NULL;
292  if (__builtin_expect (thr->thread_pool == NULL, 0))
293    {
294      thr->thread_pool = gomp_new_thread_pool ();
295      thr->thread_pool->threads_busy = nthreads;
296      pthread_setspecific (gomp_thread_destructor, thr);
297    }
298  pool = thr->thread_pool;
299  task = thr->task;
300  icv = task ? &task->icv : &gomp_global_icv;
301  if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
302    gomp_init_affinity ();
303
304  /* Always save the previous state, even if this isn't a nested team.
305     In particular, we should save any work share state from an outer
306     orphaned work share construct.  */
307  team->prev_ts = thr->ts;
308
309  thr->ts.team = team;
310  thr->ts.team_id = 0;
311  ++thr->ts.level;
312  if (nthreads > 1)
313    ++thr->ts.active_level;
314  thr->ts.work_share = &team->work_shares[0];
315  thr->ts.last_work_share = NULL;
316#ifdef HAVE_SYNC_BUILTINS
317  thr->ts.single_count = 0;
318#endif
319  thr->ts.static_trip = 0;
320  thr->task = &team->implicit_task[0];
321  nthreads_var = icv->nthreads_var;
322  if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
323      && thr->ts.level < gomp_nthreads_var_list_len)
324    nthreads_var = gomp_nthreads_var_list[thr->ts.level];
325  bind_var = icv->bind_var;
326  if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
327    bind_var = flags & 7;
328  bind = bind_var;
329  if (__builtin_expect (gomp_bind_var_list != NULL, 0)
330      && thr->ts.level < gomp_bind_var_list_len)
331    bind_var = gomp_bind_var_list[thr->ts.level];
332  gomp_init_task (thr->task, task, icv);
333  team->implicit_task[0].icv.nthreads_var = nthreads_var;
334  team->implicit_task[0].icv.bind_var = bind_var;
335
336  if (nthreads == 1)
337    return;
338
339  i = 1;
340
341  if (__builtin_expect (gomp_places_list != NULL, 0))
342    {
343      /* Depending on chosen proc_bind model, set subpartition
344	 for the master thread and initialize helper variables
345	 P and optionally S, K and/or REST used by later place
346	 computation for each additional thread.  */
347      p = thr->place - 1;
348      switch (bind)
349	{
350	case omp_proc_bind_true:
351	case omp_proc_bind_close:
352	  if (nthreads > thr->ts.place_partition_len)
353	    {
354	      /* T > P.  S threads will be placed in each place,
355		 and the final REM threads placed one by one
356		 into the already occupied places.  */
357	      s = nthreads / thr->ts.place_partition_len;
358	      rest = nthreads % thr->ts.place_partition_len;
359	    }
360	  else
361	    s = 1;
362	  k = 1;
363	  break;
364	case omp_proc_bind_master:
365	  /* Each thread will be bound to master's place.  */
366	  break;
367	case omp_proc_bind_spread:
368	  if (nthreads <= thr->ts.place_partition_len)
369	    {
370	      /* T <= P.  Each subpartition will have in between s
371		 and s+1 places (subpartitions starting at or
372		 after rest will have s places, earlier s+1 places),
373		 each thread will be bound to the first place in
374		 its subpartition (except for the master thread
375		 that can be bound to another place in its
376		 subpartition).  */
377	      s = thr->ts.place_partition_len / nthreads;
378	      rest = thr->ts.place_partition_len % nthreads;
379	      rest = (s + 1) * rest + thr->ts.place_partition_off;
380	      if (p < rest)
381		{
382		  p -= (p - thr->ts.place_partition_off) % (s + 1);
383		  thr->ts.place_partition_len = s + 1;
384		}
385	      else
386		{
387		  p -= (p - rest) % s;
388		  thr->ts.place_partition_len = s;
389		}
390	      thr->ts.place_partition_off = p;
391	    }
392	  else
393	    {
394	      /* T > P.  Each subpartition will have just a single
395		 place and we'll place between s and s+1
396		 threads into each subpartition.  */
397	      s = nthreads / thr->ts.place_partition_len;
398	      rest = nthreads % thr->ts.place_partition_len;
399	      thr->ts.place_partition_off = p;
400	      thr->ts.place_partition_len = 1;
401	      k = 1;
402	    }
403	  break;
404	}
405    }
406  else
407    bind = omp_proc_bind_false;
408
409  /* We only allow the reuse of idle threads for non-nested PARALLEL
410     regions.  This appears to be implied by the semantics of
411     threadprivate variables, but perhaps that's reading too much into
412     things.  Certainly it does prevent any locking problems, since
413     only the initial program thread will modify gomp_threads.  */
414  if (!nested)
415    {
416      old_threads_used = pool->threads_used;
417
418      if (nthreads <= old_threads_used)
419	n = nthreads;
420      else if (old_threads_used == 0)
421	{
422	  n = 0;
423	  gomp_barrier_init (&pool->threads_dock, nthreads);
424	}
425      else
426	{
427	  n = old_threads_used;
428
429	  /* Increase the barrier threshold to make sure all new
430	     threads arrive before the team is released.  */
431	  gomp_barrier_reinit (&pool->threads_dock, nthreads);
432	}
433
434      /* Not true yet, but soon will be.  We're going to release all
435	 threads from the dock, and those that aren't part of the
436	 team will exit.  */
437      pool->threads_used = nthreads;
438
439      /* If necessary, expand the size of the gomp_threads array.  It is
440	 expected that changes in the number of threads are rare, thus we
441	 make no effort to expand gomp_threads_size geometrically.  */
442      if (nthreads >= pool->threads_size)
443	{
444	  pool->threads_size = nthreads + 1;
445	  pool->threads
446	    = gomp_realloc (pool->threads,
447			    pool->threads_size
448			    * sizeof (struct gomp_thread_data *));
449	}
450
451      /* Release existing idle threads.  */
452      for (; i < n; ++i)
453	{
454	  unsigned int place_partition_off = thr->ts.place_partition_off;
455	  unsigned int place_partition_len = thr->ts.place_partition_len;
456	  unsigned int place = 0;
457	  if (__builtin_expect (gomp_places_list != NULL, 0))
458	    {
459	      switch (bind)
460		{
461		case omp_proc_bind_true:
462		case omp_proc_bind_close:
463		  if (k == s)
464		    {
465		      ++p;
466		      if (p == (team->prev_ts.place_partition_off
467				+ team->prev_ts.place_partition_len))
468			p = team->prev_ts.place_partition_off;
469		      k = 1;
470		      if (i == nthreads - rest)
471			s = 1;
472		    }
473		  else
474		    ++k;
475		  break;
476		case omp_proc_bind_master:
477		  break;
478		case omp_proc_bind_spread:
479		  if (k == 0)
480		    {
481		      /* T <= P.  */
482		      if (p < rest)
483			p += s + 1;
484		      else
485			p += s;
486		      if (p == (team->prev_ts.place_partition_off
487				+ team->prev_ts.place_partition_len))
488			p = team->prev_ts.place_partition_off;
489		      place_partition_off = p;
490		      if (p < rest)
491			place_partition_len = s + 1;
492		      else
493			place_partition_len = s;
494		    }
495		  else
496		    {
497		      /* T > P.  */
498		      if (k == s)
499			{
500			  ++p;
501			  if (p == (team->prev_ts.place_partition_off
502				    + team->prev_ts.place_partition_len))
503			    p = team->prev_ts.place_partition_off;
504			  k = 1;
505			  if (i == nthreads - rest)
506			    s = 1;
507			}
508		      else
509			++k;
510		      place_partition_off = p;
511		      place_partition_len = 1;
512		    }
513		  break;
514		}
515	      if (affinity_thr != NULL
516		  || (bind != omp_proc_bind_true
517		      && pool->threads[i]->place != p + 1)
518		  || pool->threads[i]->place <= place_partition_off
519		  || pool->threads[i]->place > (place_partition_off
520						+ place_partition_len))
521		{
522		  unsigned int l;
523		  if (affinity_thr == NULL)
524		    {
525		      unsigned int j;
526
527		      if (team->prev_ts.place_partition_len > 64)
528			affinity_thr
529			  = gomp_malloc (team->prev_ts.place_partition_len
530					 * sizeof (struct gomp_thread *));
531		      else
532			affinity_thr
533			  = gomp_alloca (team->prev_ts.place_partition_len
534					 * sizeof (struct gomp_thread *));
535		      memset (affinity_thr, '\0',
536			      team->prev_ts.place_partition_len
537			      * sizeof (struct gomp_thread *));
538		      for (j = i; j < old_threads_used; j++)
539			{
540			  if (pool->threads[j]->place
541			      > team->prev_ts.place_partition_off
542			      && (pool->threads[j]->place
543				  <= (team->prev_ts.place_partition_off
544				      + team->prev_ts.place_partition_len)))
545			    {
546			      l = pool->threads[j]->place - 1
547				  - team->prev_ts.place_partition_off;
548			      pool->threads[j]->data = affinity_thr[l];
549			      affinity_thr[l] = pool->threads[j];
550			    }
551			  pool->threads[j] = NULL;
552			}
553		      if (nthreads > old_threads_used)
554			memset (&pool->threads[old_threads_used],
555				'\0', ((nthreads - old_threads_used)
556				       * sizeof (struct gomp_thread *)));
557		      n = nthreads;
558		      affinity_count = old_threads_used - i;
559		    }
560		  if (affinity_count == 0)
561		    break;
562		  l = p;
563		  if (affinity_thr[l - team->prev_ts.place_partition_off]
564		      == NULL)
565		    {
566		      if (bind != omp_proc_bind_true)
567			continue;
568		      for (l = place_partition_off;
569			   l < place_partition_off + place_partition_len;
570			   l++)
571			if (affinity_thr[l - team->prev_ts.place_partition_off]
572			    != NULL)
573			  break;
574		      if (l == place_partition_off + place_partition_len)
575			continue;
576		    }
577		  nthr = affinity_thr[l - team->prev_ts.place_partition_off];
578		  affinity_thr[l - team->prev_ts.place_partition_off]
579		    = (struct gomp_thread *) nthr->data;
580		  affinity_count--;
581		  pool->threads[i] = nthr;
582		}
583	      else
584		nthr = pool->threads[i];
585	      place = p + 1;
586	    }
587	  else
588	    nthr = pool->threads[i];
589	  nthr->ts.team = team;
590	  nthr->ts.work_share = &team->work_shares[0];
591	  nthr->ts.last_work_share = NULL;
592	  nthr->ts.team_id = i;
593	  nthr->ts.level = team->prev_ts.level + 1;
594	  nthr->ts.active_level = thr->ts.active_level;
595	  nthr->ts.place_partition_off = place_partition_off;
596	  nthr->ts.place_partition_len = place_partition_len;
597#ifdef HAVE_SYNC_BUILTINS
598	  nthr->ts.single_count = 0;
599#endif
600	  nthr->ts.static_trip = 0;
601	  nthr->task = &team->implicit_task[i];
602	  nthr->place = place;
603	  gomp_init_task (nthr->task, task, icv);
604	  team->implicit_task[i].icv.nthreads_var = nthreads_var;
605	  team->implicit_task[i].icv.bind_var = bind_var;
606	  nthr->fn = fn;
607	  nthr->data = data;
608	  team->ordered_release[i] = &nthr->release;
609	}
610
611      if (__builtin_expect (affinity_thr != NULL, 0))
612	{
613	  /* If AFFINITY_THR is non-NULL just because we had to
614	     permute some threads in the pool, but we've managed
615	     to find exactly as many old threads as we'd find
616	     without affinity, we don't need to handle this
617	     specially anymore.  */
618	  if (nthreads <= old_threads_used
619	      ? (affinity_count == old_threads_used - nthreads)
620	      : (i == old_threads_used))
621	    {
622	      if (team->prev_ts.place_partition_len > 64)
623		free (affinity_thr);
624	      affinity_thr = NULL;
625	      affinity_count = 0;
626	    }
627	  else
628	    {
629	      i = 1;
630	      /* We are going to compute the places/subpartitions
631		 again from the beginning.  So, we need to reinitialize
632		 vars modified by the switch (bind) above inside
633		 of the loop, to the state they had after the initial
634		 switch (bind).  */
635	      switch (bind)
636		{
637		case omp_proc_bind_true:
638		case omp_proc_bind_close:
639		  if (nthreads > thr->ts.place_partition_len)
640		    /* T > P.  S has been changed, so needs
641		       to be recomputed.  */
642		    s = nthreads / thr->ts.place_partition_len;
643		  k = 1;
644		  p = thr->place - 1;
645		  break;
646		case omp_proc_bind_master:
647		  /* No vars have been changed.  */
648		  break;
649		case omp_proc_bind_spread:
650		  p = thr->ts.place_partition_off;
651		  if (k != 0)
652		    {
653		      /* T > P.  */
654		      s = nthreads / team->prev_ts.place_partition_len;
655		      k = 1;
656		    }
657		  break;
658		}
659
660	      /* Increase the barrier threshold to make sure all new
661		 threads and all the threads we're going to let die
662		 arrive before the team is released.  */
663	      if (affinity_count)
664		gomp_barrier_reinit (&pool->threads_dock,
665				     nthreads + affinity_count);
666	    }
667	}
668
669      if (i == nthreads)
670	goto do_release;
671
672    }
673
674  if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
675    {
676      long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
677
678      if (old_threads_used == 0)
679	--diff;
680
681#ifdef HAVE_SYNC_BUILTINS
682      __sync_fetch_and_add (&gomp_managed_threads, diff);
683#else
684      gomp_mutex_lock (&gomp_managed_threads_lock);
685      gomp_managed_threads += diff;
686      gomp_mutex_unlock (&gomp_managed_threads_lock);
687#endif
688    }
689
690  attr = &gomp_thread_attr;
691  if (__builtin_expect (gomp_places_list != NULL, 0))
692    {
693      size_t stacksize;
694      pthread_attr_init (&thread_attr);
695      pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
696      if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
697	pthread_attr_setstacksize (&thread_attr, stacksize);
698      attr = &thread_attr;
699    }
700
701  start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
702			    * (nthreads-i));
703
704  /* Launch new threads.  */
705  for (; i < nthreads; ++i)
706    {
707      pthread_t pt;
708      int err;
709
710      start_data->ts.place_partition_off = thr->ts.place_partition_off;
711      start_data->ts.place_partition_len = thr->ts.place_partition_len;
712      start_data->place = 0;
713      if (__builtin_expect (gomp_places_list != NULL, 0))
714	{
715	  switch (bind)
716	    {
717	    case omp_proc_bind_true:
718	    case omp_proc_bind_close:
719	      if (k == s)
720		{
721		  ++p;
722		  if (p == (team->prev_ts.place_partition_off
723			    + team->prev_ts.place_partition_len))
724		    p = team->prev_ts.place_partition_off;
725		  k = 1;
726		  if (i == nthreads - rest)
727		    s = 1;
728		}
729	      else
730		++k;
731	      break;
732	    case omp_proc_bind_master:
733	      break;
734	    case omp_proc_bind_spread:
735	      if (k == 0)
736		{
737		  /* T <= P.  */
738		  if (p < rest)
739		    p += s + 1;
740		  else
741		    p += s;
742		  if (p == (team->prev_ts.place_partition_off
743			    + team->prev_ts.place_partition_len))
744		    p = team->prev_ts.place_partition_off;
745		  start_data->ts.place_partition_off = p;
746		  if (p < rest)
747		    start_data->ts.place_partition_len = s + 1;
748		  else
749		    start_data->ts.place_partition_len = s;
750		}
751	      else
752		{
753		  /* T > P.  */
754		  if (k == s)
755		    {
756		      ++p;
757		      if (p == (team->prev_ts.place_partition_off
758				+ team->prev_ts.place_partition_len))
759			p = team->prev_ts.place_partition_off;
760		      k = 1;
761		      if (i == nthreads - rest)
762			s = 1;
763		    }
764		  else
765		    ++k;
766		  start_data->ts.place_partition_off = p;
767		  start_data->ts.place_partition_len = 1;
768		}
769	      break;
770	    }
771	  start_data->place = p + 1;
772	  if (affinity_thr != NULL && pool->threads[i] != NULL)
773	    continue;
774	  gomp_init_thread_affinity (attr, p);
775	}
776
777      start_data->fn = fn;
778      start_data->fn_data = data;
779      start_data->ts.team = team;
780      start_data->ts.work_share = &team->work_shares[0];
781      start_data->ts.last_work_share = NULL;
782      start_data->ts.team_id = i;
783      start_data->ts.level = team->prev_ts.level + 1;
784      start_data->ts.active_level = thr->ts.active_level;
785#ifdef HAVE_SYNC_BUILTINS
786      start_data->ts.single_count = 0;
787#endif
788      start_data->ts.static_trip = 0;
789      start_data->task = &team->implicit_task[i];
790      gomp_init_task (start_data->task, task, icv);
791      team->implicit_task[i].icv.nthreads_var = nthreads_var;
792      team->implicit_task[i].icv.bind_var = bind_var;
793      start_data->thread_pool = pool;
794      start_data->nested = nested;
795
796      err = pthread_create (&pt, attr, gomp_thread_start, start_data++);
797      if (err != 0)
798	gomp_fatal ("Thread creation failed: %s", strerror (err));
799    }
800
801  if (__builtin_expect (gomp_places_list != NULL, 0))
802    pthread_attr_destroy (&thread_attr);
803
804 do_release:
805  gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock);
806
807  /* Decrease the barrier threshold to match the number of threads
808     that should arrive back at the end of this team.  The extra
809     threads should be exiting.  Note that we arrange for this test
810     to never be true for nested teams.  If AFFINITY_COUNT is non-zero,
811     the barrier as well as gomp_managed_threads was temporarily
812     set to NTHREADS + AFFINITY_COUNT.  For NTHREADS < OLD_THREADS_COUNT,
813     AFFINITY_COUNT if non-zero will be always at least
814     OLD_THREADS_COUNT - NTHREADS.  */
815  if (__builtin_expect (nthreads < old_threads_used, 0)
816      || __builtin_expect (affinity_count, 0))
817    {
818      long diff = (long) nthreads - (long) old_threads_used;
819
820      if (affinity_count)
821	diff = -affinity_count;
822
823      gomp_barrier_reinit (&pool->threads_dock, nthreads);
824
825#ifdef HAVE_SYNC_BUILTINS
826      __sync_fetch_and_add (&gomp_managed_threads, diff);
827#else
828      gomp_mutex_lock (&gomp_managed_threads_lock);
829      gomp_managed_threads += diff;
830      gomp_mutex_unlock (&gomp_managed_threads_lock);
831#endif
832    }
833  if (__builtin_expect (affinity_thr != NULL, 0)
834      && team->prev_ts.place_partition_len > 64)
835    free (affinity_thr);
836}
837
838
839/* Terminate the current team.  This is only to be called by the master
840   thread.  We assume that we must wait for the other threads.  */
841
842void
843gomp_team_end (void)
844{
845  struct gomp_thread *thr = gomp_thread ();
846  struct gomp_team *team = thr->ts.team;
847
848  /* This barrier handles all pending explicit threads.
849     As #pragma omp cancel parallel might get awaited count in
850     team->barrier in a inconsistent state, we need to use a different
851     counter here.  */
852  gomp_team_barrier_wait_final (&team->barrier);
853  if (__builtin_expect (team->team_cancelled, 0))
854    {
855      struct gomp_work_share *ws = team->work_shares_to_free;
856      do
857	{
858	  struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
859	  if (next_ws == NULL)
860	    gomp_ptrlock_set (&ws->next_ws, ws);
861	  gomp_fini_work_share (ws);
862	  ws = next_ws;
863	}
864      while (ws != NULL);
865    }
866  else
867    gomp_fini_work_share (thr->ts.work_share);
868
869  gomp_end_task ();
870  thr->ts = team->prev_ts;
871
872  if (__builtin_expect (thr->ts.team != NULL, 0))
873    {
874#ifdef HAVE_SYNC_BUILTINS
875      __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
876#else
877      gomp_mutex_lock (&gomp_managed_threads_lock);
878      gomp_managed_threads -= team->nthreads - 1L;
879      gomp_mutex_unlock (&gomp_managed_threads_lock);
880#endif
881      /* This barrier has gomp_barrier_wait_last counterparts
882	 and ensures the team can be safely destroyed.  */
883      gomp_barrier_wait (&team->barrier);
884    }
885
886  if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
887    {
888      struct gomp_work_share *ws = team->work_shares[0].next_alloc;
889      do
890	{
891	  struct gomp_work_share *next_ws = ws->next_alloc;
892	  free (ws);
893	  ws = next_ws;
894	}
895      while (ws != NULL);
896    }
897  gomp_sem_destroy (&team->master_release);
898#ifndef HAVE_SYNC_BUILTINS
899  gomp_mutex_destroy (&team->work_share_list_free_lock);
900#endif
901
902  if (__builtin_expect (thr->ts.team != NULL, 0)
903      || __builtin_expect (team->nthreads == 1, 0))
904    free_team (team);
905  else
906    {
907      struct gomp_thread_pool *pool = thr->thread_pool;
908      if (pool->last_team)
909	free_team (pool->last_team);
910      pool->last_team = team;
911    }
912}
913
914
915/* Constructors for this file.  */
916
917static void __attribute__((constructor))
918initialize_team (void)
919{
920#if !defined HAVE_TLS && !defined USE_EMUTLS
921  static struct gomp_thread initial_thread_tls_data;
922
923  pthread_key_create (&gomp_tls_key, NULL);
924  pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
925#endif
926
927  if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
928    gomp_fatal ("could not create thread pool destructor.");
929}
930
931static void __attribute__((destructor))
932team_destructor (void)
933{
934  /* Without this dlclose on libgomp could lead to subsequent
935     crashes.  */
936  pthread_key_delete (gomp_thread_destructor);
937}
938
939struct gomp_task_icv *
940gomp_new_icv (void)
941{
942  struct gomp_thread *thr = gomp_thread ();
943  struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
944  gomp_init_task (task, NULL, &gomp_global_icv);
945  thr->task = task;
946  pthread_setspecific (gomp_thread_destructor, thr);
947  return &task->icv;
948}
949