1/* Copyright (C) 2007-2015 Free Software Foundation, Inc.
2   Contributed by Richard Henderson <rth@redhat.com>.
3
4   This file is part of the GNU Offloading and Multi Processing Library
5   (libgomp).
6
7   Libgomp is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15   more details.
16
17   Under Section 7 of GPL version 3, you are granted additional
18   permissions described in the GCC Runtime Library Exception, version
19   3.1, as published by the Free Software Foundation.
20
21   You should have received a copy of the GNU General Public License and
22   a copy of the GCC Runtime Library Exception along with this program;
23   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24   <http://www.gnu.org/licenses/>.  */
25
26/* This file handles the maintainence of tasks in response to task
27   creation and termination.  */
28
29#include "libgomp.h"
30#include <stdlib.h>
31#include <string.h>
32
33typedef struct gomp_task_depend_entry *hash_entry_type;
34
35static inline void *
36htab_alloc (size_t size)
37{
38  return gomp_malloc (size);
39}
40
41static inline void
42htab_free (void *ptr)
43{
44  free (ptr);
45}
46
47#include "hashtab.h"
48
49static inline hashval_t
50htab_hash (hash_entry_type element)
51{
52  return hash_pointer (element->addr);
53}
54
55static inline bool
56htab_eq (hash_entry_type x, hash_entry_type y)
57{
58  return x->addr == y->addr;
59}
60
61/* Create a new task data structure.  */
62
63void
64gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
65		struct gomp_task_icv *prev_icv)
66{
67  task->parent = parent_task;
68  task->icv = *prev_icv;
69  task->kind = GOMP_TASK_IMPLICIT;
70  task->taskwait = NULL;
71  task->in_tied_task = false;
72  task->final_task = false;
73  task->copy_ctors_done = false;
74  task->parent_depends_on = false;
75  task->children = NULL;
76  task->taskgroup = NULL;
77  task->dependers = NULL;
78  task->depend_hash = NULL;
79  task->depend_count = 0;
80}
81
82/* Clean up a task, after completing it.  */
83
84void
85gomp_end_task (void)
86{
87  struct gomp_thread *thr = gomp_thread ();
88  struct gomp_task *task = thr->task;
89
90  gomp_finish_task (task);
91  thr->task = task->parent;
92}
93
94static inline void
95gomp_clear_parent (struct gomp_task *children)
96{
97  struct gomp_task *task = children;
98
99  if (task)
100    do
101      {
102	task->parent = NULL;
103	task = task->next_child;
104      }
105    while (task != children);
106}
107
108static void gomp_task_maybe_wait_for_dependencies (void **depend);
109
110/* Called when encountering an explicit task directive.  If IF_CLAUSE is
111   false, then we must not delay in executing the task.  If UNTIED is true,
112   then the task may be executed by any member of the team.  */
113
114void
115GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
116	   long arg_size, long arg_align, bool if_clause, unsigned flags,
117	   void **depend)
118{
119  struct gomp_thread *thr = gomp_thread ();
120  struct gomp_team *team = thr->ts.team;
121
122#ifdef HAVE_BROKEN_POSIX_SEMAPHORES
123  /* If pthread_mutex_* is used for omp_*lock*, then each task must be
124     tied to one thread all the time.  This means UNTIED tasks must be
125     tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
126     might be running on different thread than FN.  */
127  if (cpyfn)
128    if_clause = false;
129  if (flags & 1)
130    flags &= ~1;
131#endif
132
133  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
134  if (team
135      && (gomp_team_barrier_cancelled (&team->barrier)
136	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
137    return;
138
139  if (!if_clause || team == NULL
140      || (thr->task && thr->task->final_task)
141      || team->task_count > 64 * team->nthreads)
142    {
143      struct gomp_task task;
144
145      /* If there are depend clauses and earlier deferred sibling tasks
146	 with depend clauses, check if there isn't a dependency.  If there
147	 is, we need to wait for them.  There is no need to handle
148	 depend clauses for non-deferred tasks other than this, because
149	 the parent task is suspended until the child task finishes and thus
150	 it can't start further child tasks.  */
151      if ((flags & 8) && thr->task && thr->task->depend_hash)
152	gomp_task_maybe_wait_for_dependencies (depend);
153
154      gomp_init_task (&task, thr->task, gomp_icv (false));
155      task.kind = GOMP_TASK_IFFALSE;
156      task.final_task = (thr->task && thr->task->final_task) || (flags & 2);
157      if (thr->task)
158	{
159	  task.in_tied_task = thr->task->in_tied_task;
160	  task.taskgroup = thr->task->taskgroup;
161	}
162      thr->task = &task;
163      if (__builtin_expect (cpyfn != NULL, 0))
164	{
165	  char buf[arg_size + arg_align - 1];
166	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
167				& ~(uintptr_t) (arg_align - 1));
168	  cpyfn (arg, data);
169	  fn (arg);
170	}
171      else
172	fn (data);
173      /* Access to "children" is normally done inside a task_lock
174	 mutex region, but the only way this particular task.children
175	 can be set is if this thread's task work function (fn)
176	 creates children.  So since the setter is *this* thread, we
177	 need no barriers here when testing for non-NULL.  We can have
178	 task.children set by the current thread then changed by a
179	 child thread, but seeing a stale non-NULL value is not a
180	 problem.  Once past the task_lock acquisition, this thread
181	 will see the real value of task.children.  */
182      if (task.children != NULL)
183	{
184	  gomp_mutex_lock (&team->task_lock);
185	  gomp_clear_parent (task.children);
186	  gomp_mutex_unlock (&team->task_lock);
187	}
188      gomp_end_task ();
189    }
190  else
191    {
192      struct gomp_task *task;
193      struct gomp_task *parent = thr->task;
194      struct gomp_taskgroup *taskgroup = parent->taskgroup;
195      char *arg;
196      bool do_wake;
197      size_t depend_size = 0;
198
199      if (flags & 8)
200	depend_size = ((uintptr_t) depend[0]
201		       * sizeof (struct gomp_task_depend_entry));
202      task = gomp_malloc (sizeof (*task) + depend_size
203			  + arg_size + arg_align - 1);
204      arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
205		      & ~(uintptr_t) (arg_align - 1));
206      gomp_init_task (task, parent, gomp_icv (false));
207      task->kind = GOMP_TASK_IFFALSE;
208      task->in_tied_task = parent->in_tied_task;
209      task->taskgroup = taskgroup;
210      thr->task = task;
211      if (cpyfn)
212	{
213	  cpyfn (arg, data);
214	  task->copy_ctors_done = true;
215	}
216      else
217	memcpy (arg, data, arg_size);
218      thr->task = parent;
219      task->kind = GOMP_TASK_WAITING;
220      task->fn = fn;
221      task->fn_data = arg;
222      task->final_task = (flags & 2) >> 1;
223      gomp_mutex_lock (&team->task_lock);
224      /* If parallel or taskgroup has been cancelled, don't start new
225	 tasks.  */
226      if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
227			     || (taskgroup && taskgroup->cancelled))
228			    && !task->copy_ctors_done, 0))
229	{
230	  gomp_mutex_unlock (&team->task_lock);
231	  gomp_finish_task (task);
232	  free (task);
233	  return;
234	}
235      if (taskgroup)
236	taskgroup->num_children++;
237      if (depend_size)
238	{
239	  size_t ndepend = (uintptr_t) depend[0];
240	  size_t nout = (uintptr_t) depend[1];
241	  size_t i;
242	  hash_entry_type ent;
243
244	  task->depend_count = ndepend;
245	  task->num_dependees = 0;
246	  if (parent->depend_hash == NULL)
247	    parent->depend_hash
248	      = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
249	  for (i = 0; i < ndepend; i++)
250	    {
251	      task->depend[i].addr = depend[2 + i];
252	      task->depend[i].next = NULL;
253	      task->depend[i].prev = NULL;
254	      task->depend[i].task = task;
255	      task->depend[i].is_in = i >= nout;
256	      task->depend[i].redundant = false;
257	      task->depend[i].redundant_out = false;
258
259	      hash_entry_type *slot
260		= htab_find_slot (&parent->depend_hash, &task->depend[i],
261				  INSERT);
262	      hash_entry_type out = NULL, last = NULL;
263	      if (*slot)
264		{
265		  /* If multiple depends on the same task are the
266		     same, all but the first one are redundant.
267		     As inout/out come first, if any of them is
268		     inout/out, it will win, which is the right
269		     semantics.  */
270		  if ((*slot)->task == task)
271		    {
272		      task->depend[i].redundant = true;
273		      continue;
274		    }
275		  for (ent = *slot; ent; ent = ent->next)
276		    {
277		      if (ent->redundant_out)
278			break;
279
280		      last = ent;
281
282		      /* depend(in:...) doesn't depend on earlier
283			 depend(in:...).  */
284		      if (i >= nout && ent->is_in)
285			continue;
286
287		      if (!ent->is_in)
288			out = ent;
289
290		      struct gomp_task *tsk = ent->task;
291		      if (tsk->dependers == NULL)
292			{
293			  tsk->dependers
294			    = gomp_malloc (sizeof (struct gomp_dependers_vec)
295					   + 6 * sizeof (struct gomp_task *));
296			  tsk->dependers->n_elem = 1;
297			  tsk->dependers->allocated = 6;
298			  tsk->dependers->elem[0] = task;
299			  task->num_dependees++;
300			  continue;
301			}
302		      /* We already have some other dependency on tsk
303			 from earlier depend clause.  */
304		      else if (tsk->dependers->n_elem
305			       && (tsk->dependers->elem[tsk->dependers->n_elem
306							- 1]
307				   == task))
308			continue;
309		      else if (tsk->dependers->n_elem
310			       == tsk->dependers->allocated)
311			{
312			  tsk->dependers->allocated
313			    = tsk->dependers->allocated * 2 + 2;
314			  tsk->dependers
315			    = gomp_realloc (tsk->dependers,
316					    sizeof (struct gomp_dependers_vec)
317					    + (tsk->dependers->allocated
318					       * sizeof (struct gomp_task *)));
319			}
320		      tsk->dependers->elem[tsk->dependers->n_elem++] = task;
321		      task->num_dependees++;
322		    }
323		  task->depend[i].next = *slot;
324		  (*slot)->prev = &task->depend[i];
325		}
326	      *slot = &task->depend[i];
327
328	      /* There is no need to store more than one depend({,in}out:)
329		 task per address in the hash table chain for the purpose
330		 of creation of deferred tasks, because each out
331		 depends on all earlier outs, thus it is enough to record
332		 just the last depend({,in}out:).  For depend(in:), we need
333		 to keep all of the previous ones not terminated yet, because
334		 a later depend({,in}out:) might need to depend on all of
335		 them.  So, if the new task's clause is depend({,in}out:),
336		 we know there is at most one other depend({,in}out:) clause
337		 in the list (out).  For non-deferred tasks we want to see
338		 all outs, so they are moved to the end of the chain,
339		 after first redundant_out entry all following entries
340		 should be redundant_out.  */
341	      if (!task->depend[i].is_in && out)
342		{
343		  if (out != last)
344		    {
345		      out->next->prev = out->prev;
346		      out->prev->next = out->next;
347		      out->next = last->next;
348		      out->prev = last;
349		      last->next = out;
350		      if (out->next)
351			out->next->prev = out;
352		    }
353		  out->redundant_out = true;
354		}
355	    }
356	  if (task->num_dependees)
357	    {
358	      gomp_mutex_unlock (&team->task_lock);
359	      return;
360	    }
361	}
362      if (parent->children)
363	{
364	  task->next_child = parent->children;
365	  task->prev_child = parent->children->prev_child;
366	  task->next_child->prev_child = task;
367	  task->prev_child->next_child = task;
368	}
369      else
370	{
371	  task->next_child = task;
372	  task->prev_child = task;
373	}
374      parent->children = task;
375      if (taskgroup)
376	{
377	  if (taskgroup->children)
378	    {
379	      task->next_taskgroup = taskgroup->children;
380	      task->prev_taskgroup = taskgroup->children->prev_taskgroup;
381	      task->next_taskgroup->prev_taskgroup = task;
382	      task->prev_taskgroup->next_taskgroup = task;
383	    }
384	  else
385	    {
386	      task->next_taskgroup = task;
387	      task->prev_taskgroup = task;
388	    }
389	  taskgroup->children = task;
390	}
391      if (team->task_queue)
392	{
393	  task->next_queue = team->task_queue;
394	  task->prev_queue = team->task_queue->prev_queue;
395	  task->next_queue->prev_queue = task;
396	  task->prev_queue->next_queue = task;
397	}
398      else
399	{
400	  task->next_queue = task;
401	  task->prev_queue = task;
402	  team->task_queue = task;
403	}
404      ++team->task_count;
405      ++team->task_queued_count;
406      gomp_team_barrier_set_task_pending (&team->barrier);
407      do_wake = team->task_running_count + !parent->in_tied_task
408		< team->nthreads;
409      gomp_mutex_unlock (&team->task_lock);
410      if (do_wake)
411	gomp_team_barrier_wake (&team->barrier, 1);
412    }
413}
414
415static inline bool
416gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
417		   struct gomp_taskgroup *taskgroup, struct gomp_team *team)
418{
419  if (parent)
420    {
421      if (parent->children == child_task)
422	parent->children = child_task->next_child;
423      if (__builtin_expect (child_task->parent_depends_on, 0)
424	  && parent->taskwait->last_parent_depends_on == child_task)
425	{
426	  if (child_task->prev_child->kind == GOMP_TASK_WAITING
427	      && child_task->prev_child->parent_depends_on)
428	    parent->taskwait->last_parent_depends_on = child_task->prev_child;
429	  else
430	    parent->taskwait->last_parent_depends_on = NULL;
431	}
432    }
433  if (taskgroup && taskgroup->children == child_task)
434    taskgroup->children = child_task->next_taskgroup;
435  child_task->prev_queue->next_queue = child_task->next_queue;
436  child_task->next_queue->prev_queue = child_task->prev_queue;
437  if (team->task_queue == child_task)
438    {
439      if (child_task->next_queue != child_task)
440	team->task_queue = child_task->next_queue;
441      else
442	team->task_queue = NULL;
443    }
444  child_task->kind = GOMP_TASK_TIED;
445  if (--team->task_queued_count == 0)
446    gomp_team_barrier_clear_task_pending (&team->barrier);
447  if ((gomp_team_barrier_cancelled (&team->barrier)
448       || (taskgroup && taskgroup->cancelled))
449      && !child_task->copy_ctors_done)
450    return true;
451  return false;
452}
453
454static void
455gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
456{
457  struct gomp_task *parent = child_task->parent;
458  size_t i;
459
460  for (i = 0; i < child_task->depend_count; i++)
461    if (!child_task->depend[i].redundant)
462      {
463	if (child_task->depend[i].next)
464	  child_task->depend[i].next->prev = child_task->depend[i].prev;
465	if (child_task->depend[i].prev)
466	  child_task->depend[i].prev->next = child_task->depend[i].next;
467	else
468	  {
469	    hash_entry_type *slot
470	      = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
471				NO_INSERT);
472	    if (*slot != &child_task->depend[i])
473	      abort ();
474	    if (child_task->depend[i].next)
475	      *slot = child_task->depend[i].next;
476	    else
477	      htab_clear_slot (parent->depend_hash, slot);
478	  }
479      }
480}
481
482static size_t
483gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
484				     struct gomp_team *team)
485{
486  struct gomp_task *parent = child_task->parent;
487  size_t i, count = child_task->dependers->n_elem, ret = 0;
488  for (i = 0; i < count; i++)
489    {
490      struct gomp_task *task = child_task->dependers->elem[i];
491      if (--task->num_dependees != 0)
492	continue;
493
494      struct gomp_taskgroup *taskgroup = task->taskgroup;
495      if (parent)
496	{
497	  if (parent->children)
498	    {
499	      /* If parent is in gomp_task_maybe_wait_for_dependencies
500		 and it doesn't need to wait for this task, put it after
501		 all ready to run tasks it needs to wait for.  */
502	      if (parent->taskwait && parent->taskwait->last_parent_depends_on
503		  && !task->parent_depends_on)
504		{
505		  struct gomp_task *last_parent_depends_on
506		    = parent->taskwait->last_parent_depends_on;
507		  task->next_child = last_parent_depends_on->next_child;
508		  task->prev_child = last_parent_depends_on;
509		}
510	      else
511		{
512		  task->next_child = parent->children;
513		  task->prev_child = parent->children->prev_child;
514		  parent->children = task;
515		}
516	      task->next_child->prev_child = task;
517	      task->prev_child->next_child = task;
518	    }
519	  else
520	    {
521	      task->next_child = task;
522	      task->prev_child = task;
523	      parent->children = task;
524	    }
525	  if (parent->taskwait)
526	    {
527	      if (parent->taskwait->in_taskwait)
528		{
529		  parent->taskwait->in_taskwait = false;
530		  gomp_sem_post (&parent->taskwait->taskwait_sem);
531		}
532	      else if (parent->taskwait->in_depend_wait)
533		{
534		  parent->taskwait->in_depend_wait = false;
535		  gomp_sem_post (&parent->taskwait->taskwait_sem);
536		}
537	      if (parent->taskwait->last_parent_depends_on == NULL
538		  && task->parent_depends_on)
539		parent->taskwait->last_parent_depends_on = task;
540	    }
541	}
542      if (taskgroup)
543	{
544	  if (taskgroup->children)
545	    {
546	      task->next_taskgroup = taskgroup->children;
547	      task->prev_taskgroup = taskgroup->children->prev_taskgroup;
548	      task->next_taskgroup->prev_taskgroup = task;
549	      task->prev_taskgroup->next_taskgroup = task;
550	    }
551	  else
552	    {
553	      task->next_taskgroup = task;
554	      task->prev_taskgroup = task;
555	    }
556	  taskgroup->children = task;
557	  if (taskgroup->in_taskgroup_wait)
558	    {
559	      taskgroup->in_taskgroup_wait = false;
560	      gomp_sem_post (&taskgroup->taskgroup_sem);
561	    }
562	}
563      if (team->task_queue)
564	{
565	  task->next_queue = team->task_queue;
566	  task->prev_queue = team->task_queue->prev_queue;
567	  task->next_queue->prev_queue = task;
568	  task->prev_queue->next_queue = task;
569	}
570      else
571	{
572	  task->next_queue = task;
573	  task->prev_queue = task;
574	  team->task_queue = task;
575	}
576      ++team->task_count;
577      ++team->task_queued_count;
578      ++ret;
579    }
580  free (child_task->dependers);
581  child_task->dependers = NULL;
582  if (ret > 1)
583    gomp_team_barrier_set_task_pending (&team->barrier);
584  return ret;
585}
586
587static inline size_t
588gomp_task_run_post_handle_depend (struct gomp_task *child_task,
589				  struct gomp_team *team)
590{
591  if (child_task->depend_count == 0)
592    return 0;
593
594  /* If parent is gone already, the hash table is freed and nothing
595     will use the hash table anymore, no need to remove anything from it.  */
596  if (child_task->parent != NULL)
597    gomp_task_run_post_handle_depend_hash (child_task);
598
599  if (child_task->dependers == NULL)
600    return 0;
601
602  return gomp_task_run_post_handle_dependers (child_task, team);
603}
604
605static inline void
606gomp_task_run_post_remove_parent (struct gomp_task *child_task)
607{
608  struct gomp_task *parent = child_task->parent;
609  if (parent == NULL)
610    return;
611  if (__builtin_expect (child_task->parent_depends_on, 0)
612      && --parent->taskwait->n_depend == 0
613      && parent->taskwait->in_depend_wait)
614    {
615      parent->taskwait->in_depend_wait = false;
616      gomp_sem_post (&parent->taskwait->taskwait_sem);
617    }
618  child_task->prev_child->next_child = child_task->next_child;
619  child_task->next_child->prev_child = child_task->prev_child;
620  if (parent->children != child_task)
621    return;
622  if (child_task->next_child != child_task)
623    parent->children = child_task->next_child;
624  else
625    {
626      /* We access task->children in GOMP_taskwait
627	 outside of the task lock mutex region, so
628	 need a release barrier here to ensure memory
629	 written by child_task->fn above is flushed
630	 before the NULL is written.  */
631      __atomic_store_n (&parent->children, NULL, MEMMODEL_RELEASE);
632      if (parent->taskwait && parent->taskwait->in_taskwait)
633	{
634	  parent->taskwait->in_taskwait = false;
635	  gomp_sem_post (&parent->taskwait->taskwait_sem);
636	}
637    }
638}
639
640static inline void
641gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
642{
643  struct gomp_taskgroup *taskgroup = child_task->taskgroup;
644  if (taskgroup == NULL)
645    return;
646  child_task->prev_taskgroup->next_taskgroup = child_task->next_taskgroup;
647  child_task->next_taskgroup->prev_taskgroup = child_task->prev_taskgroup;
648  if (taskgroup->num_children > 1)
649    --taskgroup->num_children;
650  else
651    {
652      /* We access taskgroup->num_children in GOMP_taskgroup_end
653	 outside of the task lock mutex region, so
654	 need a release barrier here to ensure memory
655	 written by child_task->fn above is flushed
656	 before the NULL is written.  */
657      __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
658    }
659  if (taskgroup->children != child_task)
660    return;
661  if (child_task->next_taskgroup != child_task)
662    taskgroup->children = child_task->next_taskgroup;
663  else
664    {
665      taskgroup->children = NULL;
666      if (taskgroup->in_taskgroup_wait)
667	{
668	  taskgroup->in_taskgroup_wait = false;
669	  gomp_sem_post (&taskgroup->taskgroup_sem);
670	}
671    }
672}
673
674void
675gomp_barrier_handle_tasks (gomp_barrier_state_t state)
676{
677  struct gomp_thread *thr = gomp_thread ();
678  struct gomp_team *team = thr->ts.team;
679  struct gomp_task *task = thr->task;
680  struct gomp_task *child_task = NULL;
681  struct gomp_task *to_free = NULL;
682  int do_wake = 0;
683
684  gomp_mutex_lock (&team->task_lock);
685  if (gomp_barrier_last_thread (state))
686    {
687      if (team->task_count == 0)
688	{
689	  gomp_team_barrier_done (&team->barrier, state);
690	  gomp_mutex_unlock (&team->task_lock);
691	  gomp_team_barrier_wake (&team->barrier, 0);
692	  return;
693	}
694      gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
695    }
696
697  while (1)
698    {
699      bool cancelled = false;
700      if (team->task_queue != NULL)
701	{
702	  child_task = team->task_queue;
703	  cancelled = gomp_task_run_pre (child_task, child_task->parent,
704					 child_task->taskgroup, team);
705	  if (__builtin_expect (cancelled, 0))
706	    {
707	      if (to_free)
708		{
709		  gomp_finish_task (to_free);
710		  free (to_free);
711		  to_free = NULL;
712		}
713	      goto finish_cancelled;
714	    }
715	  team->task_running_count++;
716	  child_task->in_tied_task = true;
717	}
718      gomp_mutex_unlock (&team->task_lock);
719      if (do_wake)
720	{
721	  gomp_team_barrier_wake (&team->barrier, do_wake);
722	  do_wake = 0;
723	}
724      if (to_free)
725	{
726	  gomp_finish_task (to_free);
727	  free (to_free);
728	  to_free = NULL;
729	}
730      if (child_task)
731	{
732	  thr->task = child_task;
733	  child_task->fn (child_task->fn_data);
734	  thr->task = task;
735	}
736      else
737	return;
738      gomp_mutex_lock (&team->task_lock);
739      if (child_task)
740	{
741	 finish_cancelled:;
742	  size_t new_tasks
743	    = gomp_task_run_post_handle_depend (child_task, team);
744	  gomp_task_run_post_remove_parent (child_task);
745	  gomp_clear_parent (child_task->children);
746	  gomp_task_run_post_remove_taskgroup (child_task);
747	  to_free = child_task;
748	  child_task = NULL;
749	  if (!cancelled)
750	    team->task_running_count--;
751	  if (new_tasks > 1)
752	    {
753	      do_wake = team->nthreads - team->task_running_count;
754	      if (do_wake > new_tasks)
755		do_wake = new_tasks;
756	    }
757	  if (--team->task_count == 0
758	      && gomp_team_barrier_waiting_for_tasks (&team->barrier))
759	    {
760	      gomp_team_barrier_done (&team->barrier, state);
761	      gomp_mutex_unlock (&team->task_lock);
762	      gomp_team_barrier_wake (&team->barrier, 0);
763	      gomp_mutex_lock (&team->task_lock);
764	    }
765	}
766    }
767}
768
769/* Called when encountering a taskwait directive.  */
770
771void
772GOMP_taskwait (void)
773{
774  struct gomp_thread *thr = gomp_thread ();
775  struct gomp_team *team = thr->ts.team;
776  struct gomp_task *task = thr->task;
777  struct gomp_task *child_task = NULL;
778  struct gomp_task *to_free = NULL;
779  struct gomp_taskwait taskwait;
780  int do_wake = 0;
781
782  /* The acquire barrier on load of task->children here synchronizes
783     with the write of a NULL in gomp_task_run_post_remove_parent.  It is
784     not necessary that we synchronize with other non-NULL writes at
785     this point, but we must ensure that all writes to memory by a
786     child thread task work function are seen before we exit from
787     GOMP_taskwait.  */
788  if (task == NULL
789      || __atomic_load_n (&task->children, MEMMODEL_ACQUIRE) == NULL)
790    return;
791
792  memset (&taskwait, 0, sizeof (taskwait));
793  gomp_mutex_lock (&team->task_lock);
794  while (1)
795    {
796      bool cancelled = false;
797      if (task->children == NULL)
798	{
799	  bool destroy_taskwait = task->taskwait != NULL;
800	  task->taskwait = NULL;
801	  gomp_mutex_unlock (&team->task_lock);
802	  if (to_free)
803	    {
804	      gomp_finish_task (to_free);
805	      free (to_free);
806	    }
807	  if (destroy_taskwait)
808	    gomp_sem_destroy (&taskwait.taskwait_sem);
809	  return;
810	}
811      if (task->children->kind == GOMP_TASK_WAITING)
812	{
813	  child_task = task->children;
814	  cancelled
815	    = gomp_task_run_pre (child_task, task, child_task->taskgroup,
816				 team);
817	  if (__builtin_expect (cancelled, 0))
818	    {
819	      if (to_free)
820		{
821		  gomp_finish_task (to_free);
822		  free (to_free);
823		  to_free = NULL;
824		}
825	      goto finish_cancelled;
826	    }
827	}
828      else
829	{
830	  /* All tasks we are waiting for are already running
831	     in other threads.  Wait for them.  */
832	  if (task->taskwait == NULL)
833	    {
834	      taskwait.in_depend_wait = false;
835	      gomp_sem_init (&taskwait.taskwait_sem, 0);
836	      task->taskwait = &taskwait;
837	    }
838	  taskwait.in_taskwait = true;
839	}
840      gomp_mutex_unlock (&team->task_lock);
841      if (do_wake)
842	{
843	  gomp_team_barrier_wake (&team->barrier, do_wake);
844	  do_wake = 0;
845	}
846      if (to_free)
847	{
848	  gomp_finish_task (to_free);
849	  free (to_free);
850	  to_free = NULL;
851	}
852      if (child_task)
853	{
854	  thr->task = child_task;
855	  child_task->fn (child_task->fn_data);
856	  thr->task = task;
857	}
858      else
859	gomp_sem_wait (&taskwait.taskwait_sem);
860      gomp_mutex_lock (&team->task_lock);
861      if (child_task)
862	{
863	 finish_cancelled:;
864	  size_t new_tasks
865	    = gomp_task_run_post_handle_depend (child_task, team);
866	  child_task->prev_child->next_child = child_task->next_child;
867	  child_task->next_child->prev_child = child_task->prev_child;
868	  if (task->children == child_task)
869	    {
870	      if (child_task->next_child != child_task)
871		task->children = child_task->next_child;
872	      else
873		task->children = NULL;
874	    }
875	  gomp_clear_parent (child_task->children);
876	  gomp_task_run_post_remove_taskgroup (child_task);
877	  to_free = child_task;
878	  child_task = NULL;
879	  team->task_count--;
880	  if (new_tasks > 1)
881	    {
882	      do_wake = team->nthreads - team->task_running_count
883			- !task->in_tied_task;
884	      if (do_wake > new_tasks)
885		do_wake = new_tasks;
886	    }
887	}
888    }
889}
890
891/* This is like GOMP_taskwait, but we only wait for tasks that the
892   upcoming task depends on.  */
893
894static void
895gomp_task_maybe_wait_for_dependencies (void **depend)
896{
897  struct gomp_thread *thr = gomp_thread ();
898  struct gomp_task *task = thr->task;
899  struct gomp_team *team = thr->ts.team;
900  struct gomp_task_depend_entry elem, *ent = NULL;
901  struct gomp_taskwait taskwait;
902  struct gomp_task *last_parent_depends_on = NULL;
903  size_t ndepend = (uintptr_t) depend[0];
904  size_t nout = (uintptr_t) depend[1];
905  size_t i;
906  size_t num_awaited = 0;
907  struct gomp_task *child_task = NULL;
908  struct gomp_task *to_free = NULL;
909  int do_wake = 0;
910
911  gomp_mutex_lock (&team->task_lock);
912  for (i = 0; i < ndepend; i++)
913    {
914      elem.addr = depend[i + 2];
915      ent = htab_find (task->depend_hash, &elem);
916      for (; ent; ent = ent->next)
917	if (i >= nout && ent->is_in)
918	  continue;
919	else
920	  {
921	    struct gomp_task *tsk = ent->task;
922	    if (!tsk->parent_depends_on)
923	      {
924		tsk->parent_depends_on = true;
925		++num_awaited;
926		if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
927		  {
928		    /* If a task we need to wait for is not already
929		       running and is ready to be scheduled, move it
930		       to front, so that we run it as soon as possible.  */
931		    if (last_parent_depends_on)
932		      {
933			tsk->prev_child->next_child = tsk->next_child;
934			tsk->next_child->prev_child = tsk->prev_child;
935			tsk->prev_child = last_parent_depends_on;
936			tsk->next_child = last_parent_depends_on->next_child;
937			tsk->prev_child->next_child = tsk;
938			tsk->next_child->prev_child = tsk;
939		      }
940		    else if (tsk != task->children)
941		      {
942			tsk->prev_child->next_child = tsk->next_child;
943			tsk->next_child->prev_child = tsk->prev_child;
944			tsk->prev_child = task->children;
945			tsk->next_child = task->children->next_child;
946			task->children = tsk;
947			tsk->prev_child->next_child = tsk;
948			tsk->next_child->prev_child = tsk;
949		      }
950		    last_parent_depends_on = tsk;
951		  }
952	      }
953	  }
954    }
955  if (num_awaited == 0)
956    {
957      gomp_mutex_unlock (&team->task_lock);
958      return;
959    }
960
961  memset (&taskwait, 0, sizeof (taskwait));
962  taskwait.n_depend = num_awaited;
963  taskwait.last_parent_depends_on = last_parent_depends_on;
964  gomp_sem_init (&taskwait.taskwait_sem, 0);
965  task->taskwait = &taskwait;
966
967  while (1)
968    {
969      bool cancelled = false;
970      if (taskwait.n_depend == 0)
971	{
972	  task->taskwait = NULL;
973	  gomp_mutex_unlock (&team->task_lock);
974	  if (to_free)
975	    {
976	      gomp_finish_task (to_free);
977	      free (to_free);
978	    }
979	  gomp_sem_destroy (&taskwait.taskwait_sem);
980	  return;
981	}
982      if (task->children->kind == GOMP_TASK_WAITING)
983	{
984	  child_task = task->children;
985	  cancelled
986	    = gomp_task_run_pre (child_task, task, child_task->taskgroup,
987				 team);
988	  if (__builtin_expect (cancelled, 0))
989	    {
990	      if (to_free)
991		{
992		  gomp_finish_task (to_free);
993		  free (to_free);
994		  to_free = NULL;
995		}
996	      goto finish_cancelled;
997	    }
998	}
999      else
1000	/* All tasks we are waiting for are already running
1001	   in other threads.  Wait for them.  */
1002	taskwait.in_depend_wait = true;
1003      gomp_mutex_unlock (&team->task_lock);
1004      if (do_wake)
1005	{
1006	  gomp_team_barrier_wake (&team->barrier, do_wake);
1007	  do_wake = 0;
1008	}
1009      if (to_free)
1010	{
1011	  gomp_finish_task (to_free);
1012	  free (to_free);
1013	  to_free = NULL;
1014	}
1015      if (child_task)
1016	{
1017	  thr->task = child_task;
1018	  child_task->fn (child_task->fn_data);
1019	  thr->task = task;
1020	}
1021      else
1022	gomp_sem_wait (&taskwait.taskwait_sem);
1023      gomp_mutex_lock (&team->task_lock);
1024      if (child_task)
1025	{
1026	 finish_cancelled:;
1027	  size_t new_tasks
1028	    = gomp_task_run_post_handle_depend (child_task, team);
1029	  if (child_task->parent_depends_on)
1030	    --taskwait.n_depend;
1031	  child_task->prev_child->next_child = child_task->next_child;
1032	  child_task->next_child->prev_child = child_task->prev_child;
1033	  if (task->children == child_task)
1034	    {
1035	      if (child_task->next_child != child_task)
1036		task->children = child_task->next_child;
1037	      else
1038		task->children = NULL;
1039	    }
1040	  gomp_clear_parent (child_task->children);
1041	  gomp_task_run_post_remove_taskgroup (child_task);
1042	  to_free = child_task;
1043	  child_task = NULL;
1044	  team->task_count--;
1045	  if (new_tasks > 1)
1046	    {
1047	      do_wake = team->nthreads - team->task_running_count
1048			- !task->in_tied_task;
1049	      if (do_wake > new_tasks)
1050		do_wake = new_tasks;
1051	    }
1052	}
1053    }
1054}
1055
1056/* Called when encountering a taskyield directive.  */
1057
1058void
1059GOMP_taskyield (void)
1060{
1061  /* Nothing at the moment.  */
1062}
1063
1064void
1065GOMP_taskgroup_start (void)
1066{
1067  struct gomp_thread *thr = gomp_thread ();
1068  struct gomp_team *team = thr->ts.team;
1069  struct gomp_task *task = thr->task;
1070  struct gomp_taskgroup *taskgroup;
1071
1072  /* If team is NULL, all tasks are executed as
1073     GOMP_TASK_IFFALSE tasks and thus all children tasks of
1074     taskgroup and their descendant tasks will be finished
1075     by the time GOMP_taskgroup_end is called.  */
1076  if (team == NULL)
1077    return;
1078  taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
1079  taskgroup->prev = task->taskgroup;
1080  taskgroup->children = NULL;
1081  taskgroup->in_taskgroup_wait = false;
1082  taskgroup->cancelled = false;
1083  taskgroup->num_children = 0;
1084  gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1085  task->taskgroup = taskgroup;
1086}
1087
1088void
1089GOMP_taskgroup_end (void)
1090{
1091  struct gomp_thread *thr = gomp_thread ();
1092  struct gomp_team *team = thr->ts.team;
1093  struct gomp_task *task = thr->task;
1094  struct gomp_taskgroup *taskgroup;
1095  struct gomp_task *child_task = NULL;
1096  struct gomp_task *to_free = NULL;
1097  int do_wake = 0;
1098
1099  if (team == NULL)
1100    return;
1101  taskgroup = task->taskgroup;
1102
1103  /* The acquire barrier on load of taskgroup->num_children here
1104     synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1105     It is not necessary that we synchronize with other non-0 writes at
1106     this point, but we must ensure that all writes to memory by a
1107     child thread task work function are seen before we exit from
1108     GOMP_taskgroup_end.  */
1109  if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1110    goto finish;
1111
1112  gomp_mutex_lock (&team->task_lock);
1113  while (1)
1114    {
1115      bool cancelled = false;
1116      if (taskgroup->children == NULL)
1117	{
1118	  if (taskgroup->num_children)
1119	    {
1120	      if (task->children == NULL)
1121		goto do_wait;
1122	      child_task = task->children;
1123            }
1124          else
1125	    {
1126	      gomp_mutex_unlock (&team->task_lock);
1127	      if (to_free)
1128		{
1129		  gomp_finish_task (to_free);
1130		  free (to_free);
1131		}
1132	      goto finish;
1133	    }
1134	}
1135      else
1136	child_task = taskgroup->children;
1137      if (child_task->kind == GOMP_TASK_WAITING)
1138	{
1139	  cancelled
1140	    = gomp_task_run_pre (child_task, child_task->parent, taskgroup,
1141				 team);
1142	  if (__builtin_expect (cancelled, 0))
1143	    {
1144	      if (to_free)
1145		{
1146		  gomp_finish_task (to_free);
1147		  free (to_free);
1148		  to_free = NULL;
1149		}
1150	      goto finish_cancelled;
1151	    }
1152	}
1153      else
1154	{
1155	  child_task = NULL;
1156	 do_wait:
1157	  /* All tasks we are waiting for are already running
1158	     in other threads.  Wait for them.  */
1159	  taskgroup->in_taskgroup_wait = true;
1160	}
1161      gomp_mutex_unlock (&team->task_lock);
1162      if (do_wake)
1163	{
1164	  gomp_team_barrier_wake (&team->barrier, do_wake);
1165	  do_wake = 0;
1166	}
1167      if (to_free)
1168	{
1169	  gomp_finish_task (to_free);
1170	  free (to_free);
1171	  to_free = NULL;
1172	}
1173      if (child_task)
1174	{
1175	  thr->task = child_task;
1176	  child_task->fn (child_task->fn_data);
1177	  thr->task = task;
1178	}
1179      else
1180	gomp_sem_wait (&taskgroup->taskgroup_sem);
1181      gomp_mutex_lock (&team->task_lock);
1182      if (child_task)
1183	{
1184	 finish_cancelled:;
1185	  size_t new_tasks
1186	    = gomp_task_run_post_handle_depend (child_task, team);
1187	  gomp_task_run_post_remove_parent (child_task);
1188	  gomp_clear_parent (child_task->children);
1189	  gomp_task_run_post_remove_taskgroup (child_task);
1190	  to_free = child_task;
1191	  child_task = NULL;
1192	  team->task_count--;
1193	  if (new_tasks > 1)
1194	    {
1195	      do_wake = team->nthreads - team->task_running_count
1196			- !task->in_tied_task;
1197	      if (do_wake > new_tasks)
1198		do_wake = new_tasks;
1199	    }
1200	}
1201    }
1202
1203 finish:
1204  task->taskgroup = taskgroup->prev;
1205  gomp_sem_destroy (&taskgroup->taskgroup_sem);
1206  free (taskgroup);
1207}
1208
1209int
1210omp_in_final (void)
1211{
1212  struct gomp_thread *thr = gomp_thread ();
1213  return thr->task && thr->task->final_task;
1214}
1215
1216ialias (omp_in_final)
1217