1/* Expansion pass for OMP directives.  Outlines regions of certain OMP
2   directives to separate functions, converts others into explicit calls to the
3   runtime library (libgomp) and so forth
4
5Copyright (C) 2005-2020 Free Software Foundation, Inc.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3.  If not see
21<http://www.gnu.org/licenses/>.  */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
55#include "alloc-pool.h"
56#include "symbol-summary.h"
57#include "gomp-constants.h"
58#include "gimple-pretty-print.h"
59#include "hsa-common.h"
60#include "stringpool.h"
61#include "attribs.h"
62
63/* OMP region information.  Every parallel and workshare
64   directive is enclosed between two markers, the OMP_* directive
65   and a corresponding GIMPLE_OMP_RETURN statement.  */
66
67struct omp_region
68{
69  /* The enclosing region.  */
70  struct omp_region *outer;
71
72  /* First child region.  */
73  struct omp_region *inner;
74
75  /* Next peer region.  */
76  struct omp_region *next;
77
78  /* Block containing the omp directive as its last stmt.  */
79  basic_block entry;
80
81  /* Block containing the GIMPLE_OMP_RETURN as its last stmt.  */
82  basic_block exit;
83
84  /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt.  */
85  basic_block cont;
86
87  /* If this is a combined parallel+workshare region, this is a list
88     of additional arguments needed by the combined parallel+workshare
89     library call.  */
90  vec<tree, va_gc> *ws_args;
91
92  /* The code for the omp directive of this region.  */
93  enum gimple_code type;
94
95  /* Schedule kind, only used for GIMPLE_OMP_FOR type regions.  */
96  enum omp_clause_schedule_kind sched_kind;
97
98  /* Schedule modifiers.  */
99  unsigned char sched_modifiers;
100
101  /* True if this is a combined parallel+workshare region.  */
102  bool is_combined_parallel;
103
104  /* Copy of fd.lastprivate_conditional != 0.  */
105  bool has_lastprivate_conditional;
106
107  /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
108     a depend clause.  */
109  gomp_ordered *ord_stmt;
110};
111
112static struct omp_region *root_omp_region;
113static bool omp_any_child_fn_dumped;
114
115static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
116				     bool = false);
117static gphi *find_phi_with_arg_on_edge (tree, edge);
118static void expand_omp (struct omp_region *region);
119
120/* Return true if REGION is a combined parallel+workshare region.  */
121
122static inline bool
123is_combined_parallel (struct omp_region *region)
124{
125  return region->is_combined_parallel;
126}
127
128/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129   is the immediate dominator of PAR_ENTRY_BB, return true if there
130   are no data dependencies that would prevent expanding the parallel
131   directive at PAR_ENTRY_BB as a combined parallel+workshare region.
132
133   When expanding a combined parallel+workshare region, the call to
134   the child function may need additional arguments in the case of
135   GIMPLE_OMP_FOR regions.  In some cases, these arguments are
136   computed out of variables passed in from the parent to the child
137   via 'struct .omp_data_s'.  For instance:
138
139	#pragma omp parallel for schedule (guided, i * 4)
140	for (j ...)
141
142   Is lowered into:
143
144	# BLOCK 2 (PAR_ENTRY_BB)
145	.omp_data_o.i = i;
146	#pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
147
148	# BLOCK 3 (WS_ENTRY_BB)
149	.omp_data_i = &.omp_data_o;
150	D.1667 = .omp_data_i->i;
151	D.1598 = D.1667 * 4;
152	#pragma omp for schedule (guided, D.1598)
153
154   When we outline the parallel region, the call to the child function
155   'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156   that value is computed *after* the call site.  So, in principle we
157   cannot do the transformation.
158
159   To see whether the code in WS_ENTRY_BB blocks the combined
160   parallel+workshare call, we collect all the variables used in the
161   GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162   statement in WS_ENTRY_BB.  If so, then we cannot emit the combined
163   call.
164
165   FIXME.  If we had the SSA form built at this point, we could merely
166   hoist the code in block 3 into block 2 and be done with it.  But at
167   this point we don't have dataflow information and though we could
168   hack something up here, it is really not worth the aggravation.  */
169
170static bool
171workshare_safe_to_combine_p (basic_block ws_entry_bb)
172{
173  struct omp_for_data fd;
174  gimple *ws_stmt = last_stmt (ws_entry_bb);
175
176  if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
177    return true;
178
179  gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
180  if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
181    return false;
182
183  omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
184
185  if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
186    return false;
187  if (fd.iter_type != long_integer_type_node)
188    return false;
189
190  /* FIXME.  We give up too easily here.  If any of these arguments
191     are not constants, they will likely involve variables that have
192     been mapped into fields of .omp_data_s for sharing with the child
193     function.  With appropriate data flow, it would be possible to
194     see through this.  */
195  if (!is_gimple_min_invariant (fd.loop.n1)
196      || !is_gimple_min_invariant (fd.loop.n2)
197      || !is_gimple_min_invariant (fd.loop.step)
198      || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
199    return false;
200
201  return true;
202}
203
204/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205   presence (SIMD_SCHEDULE).  */
206
207static tree
208omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
209{
210  if (!simd_schedule || integer_zerop (chunk_size))
211    return chunk_size;
212
213  poly_uint64 vf = omp_max_vf ();
214  if (known_eq (vf, 1U))
215    return chunk_size;
216
217  tree type = TREE_TYPE (chunk_size);
218  chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
219			    build_int_cst (type, vf - 1));
220  return fold_build2 (BIT_AND_EXPR, type, chunk_size,
221		      build_int_cst (type, -vf));
222}
223
224/* Collect additional arguments needed to emit a combined
225   parallel+workshare call.  WS_STMT is the workshare directive being
226   expanded.  */
227
228static vec<tree, va_gc> *
229get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
230{
231  tree t;
232  location_t loc = gimple_location (ws_stmt);
233  vec<tree, va_gc> *ws_args;
234
235  if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
236    {
237      struct omp_for_data fd;
238      tree n1, n2;
239
240      omp_extract_for_data (for_stmt, &fd, NULL);
241      n1 = fd.loop.n1;
242      n2 = fd.loop.n2;
243
244      if (gimple_omp_for_combined_into_p (for_stmt))
245	{
246	  tree innerc
247	    = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
248			       OMP_CLAUSE__LOOPTEMP_);
249	  gcc_assert (innerc);
250	  n1 = OMP_CLAUSE_DECL (innerc);
251	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
252				    OMP_CLAUSE__LOOPTEMP_);
253	  gcc_assert (innerc);
254	  n2 = OMP_CLAUSE_DECL (innerc);
255	}
256
257      vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
258
259      t = fold_convert_loc (loc, long_integer_type_node, n1);
260      ws_args->quick_push (t);
261
262      t = fold_convert_loc (loc, long_integer_type_node, n2);
263      ws_args->quick_push (t);
264
265      t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
266      ws_args->quick_push (t);
267
268      if (fd.chunk_size)
269	{
270	  t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
271	  t = omp_adjust_chunk_size (t, fd.simd_schedule);
272	  ws_args->quick_push (t);
273	}
274
275      return ws_args;
276    }
277  else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
278    {
279      /* Number of sections is equal to the number of edges from the
280	 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281	 the exit of the sections region.  */
282      basic_block bb = single_succ (gimple_bb (ws_stmt));
283      t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
284      vec_alloc (ws_args, 1);
285      ws_args->quick_push (t);
286      return ws_args;
287    }
288
289  gcc_unreachable ();
290}
291
292/* Discover whether REGION is a combined parallel+workshare region.  */
293
294static void
295determine_parallel_type (struct omp_region *region)
296{
297  basic_block par_entry_bb, par_exit_bb;
298  basic_block ws_entry_bb, ws_exit_bb;
299
300  if (region == NULL || region->inner == NULL
301      || region->exit == NULL || region->inner->exit == NULL
302      || region->inner->cont == NULL)
303    return;
304
305  /* We only support parallel+for and parallel+sections.  */
306  if (region->type != GIMPLE_OMP_PARALLEL
307      || (region->inner->type != GIMPLE_OMP_FOR
308	  && region->inner->type != GIMPLE_OMP_SECTIONS))
309    return;
310
311  /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312     WS_EXIT_BB -> PAR_EXIT_BB.  */
313  par_entry_bb = region->entry;
314  par_exit_bb = region->exit;
315  ws_entry_bb = region->inner->entry;
316  ws_exit_bb = region->inner->exit;
317
318  /* Give up for task reductions on the parallel, while it is implementable,
319     adding another big set of APIs or slowing down the normal paths is
320     not acceptable.  */
321  tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
322  if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
323    return;
324
325  if (single_succ (par_entry_bb) == ws_entry_bb
326      && single_succ (ws_exit_bb) == par_exit_bb
327      && workshare_safe_to_combine_p (ws_entry_bb)
328      && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
329	  || (last_and_only_stmt (ws_entry_bb)
330	      && last_and_only_stmt (par_exit_bb))))
331    {
332      gimple *par_stmt = last_stmt (par_entry_bb);
333      gimple *ws_stmt = last_stmt (ws_entry_bb);
334
335      if (region->inner->type == GIMPLE_OMP_FOR)
336	{
337	  /* If this is a combined parallel loop, we need to determine
338	     whether or not to use the combined library calls.  There
339	     are two cases where we do not apply the transformation:
340	     static loops and any kind of ordered loop.  In the first
341	     case, we already open code the loop so there is no need
342	     to do anything else.  In the latter case, the combined
343	     parallel loop call would still need extra synchronization
344	     to implement ordered semantics, so there would not be any
345	     gain in using the combined call.  */
346	  tree clauses = gimple_omp_for_clauses (ws_stmt);
347	  tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
348	  if (c == NULL
349	      || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
350		  == OMP_CLAUSE_SCHEDULE_STATIC)
351	      || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
352	      || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
353	      || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
354		  && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
355	    return;
356	}
357      else if (region->inner->type == GIMPLE_OMP_SECTIONS
358	       && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
359				    OMP_CLAUSE__REDUCTEMP_)
360		   || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361				       OMP_CLAUSE__CONDTEMP_)))
362	return;
363
364      region->is_combined_parallel = true;
365      region->inner->is_combined_parallel = true;
366      region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
367    }
368}
369
370/* Debugging dumps for parallel regions.  */
371void dump_omp_region (FILE *, struct omp_region *, int);
372void debug_omp_region (struct omp_region *);
373void debug_all_omp_regions (void);
374
375/* Dump the parallel region tree rooted at REGION.  */
376
377void
378dump_omp_region (FILE *file, struct omp_region *region, int indent)
379{
380  fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
381	   gimple_code_name[region->type]);
382
383  if (region->inner)
384    dump_omp_region (file, region->inner, indent + 4);
385
386  if (region->cont)
387    {
388      fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
389	       region->cont->index);
390    }
391
392  if (region->exit)
393    fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
394	     region->exit->index);
395  else
396    fprintf (file, "%*s[no exit marker]\n", indent, "");
397
398  if (region->next)
399    dump_omp_region (file, region->next, indent);
400}
401
402DEBUG_FUNCTION void
403debug_omp_region (struct omp_region *region)
404{
405  dump_omp_region (stderr, region, 0);
406}
407
408DEBUG_FUNCTION void
409debug_all_omp_regions (void)
410{
411  dump_omp_region (stderr, root_omp_region, 0);
412}
413
414/* Create a new parallel region starting at STMT inside region PARENT.  */
415
416static struct omp_region *
417new_omp_region (basic_block bb, enum gimple_code type,
418		struct omp_region *parent)
419{
420  struct omp_region *region = XCNEW (struct omp_region);
421
422  region->outer = parent;
423  region->entry = bb;
424  region->type = type;
425
426  if (parent)
427    {
428      /* This is a nested region.  Add it to the list of inner
429	 regions in PARENT.  */
430      region->next = parent->inner;
431      parent->inner = region;
432    }
433  else
434    {
435      /* This is a toplevel region.  Add it to the list of toplevel
436	 regions in ROOT_OMP_REGION.  */
437      region->next = root_omp_region;
438      root_omp_region = region;
439    }
440
441  return region;
442}
443
444/* Release the memory associated with the region tree rooted at REGION.  */
445
446static void
447free_omp_region_1 (struct omp_region *region)
448{
449  struct omp_region *i, *n;
450
451  for (i = region->inner; i ; i = n)
452    {
453      n = i->next;
454      free_omp_region_1 (i);
455    }
456
457  free (region);
458}
459
460/* Release the memory for the entire omp region tree.  */
461
462void
463omp_free_regions (void)
464{
465  struct omp_region *r, *n;
466  for (r = root_omp_region; r ; r = n)
467    {
468      n = r->next;
469      free_omp_region_1 (r);
470    }
471  root_omp_region = NULL;
472}
473
474/* A convenience function to build an empty GIMPLE_COND with just the
475   condition.  */
476
477static gcond *
478gimple_build_cond_empty (tree cond)
479{
480  enum tree_code pred_code;
481  tree lhs, rhs;
482
483  gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
484  return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
485}
486
487/* Return true if a parallel REGION is within a declare target function or
488   within a target region and is not a part of a gridified target.  */
489
490static bool
491parallel_needs_hsa_kernel_p (struct omp_region *region)
492{
493  bool indirect = false;
494  for (region = region->outer; region; region = region->outer)
495    {
496      if (region->type == GIMPLE_OMP_PARALLEL)
497	indirect = true;
498      else if (region->type == GIMPLE_OMP_TARGET)
499	{
500	  gomp_target *tgt_stmt
501	    = as_a <gomp_target *> (last_stmt (region->entry));
502
503	  if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
504			       OMP_CLAUSE__GRIDDIM_))
505	    return indirect;
506	  else
507	    return true;
508	}
509    }
510
511  if (lookup_attribute ("omp declare target",
512			DECL_ATTRIBUTES (current_function_decl)))
513    return true;
514
515  return false;
516}
517
518/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
519   Add CHILD_FNDECL to decl chain of the supercontext of the block
520   ENTRY_BLOCK - this is the block which originally contained the
521   code from which CHILD_FNDECL was created.
522
523   Together, these actions ensure that the debug info for the outlined
524   function will be emitted with the correct lexical scope.  */
525
526static void
527adjust_context_and_scope (struct omp_region *region, tree entry_block,
528			  tree child_fndecl)
529{
530  tree parent_fndecl = NULL_TREE;
531  gimple *entry_stmt;
532  /* OMP expansion expands inner regions before outer ones, so if
533     we e.g. have explicit task region nested in parallel region, when
534     expanding the task region current_function_decl will be the original
535     source function, but we actually want to use as context the child
536     function of the parallel.  */
537  for (region = region->outer;
538       region && parent_fndecl == NULL_TREE; region = region->outer)
539    switch (region->type)
540      {
541      case GIMPLE_OMP_PARALLEL:
542      case GIMPLE_OMP_TASK:
543      case GIMPLE_OMP_TEAMS:
544	entry_stmt = last_stmt (region->entry);
545	parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
546	break;
547      case GIMPLE_OMP_TARGET:
548	entry_stmt = last_stmt (region->entry);
549	parent_fndecl
550	  = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
551	break;
552      default:
553	break;
554      }
555
556  if (parent_fndecl == NULL_TREE)
557    parent_fndecl = current_function_decl;
558  DECL_CONTEXT (child_fndecl) = parent_fndecl;
559
560  if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
561    {
562      tree b = BLOCK_SUPERCONTEXT (entry_block);
563      if (TREE_CODE (b) == BLOCK)
564        {
565	  DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
566	  BLOCK_VARS (b) = child_fndecl;
567	}
568    }
569}
570
571/* Build the function calls to GOMP_parallel etc to actually
572   generate the parallel operation.  REGION is the parallel region
573   being expanded.  BB is the block where to insert the code.  WS_ARGS
574   will be set if this is a call to a combined parallel+workshare
575   construct, it contains the list of additional arguments needed by
576   the workshare construct.  */
577
578static void
579expand_parallel_call (struct omp_region *region, basic_block bb,
580		      gomp_parallel *entry_stmt,
581		      vec<tree, va_gc> *ws_args)
582{
583  tree t, t1, t2, val, cond, c, clauses, flags;
584  gimple_stmt_iterator gsi;
585  gimple *stmt;
586  enum built_in_function start_ix;
587  int start_ix2;
588  location_t clause_loc;
589  vec<tree, va_gc> *args;
590
591  clauses = gimple_omp_parallel_clauses (entry_stmt);
592
593  /* Determine what flavor of GOMP_parallel we will be
594     emitting.  */
595  start_ix = BUILT_IN_GOMP_PARALLEL;
596  tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
597  if (rtmp)
598    start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
599  else if (is_combined_parallel (region))
600    {
601      switch (region->inner->type)
602	{
603	case GIMPLE_OMP_FOR:
604	  gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
605	  switch (region->inner->sched_kind)
606	    {
607	    case OMP_CLAUSE_SCHEDULE_RUNTIME:
608	      /* For lastprivate(conditional:), our implementation
609		 requires monotonic behavior.  */
610	      if (region->inner->has_lastprivate_conditional != 0)
611		start_ix2 = 3;
612	      else if ((region->inner->sched_modifiers
613		       & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
614		start_ix2 = 6;
615	      else if ((region->inner->sched_modifiers
616			& OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
617		start_ix2 = 7;
618	      else
619		start_ix2 = 3;
620	      break;
621	    case OMP_CLAUSE_SCHEDULE_DYNAMIC:
622	    case OMP_CLAUSE_SCHEDULE_GUIDED:
623	      if ((region->inner->sched_modifiers
624		   & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
625		  && !region->inner->has_lastprivate_conditional)
626		{
627		  start_ix2 = 3 + region->inner->sched_kind;
628		  break;
629		}
630	      /* FALLTHRU */
631	    default:
632	      start_ix2 = region->inner->sched_kind;
633	      break;
634	    }
635	  start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
636	  start_ix = (enum built_in_function) start_ix2;
637	  break;
638	case GIMPLE_OMP_SECTIONS:
639	  start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
640	  break;
641	default:
642	  gcc_unreachable ();
643	}
644    }
645
646  /* By default, the value of NUM_THREADS is zero (selected at run time)
647     and there is no conditional.  */
648  cond = NULL_TREE;
649  val = build_int_cst (unsigned_type_node, 0);
650  flags = build_int_cst (unsigned_type_node, 0);
651
652  c = omp_find_clause (clauses, OMP_CLAUSE_IF);
653  if (c)
654    cond = OMP_CLAUSE_IF_EXPR (c);
655
656  c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
657  if (c)
658    {
659      val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
660      clause_loc = OMP_CLAUSE_LOCATION (c);
661    }
662  else
663    clause_loc = gimple_location (entry_stmt);
664
665  c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
666  if (c)
667    flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
668
669  /* Ensure 'val' is of the correct type.  */
670  val = fold_convert_loc (clause_loc, unsigned_type_node, val);
671
672  /* If we found the clause 'if (cond)', build either
673     (cond != 0) or (cond ? val : 1u).  */
674  if (cond)
675    {
676      cond = gimple_boolify (cond);
677
678      if (integer_zerop (val))
679	val = fold_build2_loc (clause_loc,
680			   EQ_EXPR, unsigned_type_node, cond,
681			   build_int_cst (TREE_TYPE (cond), 0));
682      else
683	{
684	  basic_block cond_bb, then_bb, else_bb;
685	  edge e, e_then, e_else;
686	  tree tmp_then, tmp_else, tmp_join, tmp_var;
687
688	  tmp_var = create_tmp_var (TREE_TYPE (val));
689	  if (gimple_in_ssa_p (cfun))
690	    {
691	      tmp_then = make_ssa_name (tmp_var);
692	      tmp_else = make_ssa_name (tmp_var);
693	      tmp_join = make_ssa_name (tmp_var);
694	    }
695	  else
696	    {
697	      tmp_then = tmp_var;
698	      tmp_else = tmp_var;
699	      tmp_join = tmp_var;
700	    }
701
702	  e = split_block_after_labels (bb);
703	  cond_bb = e->src;
704	  bb = e->dest;
705	  remove_edge (e);
706
707	  then_bb = create_empty_bb (cond_bb);
708	  else_bb = create_empty_bb (then_bb);
709	  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
710	  set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
711
712	  stmt = gimple_build_cond_empty (cond);
713	  gsi = gsi_start_bb (cond_bb);
714	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
715
716	  gsi = gsi_start_bb (then_bb);
717	  expand_omp_build_assign (&gsi, tmp_then, val, true);
718
719	  gsi = gsi_start_bb (else_bb);
720	  expand_omp_build_assign (&gsi, tmp_else,
721				   build_int_cst (unsigned_type_node, 1),
722				   true);
723
724	  make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
725	  make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
726	  add_bb_to_loop (then_bb, cond_bb->loop_father);
727	  add_bb_to_loop (else_bb, cond_bb->loop_father);
728	  e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
729	  e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
730
731	  if (gimple_in_ssa_p (cfun))
732	    {
733	      gphi *phi = create_phi_node (tmp_join, bb);
734	      add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
735	      add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
736	    }
737
738	  val = tmp_join;
739	}
740
741      gsi = gsi_start_bb (bb);
742      val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
743				      false, GSI_CONTINUE_LINKING);
744    }
745
746  gsi = gsi_last_nondebug_bb (bb);
747  t = gimple_omp_parallel_data_arg (entry_stmt);
748  if (t == NULL)
749    t1 = null_pointer_node;
750  else
751    t1 = build_fold_addr_expr (t);
752  tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
753  t2 = build_fold_addr_expr (child_fndecl);
754
755  vec_alloc (args, 4 + vec_safe_length (ws_args));
756  args->quick_push (t2);
757  args->quick_push (t1);
758  args->quick_push (val);
759  if (ws_args)
760    args->splice (*ws_args);
761  args->quick_push (flags);
762
763  t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
764			       builtin_decl_explicit (start_ix), args);
765
766  if (rtmp)
767    {
768      tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
769      t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
770		  fold_convert (type,
771				fold_convert (pointer_sized_int_node, t)));
772    }
773  force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
774			    false, GSI_CONTINUE_LINKING);
775
776  if (hsa_gen_requested_p ()
777      && parallel_needs_hsa_kernel_p (region))
778    {
779      cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
780      hsa_register_kernel (child_cnode);
781    }
782}
783
784/* Build the function call to GOMP_task to actually
785   generate the task operation.  BB is the block where to insert the code.  */
786
787static void
788expand_task_call (struct omp_region *region, basic_block bb,
789		  gomp_task *entry_stmt)
790{
791  tree t1, t2, t3;
792  gimple_stmt_iterator gsi;
793  location_t loc = gimple_location (entry_stmt);
794
795  tree clauses = gimple_omp_task_clauses (entry_stmt);
796
797  tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
798  tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
799  tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
800  tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
801  tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
802  tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
803
804  unsigned int iflags
805    = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
806      | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
807      | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
808
809  bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
810  tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
811  tree num_tasks = NULL_TREE;
812  bool ull = false;
813  if (taskloop_p)
814    {
815      gimple *g = last_stmt (region->outer->entry);
816      gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
817		  && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
818      struct omp_for_data fd;
819      omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
820      startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
821      endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
822				OMP_CLAUSE__LOOPTEMP_);
823      startvar = OMP_CLAUSE_DECL (startvar);
824      endvar = OMP_CLAUSE_DECL (endvar);
825      step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
826      if (fd.loop.cond_code == LT_EXPR)
827	iflags |= GOMP_TASK_FLAG_UP;
828      tree tclauses = gimple_omp_for_clauses (g);
829      num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
830      if (num_tasks)
831	num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
832      else
833	{
834	  num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
835	  if (num_tasks)
836	    {
837	      iflags |= GOMP_TASK_FLAG_GRAINSIZE;
838	      num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
839	    }
840	  else
841	    num_tasks = integer_zero_node;
842	}
843      num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
844      if (ifc == NULL_TREE)
845	iflags |= GOMP_TASK_FLAG_IF;
846      if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
847	iflags |= GOMP_TASK_FLAG_NOGROUP;
848      ull = fd.iter_type == long_long_unsigned_type_node;
849      if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
850	iflags |= GOMP_TASK_FLAG_REDUCTION;
851    }
852  else if (priority)
853    iflags |= GOMP_TASK_FLAG_PRIORITY;
854
855  tree flags = build_int_cst (unsigned_type_node, iflags);
856
857  tree cond = boolean_true_node;
858  if (ifc)
859    {
860      if (taskloop_p)
861	{
862	  tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
863	  t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
864			       build_int_cst (unsigned_type_node,
865					      GOMP_TASK_FLAG_IF),
866			       build_int_cst (unsigned_type_node, 0));
867	  flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
868				   flags, t);
869	}
870      else
871	cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
872    }
873
874  if (finalc)
875    {
876      tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
877      t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
878			   build_int_cst (unsigned_type_node,
879					  GOMP_TASK_FLAG_FINAL),
880			   build_int_cst (unsigned_type_node, 0));
881      flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
882    }
883  if (depend)
884    depend = OMP_CLAUSE_DECL (depend);
885  else
886    depend = build_int_cst (ptr_type_node, 0);
887  if (priority)
888    priority = fold_convert (integer_type_node,
889			     OMP_CLAUSE_PRIORITY_EXPR (priority));
890  else
891    priority = integer_zero_node;
892
893  gsi = gsi_last_nondebug_bb (bb);
894  tree t = gimple_omp_task_data_arg (entry_stmt);
895  if (t == NULL)
896    t2 = null_pointer_node;
897  else
898    t2 = build_fold_addr_expr_loc (loc, t);
899  t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
900  t = gimple_omp_task_copy_fn (entry_stmt);
901  if (t == NULL)
902    t3 = null_pointer_node;
903  else
904    t3 = build_fold_addr_expr_loc (loc, t);
905
906  if (taskloop_p)
907    t = build_call_expr (ull
908			 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
909			 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
910			 11, t1, t2, t3,
911			 gimple_omp_task_arg_size (entry_stmt),
912			 gimple_omp_task_arg_align (entry_stmt), flags,
913			 num_tasks, priority, startvar, endvar, step);
914  else
915    t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
916			 9, t1, t2, t3,
917			 gimple_omp_task_arg_size (entry_stmt),
918			 gimple_omp_task_arg_align (entry_stmt), cond, flags,
919			 depend, priority);
920
921  force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
922			    false, GSI_CONTINUE_LINKING);
923}
924
925/* Build the function call to GOMP_taskwait_depend to actually
926   generate the taskwait operation.  BB is the block where to insert the
927   code.  */
928
929static void
930expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
931{
932  tree clauses = gimple_omp_task_clauses (entry_stmt);
933  tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
934  if (depend == NULL_TREE)
935    return;
936
937  depend = OMP_CLAUSE_DECL (depend);
938
939  gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
940  tree t
941    = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
942		       1, depend);
943
944  force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
945			    false, GSI_CONTINUE_LINKING);
946}
947
948/* Build the function call to GOMP_teams_reg to actually
949   generate the host teams operation.  REGION is the teams region
950   being expanded.  BB is the block where to insert the code.  */
951
952static void
953expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
954{
955  tree clauses = gimple_omp_teams_clauses (entry_stmt);
956  tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
957  if (num_teams == NULL_TREE)
958    num_teams = build_int_cst (unsigned_type_node, 0);
959  else
960    {
961      num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
962      num_teams = fold_convert (unsigned_type_node, num_teams);
963    }
964  tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
965  if (thread_limit == NULL_TREE)
966    thread_limit = build_int_cst (unsigned_type_node, 0);
967  else
968    {
969      thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
970      thread_limit = fold_convert (unsigned_type_node, thread_limit);
971    }
972
973  gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
974  tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
975  if (t == NULL)
976    t1 = null_pointer_node;
977  else
978    t1 = build_fold_addr_expr (t);
979  tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
980  tree t2 = build_fold_addr_expr (child_fndecl);
981
982  vec<tree, va_gc> *args;
983  vec_alloc (args, 5);
984  args->quick_push (t2);
985  args->quick_push (t1);
986  args->quick_push (num_teams);
987  args->quick_push (thread_limit);
988  /* For future extensibility.  */
989  args->quick_push (build_zero_cst (unsigned_type_node));
990
991  t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
992			       builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
993			       args);
994
995  force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
996			    false, GSI_CONTINUE_LINKING);
997}
998
999/* Chain all the DECLs in LIST by their TREE_CHAIN fields.  */
1000
1001static tree
1002vec2chain (vec<tree, va_gc> *v)
1003{
1004  tree chain = NULL_TREE, t;
1005  unsigned ix;
1006
1007  FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
1008    {
1009      DECL_CHAIN (t) = chain;
1010      chain = t;
1011    }
1012
1013  return chain;
1014}
1015
1016/* Remove barriers in REGION->EXIT's block.  Note that this is only
1017   valid for GIMPLE_OMP_PARALLEL regions.  Since the end of a parallel region
1018   is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1019   left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1020   removed.  */
1021
1022static void
1023remove_exit_barrier (struct omp_region *region)
1024{
1025  gimple_stmt_iterator gsi;
1026  basic_block exit_bb;
1027  edge_iterator ei;
1028  edge e;
1029  gimple *stmt;
1030  int any_addressable_vars = -1;
1031
1032  exit_bb = region->exit;
1033
1034  /* If the parallel region doesn't return, we don't have REGION->EXIT
1035     block at all.  */
1036  if (! exit_bb)
1037    return;
1038
1039  /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN.  The
1040     workshare's GIMPLE_OMP_RETURN will be in a preceding block.  The kinds of
1041     statements that can appear in between are extremely limited -- no
1042     memory operations at all.  Here, we allow nothing at all, so the
1043     only thing we allow to precede this GIMPLE_OMP_RETURN is a label.  */
1044  gsi = gsi_last_nondebug_bb (exit_bb);
1045  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1046  gsi_prev_nondebug (&gsi);
1047  if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1048    return;
1049
1050  FOR_EACH_EDGE (e, ei, exit_bb->preds)
1051    {
1052      gsi = gsi_last_nondebug_bb (e->src);
1053      if (gsi_end_p (gsi))
1054	continue;
1055      stmt = gsi_stmt (gsi);
1056      if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1057	  && !gimple_omp_return_nowait_p (stmt))
1058	{
1059	  /* OpenMP 3.0 tasks unfortunately prevent this optimization
1060	     in many cases.  If there could be tasks queued, the barrier
1061	     might be needed to let the tasks run before some local
1062	     variable of the parallel that the task uses as shared
1063	     runs out of scope.  The task can be spawned either
1064	     from within current function (this would be easy to check)
1065	     or from some function it calls and gets passed an address
1066	     of such a variable.  */
1067	  if (any_addressable_vars < 0)
1068	    {
1069	      gomp_parallel *parallel_stmt
1070		= as_a <gomp_parallel *> (last_stmt (region->entry));
1071	      tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1072	      tree local_decls, block, decl;
1073	      unsigned ix;
1074
1075	      any_addressable_vars = 0;
1076	      FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1077		if (TREE_ADDRESSABLE (decl))
1078		  {
1079		    any_addressable_vars = 1;
1080		    break;
1081		  }
1082	      for (block = gimple_block (stmt);
1083		   !any_addressable_vars
1084		   && block
1085		   && TREE_CODE (block) == BLOCK;
1086		   block = BLOCK_SUPERCONTEXT (block))
1087		{
1088		  for (local_decls = BLOCK_VARS (block);
1089		       local_decls;
1090		       local_decls = DECL_CHAIN (local_decls))
1091		    if (TREE_ADDRESSABLE (local_decls))
1092		      {
1093			any_addressable_vars = 1;
1094			break;
1095		      }
1096		  if (block == gimple_block (parallel_stmt))
1097		    break;
1098		}
1099	    }
1100	  if (!any_addressable_vars)
1101	    gimple_omp_return_set_nowait (stmt);
1102	}
1103    }
1104}
1105
1106static void
1107remove_exit_barriers (struct omp_region *region)
1108{
1109  if (region->type == GIMPLE_OMP_PARALLEL)
1110    remove_exit_barrier (region);
1111
1112  if (region->inner)
1113    {
1114      region = region->inner;
1115      remove_exit_barriers (region);
1116      while (region->next)
1117	{
1118	  region = region->next;
1119	  remove_exit_barriers (region);
1120	}
1121    }
1122}
1123
1124/* Optimize omp_get_thread_num () and omp_get_num_threads ()
1125   calls.  These can't be declared as const functions, but
1126   within one parallel body they are constant, so they can be
1127   transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1128   which are declared const.  Similarly for task body, except
1129   that in untied task omp_get_thread_num () can change at any task
1130   scheduling point.  */
1131
1132static void
1133optimize_omp_library_calls (gimple *entry_stmt)
1134{
1135  basic_block bb;
1136  gimple_stmt_iterator gsi;
1137  tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1138  tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1139  tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1140  tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1141  bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1142		      && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1143					  OMP_CLAUSE_UNTIED) != NULL);
1144
1145  FOR_EACH_BB_FN (bb, cfun)
1146    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1147      {
1148	gimple *call = gsi_stmt (gsi);
1149	tree decl;
1150
1151	if (is_gimple_call (call)
1152	    && (decl = gimple_call_fndecl (call))
1153	    && DECL_EXTERNAL (decl)
1154	    && TREE_PUBLIC (decl)
1155	    && DECL_INITIAL (decl) == NULL)
1156	  {
1157	    tree built_in;
1158
1159	    if (DECL_NAME (decl) == thr_num_id)
1160	      {
1161		/* In #pragma omp task untied omp_get_thread_num () can change
1162		   during the execution of the task region.  */
1163		if (untied_task)
1164		  continue;
1165		built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1166	      }
1167	    else if (DECL_NAME (decl) == num_thr_id)
1168	      built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1169	    else
1170	      continue;
1171
1172	    if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1173		|| gimple_call_num_args (call) != 0)
1174	      continue;
1175
1176	    if (flag_exceptions && !TREE_NOTHROW (decl))
1177	      continue;
1178
1179	    if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1180		|| !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1181					TREE_TYPE (TREE_TYPE (built_in))))
1182	      continue;
1183
1184	    gimple_call_set_fndecl (call, built_in);
1185	  }
1186      }
1187}
1188
1189/* Callback for expand_omp_build_assign.  Return non-NULL if *tp needs to be
1190   regimplified.  */
1191
1192static tree
1193expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1194{
1195  tree t = *tp;
1196
1197  /* Any variable with DECL_VALUE_EXPR needs to be regimplified.  */
1198  if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1199    return t;
1200
1201  if (TREE_CODE (t) == ADDR_EXPR)
1202    recompute_tree_invariant_for_addr_expr (t);
1203
1204  *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1205  return NULL_TREE;
1206}
1207
1208/* Prepend or append TO = FROM assignment before or after *GSI_P.  */
1209
1210static void
1211expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1212			 bool after)
1213{
1214  bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1215  from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1216				   !after, after ? GSI_CONTINUE_LINKING
1217						 : GSI_SAME_STMT);
1218  gimple *stmt = gimple_build_assign (to, from);
1219  if (after)
1220    gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1221  else
1222    gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1223  if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1224      || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1225    {
1226      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1227      gimple_regimplify_operands (stmt, &gsi);
1228    }
1229}
1230
1231/* Expand the OpenMP parallel or task directive starting at REGION.  */
1232
1233static void
1234expand_omp_taskreg (struct omp_region *region)
1235{
1236  basic_block entry_bb, exit_bb, new_bb;
1237  struct function *child_cfun;
1238  tree child_fn, block, t;
1239  gimple_stmt_iterator gsi;
1240  gimple *entry_stmt, *stmt;
1241  edge e;
1242  vec<tree, va_gc> *ws_args;
1243
1244  entry_stmt = last_stmt (region->entry);
1245  if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1246      && gimple_omp_task_taskwait_p (entry_stmt))
1247    {
1248      new_bb = region->entry;
1249      gsi = gsi_last_nondebug_bb (region->entry);
1250      gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1251      gsi_remove (&gsi, true);
1252      expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1253      return;
1254    }
1255
1256  child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1257  child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1258
1259  entry_bb = region->entry;
1260  if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1261    exit_bb = region->cont;
1262  else
1263    exit_bb = region->exit;
1264
1265  if (is_combined_parallel (region))
1266    ws_args = region->ws_args;
1267  else
1268    ws_args = NULL;
1269
1270  if (child_cfun->cfg)
1271    {
1272      /* Due to inlining, it may happen that we have already outlined
1273	 the region, in which case all we need to do is make the
1274	 sub-graph unreachable and emit the parallel call.  */
1275      edge entry_succ_e, exit_succ_e;
1276
1277      entry_succ_e = single_succ_edge (entry_bb);
1278
1279      gsi = gsi_last_nondebug_bb (entry_bb);
1280      gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1281		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1282		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1283      gsi_remove (&gsi, true);
1284
1285      new_bb = entry_bb;
1286      if (exit_bb)
1287	{
1288	  exit_succ_e = single_succ_edge (exit_bb);
1289	  make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1290	}
1291      remove_edge_and_dominated_blocks (entry_succ_e);
1292    }
1293  else
1294    {
1295      unsigned srcidx, dstidx, num;
1296
1297      /* If the parallel region needs data sent from the parent
1298	 function, then the very first statement (except possible
1299	 tree profile counter updates) of the parallel body
1300	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
1301	 &.OMP_DATA_O is passed as an argument to the child function,
1302	 we need to replace it with the argument as seen by the child
1303	 function.
1304
1305	 In most cases, this will end up being the identity assignment
1306	 .OMP_DATA_I = .OMP_DATA_I.  However, if the parallel body had
1307	 a function call that has been inlined, the original PARM_DECL
1308	 .OMP_DATA_I may have been converted into a different local
1309	 variable.  In which case, we need to keep the assignment.  */
1310      if (gimple_omp_taskreg_data_arg (entry_stmt))
1311	{
1312	  basic_block entry_succ_bb
1313	    = single_succ_p (entry_bb) ? single_succ (entry_bb)
1314				       : FALLTHRU_EDGE (entry_bb)->dest;
1315	  tree arg;
1316	  gimple *parcopy_stmt = NULL;
1317
1318	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1319	    {
1320	      gimple *stmt;
1321
1322	      gcc_assert (!gsi_end_p (gsi));
1323	      stmt = gsi_stmt (gsi);
1324	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
1325		continue;
1326
1327	      if (gimple_num_ops (stmt) == 2)
1328		{
1329		  tree arg = gimple_assign_rhs1 (stmt);
1330
1331		  /* We're ignore the subcode because we're
1332		     effectively doing a STRIP_NOPS.  */
1333
1334		  if (TREE_CODE (arg) == ADDR_EXPR
1335		      && (TREE_OPERAND (arg, 0)
1336			  == gimple_omp_taskreg_data_arg (entry_stmt)))
1337		    {
1338		      parcopy_stmt = stmt;
1339		      break;
1340		    }
1341		}
1342	    }
1343
1344	  gcc_assert (parcopy_stmt != NULL);
1345	  arg = DECL_ARGUMENTS (child_fn);
1346
1347	  if (!gimple_in_ssa_p (cfun))
1348	    {
1349	      if (gimple_assign_lhs (parcopy_stmt) == arg)
1350		gsi_remove (&gsi, true);
1351	      else
1352		{
1353		  /* ?? Is setting the subcode really necessary ??  */
1354		  gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1355		  gimple_assign_set_rhs1 (parcopy_stmt, arg);
1356		}
1357	    }
1358	  else
1359	    {
1360	      tree lhs = gimple_assign_lhs (parcopy_stmt);
1361	      gcc_assert (SSA_NAME_VAR (lhs) == arg);
1362	      /* We'd like to set the rhs to the default def in the child_fn,
1363		 but it's too early to create ssa names in the child_fn.
1364		 Instead, we set the rhs to the parm.  In
1365		 move_sese_region_to_fn, we introduce a default def for the
1366		 parm, map the parm to it's default def, and once we encounter
1367		 this stmt, replace the parm with the default def.  */
1368	      gimple_assign_set_rhs1 (parcopy_stmt, arg);
1369	      update_stmt (parcopy_stmt);
1370	    }
1371	}
1372
1373      /* Declare local variables needed in CHILD_CFUN.  */
1374      block = DECL_INITIAL (child_fn);
1375      BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1376      /* The gimplifier could record temporaries in parallel/task block
1377	 rather than in containing function's local_decls chain,
1378	 which would mean cgraph missed finalizing them.  Do it now.  */
1379      for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1380	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1381	  varpool_node::finalize_decl (t);
1382      DECL_SAVED_TREE (child_fn) = NULL;
1383      /* We'll create a CFG for child_fn, so no gimple body is needed.  */
1384      gimple_set_body (child_fn, NULL);
1385      TREE_USED (block) = 1;
1386
1387      /* Reset DECL_CONTEXT on function arguments.  */
1388      for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1389	DECL_CONTEXT (t) = child_fn;
1390
1391      /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1392	 so that it can be moved to the child function.  */
1393      gsi = gsi_last_nondebug_bb (entry_bb);
1394      stmt = gsi_stmt (gsi);
1395      gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1396			   || gimple_code (stmt) == GIMPLE_OMP_TASK
1397			   || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1398      e = split_block (entry_bb, stmt);
1399      gsi_remove (&gsi, true);
1400      entry_bb = e->dest;
1401      edge e2 = NULL;
1402      if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1403	single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1404      else
1405	{
1406	  e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1407	  gcc_assert (e2->dest == region->exit);
1408	  remove_edge (BRANCH_EDGE (entry_bb));
1409	  set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1410	  gsi = gsi_last_nondebug_bb (region->exit);
1411	  gcc_assert (!gsi_end_p (gsi)
1412		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1413	  gsi_remove (&gsi, true);
1414	}
1415
1416      /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR.  */
1417      if (exit_bb)
1418	{
1419	  gsi = gsi_last_nondebug_bb (exit_bb);
1420	  gcc_assert (!gsi_end_p (gsi)
1421		      && (gimple_code (gsi_stmt (gsi))
1422			  == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1423	  stmt = gimple_build_return (NULL);
1424	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1425	  gsi_remove (&gsi, true);
1426	}
1427
1428      /* Move the parallel region into CHILD_CFUN.  */
1429
1430      if (gimple_in_ssa_p (cfun))
1431	{
1432	  init_tree_ssa (child_cfun);
1433	  init_ssa_operands (child_cfun);
1434	  child_cfun->gimple_df->in_ssa_p = true;
1435	  block = NULL_TREE;
1436	}
1437      else
1438	block = gimple_block (entry_stmt);
1439
1440      new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1441      if (exit_bb)
1442	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1443      if (e2)
1444	{
1445	  basic_block dest_bb = e2->dest;
1446	  if (!exit_bb)
1447	    make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1448	  remove_edge (e2);
1449	  set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1450	}
1451      /* When the OMP expansion process cannot guarantee an up-to-date
1452	 loop tree arrange for the child function to fixup loops.  */
1453      if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1454	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1455
1456      /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
1457      num = vec_safe_length (child_cfun->local_decls);
1458      for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1459	{
1460	  t = (*child_cfun->local_decls)[srcidx];
1461	  if (DECL_CONTEXT (t) == cfun->decl)
1462	    continue;
1463	  if (srcidx != dstidx)
1464	    (*child_cfun->local_decls)[dstidx] = t;
1465	  dstidx++;
1466	}
1467      if (dstidx != num)
1468	vec_safe_truncate (child_cfun->local_decls, dstidx);
1469
1470      /* Inform the callgraph about the new function.  */
1471      child_cfun->curr_properties = cfun->curr_properties;
1472      child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1473      child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1474      cgraph_node *node = cgraph_node::get_create (child_fn);
1475      node->parallelized_function = 1;
1476      cgraph_node::add_new_function (child_fn, true);
1477
1478      bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1479		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1480
1481      /* Fix the callgraph edges for child_cfun.  Those for cfun will be
1482	 fixed in a following pass.  */
1483      push_cfun (child_cfun);
1484      if (need_asm)
1485	assign_assembler_name_if_needed (child_fn);
1486
1487      if (optimize)
1488	optimize_omp_library_calls (entry_stmt);
1489      update_max_bb_count ();
1490      cgraph_edge::rebuild_edges ();
1491
1492      /* Some EH regions might become dead, see PR34608.  If
1493	 pass_cleanup_cfg isn't the first pass to happen with the
1494	 new child, these dead EH edges might cause problems.
1495	 Clean them up now.  */
1496      if (flag_exceptions)
1497	{
1498	  basic_block bb;
1499	  bool changed = false;
1500
1501	  FOR_EACH_BB_FN (bb, cfun)
1502	    changed |= gimple_purge_dead_eh_edges (bb);
1503	  if (changed)
1504	    cleanup_tree_cfg ();
1505	}
1506      if (gimple_in_ssa_p (cfun))
1507	update_ssa (TODO_update_ssa);
1508      if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1509	verify_loop_structure ();
1510      pop_cfun ();
1511
1512      if (dump_file && !gimple_in_ssa_p (cfun))
1513	{
1514	  omp_any_child_fn_dumped = true;
1515	  dump_function_header (dump_file, child_fn, dump_flags);
1516	  dump_function_to_file (child_fn, dump_file, dump_flags);
1517	}
1518    }
1519
1520  adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1521
1522  if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1523    expand_parallel_call (region, new_bb,
1524			  as_a <gomp_parallel *> (entry_stmt), ws_args);
1525  else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1526    expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1527  else
1528    expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1529  if (gimple_in_ssa_p (cfun))
1530    update_ssa (TODO_update_ssa_only_virtuals);
1531}
1532
1533/* Information about members of an OpenACC collapsed loop nest.  */
1534
1535struct oacc_collapse
1536{
1537  tree base;  /* Base value.  */
1538  tree iters; /* Number of steps.  */
1539  tree step;  /* Step size.  */
1540  tree tile;  /* Tile increment (if tiled).  */
1541  tree outer; /* Tile iterator var. */
1542};
1543
1544/* Helper for expand_oacc_for.  Determine collapsed loop information.
1545   Fill in COUNTS array.  Emit any initialization code before GSI.
1546   Return the calculated outer loop bound of BOUND_TYPE.  */
1547
1548static tree
1549expand_oacc_collapse_init (const struct omp_for_data *fd,
1550			   gimple_stmt_iterator *gsi,
1551			   oacc_collapse *counts, tree diff_type,
1552			   tree bound_type, location_t loc)
1553{
1554  tree tiling = fd->tiling;
1555  tree total = build_int_cst (bound_type, 1);
1556  int ix;
1557
1558  gcc_assert (integer_onep (fd->loop.step));
1559  gcc_assert (integer_zerop (fd->loop.n1));
1560
1561  /* When tiling, the first operand of the tile clause applies to the
1562     innermost loop, and we work outwards from there.  Seems
1563     backwards, but whatever.  */
1564  for (ix = fd->collapse; ix--;)
1565    {
1566      const omp_for_data_loop *loop = &fd->loops[ix];
1567
1568      tree iter_type = TREE_TYPE (loop->v);
1569      tree plus_type = iter_type;
1570
1571      gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1572
1573      if (POINTER_TYPE_P (iter_type))
1574	plus_type = sizetype;
1575
1576      if (tiling)
1577	{
1578	  tree num = build_int_cst (integer_type_node, fd->collapse);
1579	  tree loop_no = build_int_cst (integer_type_node, ix);
1580	  tree tile = TREE_VALUE (tiling);
1581	  gcall *call
1582	    = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1583					  /* gwv-outer=*/integer_zero_node,
1584					  /* gwv-inner=*/integer_zero_node);
1585
1586	  counts[ix].outer = create_tmp_var (iter_type, ".outer");
1587	  counts[ix].tile = create_tmp_var (diff_type, ".tile");
1588	  gimple_call_set_lhs (call, counts[ix].tile);
1589	  gimple_set_location (call, loc);
1590	  gsi_insert_before (gsi, call, GSI_SAME_STMT);
1591
1592	  tiling = TREE_CHAIN (tiling);
1593	}
1594      else
1595	{
1596	  counts[ix].tile = NULL;
1597	  counts[ix].outer = loop->v;
1598	}
1599
1600      tree b = loop->n1;
1601      tree e = loop->n2;
1602      tree s = loop->step;
1603      bool up = loop->cond_code == LT_EXPR;
1604      tree dir = build_int_cst (diff_type, up ? +1 : -1);
1605      bool negating;
1606      tree expr;
1607
1608      b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1609				    true, GSI_SAME_STMT);
1610      e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1611				    true, GSI_SAME_STMT);
1612
1613      /* Convert the step, avoiding possible unsigned->signed overflow.  */
1614      negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1615      if (negating)
1616	s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1617      s = fold_convert (diff_type, s);
1618      if (negating)
1619	s = fold_build1 (NEGATE_EXPR, diff_type, s);
1620      s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1621				    true, GSI_SAME_STMT);
1622
1623      /* Determine the range, avoiding possible unsigned->signed overflow.  */
1624      negating = !up && TYPE_UNSIGNED (iter_type);
1625      expr = fold_build2 (MINUS_EXPR, plus_type,
1626			  fold_convert (plus_type, negating ? b : e),
1627			  fold_convert (plus_type, negating ? e : b));
1628      expr = fold_convert (diff_type, expr);
1629      if (negating)
1630	expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1631      tree range = force_gimple_operand_gsi
1632	(gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1633
1634      /* Determine number of iterations.  */
1635      expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1636      expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1637      expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1638
1639      tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1640					     true, GSI_SAME_STMT);
1641
1642      counts[ix].base = b;
1643      counts[ix].iters = iters;
1644      counts[ix].step = s;
1645
1646      total = fold_build2 (MULT_EXPR, bound_type, total,
1647			   fold_convert (bound_type, iters));
1648    }
1649
1650  return total;
1651}
1652
1653/* Emit initializers for collapsed loop members.  INNER is true if
1654   this is for the element loop of a TILE.  IVAR is the outer
1655   loop iteration variable, from which collapsed loop iteration values
1656   are  calculated.  COUNTS array has been initialized by
1657   expand_oacc_collapse_inits.  */
1658
1659static void
1660expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1661			   gimple_stmt_iterator *gsi,
1662			   const oacc_collapse *counts, tree ivar,
1663			   tree diff_type)
1664{
1665  tree ivar_type = TREE_TYPE (ivar);
1666
1667  /*  The most rapidly changing iteration variable is the innermost
1668      one.  */
1669  for (int ix = fd->collapse; ix--;)
1670    {
1671      const omp_for_data_loop *loop = &fd->loops[ix];
1672      const oacc_collapse *collapse = &counts[ix];
1673      tree v = inner ? loop->v : collapse->outer;
1674      tree iter_type = TREE_TYPE (v);
1675      tree plus_type = iter_type;
1676      enum tree_code plus_code = PLUS_EXPR;
1677      tree expr;
1678
1679      if (POINTER_TYPE_P (iter_type))
1680	{
1681	  plus_code = POINTER_PLUS_EXPR;
1682	  plus_type = sizetype;
1683	}
1684
1685      expr = ivar;
1686      if (ix)
1687	{
1688	  tree mod = fold_convert (ivar_type, collapse->iters);
1689	  ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1690	  expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1691	  ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1692					   true, GSI_SAME_STMT);
1693	}
1694
1695      expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1696			  fold_convert (diff_type, collapse->step));
1697      expr = fold_build2 (plus_code, iter_type,
1698			  inner ? collapse->outer : collapse->base,
1699			  fold_convert (plus_type, expr));
1700      expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1701				       true, GSI_SAME_STMT);
1702      gassign *ass = gimple_build_assign (v, expr);
1703      gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1704    }
1705}
1706
1707/* Helper function for expand_omp_{for_*,simd}.  If this is the outermost
1708   of the combined collapse > 1 loop constructs, generate code like:
1709	if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1710	if (cond3 is <)
1711	  adj = STEP3 - 1;
1712	else
1713	  adj = STEP3 + 1;
1714	count3 = (adj + N32 - N31) / STEP3;
1715	if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1716	if (cond2 is <)
1717	  adj = STEP2 - 1;
1718	else
1719	  adj = STEP2 + 1;
1720	count2 = (adj + N22 - N21) / STEP2;
1721	if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1722	if (cond1 is <)
1723	  adj = STEP1 - 1;
1724	else
1725	  adj = STEP1 + 1;
1726	count1 = (adj + N12 - N11) / STEP1;
1727	count = count1 * count2 * count3;
1728   Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1729	count = 0;
1730   and set ZERO_ITER_BB to that bb.  If this isn't the outermost
1731   of the combined loop constructs, just initialize COUNTS array
1732   from the _looptemp_ clauses.  */
1733
1734/* NOTE: It *could* be better to moosh all of the BBs together,
1735   creating one larger BB with all the computation and the unexpected
1736   jump at the end.  I.e.
1737
1738   bool zero3, zero2, zero1, zero;
1739
1740   zero3 = N32 c3 N31;
1741   count3 = (N32 - N31) /[cl] STEP3;
1742   zero2 = N22 c2 N21;
1743   count2 = (N22 - N21) /[cl] STEP2;
1744   zero1 = N12 c1 N11;
1745   count1 = (N12 - N11) /[cl] STEP1;
1746   zero = zero3 || zero2 || zero1;
1747   count = count1 * count2 * count3;
1748   if (__builtin_expect(zero, false)) goto zero_iter_bb;
1749
1750   After all, we expect the zero=false, and thus we expect to have to
1751   evaluate all of the comparison expressions, so short-circuiting
1752   oughtn't be a win.  Since the condition isn't protecting a
1753   denominator, we're not concerned about divide-by-zero, so we can
1754   fully evaluate count even if a numerator turned out to be wrong.
1755
1756   It seems like putting this all together would create much better
1757   scheduling opportunities, and less pressure on the chip's branch
1758   predictor.  */
1759
1760static void
1761expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1762			    basic_block &entry_bb, tree *counts,
1763			    basic_block &zero_iter1_bb, int &first_zero_iter1,
1764			    basic_block &zero_iter2_bb, int &first_zero_iter2,
1765			    basic_block &l2_dom_bb)
1766{
1767  tree t, type = TREE_TYPE (fd->loop.v);
1768  edge e, ne;
1769  int i;
1770
1771  /* Collapsed loops need work for expansion into SSA form.  */
1772  gcc_assert (!gimple_in_ssa_p (cfun));
1773
1774  if (gimple_omp_for_combined_into_p (fd->for_stmt)
1775      && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1776    {
1777      gcc_assert (fd->ordered == 0);
1778      /* First two _looptemp_ clauses are for istart/iend, counts[0]
1779	 isn't supposed to be handled, as the inner loop doesn't
1780	 use it.  */
1781      tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1782				     OMP_CLAUSE__LOOPTEMP_);
1783      gcc_assert (innerc);
1784      for (i = 0; i < fd->collapse; i++)
1785	{
1786	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1787				    OMP_CLAUSE__LOOPTEMP_);
1788	  gcc_assert (innerc);
1789	  if (i)
1790	    counts[i] = OMP_CLAUSE_DECL (innerc);
1791	  else
1792	    counts[0] = NULL_TREE;
1793	}
1794      return;
1795    }
1796
1797  for (i = fd->collapse; i < fd->ordered; i++)
1798    {
1799      tree itype = TREE_TYPE (fd->loops[i].v);
1800      counts[i] = NULL_TREE;
1801      t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1802		       fold_convert (itype, fd->loops[i].n1),
1803		       fold_convert (itype, fd->loops[i].n2));
1804      if (t && integer_zerop (t))
1805	{
1806	  for (i = fd->collapse; i < fd->ordered; i++)
1807	    counts[i] = build_int_cst (type, 0);
1808	  break;
1809	}
1810    }
1811  for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1812    {
1813      tree itype = TREE_TYPE (fd->loops[i].v);
1814
1815      if (i >= fd->collapse && counts[i])
1816	continue;
1817      if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1818	  && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1819				fold_convert (itype, fd->loops[i].n1),
1820				fold_convert (itype, fd->loops[i].n2)))
1821	      == NULL_TREE || !integer_onep (t)))
1822	{
1823	  gcond *cond_stmt;
1824	  tree n1, n2;
1825	  n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1826	  n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1827					 true, GSI_SAME_STMT);
1828	  n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1829	  n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1830					 true, GSI_SAME_STMT);
1831	  cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1832					 NULL_TREE, NULL_TREE);
1833	  gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1834	  if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1835			 expand_omp_regimplify_p, NULL, NULL)
1836	      || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1837			    expand_omp_regimplify_p, NULL, NULL))
1838	    {
1839	      *gsi = gsi_for_stmt (cond_stmt);
1840	      gimple_regimplify_operands (cond_stmt, gsi);
1841	    }
1842	  e = split_block (entry_bb, cond_stmt);
1843	  basic_block &zero_iter_bb
1844	    = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1845	  int &first_zero_iter
1846	    = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1847	  if (zero_iter_bb == NULL)
1848	    {
1849	      gassign *assign_stmt;
1850	      first_zero_iter = i;
1851	      zero_iter_bb = create_empty_bb (entry_bb);
1852	      add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1853	      *gsi = gsi_after_labels (zero_iter_bb);
1854	      if (i < fd->collapse)
1855		assign_stmt = gimple_build_assign (fd->loop.n2,
1856						   build_zero_cst (type));
1857	      else
1858		{
1859		  counts[i] = create_tmp_reg (type, ".count");
1860		  assign_stmt
1861		    = gimple_build_assign (counts[i], build_zero_cst (type));
1862		}
1863	      gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1864	      set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1865				       entry_bb);
1866	    }
1867	  ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1868	  ne->probability = profile_probability::very_unlikely ();
1869	  e->flags = EDGE_TRUE_VALUE;
1870	  e->probability = ne->probability.invert ();
1871	  if (l2_dom_bb == NULL)
1872	    l2_dom_bb = entry_bb;
1873	  entry_bb = e->dest;
1874	  *gsi = gsi_last_nondebug_bb (entry_bb);
1875	}
1876
1877      if (POINTER_TYPE_P (itype))
1878	itype = signed_type_for (itype);
1879      t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1880				 ? -1 : 1));
1881      t = fold_build2 (PLUS_EXPR, itype,
1882		       fold_convert (itype, fd->loops[i].step), t);
1883      t = fold_build2 (PLUS_EXPR, itype, t,
1884		       fold_convert (itype, fd->loops[i].n2));
1885      t = fold_build2 (MINUS_EXPR, itype, t,
1886		       fold_convert (itype, fd->loops[i].n1));
1887      /* ?? We could probably use CEIL_DIV_EXPR instead of
1888	 TRUNC_DIV_EXPR and adjusting by hand.  Unless we can't
1889	 generate the same code in the end because generically we
1890	 don't know that the values involved must be negative for
1891	 GT??  */
1892      if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1893	t = fold_build2 (TRUNC_DIV_EXPR, itype,
1894			 fold_build1 (NEGATE_EXPR, itype, t),
1895			 fold_build1 (NEGATE_EXPR, itype,
1896				      fold_convert (itype,
1897						    fd->loops[i].step)));
1898      else
1899	t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1900			 fold_convert (itype, fd->loops[i].step));
1901      t = fold_convert (type, t);
1902      if (TREE_CODE (t) == INTEGER_CST)
1903	counts[i] = t;
1904      else
1905	{
1906	  if (i < fd->collapse || i != first_zero_iter2)
1907	    counts[i] = create_tmp_reg (type, ".count");
1908	  expand_omp_build_assign (gsi, counts[i], t);
1909	}
1910      if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1911	{
1912	  if (i == 0)
1913	    t = counts[0];
1914	  else
1915	    t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1916	  expand_omp_build_assign (gsi, fd->loop.n2, t);
1917	}
1918    }
1919}
1920
1921/* Helper function for expand_omp_{for_*,simd}.  Generate code like:
1922	T = V;
1923	V3 = N31 + (T % count3) * STEP3;
1924	T = T / count3;
1925	V2 = N21 + (T % count2) * STEP2;
1926	T = T / count2;
1927	V1 = N11 + T * STEP1;
1928   if this loop doesn't have an inner loop construct combined with it.
1929   If it does have an inner loop construct combined with it and the
1930   iteration count isn't known constant, store values from counts array
1931   into its _looptemp_ temporaries instead.  */
1932
1933static void
1934expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1935			  tree *counts, gimple *inner_stmt, tree startvar)
1936{
1937  int i;
1938  if (gimple_omp_for_combined_p (fd->for_stmt))
1939    {
1940      /* If fd->loop.n2 is constant, then no propagation of the counts
1941	 is needed, they are constant.  */
1942      if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1943	return;
1944
1945      tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1946		     ? gimple_omp_taskreg_clauses (inner_stmt)
1947		     : gimple_omp_for_clauses (inner_stmt);
1948      /* First two _looptemp_ clauses are for istart/iend, counts[0]
1949	 isn't supposed to be handled, as the inner loop doesn't
1950	 use it.  */
1951      tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1952      gcc_assert (innerc);
1953      for (i = 0; i < fd->collapse; i++)
1954	{
1955	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1956				    OMP_CLAUSE__LOOPTEMP_);
1957	  gcc_assert (innerc);
1958	  if (i)
1959	    {
1960	      tree tem = OMP_CLAUSE_DECL (innerc);
1961	      tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1962	      t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1963					    false, GSI_CONTINUE_LINKING);
1964	      gassign *stmt = gimple_build_assign (tem, t);
1965	      gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1966	    }
1967	}
1968      return;
1969    }
1970
1971  tree type = TREE_TYPE (fd->loop.v);
1972  tree tem = create_tmp_reg (type, ".tem");
1973  gassign *stmt = gimple_build_assign (tem, startvar);
1974  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1975
1976  for (i = fd->collapse - 1; i >= 0; i--)
1977    {
1978      tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1979      itype = vtype;
1980      if (POINTER_TYPE_P (vtype))
1981	itype = signed_type_for (vtype);
1982      if (i != 0)
1983	t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1984      else
1985	t = tem;
1986      t = fold_convert (itype, t);
1987      t = fold_build2 (MULT_EXPR, itype, t,
1988		       fold_convert (itype, fd->loops[i].step));
1989      if (POINTER_TYPE_P (vtype))
1990	t = fold_build_pointer_plus (fd->loops[i].n1, t);
1991      else
1992	t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1993      t = force_gimple_operand_gsi (gsi, t,
1994				    DECL_P (fd->loops[i].v)
1995				    && TREE_ADDRESSABLE (fd->loops[i].v),
1996				    NULL_TREE, false,
1997				    GSI_CONTINUE_LINKING);
1998      stmt = gimple_build_assign (fd->loops[i].v, t);
1999      gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2000      if (i != 0)
2001	{
2002	  t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2003	  t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2004					false, GSI_CONTINUE_LINKING);
2005	  stmt = gimple_build_assign (tem, t);
2006	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2007	}
2008    }
2009}
2010
2011/* Helper function for expand_omp_for_*.  Generate code like:
2012    L10:
2013	V3 += STEP3;
2014	if (V3 cond3 N32) goto BODY_BB; else goto L11;
2015    L11:
2016	V3 = N31;
2017	V2 += STEP2;
2018	if (V2 cond2 N22) goto BODY_BB; else goto L12;
2019    L12:
2020	V2 = N21;
2021	V1 += STEP1;
2022	goto BODY_BB;  */
2023
2024static basic_block
2025extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
2026			     basic_block body_bb)
2027{
2028  basic_block last_bb, bb, collapse_bb = NULL;
2029  int i;
2030  gimple_stmt_iterator gsi;
2031  edge e;
2032  tree t;
2033  gimple *stmt;
2034
2035  last_bb = cont_bb;
2036  for (i = fd->collapse - 1; i >= 0; i--)
2037    {
2038      tree vtype = TREE_TYPE (fd->loops[i].v);
2039
2040      bb = create_empty_bb (last_bb);
2041      add_bb_to_loop (bb, last_bb->loop_father);
2042      gsi = gsi_start_bb (bb);
2043
2044      if (i < fd->collapse - 1)
2045	{
2046	  e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
2047	  e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2048
2049	  t = fd->loops[i + 1].n1;
2050	  t = force_gimple_operand_gsi (&gsi, t,
2051					DECL_P (fd->loops[i + 1].v)
2052					&& TREE_ADDRESSABLE (fd->loops[i
2053								       + 1].v),
2054					NULL_TREE, false,
2055					GSI_CONTINUE_LINKING);
2056	  stmt = gimple_build_assign (fd->loops[i + 1].v, t);
2057	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2058	}
2059      else
2060	collapse_bb = bb;
2061
2062      set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2063
2064      if (POINTER_TYPE_P (vtype))
2065	t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2066      else
2067	t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2068      t = force_gimple_operand_gsi (&gsi, t,
2069				    DECL_P (fd->loops[i].v)
2070				    && TREE_ADDRESSABLE (fd->loops[i].v),
2071				    NULL_TREE, false, GSI_CONTINUE_LINKING);
2072      stmt = gimple_build_assign (fd->loops[i].v, t);
2073      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2074
2075      if (i > 0)
2076	{
2077	  t = fd->loops[i].n2;
2078	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2079					false, GSI_CONTINUE_LINKING);
2080	  tree v = fd->loops[i].v;
2081	  if (DECL_P (v) && TREE_ADDRESSABLE (v))
2082	    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2083					  false, GSI_CONTINUE_LINKING);
2084	  t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2085	  stmt = gimple_build_cond_empty (t);
2086	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2087	  if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
2088			 expand_omp_regimplify_p, NULL, NULL)
2089	      || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
2090			    expand_omp_regimplify_p, NULL, NULL))
2091	    gimple_regimplify_operands (stmt, &gsi);
2092	  e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
2093	  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
2094	}
2095      else
2096	make_edge (bb, body_bb, EDGE_FALLTHRU);
2097      last_bb = bb;
2098    }
2099
2100  return collapse_bb;
2101}
2102
2103/* Expand #pragma omp ordered depend(source).  */
2104
2105static void
2106expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2107			   tree *counts, location_t loc)
2108{
2109  enum built_in_function source_ix
2110    = fd->iter_type == long_integer_type_node
2111      ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2112  gimple *g
2113    = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2114			 build_fold_addr_expr (counts[fd->ordered]));
2115  gimple_set_location (g, loc);
2116  gsi_insert_before (gsi, g, GSI_SAME_STMT);
2117}
2118
2119/* Expand a single depend from #pragma omp ordered depend(sink:...).  */
2120
2121static void
2122expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2123			 tree *counts, tree c, location_t loc)
2124{
2125  auto_vec<tree, 10> args;
2126  enum built_in_function sink_ix
2127    = fd->iter_type == long_integer_type_node
2128      ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2129  tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2130  int i;
2131  gimple_stmt_iterator gsi2 = *gsi;
2132  bool warned_step = false;
2133
2134  for (i = 0; i < fd->ordered; i++)
2135    {
2136      tree step = NULL_TREE;
2137      off = TREE_PURPOSE (deps);
2138      if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2139	{
2140	  step = TREE_OPERAND (off, 1);
2141	  off = TREE_OPERAND (off, 0);
2142	}
2143      if (!integer_zerop (off))
2144	{
2145	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
2146		      || fd->loops[i].cond_code == GT_EXPR);
2147	  bool forward = fd->loops[i].cond_code == LT_EXPR;
2148	  if (step)
2149	    {
2150	      /* Non-simple Fortran DO loops.  If step is variable,
2151		 we don't know at compile even the direction, so can't
2152		 warn.  */
2153	      if (TREE_CODE (step) != INTEGER_CST)
2154		break;
2155	      forward = tree_int_cst_sgn (step) != -1;
2156	    }
2157	  if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2158	    warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2159				"waiting for lexically later iteration");
2160	  break;
2161	}
2162      deps = TREE_CHAIN (deps);
2163    }
2164  /* If all offsets corresponding to the collapsed loops are zero,
2165     this depend clause can be ignored.  FIXME: but there is still a
2166     flush needed.  We need to emit one __sync_synchronize () for it
2167     though (perhaps conditionally)?  Solve this together with the
2168     conservative dependence folding optimization.
2169  if (i >= fd->collapse)
2170    return;  */
2171
2172  deps = OMP_CLAUSE_DECL (c);
2173  gsi_prev (&gsi2);
2174  edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2175  edge e2 = split_block_after_labels (e1->dest);
2176
2177  gsi2 = gsi_after_labels (e1->dest);
2178  *gsi = gsi_last_bb (e1->src);
2179  for (i = 0; i < fd->ordered; i++)
2180    {
2181      tree itype = TREE_TYPE (fd->loops[i].v);
2182      tree step = NULL_TREE;
2183      tree orig_off = NULL_TREE;
2184      if (POINTER_TYPE_P (itype))
2185	itype = sizetype;
2186      if (i)
2187	deps = TREE_CHAIN (deps);
2188      off = TREE_PURPOSE (deps);
2189      if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2190	{
2191	  step = TREE_OPERAND (off, 1);
2192	  off = TREE_OPERAND (off, 0);
2193	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
2194		      && integer_onep (fd->loops[i].step)
2195		      && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2196	}
2197      tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2198      if (step)
2199	{
2200	  off = fold_convert_loc (loc, itype, off);
2201	  orig_off = off;
2202	  off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2203	}
2204
2205      if (integer_zerop (off))
2206	t = boolean_true_node;
2207      else
2208	{
2209	  tree a;
2210	  tree co = fold_convert_loc (loc, itype, off);
2211	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2212	    {
2213	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2214		co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2215	      a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2216				   TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2217				   co);
2218	    }
2219	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2220	    a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2221				 fd->loops[i].v, co);
2222	  else
2223	    a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2224				 fd->loops[i].v, co);
2225	  if (step)
2226	    {
2227	      tree t1, t2;
2228	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2229		t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2230				      fd->loops[i].n1);
2231	      else
2232		t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2233				      fd->loops[i].n2);
2234	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2235		t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2236				      fd->loops[i].n2);
2237	      else
2238		t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2239				      fd->loops[i].n1);
2240	      t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2241				   step, build_int_cst (TREE_TYPE (step), 0));
2242	      if (TREE_CODE (step) != INTEGER_CST)
2243		{
2244		  t1 = unshare_expr (t1);
2245		  t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2246						 false, GSI_CONTINUE_LINKING);
2247		  t2 = unshare_expr (t2);
2248		  t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2249						 false, GSI_CONTINUE_LINKING);
2250		}
2251	      t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2252				   t, t2, t1);
2253	    }
2254	  else if (fd->loops[i].cond_code == LT_EXPR)
2255	    {
2256	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2257		t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2258				     fd->loops[i].n1);
2259	      else
2260		t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2261				     fd->loops[i].n2);
2262	    }
2263	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2264	    t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2265				 fd->loops[i].n2);
2266	  else
2267	    t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2268				 fd->loops[i].n1);
2269	}
2270      if (cond)
2271	cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2272      else
2273	cond = t;
2274
2275      off = fold_convert_loc (loc, itype, off);
2276
2277      if (step
2278	  || (fd->loops[i].cond_code == LT_EXPR
2279	      ? !integer_onep (fd->loops[i].step)
2280	      : !integer_minus_onep (fd->loops[i].step)))
2281	{
2282	  if (step == NULL_TREE
2283	      && TYPE_UNSIGNED (itype)
2284	      && fd->loops[i].cond_code == GT_EXPR)
2285	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2286				 fold_build1_loc (loc, NEGATE_EXPR, itype,
2287						  s));
2288	  else
2289	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2290				 orig_off ? orig_off : off, s);
2291	  t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2292			       build_int_cst (itype, 0));
2293	  if (integer_zerop (t) && !warned_step)
2294	    {
2295	      warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2296				  "refers to iteration never in the iteration "
2297				  "space");
2298	      warned_step = true;
2299	    }
2300	  cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2301				  cond, t);
2302	}
2303
2304      if (i <= fd->collapse - 1 && fd->collapse > 1)
2305	t = fd->loop.v;
2306      else if (counts[i])
2307	t = counts[i];
2308      else
2309	{
2310	  t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2311			       fd->loops[i].v, fd->loops[i].n1);
2312	  t = fold_convert_loc (loc, fd->iter_type, t);
2313	}
2314      if (step)
2315	/* We have divided off by step already earlier.  */;
2316      else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2317	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2318			       fold_build1_loc (loc, NEGATE_EXPR, itype,
2319						s));
2320      else
2321	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2322      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2323	off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2324      off = fold_convert_loc (loc, fd->iter_type, off);
2325      if (i <= fd->collapse - 1 && fd->collapse > 1)
2326	{
2327	  if (i)
2328	    off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2329				   off);
2330	  if (i < fd->collapse - 1)
2331	    {
2332	      coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2333				      counts[i]);
2334	      continue;
2335	    }
2336	}
2337      off = unshare_expr (off);
2338      t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2339      t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2340				    true, GSI_SAME_STMT);
2341      args.safe_push (t);
2342    }
2343  gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2344  gimple_set_location (g, loc);
2345  gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2346
2347  cond = unshare_expr (cond);
2348  cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2349				   GSI_CONTINUE_LINKING);
2350  gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2351  edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2352  e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2353  e1->probability = e3->probability.invert ();
2354  e1->flags = EDGE_TRUE_VALUE;
2355  set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2356
2357  *gsi = gsi_after_labels (e2->dest);
2358}
2359
2360/* Expand all #pragma omp ordered depend(source) and
2361   #pragma omp ordered depend(sink:...) constructs in the current
2362   #pragma omp for ordered(n) region.  */
2363
2364static void
2365expand_omp_ordered_source_sink (struct omp_region *region,
2366				struct omp_for_data *fd, tree *counts,
2367				basic_block cont_bb)
2368{
2369  struct omp_region *inner;
2370  int i;
2371  for (i = fd->collapse - 1; i < fd->ordered; i++)
2372    if (i == fd->collapse - 1 && fd->collapse > 1)
2373      counts[i] = NULL_TREE;
2374    else if (i >= fd->collapse && !cont_bb)
2375      counts[i] = build_zero_cst (fd->iter_type);
2376    else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2377	     && integer_onep (fd->loops[i].step))
2378      counts[i] = NULL_TREE;
2379    else
2380      counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2381  tree atype
2382    = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2383  counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2384  TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2385
2386  for (inner = region->inner; inner; inner = inner->next)
2387    if (inner->type == GIMPLE_OMP_ORDERED)
2388      {
2389	gomp_ordered *ord_stmt = inner->ord_stmt;
2390	gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2391	location_t loc = gimple_location (ord_stmt);
2392	tree c;
2393	for (c = gimple_omp_ordered_clauses (ord_stmt);
2394	     c; c = OMP_CLAUSE_CHAIN (c))
2395	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2396	    break;
2397	if (c)
2398	  expand_omp_ordered_source (&gsi, fd, counts, loc);
2399	for (c = gimple_omp_ordered_clauses (ord_stmt);
2400	     c; c = OMP_CLAUSE_CHAIN (c))
2401	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2402	    expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2403	gsi_remove (&gsi, true);
2404      }
2405}
2406
2407/* Wrap the body into fd->ordered - fd->collapse loops that aren't
2408   collapsed.  */
2409
2410static basic_block
2411expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2412			      basic_block cont_bb, basic_block body_bb,
2413			      basic_block l0_bb, bool ordered_lastprivate)
2414{
2415  if (fd->ordered == fd->collapse)
2416    return cont_bb;
2417
2418  if (!cont_bb)
2419    {
2420      gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2421      for (int i = fd->collapse; i < fd->ordered; i++)
2422	{
2423	  tree type = TREE_TYPE (fd->loops[i].v);
2424	  tree n1 = fold_convert (type, fd->loops[i].n1);
2425	  expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2426	  tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2427			      size_int (i - fd->collapse + 1),
2428			      NULL_TREE, NULL_TREE);
2429	  expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2430	}
2431      return NULL;
2432    }
2433
2434  for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2435    {
2436      tree t, type = TREE_TYPE (fd->loops[i].v);
2437      gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2438      expand_omp_build_assign (&gsi, fd->loops[i].v,
2439			       fold_convert (type, fd->loops[i].n1));
2440      if (counts[i])
2441	expand_omp_build_assign (&gsi, counts[i],
2442				 build_zero_cst (fd->iter_type));
2443      tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2444			  size_int (i - fd->collapse + 1),
2445			  NULL_TREE, NULL_TREE);
2446      expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2447      if (!gsi_end_p (gsi))
2448	gsi_prev (&gsi);
2449      else
2450	gsi = gsi_last_bb (body_bb);
2451      edge e1 = split_block (body_bb, gsi_stmt (gsi));
2452      basic_block new_body = e1->dest;
2453      if (body_bb == cont_bb)
2454	cont_bb = new_body;
2455      edge e2 = NULL;
2456      basic_block new_header;
2457      if (EDGE_COUNT (cont_bb->preds) > 0)
2458	{
2459	  gsi = gsi_last_bb (cont_bb);
2460	  if (POINTER_TYPE_P (type))
2461	    t = fold_build_pointer_plus (fd->loops[i].v,
2462					 fold_convert (sizetype,
2463						       fd->loops[i].step));
2464	  else
2465	    t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2466			     fold_convert (type, fd->loops[i].step));
2467	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2468	  if (counts[i])
2469	    {
2470	      t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2471			       build_int_cst (fd->iter_type, 1));
2472	      expand_omp_build_assign (&gsi, counts[i], t);
2473	      t = counts[i];
2474	    }
2475	  else
2476	    {
2477	      t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2478			       fd->loops[i].v, fd->loops[i].n1);
2479	      t = fold_convert (fd->iter_type, t);
2480	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2481					    true, GSI_SAME_STMT);
2482	    }
2483	  aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2484			 size_int (i - fd->collapse + 1),
2485			 NULL_TREE, NULL_TREE);
2486	  expand_omp_build_assign (&gsi, aref, t);
2487	  gsi_prev (&gsi);
2488	  e2 = split_block (cont_bb, gsi_stmt (gsi));
2489	  new_header = e2->dest;
2490	}
2491      else
2492	new_header = cont_bb;
2493      gsi = gsi_after_labels (new_header);
2494      tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2495					 true, GSI_SAME_STMT);
2496      tree n2
2497	= force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2498				    true, NULL_TREE, true, GSI_SAME_STMT);
2499      t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2500      gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2501      edge e3 = split_block (new_header, gsi_stmt (gsi));
2502      cont_bb = e3->dest;
2503      remove_edge (e1);
2504      make_edge (body_bb, new_header, EDGE_FALLTHRU);
2505      e3->flags = EDGE_FALSE_VALUE;
2506      e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2507      e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2508      e1->probability = e3->probability.invert ();
2509
2510      set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2511      set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2512
2513      if (e2)
2514	{
2515	  class loop *loop = alloc_loop ();
2516	  loop->header = new_header;
2517	  loop->latch = e2->src;
2518	  add_loop (loop, l0_bb->loop_father);
2519	}
2520    }
2521
2522  /* If there are any lastprivate clauses and it is possible some loops
2523     might have zero iterations, ensure all the decls are initialized,
2524     otherwise we could crash evaluating C++ class iterators with lastprivate
2525     clauses.  */
2526  bool need_inits = false;
2527  for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2528    if (need_inits)
2529      {
2530	tree type = TREE_TYPE (fd->loops[i].v);
2531	gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2532	expand_omp_build_assign (&gsi, fd->loops[i].v,
2533				 fold_convert (type, fd->loops[i].n1));
2534      }
2535    else
2536      {
2537	tree type = TREE_TYPE (fd->loops[i].v);
2538	tree this_cond = fold_build2 (fd->loops[i].cond_code,
2539				      boolean_type_node,
2540				      fold_convert (type, fd->loops[i].n1),
2541				      fold_convert (type, fd->loops[i].n2));
2542	if (!integer_onep (this_cond))
2543	  need_inits = true;
2544      }
2545
2546  return cont_bb;
2547}
2548
2549/* A subroutine of expand_omp_for.  Generate code for a parallel
2550   loop with any schedule.  Given parameters:
2551
2552	for (V = N1; V cond N2; V += STEP) BODY;
2553
2554   where COND is "<" or ">", we generate pseudocode
2555
2556	more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2557	if (more) goto L0; else goto L3;
2558    L0:
2559	V = istart0;
2560	iend = iend0;
2561    L1:
2562	BODY;
2563	V += STEP;
2564	if (V cond iend) goto L1; else goto L2;
2565    L2:
2566	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2567    L3:
2568
2569    If this is a combined omp parallel loop, instead of the call to
2570    GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2571    If this is gimple_omp_for_combined_p loop, then instead of assigning
2572    V and iend in L0 we assign the first two _looptemp_ clause decls of the
2573    inner GIMPLE_OMP_FOR and V += STEP; and
2574    if (V cond iend) goto L1; else goto L2; are removed.
2575
2576    For collapsed loops, given parameters:
2577      collapse(3)
2578      for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2579	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2580	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2581	    BODY;
2582
2583    we generate pseudocode
2584
2585	if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2586	if (cond3 is <)
2587	  adj = STEP3 - 1;
2588	else
2589	  adj = STEP3 + 1;
2590	count3 = (adj + N32 - N31) / STEP3;
2591	if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2592	if (cond2 is <)
2593	  adj = STEP2 - 1;
2594	else
2595	  adj = STEP2 + 1;
2596	count2 = (adj + N22 - N21) / STEP2;
2597	if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2598	if (cond1 is <)
2599	  adj = STEP1 - 1;
2600	else
2601	  adj = STEP1 + 1;
2602	count1 = (adj + N12 - N11) / STEP1;
2603	count = count1 * count2 * count3;
2604	goto Z1;
2605    Z0:
2606	count = 0;
2607    Z1:
2608	more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2609	if (more) goto L0; else goto L3;
2610    L0:
2611	V = istart0;
2612	T = V;
2613	V3 = N31 + (T % count3) * STEP3;
2614	T = T / count3;
2615	V2 = N21 + (T % count2) * STEP2;
2616	T = T / count2;
2617	V1 = N11 + T * STEP1;
2618	iend = iend0;
2619    L1:
2620	BODY;
2621	V += 1;
2622	if (V < iend) goto L10; else goto L2;
2623    L10:
2624	V3 += STEP3;
2625	if (V3 cond3 N32) goto L1; else goto L11;
2626    L11:
2627	V3 = N31;
2628	V2 += STEP2;
2629	if (V2 cond2 N22) goto L1; else goto L12;
2630    L12:
2631	V2 = N21;
2632	V1 += STEP1;
2633	goto L1;
2634    L2:
2635	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2636    L3:
2637
2638      */
2639
2640static void
2641expand_omp_for_generic (struct omp_region *region,
2642			struct omp_for_data *fd,
2643			enum built_in_function start_fn,
2644			enum built_in_function next_fn,
2645			tree sched_arg,
2646			gimple *inner_stmt)
2647{
2648  tree type, istart0, iend0, iend;
2649  tree t, vmain, vback, bias = NULL_TREE;
2650  basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2651  basic_block l2_bb = NULL, l3_bb = NULL;
2652  gimple_stmt_iterator gsi;
2653  gassign *assign_stmt;
2654  bool in_combined_parallel = is_combined_parallel (region);
2655  bool broken_loop = region->cont == NULL;
2656  edge e, ne;
2657  tree *counts = NULL;
2658  int i;
2659  bool ordered_lastprivate = false;
2660
2661  gcc_assert (!broken_loop || !in_combined_parallel);
2662  gcc_assert (fd->iter_type == long_integer_type_node
2663	      || !in_combined_parallel);
2664
2665  entry_bb = region->entry;
2666  cont_bb = region->cont;
2667  collapse_bb = NULL;
2668  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2669  gcc_assert (broken_loop
2670	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2671  l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2672  l1_bb = single_succ (l0_bb);
2673  if (!broken_loop)
2674    {
2675      l2_bb = create_empty_bb (cont_bb);
2676      gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2677		  || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2678		      == l1_bb));
2679      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2680    }
2681  else
2682    l2_bb = NULL;
2683  l3_bb = BRANCH_EDGE (entry_bb)->dest;
2684  exit_bb = region->exit;
2685
2686  gsi = gsi_last_nondebug_bb (entry_bb);
2687
2688  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2689  if (fd->ordered
2690      && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2691			  OMP_CLAUSE_LASTPRIVATE))
2692    ordered_lastprivate = false;
2693  tree reductions = NULL_TREE;
2694  tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
2695  tree memv = NULL_TREE;
2696  if (fd->lastprivate_conditional)
2697    {
2698      tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2699				OMP_CLAUSE__CONDTEMP_);
2700      if (fd->have_pointer_condtemp)
2701	condtemp = OMP_CLAUSE_DECL (c);
2702      c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
2703      cond_var = OMP_CLAUSE_DECL (c);
2704    }
2705  if (sched_arg)
2706    {
2707      if (fd->have_reductemp)
2708	{
2709	  tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2710				    OMP_CLAUSE__REDUCTEMP_);
2711	  reductions = OMP_CLAUSE_DECL (c);
2712	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
2713	  gimple *g = SSA_NAME_DEF_STMT (reductions);
2714	  reductions = gimple_assign_rhs1 (g);
2715	  OMP_CLAUSE_DECL (c) = reductions;
2716	  entry_bb = gimple_bb (g);
2717	  edge e = split_block (entry_bb, g);
2718	  if (region->entry == entry_bb)
2719	    region->entry = e->dest;
2720	  gsi = gsi_last_bb (entry_bb);
2721	}
2722      else
2723	reductions = null_pointer_node;
2724      if (fd->have_pointer_condtemp)
2725	{
2726	  tree type = TREE_TYPE (condtemp);
2727	  memv = create_tmp_var (type);
2728	  TREE_ADDRESSABLE (memv) = 1;
2729	  unsigned HOST_WIDE_INT sz
2730	    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
2731	  sz *= fd->lastprivate_conditional;
2732	  expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
2733				   false);
2734	  mem = build_fold_addr_expr (memv);
2735	}
2736      else
2737	mem = null_pointer_node;
2738    }
2739  if (fd->collapse > 1 || fd->ordered)
2740    {
2741      int first_zero_iter1 = -1, first_zero_iter2 = -1;
2742      basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2743
2744      counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2745      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2746				  zero_iter1_bb, first_zero_iter1,
2747				  zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2748
2749      if (zero_iter1_bb)
2750	{
2751	  /* Some counts[i] vars might be uninitialized if
2752	     some loop has zero iterations.  But the body shouldn't
2753	     be executed in that case, so just avoid uninit warnings.  */
2754	  for (i = first_zero_iter1;
2755	       i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2756	    if (SSA_VAR_P (counts[i]))
2757	      TREE_NO_WARNING (counts[i]) = 1;
2758	  gsi_prev (&gsi);
2759	  e = split_block (entry_bb, gsi_stmt (gsi));
2760	  entry_bb = e->dest;
2761	  make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2762	  gsi = gsi_last_nondebug_bb (entry_bb);
2763	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2764				   get_immediate_dominator (CDI_DOMINATORS,
2765							    zero_iter1_bb));
2766	}
2767      if (zero_iter2_bb)
2768	{
2769	  /* Some counts[i] vars might be uninitialized if
2770	     some loop has zero iterations.  But the body shouldn't
2771	     be executed in that case, so just avoid uninit warnings.  */
2772	  for (i = first_zero_iter2; i < fd->ordered; i++)
2773	    if (SSA_VAR_P (counts[i]))
2774	      TREE_NO_WARNING (counts[i]) = 1;
2775	  if (zero_iter1_bb)
2776	    make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2777	  else
2778	    {
2779	      gsi_prev (&gsi);
2780	      e = split_block (entry_bb, gsi_stmt (gsi));
2781	      entry_bb = e->dest;
2782	      make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2783	      gsi = gsi_last_nondebug_bb (entry_bb);
2784	      set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2785				       get_immediate_dominator
2786					 (CDI_DOMINATORS, zero_iter2_bb));
2787	    }
2788	}
2789      if (fd->collapse == 1)
2790	{
2791	  counts[0] = fd->loop.n2;
2792	  fd->loop = fd->loops[0];
2793	}
2794    }
2795
2796  type = TREE_TYPE (fd->loop.v);
2797  istart0 = create_tmp_var (fd->iter_type, ".istart0");
2798  iend0 = create_tmp_var (fd->iter_type, ".iend0");
2799  TREE_ADDRESSABLE (istart0) = 1;
2800  TREE_ADDRESSABLE (iend0) = 1;
2801
2802  /* See if we need to bias by LLONG_MIN.  */
2803  if (fd->iter_type == long_long_unsigned_type_node
2804      && TREE_CODE (type) == INTEGER_TYPE
2805      && !TYPE_UNSIGNED (type)
2806      && fd->ordered == 0)
2807    {
2808      tree n1, n2;
2809
2810      if (fd->loop.cond_code == LT_EXPR)
2811	{
2812	  n1 = fd->loop.n1;
2813	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2814	}
2815      else
2816	{
2817	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2818	  n2 = fd->loop.n1;
2819	}
2820      if (TREE_CODE (n1) != INTEGER_CST
2821	  || TREE_CODE (n2) != INTEGER_CST
2822	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2823	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2824    }
2825
2826  gimple_stmt_iterator gsif = gsi;
2827  gsi_prev (&gsif);
2828
2829  tree arr = NULL_TREE;
2830  if (in_combined_parallel)
2831    {
2832      gcc_assert (fd->ordered == 0);
2833      /* In a combined parallel loop, emit a call to
2834	 GOMP_loop_foo_next.  */
2835      t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2836			   build_fold_addr_expr (istart0),
2837			   build_fold_addr_expr (iend0));
2838    }
2839  else
2840    {
2841      tree t0, t1, t2, t3, t4;
2842      /* If this is not a combined parallel loop, emit a call to
2843	 GOMP_loop_foo_start in ENTRY_BB.  */
2844      t4 = build_fold_addr_expr (iend0);
2845      t3 = build_fold_addr_expr (istart0);
2846      if (fd->ordered)
2847	{
2848	  t0 = build_int_cst (unsigned_type_node,
2849			      fd->ordered - fd->collapse + 1);
2850	  arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2851							fd->ordered
2852							- fd->collapse + 1),
2853				".omp_counts");
2854	  DECL_NAMELESS (arr) = 1;
2855	  TREE_ADDRESSABLE (arr) = 1;
2856	  TREE_STATIC (arr) = 1;
2857	  vec<constructor_elt, va_gc> *v;
2858	  vec_alloc (v, fd->ordered - fd->collapse + 1);
2859	  int idx;
2860
2861	  for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2862	    {
2863	      tree c;
2864	      if (idx == 0 && fd->collapse > 1)
2865		c = fd->loop.n2;
2866	      else
2867		c = counts[idx + fd->collapse - 1];
2868	      tree purpose = size_int (idx);
2869	      CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2870	      if (TREE_CODE (c) != INTEGER_CST)
2871		TREE_STATIC (arr) = 0;
2872	    }
2873
2874	  DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2875	  if (!TREE_STATIC (arr))
2876	    force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2877						    void_type_node, arr),
2878				      true, NULL_TREE, true, GSI_SAME_STMT);
2879	  t1 = build_fold_addr_expr (arr);
2880	  t2 = NULL_TREE;
2881	}
2882      else
2883	{
2884	  t2 = fold_convert (fd->iter_type, fd->loop.step);
2885	  t1 = fd->loop.n2;
2886	  t0 = fd->loop.n1;
2887	  if (gimple_omp_for_combined_into_p (fd->for_stmt))
2888	    {
2889	      tree innerc
2890		= omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2891				   OMP_CLAUSE__LOOPTEMP_);
2892	      gcc_assert (innerc);
2893	      t0 = OMP_CLAUSE_DECL (innerc);
2894	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2895					OMP_CLAUSE__LOOPTEMP_);
2896	      gcc_assert (innerc);
2897	      t1 = OMP_CLAUSE_DECL (innerc);
2898	    }
2899	  if (POINTER_TYPE_P (TREE_TYPE (t0))
2900	      && TYPE_PRECISION (TREE_TYPE (t0))
2901		 != TYPE_PRECISION (fd->iter_type))
2902	    {
2903	      /* Avoid casting pointers to integer of a different size.  */
2904	      tree itype = signed_type_for (type);
2905	      t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2906	      t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2907	    }
2908	  else
2909	    {
2910	      t1 = fold_convert (fd->iter_type, t1);
2911	      t0 = fold_convert (fd->iter_type, t0);
2912	    }
2913	  if (bias)
2914	    {
2915	      t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2916	      t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2917	    }
2918	}
2919      if (fd->iter_type == long_integer_type_node || fd->ordered)
2920	{
2921	  if (fd->chunk_size)
2922	    {
2923	      t = fold_convert (fd->iter_type, fd->chunk_size);
2924	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
2925	      if (sched_arg)
2926		{
2927		  if (fd->ordered)
2928		    t = build_call_expr (builtin_decl_explicit (start_fn),
2929					 8, t0, t1, sched_arg, t, t3, t4,
2930					 reductions, mem);
2931		  else
2932		    t = build_call_expr (builtin_decl_explicit (start_fn),
2933					 9, t0, t1, t2, sched_arg, t, t3, t4,
2934					 reductions, mem);
2935		}
2936	      else if (fd->ordered)
2937		t = build_call_expr (builtin_decl_explicit (start_fn),
2938				     5, t0, t1, t, t3, t4);
2939	      else
2940		t = build_call_expr (builtin_decl_explicit (start_fn),
2941				     6, t0, t1, t2, t, t3, t4);
2942	    }
2943	  else if (fd->ordered)
2944	    t = build_call_expr (builtin_decl_explicit (start_fn),
2945				 4, t0, t1, t3, t4);
2946	  else
2947	    t = build_call_expr (builtin_decl_explicit (start_fn),
2948				 5, t0, t1, t2, t3, t4);
2949	}
2950      else
2951	{
2952	  tree t5;
2953	  tree c_bool_type;
2954	  tree bfn_decl;
2955
2956	  /* The GOMP_loop_ull_*start functions have additional boolean
2957	     argument, true for < loops and false for > loops.
2958	     In Fortran, the C bool type can be different from
2959	     boolean_type_node.  */
2960	  bfn_decl = builtin_decl_explicit (start_fn);
2961	  c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2962	  t5 = build_int_cst (c_bool_type,
2963			      fd->loop.cond_code == LT_EXPR ? 1 : 0);
2964	  if (fd->chunk_size)
2965	    {
2966	      tree bfn_decl = builtin_decl_explicit (start_fn);
2967	      t = fold_convert (fd->iter_type, fd->chunk_size);
2968	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
2969	      if (sched_arg)
2970		t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
2971				     t, t3, t4, reductions, mem);
2972	      else
2973		t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2974	    }
2975	  else
2976	    t = build_call_expr (builtin_decl_explicit (start_fn),
2977				 6, t5, t0, t1, t2, t3, t4);
2978	}
2979    }
2980  if (TREE_TYPE (t) != boolean_type_node)
2981    t = fold_build2 (NE_EXPR, boolean_type_node,
2982		     t, build_int_cst (TREE_TYPE (t), 0));
2983  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2984				true, GSI_SAME_STMT);
2985  if (arr && !TREE_STATIC (arr))
2986    {
2987      tree clobber = build_clobber (TREE_TYPE (arr));
2988      gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2989			 GSI_SAME_STMT);
2990    }
2991  if (fd->have_pointer_condtemp)
2992    expand_omp_build_assign (&gsi, condtemp, memv, false);
2993  if (fd->have_reductemp)
2994    {
2995      gimple *g = gsi_stmt (gsi);
2996      gsi_remove (&gsi, true);
2997      release_ssa_name (gimple_assign_lhs (g));
2998
2999      entry_bb = region->entry;
3000      gsi = gsi_last_nondebug_bb (entry_bb);
3001
3002      gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3003    }
3004  gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3005
3006  /* Remove the GIMPLE_OMP_FOR statement.  */
3007  gsi_remove (&gsi, true);
3008
3009  if (gsi_end_p (gsif))
3010    gsif = gsi_after_labels (gsi_bb (gsif));
3011  gsi_next (&gsif);
3012
3013  /* Iteration setup for sequential loop goes in L0_BB.  */
3014  tree startvar = fd->loop.v;
3015  tree endvar = NULL_TREE;
3016
3017  if (gimple_omp_for_combined_p (fd->for_stmt))
3018    {
3019      gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
3020		  && gimple_omp_for_kind (inner_stmt)
3021		     == GF_OMP_FOR_KIND_SIMD);
3022      tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
3023				     OMP_CLAUSE__LOOPTEMP_);
3024      gcc_assert (innerc);
3025      startvar = OMP_CLAUSE_DECL (innerc);
3026      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3027				OMP_CLAUSE__LOOPTEMP_);
3028      gcc_assert (innerc);
3029      endvar = OMP_CLAUSE_DECL (innerc);
3030    }
3031
3032  gsi = gsi_start_bb (l0_bb);
3033  t = istart0;
3034  if (fd->ordered && fd->collapse == 1)
3035    t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3036		     fold_convert (fd->iter_type, fd->loop.step));
3037  else if (bias)
3038    t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3039  if (fd->ordered && fd->collapse == 1)
3040    {
3041      if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3042	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3043			 fd->loop.n1, fold_convert (sizetype, t));
3044      else
3045	{
3046	  t = fold_convert (TREE_TYPE (startvar), t);
3047	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3048			   fd->loop.n1, t);
3049	}
3050    }
3051  else
3052    {
3053      if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3054	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3055      t = fold_convert (TREE_TYPE (startvar), t);
3056    }
3057  t = force_gimple_operand_gsi (&gsi, t,
3058				DECL_P (startvar)
3059				&& TREE_ADDRESSABLE (startvar),
3060				NULL_TREE, false, GSI_CONTINUE_LINKING);
3061  assign_stmt = gimple_build_assign (startvar, t);
3062  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3063  if (cond_var)
3064    {
3065      tree itype = TREE_TYPE (cond_var);
3066      /* For lastprivate(conditional:) itervar, we need some iteration
3067	 counter that starts at unsigned non-zero and increases.
3068	 Prefer as few IVs as possible, so if we can use startvar
3069	 itself, use that, or startvar + constant (those would be
3070	 incremented with step), and as last resort use the s0 + 1
3071	 incremented by 1.  */
3072      if ((fd->ordered && fd->collapse == 1)
3073	  || bias
3074	  || POINTER_TYPE_P (type)
3075	  || TREE_CODE (fd->loop.n1) != INTEGER_CST
3076	  || fd->loop.cond_code != LT_EXPR)
3077	t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
3078			 build_int_cst (itype, 1));
3079      else if (tree_int_cst_sgn (fd->loop.n1) == 1)
3080	t = fold_convert (itype, t);
3081      else
3082	{
3083	  tree c = fold_convert (itype, fd->loop.n1);
3084	  c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3085	  t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3086	}
3087      t = force_gimple_operand_gsi (&gsi, t, false,
3088				    NULL_TREE, false, GSI_CONTINUE_LINKING);
3089      assign_stmt = gimple_build_assign (cond_var, t);
3090      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3091    }
3092
3093  t = iend0;
3094  if (fd->ordered && fd->collapse == 1)
3095    t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3096		     fold_convert (fd->iter_type, fd->loop.step));
3097  else if (bias)
3098    t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3099  if (fd->ordered && fd->collapse == 1)
3100    {
3101      if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3102	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3103			 fd->loop.n1, fold_convert (sizetype, t));
3104      else
3105	{
3106	  t = fold_convert (TREE_TYPE (startvar), t);
3107	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3108			   fd->loop.n1, t);
3109	}
3110    }
3111  else
3112    {
3113      if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3114	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3115      t = fold_convert (TREE_TYPE (startvar), t);
3116    }
3117  iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3118				   false, GSI_CONTINUE_LINKING);
3119  if (endvar)
3120    {
3121      assign_stmt = gimple_build_assign (endvar, iend);
3122      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3123      if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3124	assign_stmt = gimple_build_assign (fd->loop.v, iend);
3125      else
3126	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3127      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3128    }
3129  /* Handle linear clause adjustments.  */
3130  tree itercnt = NULL_TREE;
3131  if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3132    for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3133	 c; c = OMP_CLAUSE_CHAIN (c))
3134      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3135	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3136	{
3137	  tree d = OMP_CLAUSE_DECL (c);
3138	  bool is_ref = omp_is_reference (d);
3139	  tree t = d, a, dest;
3140	  if (is_ref)
3141	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3142	  tree type = TREE_TYPE (t);
3143	  if (POINTER_TYPE_P (type))
3144	    type = sizetype;
3145	  dest = unshare_expr (t);
3146	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
3147	  expand_omp_build_assign (&gsif, v, t);
3148	  if (itercnt == NULL_TREE)
3149	    {
3150	      itercnt = startvar;
3151	      tree n1 = fd->loop.n1;
3152	      if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3153		{
3154		  itercnt
3155		    = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3156				    itercnt);
3157		  n1 = fold_convert (TREE_TYPE (itercnt), n1);
3158		}
3159	      itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3160				     itercnt, n1);
3161	      itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3162				     itercnt, fd->loop.step);
3163	      itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3164						  NULL_TREE, false,
3165						  GSI_CONTINUE_LINKING);
3166	    }
3167	  a = fold_build2 (MULT_EXPR, type,
3168			   fold_convert (type, itercnt),
3169			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3170	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3171			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3172	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3173					false, GSI_CONTINUE_LINKING);
3174	  assign_stmt = gimple_build_assign (dest, t);
3175	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3176	}
3177  if (fd->collapse > 1)
3178    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3179
3180  if (fd->ordered)
3181    {
3182      /* Until now, counts array contained number of iterations or
3183	 variable containing it for ith loop.  From now on, we need
3184	 those counts only for collapsed loops, and only for the 2nd
3185	 till the last collapsed one.  Move those one element earlier,
3186	 we'll use counts[fd->collapse - 1] for the first source/sink
3187	 iteration counter and so on and counts[fd->ordered]
3188	 as the array holding the current counter values for
3189	 depend(source).  */
3190      if (fd->collapse > 1)
3191	memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
3192      if (broken_loop)
3193	{
3194	  int i;
3195	  for (i = fd->collapse; i < fd->ordered; i++)
3196	    {
3197	      tree type = TREE_TYPE (fd->loops[i].v);
3198	      tree this_cond
3199		= fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3200			       fold_convert (type, fd->loops[i].n1),
3201			       fold_convert (type, fd->loops[i].n2));
3202	      if (!integer_onep (this_cond))
3203		break;
3204	    }
3205	  if (i < fd->ordered)
3206	    {
3207	      if (entry_bb->loop_father != l0_bb->loop_father)
3208		{
3209		  remove_bb_from_loops (l0_bb);
3210		  add_bb_to_loop (l0_bb, entry_bb->loop_father);
3211		  gcc_assert (single_succ (l0_bb) == l1_bb);
3212		}
3213	      cont_bb
3214		= create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3215	      add_bb_to_loop (cont_bb, l0_bb->loop_father);
3216	      gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3217	      gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3218	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3219	      make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3220	      make_edge (cont_bb, l1_bb, 0);
3221	      l2_bb = create_empty_bb (cont_bb);
3222	      broken_loop = false;
3223	    }
3224	}
3225      expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3226      cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3227					      l0_bb, ordered_lastprivate);
3228      if (counts[fd->collapse - 1])
3229	{
3230	  gcc_assert (fd->collapse == 1);
3231	  gsi = gsi_last_bb (l0_bb);
3232	  expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3233				   istart0, true);
3234	  if (cont_bb)
3235	    {
3236	      gsi = gsi_last_bb (cont_bb);
3237	      t = fold_build2 (PLUS_EXPR, fd->iter_type,
3238			       counts[fd->collapse - 1],
3239			       build_int_cst (fd->iter_type, 1));
3240	      expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3241	      tree aref = build4 (ARRAY_REF, fd->iter_type,
3242				  counts[fd->ordered], size_zero_node,
3243				  NULL_TREE, NULL_TREE);
3244	      expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3245	    }
3246	  t = counts[fd->collapse - 1];
3247	}
3248      else if (fd->collapse > 1)
3249	t = fd->loop.v;
3250      else
3251	{
3252	  t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3253			   fd->loops[0].v, fd->loops[0].n1);
3254	  t = fold_convert (fd->iter_type, t);
3255	}
3256      gsi = gsi_last_bb (l0_bb);
3257      tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3258			  size_zero_node, NULL_TREE, NULL_TREE);
3259      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3260				    false, GSI_CONTINUE_LINKING);
3261      expand_omp_build_assign (&gsi, aref, t, true);
3262    }
3263
3264  if (!broken_loop)
3265    {
3266      /* Code to control the increment and predicate for the sequential
3267	 loop goes in the CONT_BB.  */
3268      gsi = gsi_last_nondebug_bb (cont_bb);
3269      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3270      gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3271      vmain = gimple_omp_continue_control_use (cont_stmt);
3272      vback = gimple_omp_continue_control_def (cont_stmt);
3273
3274      if (cond_var)
3275	{
3276	  tree itype = TREE_TYPE (cond_var);
3277	  tree t2;
3278	  if ((fd->ordered && fd->collapse == 1)
3279	       || bias
3280	       || POINTER_TYPE_P (type)
3281	       || TREE_CODE (fd->loop.n1) != INTEGER_CST
3282	       || fd->loop.cond_code != LT_EXPR)
3283	    t2 = build_int_cst (itype, 1);
3284	  else
3285	    t2 = fold_convert (itype, fd->loop.step);
3286	  t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
3287	  t2 = force_gimple_operand_gsi (&gsi, t2, false,
3288					 NULL_TREE, true, GSI_SAME_STMT);
3289	  assign_stmt = gimple_build_assign (cond_var, t2);
3290	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3291	}
3292
3293      if (!gimple_omp_for_combined_p (fd->for_stmt))
3294	{
3295	  if (POINTER_TYPE_P (type))
3296	    t = fold_build_pointer_plus (vmain, fd->loop.step);
3297	  else
3298	    t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3299	  t = force_gimple_operand_gsi (&gsi, t,
3300					DECL_P (vback)
3301					&& TREE_ADDRESSABLE (vback),
3302					NULL_TREE, true, GSI_SAME_STMT);
3303	  assign_stmt = gimple_build_assign (vback, t);
3304	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3305
3306	  if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3307	    {
3308	      tree tem;
3309	      if (fd->collapse > 1)
3310		tem = fd->loop.v;
3311	      else
3312		{
3313		  tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3314				     fd->loops[0].v, fd->loops[0].n1);
3315		  tem = fold_convert (fd->iter_type, tem);
3316		}
3317	      tree aref = build4 (ARRAY_REF, fd->iter_type,
3318				  counts[fd->ordered], size_zero_node,
3319				  NULL_TREE, NULL_TREE);
3320	      tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3321					      true, GSI_SAME_STMT);
3322	      expand_omp_build_assign (&gsi, aref, tem);
3323	    }
3324
3325	  t = build2 (fd->loop.cond_code, boolean_type_node,
3326		      DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3327		      iend);
3328	  gcond *cond_stmt = gimple_build_cond_empty (t);
3329	  gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3330	}
3331
3332      /* Remove GIMPLE_OMP_CONTINUE.  */
3333      gsi_remove (&gsi, true);
3334
3335      if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3336	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3337
3338      /* Emit code to get the next parallel iteration in L2_BB.  */
3339      gsi = gsi_start_bb (l2_bb);
3340
3341      t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3342			   build_fold_addr_expr (istart0),
3343			   build_fold_addr_expr (iend0));
3344      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3345				    false, GSI_CONTINUE_LINKING);
3346      if (TREE_TYPE (t) != boolean_type_node)
3347	t = fold_build2 (NE_EXPR, boolean_type_node,
3348			 t, build_int_cst (TREE_TYPE (t), 0));
3349      gcond *cond_stmt = gimple_build_cond_empty (t);
3350      gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3351    }
3352
3353  /* Add the loop cleanup function.  */
3354  gsi = gsi_last_nondebug_bb (exit_bb);
3355  if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3356    t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3357  else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3358    t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3359  else
3360    t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3361  gcall *call_stmt = gimple_build_call (t, 0);
3362  if (fd->ordered)
3363    {
3364      tree arr = counts[fd->ordered];
3365      tree clobber = build_clobber (TREE_TYPE (arr));
3366      gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3367			GSI_SAME_STMT);
3368    }
3369  if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3370    {
3371      gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3372      if (fd->have_reductemp)
3373	{
3374	  gimple *g = gimple_build_assign (reductions, NOP_EXPR,
3375					   gimple_call_lhs (call_stmt));
3376	  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3377	}
3378    }
3379  gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3380  gsi_remove (&gsi, true);
3381
3382  /* Connect the new blocks.  */
3383  find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3384  find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3385
3386  if (!broken_loop)
3387    {
3388      gimple_seq phis;
3389
3390      e = find_edge (cont_bb, l3_bb);
3391      ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3392
3393      phis = phi_nodes (l3_bb);
3394      for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3395	{
3396	  gimple *phi = gsi_stmt (gsi);
3397	  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3398		   PHI_ARG_DEF_FROM_EDGE (phi, e));
3399	}
3400      remove_edge (e);
3401
3402      make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3403      e = find_edge (cont_bb, l1_bb);
3404      if (e == NULL)
3405	{
3406	  e = BRANCH_EDGE (cont_bb);
3407	  gcc_assert (single_succ (e->dest) == l1_bb);
3408	}
3409      if (gimple_omp_for_combined_p (fd->for_stmt))
3410	{
3411	  remove_edge (e);
3412	  e = NULL;
3413	}
3414      else if (fd->collapse > 1)
3415	{
3416	  remove_edge (e);
3417	  e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3418	}
3419      else
3420	e->flags = EDGE_TRUE_VALUE;
3421      if (e)
3422	{
3423	  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3424	  find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3425	}
3426      else
3427	{
3428	  e = find_edge (cont_bb, l2_bb);
3429	  e->flags = EDGE_FALLTHRU;
3430	}
3431      make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3432
3433      if (gimple_in_ssa_p (cfun))
3434	{
3435	  /* Add phis to the outer loop that connect to the phis in the inner,
3436	     original loop, and move the loop entry value of the inner phi to
3437	     the loop entry value of the outer phi.  */
3438	  gphi_iterator psi;
3439	  for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3440	    {
3441	      location_t locus;
3442	      gphi *nphi;
3443	      gphi *exit_phi = psi.phi ();
3444
3445	      if (virtual_operand_p (gimple_phi_result (exit_phi)))
3446		continue;
3447
3448	      edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3449	      tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3450
3451	      basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3452	      edge latch_to_l1 = find_edge (latch, l1_bb);
3453	      gphi *inner_phi
3454		= find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3455
3456	      tree t = gimple_phi_result (exit_phi);
3457	      tree new_res = copy_ssa_name (t, NULL);
3458	      nphi = create_phi_node (new_res, l0_bb);
3459
3460	      edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3461	      t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3462	      locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3463	      edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3464	      add_phi_arg (nphi, t, entry_to_l0, locus);
3465
3466	      edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3467	      add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3468
3469	      add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3470	    }
3471	}
3472
3473      set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3474			       recompute_dominator (CDI_DOMINATORS, l2_bb));
3475      set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3476			       recompute_dominator (CDI_DOMINATORS, l3_bb));
3477      set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3478			       recompute_dominator (CDI_DOMINATORS, l0_bb));
3479      set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3480			       recompute_dominator (CDI_DOMINATORS, l1_bb));
3481
3482      /* We enter expand_omp_for_generic with a loop.  This original loop may
3483	 have its own loop struct, or it may be part of an outer loop struct
3484	 (which may be the fake loop).  */
3485      class loop *outer_loop = entry_bb->loop_father;
3486      bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3487
3488      add_bb_to_loop (l2_bb, outer_loop);
3489
3490      /* We've added a new loop around the original loop.  Allocate the
3491	 corresponding loop struct.  */
3492      class loop *new_loop = alloc_loop ();
3493      new_loop->header = l0_bb;
3494      new_loop->latch = l2_bb;
3495      add_loop (new_loop, outer_loop);
3496
3497      /* Allocate a loop structure for the original loop unless we already
3498	 had one.  */
3499      if (!orig_loop_has_loop_struct
3500	  && !gimple_omp_for_combined_p (fd->for_stmt))
3501	{
3502	  class loop *orig_loop = alloc_loop ();
3503	  orig_loop->header = l1_bb;
3504	  /* The loop may have multiple latches.  */
3505	  add_loop (orig_loop, new_loop);
3506	}
3507    }
3508}
3509
3510/* Helper function for expand_omp_for_static_nochunk.  If PTR is NULL,
3511   compute needed allocation size.  If !ALLOC of team allocations,
3512   if ALLOC of thread allocation.  SZ is the initial needed size for
3513   other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
3514   CNT number of elements of each array, for !ALLOC this is
3515   omp_get_num_threads (), for ALLOC number of iterations handled by the
3516   current thread.  If PTR is non-NULL, it is the start of the allocation
3517   and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
3518   clauses pointers to the corresponding arrays.  */
3519
3520static tree
3521expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
3522			   unsigned HOST_WIDE_INT alloc_align, tree cnt,
3523			   gimple_stmt_iterator *gsi, bool alloc)
3524{
3525  tree eltsz = NULL_TREE;
3526  unsigned HOST_WIDE_INT preval = 0;
3527  if (ptr && sz)
3528    ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
3529		       ptr, size_int (sz));
3530  for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3531    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3532	&& !OMP_CLAUSE__SCANTEMP__CONTROL (c)
3533	&& (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
3534      {
3535	tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
3536	unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
3537	if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
3538	  {
3539	    unsigned HOST_WIDE_INT szl
3540	      = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
3541	    szl = least_bit_hwi (szl);
3542	    if (szl)
3543	      al = MIN (al, szl);
3544	  }
3545	if (ptr == NULL_TREE)
3546	  {
3547	    if (eltsz == NULL_TREE)
3548	      eltsz = TYPE_SIZE_UNIT (pointee_type);
3549	    else
3550	      eltsz = size_binop (PLUS_EXPR, eltsz,
3551				  TYPE_SIZE_UNIT (pointee_type));
3552	  }
3553	if (preval == 0 && al <= alloc_align)
3554	  {
3555	    unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
3556	    sz += diff;
3557	    if (diff && ptr)
3558	      ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
3559				 ptr, size_int (diff));
3560	  }
3561	else if (al > preval)
3562	  {
3563	    if (ptr)
3564	      {
3565		ptr = fold_convert (pointer_sized_int_node, ptr);
3566		ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
3567				   build_int_cst (pointer_sized_int_node,
3568						  al - 1));
3569		ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
3570				   build_int_cst (pointer_sized_int_node,
3571						  -(HOST_WIDE_INT) al));
3572		ptr = fold_convert (ptr_type_node, ptr);
3573	      }
3574	    else
3575	      sz += al - 1;
3576	  }
3577	if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
3578	  preval = al;
3579	else
3580	  preval = 1;
3581	if (ptr)
3582	  {
3583	    expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
3584	    ptr = OMP_CLAUSE_DECL (c);
3585	    ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
3586			       size_binop (MULT_EXPR, cnt,
3587					   TYPE_SIZE_UNIT (pointee_type)));
3588	  }
3589      }
3590
3591  if (ptr == NULL_TREE)
3592    {
3593      eltsz = size_binop (MULT_EXPR, eltsz, cnt);
3594      if (sz)
3595	eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
3596      return eltsz;
3597    }
3598  else
3599    return ptr;
3600}
3601
3602/* A subroutine of expand_omp_for.  Generate code for a parallel
3603   loop with static schedule and no specified chunk size.  Given
3604   parameters:
3605
3606	for (V = N1; V cond N2; V += STEP) BODY;
3607
3608   where COND is "<" or ">", we generate pseudocode
3609
3610	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3611	if (cond is <)
3612	  adj = STEP - 1;
3613	else
3614	  adj = STEP + 1;
3615	if ((__typeof (V)) -1 > 0 && cond is >)
3616	  n = -(adj + N2 - N1) / -STEP;
3617	else
3618	  n = (adj + N2 - N1) / STEP;
3619	q = n / nthreads;
3620	tt = n % nthreads;
3621	if (threadid < tt) goto L3; else goto L4;
3622    L3:
3623	tt = 0;
3624	q = q + 1;
3625    L4:
3626	s0 = q * threadid + tt;
3627	e0 = s0 + q;
3628	V = s0 * STEP + N1;
3629	if (s0 >= e0) goto L2; else goto L0;
3630    L0:
3631	e = e0 * STEP + N1;
3632    L1:
3633	BODY;
3634	V += STEP;
3635	if (V cond e) goto L1;
3636    L2:
3637*/
3638
3639static void
3640expand_omp_for_static_nochunk (struct omp_region *region,
3641			       struct omp_for_data *fd,
3642			       gimple *inner_stmt)
3643{
3644  tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
3645  tree type, itype, vmain, vback;
3646  basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3647  basic_block body_bb, cont_bb, collapse_bb = NULL;
3648  basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
3649  basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
3650  gimple_stmt_iterator gsi, gsip;
3651  edge ep;
3652  bool broken_loop = region->cont == NULL;
3653  tree *counts = NULL;
3654  tree n1, n2, step;
3655  tree reductions = NULL_TREE;
3656  tree cond_var = NULL_TREE, condtemp = NULL_TREE;
3657
3658  itype = type = TREE_TYPE (fd->loop.v);
3659  if (POINTER_TYPE_P (type))
3660    itype = signed_type_for (type);
3661
3662  entry_bb = region->entry;
3663  cont_bb = region->cont;
3664  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3665  fin_bb = BRANCH_EDGE (entry_bb)->dest;
3666  gcc_assert (broken_loop
3667	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3668  seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3669  body_bb = single_succ (seq_start_bb);
3670  if (!broken_loop)
3671    {
3672      gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3673		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3674      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3675    }
3676  exit_bb = region->exit;
3677
3678  /* Iteration space partitioning goes in ENTRY_BB.  */
3679  gsi = gsi_last_nondebug_bb (entry_bb);
3680  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3681  gsip = gsi;
3682  gsi_prev (&gsip);
3683
3684  if (fd->collapse > 1)
3685    {
3686      int first_zero_iter = -1, dummy = -1;
3687      basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3688
3689      counts = XALLOCAVEC (tree, fd->collapse);
3690      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3691				  fin_bb, first_zero_iter,
3692				  dummy_bb, dummy, l2_dom_bb);
3693      t = NULL_TREE;
3694    }
3695  else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3696    t = integer_one_node;
3697  else
3698    t = fold_binary (fd->loop.cond_code, boolean_type_node,
3699		     fold_convert (type, fd->loop.n1),
3700		     fold_convert (type, fd->loop.n2));
3701  if (fd->collapse == 1
3702      && TYPE_UNSIGNED (type)
3703      && (t == NULL_TREE || !integer_onep (t)))
3704    {
3705      n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3706      n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3707				     true, GSI_SAME_STMT);
3708      n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3709      n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3710				     true, GSI_SAME_STMT);
3711      gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3712					    NULL_TREE, NULL_TREE);
3713      gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3714      if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3715		     expand_omp_regimplify_p, NULL, NULL)
3716	  || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3717			expand_omp_regimplify_p, NULL, NULL))
3718	{
3719	  gsi = gsi_for_stmt (cond_stmt);
3720	  gimple_regimplify_operands (cond_stmt, &gsi);
3721	}
3722      ep = split_block (entry_bb, cond_stmt);
3723      ep->flags = EDGE_TRUE_VALUE;
3724      entry_bb = ep->dest;
3725      ep->probability = profile_probability::very_likely ();
3726      ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3727      ep->probability = profile_probability::very_unlikely ();
3728      if (gimple_in_ssa_p (cfun))
3729	{
3730	  int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3731	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3732	       !gsi_end_p (gpi); gsi_next (&gpi))
3733	    {
3734	      gphi *phi = gpi.phi ();
3735	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3736			   ep, UNKNOWN_LOCATION);
3737	    }
3738	}
3739      gsi = gsi_last_bb (entry_bb);
3740    }
3741
3742  if (fd->lastprivate_conditional)
3743    {
3744      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3745      tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
3746      if (fd->have_pointer_condtemp)
3747	condtemp = OMP_CLAUSE_DECL (c);
3748      c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3749      cond_var = OMP_CLAUSE_DECL (c);
3750    }
3751  if (fd->have_reductemp
3752      /* For scan, we don't want to reinitialize condtemp before the
3753	 second loop.  */
3754      || (fd->have_pointer_condtemp && !fd->have_scantemp)
3755      || fd->have_nonctrl_scantemp)
3756    {
3757      tree t1 = build_int_cst (long_integer_type_node, 0);
3758      tree t2 = build_int_cst (long_integer_type_node, 1);
3759      tree t3 = build_int_cstu (long_integer_type_node,
3760				(HOST_WIDE_INT_1U << 31) + 1);
3761      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3762      gimple_stmt_iterator gsi2 = gsi_none ();
3763      gimple *g = NULL;
3764      tree mem = null_pointer_node, memv = NULL_TREE;
3765      unsigned HOST_WIDE_INT condtemp_sz = 0;
3766      unsigned HOST_WIDE_INT alloc_align = 0;
3767      if (fd->have_reductemp)
3768	{
3769	  gcc_assert (!fd->have_nonctrl_scantemp);
3770	  tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
3771	  reductions = OMP_CLAUSE_DECL (c);
3772	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3773	  g = SSA_NAME_DEF_STMT (reductions);
3774	  reductions = gimple_assign_rhs1 (g);
3775	  OMP_CLAUSE_DECL (c) = reductions;
3776	  gsi2 = gsi_for_stmt (g);
3777	}
3778      else
3779	{
3780	  if (gsi_end_p (gsip))
3781	    gsi2 = gsi_after_labels (region->entry);
3782	  else
3783	    gsi2 = gsip;
3784	  reductions = null_pointer_node;
3785	}
3786      if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
3787	{
3788	  tree type;
3789	  if (fd->have_pointer_condtemp)
3790	    type = TREE_TYPE (condtemp);
3791	  else
3792	    type = ptr_type_node;
3793	  memv = create_tmp_var (type);
3794	  TREE_ADDRESSABLE (memv) = 1;
3795	  unsigned HOST_WIDE_INT sz = 0;
3796	  tree size = NULL_TREE;
3797	  if (fd->have_pointer_condtemp)
3798	    {
3799	      sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3800	      sz *= fd->lastprivate_conditional;
3801	      condtemp_sz = sz;
3802	    }
3803	  if (fd->have_nonctrl_scantemp)
3804	    {
3805	      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3806	      gimple *g = gimple_build_call (nthreads, 0);
3807	      nthreads = create_tmp_var (integer_type_node);
3808	      gimple_call_set_lhs (g, nthreads);
3809	      gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3810	      nthreads = fold_convert (sizetype, nthreads);
3811	      alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
3812	      size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
3813						alloc_align, nthreads, NULL,
3814						false);
3815	      size = fold_convert (type, size);
3816	    }
3817	  else
3818	    size = build_int_cst (type, sz);
3819	  expand_omp_build_assign (&gsi2, memv, size, false);
3820	  mem = build_fold_addr_expr (memv);
3821	}
3822      tree t
3823	= build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
3824			   9, t1, t2, t2, t3, t1, null_pointer_node,
3825			   null_pointer_node, reductions, mem);
3826      force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3827				true, GSI_SAME_STMT);
3828      if (fd->have_pointer_condtemp)
3829	expand_omp_build_assign (&gsi2, condtemp, memv, false);
3830      if (fd->have_nonctrl_scantemp)
3831	{
3832	  tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
3833	  expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
3834				     alloc_align, nthreads, &gsi2, false);
3835	}
3836      if (fd->have_reductemp)
3837	{
3838	  gsi_remove (&gsi2, true);
3839	  release_ssa_name (gimple_assign_lhs (g));
3840	}
3841    }
3842  switch (gimple_omp_for_kind (fd->for_stmt))
3843    {
3844    case GF_OMP_FOR_KIND_FOR:
3845      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3846      threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3847      break;
3848    case GF_OMP_FOR_KIND_DISTRIBUTE:
3849      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3850      threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3851      break;
3852    default:
3853      gcc_unreachable ();
3854    }
3855  nthreads = build_call_expr (nthreads, 0);
3856  nthreads = fold_convert (itype, nthreads);
3857  nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3858				       true, GSI_SAME_STMT);
3859  threadid = build_call_expr (threadid, 0);
3860  threadid = fold_convert (itype, threadid);
3861  threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3862				       true, GSI_SAME_STMT);
3863
3864  n1 = fd->loop.n1;
3865  n2 = fd->loop.n2;
3866  step = fd->loop.step;
3867  if (gimple_omp_for_combined_into_p (fd->for_stmt))
3868    {
3869      tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3870				     OMP_CLAUSE__LOOPTEMP_);
3871      gcc_assert (innerc);
3872      n1 = OMP_CLAUSE_DECL (innerc);
3873      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3874				OMP_CLAUSE__LOOPTEMP_);
3875      gcc_assert (innerc);
3876      n2 = OMP_CLAUSE_DECL (innerc);
3877    }
3878  n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3879				 true, NULL_TREE, true, GSI_SAME_STMT);
3880  n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3881				 true, NULL_TREE, true, GSI_SAME_STMT);
3882  step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3883				   true, NULL_TREE, true, GSI_SAME_STMT);
3884
3885  t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3886  t = fold_build2 (PLUS_EXPR, itype, step, t);
3887  t = fold_build2 (PLUS_EXPR, itype, t, n2);
3888  t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3889  if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3890    t = fold_build2 (TRUNC_DIV_EXPR, itype,
3891		     fold_build1 (NEGATE_EXPR, itype, t),
3892		     fold_build1 (NEGATE_EXPR, itype, step));
3893  else
3894    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3895  t = fold_convert (itype, t);
3896  n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3897
3898  q = create_tmp_reg (itype, "q");
3899  t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3900  t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3901  gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3902
3903  tt = create_tmp_reg (itype, "tt");
3904  t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3905  t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3906  gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3907
3908  t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3909  gcond *cond_stmt = gimple_build_cond_empty (t);
3910  gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3911
3912  second_bb = split_block (entry_bb, cond_stmt)->dest;
3913  gsi = gsi_last_nondebug_bb (second_bb);
3914  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3915
3916  gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3917		     GSI_SAME_STMT);
3918  gassign *assign_stmt
3919    = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3920  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3921
3922  third_bb = split_block (second_bb, assign_stmt)->dest;
3923  gsi = gsi_last_nondebug_bb (third_bb);
3924  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3925
3926  if (fd->have_nonctrl_scantemp)
3927    {
3928      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3929      tree controlp = NULL_TREE, controlb = NULL_TREE;
3930      for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3931	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3932	    && OMP_CLAUSE__SCANTEMP__CONTROL (c))
3933	  {
3934	    if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
3935	      controlb = OMP_CLAUSE_DECL (c);
3936	    else
3937	      controlp = OMP_CLAUSE_DECL (c);
3938	    if (controlb && controlp)
3939	      break;
3940	  }
3941      gcc_assert (controlp && controlb);
3942      tree cnt = create_tmp_var (sizetype);
3943      gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
3944      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3945      unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
3946      tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
3947					   alloc_align, cnt, NULL, true);
3948      tree size = create_tmp_var (sizetype);
3949      expand_omp_build_assign (&gsi, size, sz, false);
3950      tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
3951			      size, size_int (16384));
3952      expand_omp_build_assign (&gsi, controlb, cmp);
3953      g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
3954			     NULL_TREE, NULL_TREE);
3955      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3956      fourth_bb = split_block (third_bb, g)->dest;
3957      gsi = gsi_last_nondebug_bb (fourth_bb);
3958      /* FIXME: Once we have allocators, this should use allocator.  */
3959      g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
3960      gimple_call_set_lhs (g, controlp);
3961      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3962      expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
3963				 &gsi, true);
3964      gsi_prev (&gsi);
3965      g = gsi_stmt (gsi);
3966      fifth_bb = split_block (fourth_bb, g)->dest;
3967      gsi = gsi_last_nondebug_bb (fifth_bb);
3968
3969      g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
3970      gimple_call_set_lhs (g, controlp);
3971      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3972      tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
3973      for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3974	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3975	    && OMP_CLAUSE__SCANTEMP__ALLOC (c))
3976	  {
3977	    tree tmp = create_tmp_var (sizetype);
3978	    tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
3979	    g = gimple_build_assign (tmp, MULT_EXPR, cnt,
3980				     TYPE_SIZE_UNIT (pointee_type));
3981	    gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3982	    g = gimple_build_call (alloca_decl, 2, tmp,
3983				   size_int (TYPE_ALIGN (pointee_type)));
3984	    gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
3985	    gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3986	  }
3987
3988      sixth_bb = split_block (fifth_bb, g)->dest;
3989      gsi = gsi_last_nondebug_bb (sixth_bb);
3990    }
3991
3992  t = build2 (MULT_EXPR, itype, q, threadid);
3993  t = build2 (PLUS_EXPR, itype, t, tt);
3994  s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3995
3996  t = fold_build2 (PLUS_EXPR, itype, s0, q);
3997  e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3998
3999  t = build2 (GE_EXPR, boolean_type_node, s0, e0);
4000  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4001
4002  /* Remove the GIMPLE_OMP_FOR statement.  */
4003  gsi_remove (&gsi, true);
4004
4005  /* Setup code for sequential iteration goes in SEQ_START_BB.  */
4006  gsi = gsi_start_bb (seq_start_bb);
4007
4008  tree startvar = fd->loop.v;
4009  tree endvar = NULL_TREE;
4010
4011  if (gimple_omp_for_combined_p (fd->for_stmt))
4012    {
4013      tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4014		     ? gimple_omp_parallel_clauses (inner_stmt)
4015		     : gimple_omp_for_clauses (inner_stmt);
4016      tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4017      gcc_assert (innerc);
4018      startvar = OMP_CLAUSE_DECL (innerc);
4019      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4020				OMP_CLAUSE__LOOPTEMP_);
4021      gcc_assert (innerc);
4022      endvar = OMP_CLAUSE_DECL (innerc);
4023      if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4024	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4025	{
4026	  int i;
4027	  for (i = 1; i < fd->collapse; i++)
4028	    {
4029	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4030					OMP_CLAUSE__LOOPTEMP_);
4031	      gcc_assert (innerc);
4032	    }
4033	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4034				    OMP_CLAUSE__LOOPTEMP_);
4035	  if (innerc)
4036	    {
4037	      /* If needed (distribute parallel for with lastprivate),
4038		 propagate down the total number of iterations.  */
4039	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4040				     fd->loop.n2);
4041	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4042					    GSI_CONTINUE_LINKING);
4043	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4044	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4045	    }
4046	}
4047    }
4048  t = fold_convert (itype, s0);
4049  t = fold_build2 (MULT_EXPR, itype, t, step);
4050  if (POINTER_TYPE_P (type))
4051    {
4052      t = fold_build_pointer_plus (n1, t);
4053      if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4054	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4055	t = fold_convert (signed_type_for (type), t);
4056    }
4057  else
4058    t = fold_build2 (PLUS_EXPR, type, t, n1);
4059  t = fold_convert (TREE_TYPE (startvar), t);
4060  t = force_gimple_operand_gsi (&gsi, t,
4061				DECL_P (startvar)
4062				&& TREE_ADDRESSABLE (startvar),
4063				NULL_TREE, false, GSI_CONTINUE_LINKING);
4064  assign_stmt = gimple_build_assign (startvar, t);
4065  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4066  if (cond_var)
4067    {
4068      tree itype = TREE_TYPE (cond_var);
4069      /* For lastprivate(conditional:) itervar, we need some iteration
4070	 counter that starts at unsigned non-zero and increases.
4071	 Prefer as few IVs as possible, so if we can use startvar
4072	 itself, use that, or startvar + constant (those would be
4073	 incremented with step), and as last resort use the s0 + 1
4074	 incremented by 1.  */
4075      if (POINTER_TYPE_P (type)
4076	  || TREE_CODE (n1) != INTEGER_CST
4077	  || fd->loop.cond_code != LT_EXPR)
4078	t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4079			 build_int_cst (itype, 1));
4080      else if (tree_int_cst_sgn (n1) == 1)
4081	t = fold_convert (itype, t);
4082      else
4083	{
4084	  tree c = fold_convert (itype, n1);
4085	  c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4086	  t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4087	}
4088      t = force_gimple_operand_gsi (&gsi, t, false,
4089				    NULL_TREE, false, GSI_CONTINUE_LINKING);
4090      assign_stmt = gimple_build_assign (cond_var, t);
4091      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4092    }
4093
4094  t = fold_convert (itype, e0);
4095  t = fold_build2 (MULT_EXPR, itype, t, step);
4096  if (POINTER_TYPE_P (type))
4097    {
4098      t = fold_build_pointer_plus (n1, t);
4099      if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4100	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4101	t = fold_convert (signed_type_for (type), t);
4102    }
4103  else
4104    t = fold_build2 (PLUS_EXPR, type, t, n1);
4105  t = fold_convert (TREE_TYPE (startvar), t);
4106  e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4107				false, GSI_CONTINUE_LINKING);
4108  if (endvar)
4109    {
4110      assign_stmt = gimple_build_assign (endvar, e);
4111      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4112      if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4113	assign_stmt = gimple_build_assign (fd->loop.v, e);
4114      else
4115	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4116      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4117    }
4118  /* Handle linear clause adjustments.  */
4119  tree itercnt = NULL_TREE;
4120  if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4121    for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4122	 c; c = OMP_CLAUSE_CHAIN (c))
4123      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4124	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4125	{
4126	  tree d = OMP_CLAUSE_DECL (c);
4127	  bool is_ref = omp_is_reference (d);
4128	  tree t = d, a, dest;
4129	  if (is_ref)
4130	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4131	  if (itercnt == NULL_TREE)
4132	    {
4133	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
4134		{
4135		  itercnt = fold_build2 (MINUS_EXPR, itype,
4136					 fold_convert (itype, n1),
4137					 fold_convert (itype, fd->loop.n1));
4138		  itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
4139		  itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
4140		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4141						      NULL_TREE, false,
4142						      GSI_CONTINUE_LINKING);
4143		}
4144	      else
4145		itercnt = s0;
4146	    }
4147	  tree type = TREE_TYPE (t);
4148	  if (POINTER_TYPE_P (type))
4149	    type = sizetype;
4150	  a = fold_build2 (MULT_EXPR, type,
4151			   fold_convert (type, itercnt),
4152			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4153	  dest = unshare_expr (t);
4154	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4155			   : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
4156	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4157					false, GSI_CONTINUE_LINKING);
4158	  assign_stmt = gimple_build_assign (dest, t);
4159	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4160	}
4161  if (fd->collapse > 1)
4162    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4163
4164  if (!broken_loop)
4165    {
4166      /* The code controlling the sequential loop replaces the
4167	 GIMPLE_OMP_CONTINUE.  */
4168      gsi = gsi_last_nondebug_bb (cont_bb);
4169      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4170      gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4171      vmain = gimple_omp_continue_control_use (cont_stmt);
4172      vback = gimple_omp_continue_control_def (cont_stmt);
4173
4174      if (cond_var)
4175	{
4176	  tree itype = TREE_TYPE (cond_var);
4177	  tree t2;
4178	  if (POINTER_TYPE_P (type)
4179	      || TREE_CODE (n1) != INTEGER_CST
4180	      || fd->loop.cond_code != LT_EXPR)
4181	    t2 = build_int_cst (itype, 1);
4182	  else
4183	    t2 = fold_convert (itype, step);
4184	  t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4185	  t2 = force_gimple_operand_gsi (&gsi, t2, false,
4186					 NULL_TREE, true, GSI_SAME_STMT);
4187	  assign_stmt = gimple_build_assign (cond_var, t2);
4188	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4189	}
4190
4191      if (!gimple_omp_for_combined_p (fd->for_stmt))
4192	{
4193	  if (POINTER_TYPE_P (type))
4194	    t = fold_build_pointer_plus (vmain, step);
4195	  else
4196	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
4197	  t = force_gimple_operand_gsi (&gsi, t,
4198					DECL_P (vback)
4199					&& TREE_ADDRESSABLE (vback),
4200					NULL_TREE, true, GSI_SAME_STMT);
4201	  assign_stmt = gimple_build_assign (vback, t);
4202	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4203
4204	  t = build2 (fd->loop.cond_code, boolean_type_node,
4205		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
4206		      ? t : vback, e);
4207	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4208	}
4209
4210      /* Remove the GIMPLE_OMP_CONTINUE statement.  */
4211      gsi_remove (&gsi, true);
4212
4213      if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4214	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4215    }
4216
4217  /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
4218  gsi = gsi_last_nondebug_bb (exit_bb);
4219  if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4220    {
4221      t = gimple_omp_return_lhs (gsi_stmt (gsi));
4222      if (fd->have_reductemp
4223	  || ((fd->have_pointer_condtemp || fd->have_scantemp)
4224	      && !fd->have_nonctrl_scantemp))
4225	{
4226	  tree fn;
4227	  if (t)
4228	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4229	  else
4230	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4231	  gcall *g = gimple_build_call (fn, 0);
4232	  if (t)
4233	    {
4234	      gimple_call_set_lhs (g, t);
4235	      if (fd->have_reductemp)
4236		gsi_insert_after (&gsi, gimple_build_assign (reductions,
4237							     NOP_EXPR, t),
4238				  GSI_SAME_STMT);
4239	    }
4240	  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4241	}
4242      else
4243	gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4244    }
4245  else if ((fd->have_pointer_condtemp || fd->have_scantemp)
4246	   && !fd->have_nonctrl_scantemp)
4247    {
4248      tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4249      gcall *g = gimple_build_call (fn, 0);
4250      gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4251    }
4252  if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
4253    {
4254      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4255      tree controlp = NULL_TREE, controlb = NULL_TREE;
4256      for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4257	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4258	    && OMP_CLAUSE__SCANTEMP__CONTROL (c))
4259	  {
4260	    if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
4261	      controlb = OMP_CLAUSE_DECL (c);
4262	    else
4263	      controlp = OMP_CLAUSE_DECL (c);
4264	    if (controlb && controlp)
4265	      break;
4266	  }
4267      gcc_assert (controlp && controlb);
4268      gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
4269				     NULL_TREE, NULL_TREE);
4270      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4271      exit1_bb = split_block (exit_bb, g)->dest;
4272      gsi = gsi_after_labels (exit1_bb);
4273      g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
4274			     controlp);
4275      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4276      exit2_bb = split_block (exit1_bb, g)->dest;
4277      gsi = gsi_after_labels (exit2_bb);
4278      g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
4279			     controlp);
4280      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4281      exit3_bb = split_block (exit2_bb, g)->dest;
4282      gsi = gsi_after_labels (exit3_bb);
4283    }
4284  gsi_remove (&gsi, true);
4285
4286  /* Connect all the blocks.  */
4287  ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
4288  ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
4289  ep = find_edge (entry_bb, second_bb);
4290  ep->flags = EDGE_TRUE_VALUE;
4291  ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
4292  if (fourth_bb)
4293    {
4294      ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
4295      ep->probability
4296	= profile_probability::guessed_always ().apply_scale (1, 2);
4297      ep = find_edge (third_bb, fourth_bb);
4298      ep->flags = EDGE_TRUE_VALUE;
4299      ep->probability
4300	= profile_probability::guessed_always ().apply_scale (1, 2);
4301      ep = find_edge (fourth_bb, fifth_bb);
4302      redirect_edge_and_branch (ep, sixth_bb);
4303    }
4304  else
4305    sixth_bb = third_bb;
4306  find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
4307  find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
4308  if (exit1_bb)
4309    {
4310      ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
4311      ep->probability
4312	= profile_probability::guessed_always ().apply_scale (1, 2);
4313      ep = find_edge (exit_bb, exit1_bb);
4314      ep->flags = EDGE_TRUE_VALUE;
4315      ep->probability
4316	= profile_probability::guessed_always ().apply_scale (1, 2);
4317      ep = find_edge (exit1_bb, exit2_bb);
4318      redirect_edge_and_branch (ep, exit3_bb);
4319    }
4320
4321  if (!broken_loop)
4322    {
4323      ep = find_edge (cont_bb, body_bb);
4324      if (ep == NULL)
4325	{
4326	  ep = BRANCH_EDGE (cont_bb);
4327	  gcc_assert (single_succ (ep->dest) == body_bb);
4328	}
4329      if (gimple_omp_for_combined_p (fd->for_stmt))
4330	{
4331	  remove_edge (ep);
4332	  ep = NULL;
4333	}
4334      else if (fd->collapse > 1)
4335	{
4336	  remove_edge (ep);
4337	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4338	}
4339      else
4340	ep->flags = EDGE_TRUE_VALUE;
4341      find_edge (cont_bb, fin_bb)->flags
4342	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4343    }
4344
4345  set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
4346  set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
4347  if (fourth_bb)
4348    {
4349      set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
4350      set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
4351    }
4352  set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
4353
4354  set_immediate_dominator (CDI_DOMINATORS, body_bb,
4355			   recompute_dominator (CDI_DOMINATORS, body_bb));
4356  set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4357			   recompute_dominator (CDI_DOMINATORS, fin_bb));
4358  if (exit1_bb)
4359    {
4360      set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
4361      set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
4362    }
4363
4364  class loop *loop = body_bb->loop_father;
4365  if (loop != entry_bb->loop_father)
4366    {
4367      gcc_assert (broken_loop || loop->header == body_bb);
4368      gcc_assert (broken_loop
4369		  || loop->latch == region->cont
4370		  || single_pred (loop->latch) == region->cont);
4371      return;
4372    }
4373
4374  if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
4375    {
4376      loop = alloc_loop ();
4377      loop->header = body_bb;
4378      if (collapse_bb == NULL)
4379	loop->latch = cont_bb;
4380      add_loop (loop, body_bb->loop_father);
4381    }
4382}
4383
4384/* Return phi in E->DEST with ARG on edge E.  */
4385
4386static gphi *
4387find_phi_with_arg_on_edge (tree arg, edge e)
4388{
4389  basic_block bb = e->dest;
4390
4391  for (gphi_iterator gpi = gsi_start_phis (bb);
4392       !gsi_end_p (gpi);
4393       gsi_next (&gpi))
4394    {
4395      gphi *phi = gpi.phi ();
4396      if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
4397	return phi;
4398    }
4399
4400  return NULL;
4401}
4402
4403/* A subroutine of expand_omp_for.  Generate code for a parallel
4404   loop with static schedule and a specified chunk size.  Given
4405   parameters:
4406
4407	for (V = N1; V cond N2; V += STEP) BODY;
4408
4409   where COND is "<" or ">", we generate pseudocode
4410
4411	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4412	if (cond is <)
4413	  adj = STEP - 1;
4414	else
4415	  adj = STEP + 1;
4416	if ((__typeof (V)) -1 > 0 && cond is >)
4417	  n = -(adj + N2 - N1) / -STEP;
4418	else
4419	  n = (adj + N2 - N1) / STEP;
4420	trip = 0;
4421	V = threadid * CHUNK * STEP + N1;  -- this extra definition of V is
4422					      here so that V is defined
4423					      if the loop is not entered
4424    L0:
4425	s0 = (trip * nthreads + threadid) * CHUNK;
4426	e0 = min (s0 + CHUNK, n);
4427	if (s0 < n) goto L1; else goto L4;
4428    L1:
4429	V = s0 * STEP + N1;
4430	e = e0 * STEP + N1;
4431    L2:
4432	BODY;
4433	V += STEP;
4434	if (V cond e) goto L2; else goto L3;
4435    L3:
4436	trip += 1;
4437	goto L0;
4438    L4:
4439*/
4440
4441static void
4442expand_omp_for_static_chunk (struct omp_region *region,
4443			     struct omp_for_data *fd, gimple *inner_stmt)
4444{
4445  tree n, s0, e0, e, t;
4446  tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
4447  tree type, itype, vmain, vback, vextra;
4448  basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
4449  basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
4450  gimple_stmt_iterator gsi, gsip;
4451  edge se;
4452  bool broken_loop = region->cont == NULL;
4453  tree *counts = NULL;
4454  tree n1, n2, step;
4455  tree reductions = NULL_TREE;
4456  tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4457
4458  itype = type = TREE_TYPE (fd->loop.v);
4459  if (POINTER_TYPE_P (type))
4460    itype = signed_type_for (type);
4461
4462  entry_bb = region->entry;
4463  se = split_block (entry_bb, last_stmt (entry_bb));
4464  entry_bb = se->src;
4465  iter_part_bb = se->dest;
4466  cont_bb = region->cont;
4467  gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
4468  fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
4469  gcc_assert (broken_loop
4470	      || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
4471  seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
4472  body_bb = single_succ (seq_start_bb);
4473  if (!broken_loop)
4474    {
4475      gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4476		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4477      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4478      trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
4479    }
4480  exit_bb = region->exit;
4481
4482  /* Trip and adjustment setup goes in ENTRY_BB.  */
4483  gsi = gsi_last_nondebug_bb (entry_bb);
4484  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4485  gsip = gsi;
4486  gsi_prev (&gsip);
4487
4488  if (fd->collapse > 1)
4489    {
4490      int first_zero_iter = -1, dummy = -1;
4491      basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4492
4493      counts = XALLOCAVEC (tree, fd->collapse);
4494      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4495				  fin_bb, first_zero_iter,
4496				  dummy_bb, dummy, l2_dom_bb);
4497      t = NULL_TREE;
4498    }
4499  else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4500    t = integer_one_node;
4501  else
4502    t = fold_binary (fd->loop.cond_code, boolean_type_node,
4503		     fold_convert (type, fd->loop.n1),
4504		     fold_convert (type, fd->loop.n2));
4505  if (fd->collapse == 1
4506      && TYPE_UNSIGNED (type)
4507      && (t == NULL_TREE || !integer_onep (t)))
4508    {
4509      n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4510      n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4511				     true, GSI_SAME_STMT);
4512      n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4513      n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4514				     true, GSI_SAME_STMT);
4515      gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4516						 NULL_TREE, NULL_TREE);
4517      gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4518      if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4519		     expand_omp_regimplify_p, NULL, NULL)
4520	  || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4521			expand_omp_regimplify_p, NULL, NULL))
4522	{
4523	  gsi = gsi_for_stmt (cond_stmt);
4524	  gimple_regimplify_operands (cond_stmt, &gsi);
4525	}
4526      se = split_block (entry_bb, cond_stmt);
4527      se->flags = EDGE_TRUE_VALUE;
4528      entry_bb = se->dest;
4529      se->probability = profile_probability::very_likely ();
4530      se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
4531      se->probability = profile_probability::very_unlikely ();
4532      if (gimple_in_ssa_p (cfun))
4533	{
4534	  int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
4535	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4536	       !gsi_end_p (gpi); gsi_next (&gpi))
4537	    {
4538	      gphi *phi = gpi.phi ();
4539	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4540			   se, UNKNOWN_LOCATION);
4541	    }
4542	}
4543      gsi = gsi_last_bb (entry_bb);
4544    }
4545
4546  if (fd->lastprivate_conditional)
4547    {
4548      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4549      tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4550      if (fd->have_pointer_condtemp)
4551	condtemp = OMP_CLAUSE_DECL (c);
4552      c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4553      cond_var = OMP_CLAUSE_DECL (c);
4554    }
4555  if (fd->have_reductemp || fd->have_pointer_condtemp)
4556    {
4557      tree t1 = build_int_cst (long_integer_type_node, 0);
4558      tree t2 = build_int_cst (long_integer_type_node, 1);
4559      tree t3 = build_int_cstu (long_integer_type_node,
4560				(HOST_WIDE_INT_1U << 31) + 1);
4561      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4562      gimple_stmt_iterator gsi2 = gsi_none ();
4563      gimple *g = NULL;
4564      tree mem = null_pointer_node, memv = NULL_TREE;
4565      if (fd->have_reductemp)
4566	{
4567	  tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4568	  reductions = OMP_CLAUSE_DECL (c);
4569	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4570	  g = SSA_NAME_DEF_STMT (reductions);
4571	  reductions = gimple_assign_rhs1 (g);
4572	  OMP_CLAUSE_DECL (c) = reductions;
4573	  gsi2 = gsi_for_stmt (g);
4574	}
4575      else
4576	{
4577	  if (gsi_end_p (gsip))
4578	    gsi2 = gsi_after_labels (region->entry);
4579	  else
4580	    gsi2 = gsip;
4581	  reductions = null_pointer_node;
4582	}
4583      if (fd->have_pointer_condtemp)
4584	{
4585	  tree type = TREE_TYPE (condtemp);
4586	  memv = create_tmp_var (type);
4587	  TREE_ADDRESSABLE (memv) = 1;
4588	  unsigned HOST_WIDE_INT sz
4589	    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4590	  sz *= fd->lastprivate_conditional;
4591	  expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
4592				   false);
4593	  mem = build_fold_addr_expr (memv);
4594	}
4595      tree t
4596	= build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4597			   9, t1, t2, t2, t3, t1, null_pointer_node,
4598			   null_pointer_node, reductions, mem);
4599      force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4600				true, GSI_SAME_STMT);
4601      if (fd->have_pointer_condtemp)
4602	expand_omp_build_assign (&gsi2, condtemp, memv, false);
4603      if (fd->have_reductemp)
4604	{
4605	  gsi_remove (&gsi2, true);
4606	  release_ssa_name (gimple_assign_lhs (g));
4607	}
4608    }
4609  switch (gimple_omp_for_kind (fd->for_stmt))
4610    {
4611    case GF_OMP_FOR_KIND_FOR:
4612      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4613      threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4614      break;
4615    case GF_OMP_FOR_KIND_DISTRIBUTE:
4616      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4617      threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4618      break;
4619    default:
4620      gcc_unreachable ();
4621    }
4622  nthreads = build_call_expr (nthreads, 0);
4623  nthreads = fold_convert (itype, nthreads);
4624  nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4625				       true, GSI_SAME_STMT);
4626  threadid = build_call_expr (threadid, 0);
4627  threadid = fold_convert (itype, threadid);
4628  threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4629				       true, GSI_SAME_STMT);
4630
4631  n1 = fd->loop.n1;
4632  n2 = fd->loop.n2;
4633  step = fd->loop.step;
4634  if (gimple_omp_for_combined_into_p (fd->for_stmt))
4635    {
4636      tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4637				     OMP_CLAUSE__LOOPTEMP_);
4638      gcc_assert (innerc);
4639      n1 = OMP_CLAUSE_DECL (innerc);
4640      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4641				OMP_CLAUSE__LOOPTEMP_);
4642      gcc_assert (innerc);
4643      n2 = OMP_CLAUSE_DECL (innerc);
4644    }
4645  n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4646				 true, NULL_TREE, true, GSI_SAME_STMT);
4647  n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4648				 true, NULL_TREE, true, GSI_SAME_STMT);
4649  step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4650				   true, NULL_TREE, true, GSI_SAME_STMT);
4651  tree chunk_size = fold_convert (itype, fd->chunk_size);
4652  chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
4653  chunk_size
4654    = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
4655				GSI_SAME_STMT);
4656
4657  t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4658  t = fold_build2 (PLUS_EXPR, itype, step, t);
4659  t = fold_build2 (PLUS_EXPR, itype, t, n2);
4660  t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4661  if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4662    t = fold_build2 (TRUNC_DIV_EXPR, itype,
4663		     fold_build1 (NEGATE_EXPR, itype, t),
4664		     fold_build1 (NEGATE_EXPR, itype, step));
4665  else
4666    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4667  t = fold_convert (itype, t);
4668  n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4669				true, GSI_SAME_STMT);
4670
4671  trip_var = create_tmp_reg (itype, ".trip");
4672  if (gimple_in_ssa_p (cfun))
4673    {
4674      trip_init = make_ssa_name (trip_var);
4675      trip_main = make_ssa_name (trip_var);
4676      trip_back = make_ssa_name (trip_var);
4677    }
4678  else
4679    {
4680      trip_init = trip_var;
4681      trip_main = trip_var;
4682      trip_back = trip_var;
4683    }
4684
4685  gassign *assign_stmt
4686    = gimple_build_assign (trip_init, build_int_cst (itype, 0));
4687  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4688
4689  t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
4690  t = fold_build2 (MULT_EXPR, itype, t, step);
4691  if (POINTER_TYPE_P (type))
4692    t = fold_build_pointer_plus (n1, t);
4693  else
4694    t = fold_build2 (PLUS_EXPR, type, t, n1);
4695  vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4696				     true, GSI_SAME_STMT);
4697
4698  /* Remove the GIMPLE_OMP_FOR.  */
4699  gsi_remove (&gsi, true);
4700
4701  gimple_stmt_iterator gsif = gsi;
4702
4703  /* Iteration space partitioning goes in ITER_PART_BB.  */
4704  gsi = gsi_last_bb (iter_part_bb);
4705
4706  t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
4707  t = fold_build2 (PLUS_EXPR, itype, t, threadid);
4708  t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
4709  s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4710				 false, GSI_CONTINUE_LINKING);
4711
4712  t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
4713  t = fold_build2 (MIN_EXPR, itype, t, n);
4714  e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4715				 false, GSI_CONTINUE_LINKING);
4716
4717  t = build2 (LT_EXPR, boolean_type_node, s0, n);
4718  gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4719
4720  /* Setup code for sequential iteration goes in SEQ_START_BB.  */
4721  gsi = gsi_start_bb (seq_start_bb);
4722
4723  tree startvar = fd->loop.v;
4724  tree endvar = NULL_TREE;
4725
4726  if (gimple_omp_for_combined_p (fd->for_stmt))
4727    {
4728      tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4729		     ? gimple_omp_parallel_clauses (inner_stmt)
4730		     : gimple_omp_for_clauses (inner_stmt);
4731      tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4732      gcc_assert (innerc);
4733      startvar = OMP_CLAUSE_DECL (innerc);
4734      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4735				OMP_CLAUSE__LOOPTEMP_);
4736      gcc_assert (innerc);
4737      endvar = OMP_CLAUSE_DECL (innerc);
4738      if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4739	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4740	{
4741	  int i;
4742	  for (i = 1; i < fd->collapse; i++)
4743	    {
4744	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4745					OMP_CLAUSE__LOOPTEMP_);
4746	      gcc_assert (innerc);
4747	    }
4748	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4749				    OMP_CLAUSE__LOOPTEMP_);
4750	  if (innerc)
4751	    {
4752	      /* If needed (distribute parallel for with lastprivate),
4753		 propagate down the total number of iterations.  */
4754	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4755				     fd->loop.n2);
4756	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4757					    GSI_CONTINUE_LINKING);
4758	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4759	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4760	    }
4761	}
4762    }
4763
4764  t = fold_convert (itype, s0);
4765  t = fold_build2 (MULT_EXPR, itype, t, step);
4766  if (POINTER_TYPE_P (type))
4767    {
4768      t = fold_build_pointer_plus (n1, t);
4769      if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4770	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4771	t = fold_convert (signed_type_for (type), t);
4772    }
4773  else
4774    t = fold_build2 (PLUS_EXPR, type, t, n1);
4775  t = fold_convert (TREE_TYPE (startvar), t);
4776  t = force_gimple_operand_gsi (&gsi, t,
4777				DECL_P (startvar)
4778				&& TREE_ADDRESSABLE (startvar),
4779				NULL_TREE, false, GSI_CONTINUE_LINKING);
4780  assign_stmt = gimple_build_assign (startvar, t);
4781  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4782  if (cond_var)
4783    {
4784      tree itype = TREE_TYPE (cond_var);
4785      /* For lastprivate(conditional:) itervar, we need some iteration
4786	 counter that starts at unsigned non-zero and increases.
4787	 Prefer as few IVs as possible, so if we can use startvar
4788	 itself, use that, or startvar + constant (those would be
4789	 incremented with step), and as last resort use the s0 + 1
4790	 incremented by 1.  */
4791      if (POINTER_TYPE_P (type)
4792	  || TREE_CODE (n1) != INTEGER_CST
4793	  || fd->loop.cond_code != LT_EXPR)
4794	t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4795			 build_int_cst (itype, 1));
4796      else if (tree_int_cst_sgn (n1) == 1)
4797	t = fold_convert (itype, t);
4798      else
4799	{
4800	  tree c = fold_convert (itype, n1);
4801	  c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4802	  t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4803	}
4804      t = force_gimple_operand_gsi (&gsi, t, false,
4805				    NULL_TREE, false, GSI_CONTINUE_LINKING);
4806      assign_stmt = gimple_build_assign (cond_var, t);
4807      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4808    }
4809
4810  t = fold_convert (itype, e0);
4811  t = fold_build2 (MULT_EXPR, itype, t, step);
4812  if (POINTER_TYPE_P (type))
4813    {
4814      t = fold_build_pointer_plus (n1, t);
4815      if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4816	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4817	t = fold_convert (signed_type_for (type), t);
4818    }
4819  else
4820    t = fold_build2 (PLUS_EXPR, type, t, n1);
4821  t = fold_convert (TREE_TYPE (startvar), t);
4822  e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4823				false, GSI_CONTINUE_LINKING);
4824  if (endvar)
4825    {
4826      assign_stmt = gimple_build_assign (endvar, e);
4827      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4828      if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4829	assign_stmt = gimple_build_assign (fd->loop.v, e);
4830      else
4831	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4832      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4833    }
4834  /* Handle linear clause adjustments.  */
4835  tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4836  if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4837    for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4838	 c; c = OMP_CLAUSE_CHAIN (c))
4839      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4840	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4841	{
4842	  tree d = OMP_CLAUSE_DECL (c);
4843	  bool is_ref = omp_is_reference (d);
4844	  tree t = d, a, dest;
4845	  if (is_ref)
4846	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4847	  tree type = TREE_TYPE (t);
4848	  if (POINTER_TYPE_P (type))
4849	    type = sizetype;
4850	  dest = unshare_expr (t);
4851	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
4852	  expand_omp_build_assign (&gsif, v, t);
4853	  if (itercnt == NULL_TREE)
4854	    {
4855	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
4856		{
4857		  itercntbias
4858		    = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4859				   fold_convert (itype, fd->loop.n1));
4860		  itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4861					     itercntbias, step);
4862		  itercntbias
4863		    = force_gimple_operand_gsi (&gsif, itercntbias, true,
4864						NULL_TREE, true,
4865						GSI_SAME_STMT);
4866		  itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4867		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4868						      NULL_TREE, false,
4869						      GSI_CONTINUE_LINKING);
4870		}
4871	      else
4872		itercnt = s0;
4873	    }
4874	  a = fold_build2 (MULT_EXPR, type,
4875			   fold_convert (type, itercnt),
4876			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4877	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4878			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4879	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4880					false, GSI_CONTINUE_LINKING);
4881	  assign_stmt = gimple_build_assign (dest, t);
4882	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4883	}
4884  if (fd->collapse > 1)
4885    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4886
4887  if (!broken_loop)
4888    {
4889      /* The code controlling the sequential loop goes in CONT_BB,
4890	 replacing the GIMPLE_OMP_CONTINUE.  */
4891      gsi = gsi_last_nondebug_bb (cont_bb);
4892      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4893      vmain = gimple_omp_continue_control_use (cont_stmt);
4894      vback = gimple_omp_continue_control_def (cont_stmt);
4895
4896      if (cond_var)
4897	{
4898	  tree itype = TREE_TYPE (cond_var);
4899	  tree t2;
4900	  if (POINTER_TYPE_P (type)
4901	      || TREE_CODE (n1) != INTEGER_CST
4902	      || fd->loop.cond_code != LT_EXPR)
4903	    t2 = build_int_cst (itype, 1);
4904	  else
4905	    t2 = fold_convert (itype, step);
4906	  t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4907	  t2 = force_gimple_operand_gsi (&gsi, t2, false,
4908					 NULL_TREE, true, GSI_SAME_STMT);
4909	  assign_stmt = gimple_build_assign (cond_var, t2);
4910	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4911	}
4912
4913      if (!gimple_omp_for_combined_p (fd->for_stmt))
4914	{
4915	  if (POINTER_TYPE_P (type))
4916	    t = fold_build_pointer_plus (vmain, step);
4917	  else
4918	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
4919	  if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4920	    t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4921					  true, GSI_SAME_STMT);
4922	  assign_stmt = gimple_build_assign (vback, t);
4923	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4924
4925	  if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4926	    t = build2 (EQ_EXPR, boolean_type_node,
4927			build_int_cst (itype, 0),
4928			build_int_cst (itype, 1));
4929	  else
4930	    t = build2 (fd->loop.cond_code, boolean_type_node,
4931			DECL_P (vback) && TREE_ADDRESSABLE (vback)
4932			? t : vback, e);
4933	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4934	}
4935
4936      /* Remove GIMPLE_OMP_CONTINUE.  */
4937      gsi_remove (&gsi, true);
4938
4939      if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4940	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4941
4942      /* Trip update code goes into TRIP_UPDATE_BB.  */
4943      gsi = gsi_start_bb (trip_update_bb);
4944
4945      t = build_int_cst (itype, 1);
4946      t = build2 (PLUS_EXPR, itype, trip_main, t);
4947      assign_stmt = gimple_build_assign (trip_back, t);
4948      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4949    }
4950
4951  /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
4952  gsi = gsi_last_nondebug_bb (exit_bb);
4953  if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4954    {
4955      t = gimple_omp_return_lhs (gsi_stmt (gsi));
4956      if (fd->have_reductemp || fd->have_pointer_condtemp)
4957	{
4958	  tree fn;
4959	  if (t)
4960	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4961	  else
4962	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4963	  gcall *g = gimple_build_call (fn, 0);
4964	  if (t)
4965	    {
4966	      gimple_call_set_lhs (g, t);
4967	      if (fd->have_reductemp)
4968		gsi_insert_after (&gsi, gimple_build_assign (reductions,
4969							     NOP_EXPR, t),
4970				  GSI_SAME_STMT);
4971	    }
4972	  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4973	}
4974      else
4975	gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4976    }
4977  else if (fd->have_pointer_condtemp)
4978    {
4979      tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4980      gcall *g = gimple_build_call (fn, 0);
4981      gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4982    }
4983  gsi_remove (&gsi, true);
4984
4985  /* Connect the new blocks.  */
4986  find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4987  find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4988
4989  if (!broken_loop)
4990    {
4991      se = find_edge (cont_bb, body_bb);
4992      if (se == NULL)
4993	{
4994	  se = BRANCH_EDGE (cont_bb);
4995	  gcc_assert (single_succ (se->dest) == body_bb);
4996	}
4997      if (gimple_omp_for_combined_p (fd->for_stmt))
4998	{
4999	  remove_edge (se);
5000	  se = NULL;
5001	}
5002      else if (fd->collapse > 1)
5003	{
5004	  remove_edge (se);
5005	  se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5006	}
5007      else
5008	se->flags = EDGE_TRUE_VALUE;
5009      find_edge (cont_bb, trip_update_bb)->flags
5010	= se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5011
5012      redirect_edge_and_branch (single_succ_edge (trip_update_bb),
5013				iter_part_bb);
5014    }
5015
5016  if (gimple_in_ssa_p (cfun))
5017    {
5018      gphi_iterator psi;
5019      gphi *phi;
5020      edge re, ene;
5021      edge_var_map *vm;
5022      size_t i;
5023
5024      gcc_assert (fd->collapse == 1 && !broken_loop);
5025
5026      /* When we redirect the edge from trip_update_bb to iter_part_bb, we
5027	 remove arguments of the phi nodes in fin_bb.  We need to create
5028	 appropriate phi nodes in iter_part_bb instead.  */
5029      se = find_edge (iter_part_bb, fin_bb);
5030      re = single_succ_edge (trip_update_bb);
5031      vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
5032      ene = single_succ_edge (entry_bb);
5033
5034      psi = gsi_start_phis (fin_bb);
5035      for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
5036	   gsi_next (&psi), ++i)
5037	{
5038	  gphi *nphi;
5039	  location_t locus;
5040
5041	  phi = psi.phi ();
5042	  if (operand_equal_p (gimple_phi_arg_def (phi, 0),
5043			       redirect_edge_var_map_def (vm), 0))
5044	    continue;
5045
5046	  t = gimple_phi_result (phi);
5047	  gcc_assert (t == redirect_edge_var_map_result (vm));
5048
5049	  if (!single_pred_p (fin_bb))
5050	    t = copy_ssa_name (t, phi);
5051
5052	  nphi = create_phi_node (t, iter_part_bb);
5053
5054	  t = PHI_ARG_DEF_FROM_EDGE (phi, se);
5055	  locus = gimple_phi_arg_location_from_edge (phi, se);
5056
5057	  /* A special case -- fd->loop.v is not yet computed in
5058	     iter_part_bb, we need to use vextra instead.  */
5059	  if (t == fd->loop.v)
5060	    t = vextra;
5061	  add_phi_arg (nphi, t, ene, locus);
5062	  locus = redirect_edge_var_map_location (vm);
5063	  tree back_arg = redirect_edge_var_map_def (vm);
5064	  add_phi_arg (nphi, back_arg, re, locus);
5065	  edge ce = find_edge (cont_bb, body_bb);
5066	  if (ce == NULL)
5067	    {
5068	      ce = BRANCH_EDGE (cont_bb);
5069	      gcc_assert (single_succ (ce->dest) == body_bb);
5070	      ce = single_succ_edge (ce->dest);
5071	    }
5072	  gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
5073	  gcc_assert (inner_loop_phi != NULL);
5074	  add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
5075		       find_edge (seq_start_bb, body_bb), locus);
5076
5077	  if (!single_pred_p (fin_bb))
5078	    add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
5079	}
5080      gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
5081      redirect_edge_var_map_clear (re);
5082      if (single_pred_p (fin_bb))
5083	while (1)
5084	  {
5085	    psi = gsi_start_phis (fin_bb);
5086	    if (gsi_end_p (psi))
5087	      break;
5088	    remove_phi_node (&psi, false);
5089	  }
5090
5091      /* Make phi node for trip.  */
5092      phi = create_phi_node (trip_main, iter_part_bb);
5093      add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
5094		   UNKNOWN_LOCATION);
5095      add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
5096		   UNKNOWN_LOCATION);
5097    }
5098
5099  if (!broken_loop)
5100    set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
5101  set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
5102			   recompute_dominator (CDI_DOMINATORS, iter_part_bb));
5103  set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5104			   recompute_dominator (CDI_DOMINATORS, fin_bb));
5105  set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
5106			   recompute_dominator (CDI_DOMINATORS, seq_start_bb));
5107  set_immediate_dominator (CDI_DOMINATORS, body_bb,
5108			   recompute_dominator (CDI_DOMINATORS, body_bb));
5109
5110  if (!broken_loop)
5111    {
5112      class loop *loop = body_bb->loop_father;
5113      class loop *trip_loop = alloc_loop ();
5114      trip_loop->header = iter_part_bb;
5115      trip_loop->latch = trip_update_bb;
5116      add_loop (trip_loop, iter_part_bb->loop_father);
5117
5118      if (loop != entry_bb->loop_father)
5119	{
5120	  gcc_assert (loop->header == body_bb);
5121	  gcc_assert (loop->latch == region->cont
5122		      || single_pred (loop->latch) == region->cont);
5123	  trip_loop->inner = loop;
5124	  return;
5125	}
5126
5127      if (!gimple_omp_for_combined_p (fd->for_stmt))
5128	{
5129	  loop = alloc_loop ();
5130	  loop->header = body_bb;
5131	  if (collapse_bb == NULL)
5132	    loop->latch = cont_bb;
5133	  add_loop (loop, trip_loop);
5134	}
5135    }
5136}
5137
5138/* A subroutine of expand_omp_for.  Generate code for a simd non-worksharing
5139   loop.  Given parameters:
5140
5141	for (V = N1; V cond N2; V += STEP) BODY;
5142
5143   where COND is "<" or ">", we generate pseudocode
5144
5145	V = N1;
5146	goto L1;
5147    L0:
5148	BODY;
5149	V += STEP;
5150    L1:
5151	if (V cond N2) goto L0; else goto L2;
5152    L2:
5153
5154    For collapsed loops, given parameters:
5155      collapse(3)
5156      for (V1 = N11; V1 cond1 N12; V1 += STEP1)
5157	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
5158	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
5159	    BODY;
5160
5161    we generate pseudocode
5162
5163	if (cond3 is <)
5164	  adj = STEP3 - 1;
5165	else
5166	  adj = STEP3 + 1;
5167	count3 = (adj + N32 - N31) / STEP3;
5168	if (cond2 is <)
5169	  adj = STEP2 - 1;
5170	else
5171	  adj = STEP2 + 1;
5172	count2 = (adj + N22 - N21) / STEP2;
5173	if (cond1 is <)
5174	  adj = STEP1 - 1;
5175	else
5176	  adj = STEP1 + 1;
5177	count1 = (adj + N12 - N11) / STEP1;
5178	count = count1 * count2 * count3;
5179	V = 0;
5180	V1 = N11;
5181	V2 = N21;
5182	V3 = N31;
5183	goto L1;
5184    L0:
5185	BODY;
5186	V += 1;
5187	V3 += STEP3;
5188	V2 += (V3 cond3 N32) ? 0 : STEP2;
5189	V3 = (V3 cond3 N32) ? V3 : N31;
5190	V1 += (V2 cond2 N22) ? 0 : STEP1;
5191	V2 = (V2 cond2 N22) ? V2 : N21;
5192    L1:
5193	if (V < count) goto L0; else goto L2;
5194    L2:
5195
5196      */
5197
5198static void
5199expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
5200{
5201  tree type, t;
5202  basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
5203  gimple_stmt_iterator gsi;
5204  gimple *stmt;
5205  gcond *cond_stmt;
5206  bool broken_loop = region->cont == NULL;
5207  edge e, ne;
5208  tree *counts = NULL;
5209  int i;
5210  int safelen_int = INT_MAX;
5211  bool dont_vectorize = false;
5212  tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5213				  OMP_CLAUSE_SAFELEN);
5214  tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5215				  OMP_CLAUSE__SIMDUID_);
5216  tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5217			      OMP_CLAUSE_IF);
5218  tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5219				  OMP_CLAUSE_SIMDLEN);
5220  tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5221				   OMP_CLAUSE__CONDTEMP_);
5222  tree n1, n2;
5223  tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
5224
5225  if (safelen)
5226    {
5227      poly_uint64 val;
5228      safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
5229      if (!poly_int_tree_p (safelen, &val))
5230	safelen_int = 0;
5231      else
5232	safelen_int = MIN (constant_lower_bound (val), INT_MAX);
5233      if (safelen_int == 1)
5234	safelen_int = 0;
5235    }
5236  if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
5237      || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
5238    {
5239      safelen_int = 0;
5240      dont_vectorize = true;
5241    }
5242  type = TREE_TYPE (fd->loop.v);
5243  entry_bb = region->entry;
5244  cont_bb = region->cont;
5245  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5246  gcc_assert (broken_loop
5247	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5248  l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
5249  if (!broken_loop)
5250    {
5251      gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
5252      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5253      l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
5254      l2_bb = BRANCH_EDGE (entry_bb)->dest;
5255    }
5256  else
5257    {
5258      BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
5259      l1_bb = split_edge (BRANCH_EDGE (entry_bb));
5260      l2_bb = single_succ (l1_bb);
5261    }
5262  exit_bb = region->exit;
5263  l2_dom_bb = NULL;
5264
5265  gsi = gsi_last_nondebug_bb (entry_bb);
5266
5267  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5268  /* Not needed in SSA form right now.  */
5269  gcc_assert (!gimple_in_ssa_p (cfun));
5270  if (fd->collapse > 1)
5271    {
5272      int first_zero_iter = -1, dummy = -1;
5273      basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
5274
5275      counts = XALLOCAVEC (tree, fd->collapse);
5276      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5277				  zero_iter_bb, first_zero_iter,
5278				  dummy_bb, dummy, l2_dom_bb);
5279    }
5280  if (l2_dom_bb == NULL)
5281    l2_dom_bb = l1_bb;
5282
5283  n1 = fd->loop.n1;
5284  n2 = fd->loop.n2;
5285  if (gimple_omp_for_combined_into_p (fd->for_stmt))
5286    {
5287      tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5288				     OMP_CLAUSE__LOOPTEMP_);
5289      gcc_assert (innerc);
5290      n1 = OMP_CLAUSE_DECL (innerc);
5291      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5292				OMP_CLAUSE__LOOPTEMP_);
5293      gcc_assert (innerc);
5294      n2 = OMP_CLAUSE_DECL (innerc);
5295    }
5296  tree step = fd->loop.step;
5297
5298  bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5299				  OMP_CLAUSE__SIMT_);
5300  if (is_simt)
5301    {
5302      cfun->curr_properties &= ~PROP_gimple_lomp_dev;
5303      is_simt = safelen_int > 1;
5304    }
5305  tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
5306  if (is_simt)
5307    {
5308      simt_lane = create_tmp_var (unsigned_type_node);
5309      gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
5310      gimple_call_set_lhs (g, simt_lane);
5311      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5312      tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
5313				 fold_convert (TREE_TYPE (step), simt_lane));
5314      n1 = fold_convert (type, n1);
5315      if (POINTER_TYPE_P (type))
5316	n1 = fold_build_pointer_plus (n1, offset);
5317      else
5318	n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
5319
5320      /* Collapsed loops not handled for SIMT yet: limit to one lane only.  */
5321      if (fd->collapse > 1)
5322	simt_maxlane = build_one_cst (unsigned_type_node);
5323      else if (safelen_int < omp_max_simt_vf ())
5324	simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
5325      tree vf
5326	= build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
5327					unsigned_type_node, 0);
5328      if (simt_maxlane)
5329	vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
5330      vf = fold_convert (TREE_TYPE (step), vf);
5331      step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
5332    }
5333
5334  expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
5335  if (fd->collapse > 1)
5336    {
5337      if (gimple_omp_for_combined_into_p (fd->for_stmt))
5338	{
5339	  gsi_prev (&gsi);
5340	  expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
5341	  gsi_next (&gsi);
5342	}
5343      else
5344	for (i = 0; i < fd->collapse; i++)
5345	  {
5346	    tree itype = TREE_TYPE (fd->loops[i].v);
5347	    if (POINTER_TYPE_P (itype))
5348	      itype = signed_type_for (itype);
5349	    t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
5350	    expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5351	  }
5352    }
5353  if (cond_var)
5354    {
5355      if (POINTER_TYPE_P (type)
5356	  || TREE_CODE (n1) != INTEGER_CST
5357	  || fd->loop.cond_code != LT_EXPR
5358	  || tree_int_cst_sgn (n1) != 1)
5359	expand_omp_build_assign (&gsi, cond_var,
5360				 build_one_cst (TREE_TYPE (cond_var)));
5361      else
5362	expand_omp_build_assign (&gsi, cond_var,
5363				 fold_convert (TREE_TYPE (cond_var), n1));
5364    }
5365
5366  /* Remove the GIMPLE_OMP_FOR statement.  */
5367  gsi_remove (&gsi, true);
5368
5369  if (!broken_loop)
5370    {
5371      /* Code to control the increment goes in the CONT_BB.  */
5372      gsi = gsi_last_nondebug_bb (cont_bb);
5373      stmt = gsi_stmt (gsi);
5374      gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
5375
5376      if (POINTER_TYPE_P (type))
5377	t = fold_build_pointer_plus (fd->loop.v, step);
5378      else
5379	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5380      expand_omp_build_assign (&gsi, fd->loop.v, t);
5381
5382      if (fd->collapse > 1)
5383	{
5384	  i = fd->collapse - 1;
5385	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
5386	    {
5387	      t = fold_convert (sizetype, fd->loops[i].step);
5388	      t = fold_build_pointer_plus (fd->loops[i].v, t);
5389	    }
5390	  else
5391	    {
5392	      t = fold_convert (TREE_TYPE (fd->loops[i].v),
5393				fd->loops[i].step);
5394	      t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
5395			       fd->loops[i].v, t);
5396	    }
5397	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5398
5399	  for (i = fd->collapse - 1; i > 0; i--)
5400	    {
5401	      tree itype = TREE_TYPE (fd->loops[i].v);
5402	      tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
5403	      if (POINTER_TYPE_P (itype2))
5404		itype2 = signed_type_for (itype2);
5405	      t = fold_convert (itype2, fd->loops[i - 1].step);
5406	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5407					    GSI_SAME_STMT);
5408	      t = build3 (COND_EXPR, itype2,
5409			  build2 (fd->loops[i].cond_code, boolean_type_node,
5410				  fd->loops[i].v,
5411				  fold_convert (itype, fd->loops[i].n2)),
5412			  build_int_cst (itype2, 0), t);
5413	      if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
5414		t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
5415	      else
5416		t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
5417	      expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
5418
5419	      t = fold_convert (itype, fd->loops[i].n1);
5420	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5421					    GSI_SAME_STMT);
5422	      t = build3 (COND_EXPR, itype,
5423			  build2 (fd->loops[i].cond_code, boolean_type_node,
5424				  fd->loops[i].v,
5425				  fold_convert (itype, fd->loops[i].n2)),
5426			  fd->loops[i].v, t);
5427	      expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5428	    }
5429	}
5430      if (cond_var)
5431	{
5432	  if (POINTER_TYPE_P (type)
5433	      || TREE_CODE (n1) != INTEGER_CST
5434	      || fd->loop.cond_code != LT_EXPR
5435	      || tree_int_cst_sgn (n1) != 1)
5436	    t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5437			     build_one_cst (TREE_TYPE (cond_var)));
5438	  else
5439	    t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5440			     fold_convert (TREE_TYPE (cond_var), step));
5441	  expand_omp_build_assign (&gsi, cond_var, t);
5442	}
5443
5444      /* Remove GIMPLE_OMP_CONTINUE.  */
5445      gsi_remove (&gsi, true);
5446    }
5447
5448  /* Emit the condition in L1_BB.  */
5449  gsi = gsi_start_bb (l1_bb);
5450
5451  t = fold_convert (type, n2);
5452  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5453				false, GSI_CONTINUE_LINKING);
5454  tree v = fd->loop.v;
5455  if (DECL_P (v) && TREE_ADDRESSABLE (v))
5456    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
5457				  false, GSI_CONTINUE_LINKING);
5458  t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
5459  cond_stmt = gimple_build_cond_empty (t);
5460  gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
5461  if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
5462		 NULL, NULL)
5463      || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
5464		    NULL, NULL))
5465    {
5466      gsi = gsi_for_stmt (cond_stmt);
5467      gimple_regimplify_operands (cond_stmt, &gsi);
5468    }
5469
5470  /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop.  */
5471  if (is_simt)
5472    {
5473      gsi = gsi_start_bb (l2_bb);
5474      step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
5475      if (POINTER_TYPE_P (type))
5476	t = fold_build_pointer_plus (fd->loop.v, step);
5477      else
5478	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5479      expand_omp_build_assign (&gsi, fd->loop.v, t);
5480    }
5481
5482  /* Remove GIMPLE_OMP_RETURN.  */
5483  gsi = gsi_last_nondebug_bb (exit_bb);
5484  gsi_remove (&gsi, true);
5485
5486  /* Connect the new blocks.  */
5487  remove_edge (FALLTHRU_EDGE (entry_bb));
5488
5489  if (!broken_loop)
5490    {
5491      remove_edge (BRANCH_EDGE (entry_bb));
5492      make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
5493
5494      e = BRANCH_EDGE (l1_bb);
5495      ne = FALLTHRU_EDGE (l1_bb);
5496      e->flags = EDGE_TRUE_VALUE;
5497    }
5498  else
5499    {
5500      single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5501
5502      ne = single_succ_edge (l1_bb);
5503      e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
5504
5505    }
5506  ne->flags = EDGE_FALSE_VALUE;
5507  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
5508  ne->probability = e->probability.invert ();
5509
5510  set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
5511  set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
5512
5513  if (simt_maxlane)
5514    {
5515      cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
5516				     NULL_TREE, NULL_TREE);
5517      gsi = gsi_last_bb (entry_bb);
5518      gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
5519      make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
5520      FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
5521      FALLTHRU_EDGE (entry_bb)->probability
5522	 = profile_probability::guessed_always ().apply_scale (7, 8);
5523      BRANCH_EDGE (entry_bb)->probability
5524	 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
5525      l2_dom_bb = entry_bb;
5526    }
5527  set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
5528
5529  if (!broken_loop)
5530    {
5531      class loop *loop = alloc_loop ();
5532      loop->header = l1_bb;
5533      loop->latch = cont_bb;
5534      add_loop (loop, l1_bb->loop_father);
5535      loop->safelen = safelen_int;
5536      if (simduid)
5537	{
5538	  loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
5539	  cfun->has_simduid_loops = true;
5540	}
5541      /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
5542	 the loop.  */
5543      if ((flag_tree_loop_vectorize
5544	   || !global_options_set.x_flag_tree_loop_vectorize)
5545	  && flag_tree_loop_optimize
5546	  && loop->safelen > 1)
5547	{
5548	  loop->force_vectorize = true;
5549	  if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
5550	    {
5551	      unsigned HOST_WIDE_INT v
5552		= tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
5553	      if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
5554		loop->simdlen = v;
5555	    }
5556	  cfun->has_force_vectorize_loops = true;
5557	}
5558      else if (dont_vectorize)
5559	loop->dont_vectorize = true;
5560    }
5561  else if (simduid)
5562    cfun->has_simduid_loops = true;
5563}
5564
5565/* Taskloop construct is represented after gimplification with
5566   two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5567   in between them.  This routine expands the outer GIMPLE_OMP_FOR,
5568   which should just compute all the needed loop temporaries
5569   for GIMPLE_OMP_TASK.  */
5570
5571static void
5572expand_omp_taskloop_for_outer (struct omp_region *region,
5573			       struct omp_for_data *fd,
5574			       gimple *inner_stmt)
5575{
5576  tree type, bias = NULL_TREE;
5577  basic_block entry_bb, cont_bb, exit_bb;
5578  gimple_stmt_iterator gsi;
5579  gassign *assign_stmt;
5580  tree *counts = NULL;
5581  int i;
5582
5583  gcc_assert (inner_stmt);
5584  gcc_assert (region->cont);
5585  gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
5586	      && gimple_omp_task_taskloop_p (inner_stmt));
5587  type = TREE_TYPE (fd->loop.v);
5588
5589  /* See if we need to bias by LLONG_MIN.  */
5590  if (fd->iter_type == long_long_unsigned_type_node
5591      && TREE_CODE (type) == INTEGER_TYPE
5592      && !TYPE_UNSIGNED (type))
5593    {
5594      tree n1, n2;
5595
5596      if (fd->loop.cond_code == LT_EXPR)
5597	{
5598	  n1 = fd->loop.n1;
5599	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5600	}
5601      else
5602	{
5603	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5604	  n2 = fd->loop.n1;
5605	}
5606      if (TREE_CODE (n1) != INTEGER_CST
5607	  || TREE_CODE (n2) != INTEGER_CST
5608	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5609	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5610    }
5611
5612  entry_bb = region->entry;
5613  cont_bb = region->cont;
5614  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5615  gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5616  exit_bb = region->exit;
5617
5618  gsi = gsi_last_nondebug_bb (entry_bb);
5619  gimple *for_stmt = gsi_stmt (gsi);
5620  gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
5621  if (fd->collapse > 1)
5622    {
5623      int first_zero_iter = -1, dummy = -1;
5624      basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
5625
5626      counts = XALLOCAVEC (tree, fd->collapse);
5627      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5628				  zero_iter_bb, first_zero_iter,
5629				  dummy_bb, dummy, l2_dom_bb);
5630
5631      if (zero_iter_bb)
5632	{
5633	  /* Some counts[i] vars might be uninitialized if
5634	     some loop has zero iterations.  But the body shouldn't
5635	     be executed in that case, so just avoid uninit warnings.  */
5636	  for (i = first_zero_iter; i < fd->collapse; i++)
5637	    if (SSA_VAR_P (counts[i]))
5638	      TREE_NO_WARNING (counts[i]) = 1;
5639	  gsi_prev (&gsi);
5640	  edge e = split_block (entry_bb, gsi_stmt (gsi));
5641	  entry_bb = e->dest;
5642	  make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
5643	  gsi = gsi_last_bb (entry_bb);
5644	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
5645				   get_immediate_dominator (CDI_DOMINATORS,
5646							    zero_iter_bb));
5647	}
5648    }
5649
5650  tree t0, t1;
5651  t1 = fd->loop.n2;
5652  t0 = fd->loop.n1;
5653  if (POINTER_TYPE_P (TREE_TYPE (t0))
5654      && TYPE_PRECISION (TREE_TYPE (t0))
5655	 != TYPE_PRECISION (fd->iter_type))
5656    {
5657      /* Avoid casting pointers to integer of a different size.  */
5658      tree itype = signed_type_for (type);
5659      t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5660      t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5661    }
5662  else
5663    {
5664      t1 = fold_convert (fd->iter_type, t1);
5665      t0 = fold_convert (fd->iter_type, t0);
5666    }
5667  if (bias)
5668    {
5669      t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5670      t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5671    }
5672
5673  tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5674				 OMP_CLAUSE__LOOPTEMP_);
5675  gcc_assert (innerc);
5676  tree startvar = OMP_CLAUSE_DECL (innerc);
5677  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5678  gcc_assert (innerc);
5679  tree endvar = OMP_CLAUSE_DECL (innerc);
5680  if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5681    {
5682      gcc_assert (innerc);
5683      for (i = 1; i < fd->collapse; i++)
5684	{
5685	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5686				    OMP_CLAUSE__LOOPTEMP_);
5687	  gcc_assert (innerc);
5688	}
5689      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5690				OMP_CLAUSE__LOOPTEMP_);
5691      if (innerc)
5692	{
5693	  /* If needed (inner taskloop has lastprivate clause), propagate
5694	     down the total number of iterations.  */
5695	  tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5696					     NULL_TREE, false,
5697					     GSI_CONTINUE_LINKING);
5698	  assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5699	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5700	}
5701    }
5702
5703  t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5704				 GSI_CONTINUE_LINKING);
5705  assign_stmt = gimple_build_assign (startvar, t0);
5706  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5707
5708  t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5709				 GSI_CONTINUE_LINKING);
5710  assign_stmt = gimple_build_assign (endvar, t1);
5711  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5712  if (fd->collapse > 1)
5713    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5714
5715  /* Remove the GIMPLE_OMP_FOR statement.  */
5716  gsi = gsi_for_stmt (for_stmt);
5717  gsi_remove (&gsi, true);
5718
5719  gsi = gsi_last_nondebug_bb (cont_bb);
5720  gsi_remove (&gsi, true);
5721
5722  gsi = gsi_last_nondebug_bb (exit_bb);
5723  gsi_remove (&gsi, true);
5724
5725  FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5726  remove_edge (BRANCH_EDGE (entry_bb));
5727  FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
5728  remove_edge (BRANCH_EDGE (cont_bb));
5729  set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5730  set_immediate_dominator (CDI_DOMINATORS, region->entry,
5731			   recompute_dominator (CDI_DOMINATORS, region->entry));
5732}
5733
5734/* Taskloop construct is represented after gimplification with
5735   two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5736   in between them.  This routine expands the inner GIMPLE_OMP_FOR.
5737   GOMP_taskloop{,_ull} function arranges for each task to be given just
5738   a single range of iterations.  */
5739
5740static void
5741expand_omp_taskloop_for_inner (struct omp_region *region,
5742			       struct omp_for_data *fd,
5743			       gimple *inner_stmt)
5744{
5745  tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5746  basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5747  basic_block fin_bb;
5748  gimple_stmt_iterator gsi;
5749  edge ep;
5750  bool broken_loop = region->cont == NULL;
5751  tree *counts = NULL;
5752  tree n1, n2, step;
5753
5754  itype = type = TREE_TYPE (fd->loop.v);
5755  if (POINTER_TYPE_P (type))
5756    itype = signed_type_for (type);
5757
5758  /* See if we need to bias by LLONG_MIN.  */
5759  if (fd->iter_type == long_long_unsigned_type_node
5760      && TREE_CODE (type) == INTEGER_TYPE
5761      && !TYPE_UNSIGNED (type))
5762    {
5763      tree n1, n2;
5764
5765      if (fd->loop.cond_code == LT_EXPR)
5766	{
5767	  n1 = fd->loop.n1;
5768	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5769	}
5770      else
5771	{
5772	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5773	  n2 = fd->loop.n1;
5774	}
5775      if (TREE_CODE (n1) != INTEGER_CST
5776	  || TREE_CODE (n2) != INTEGER_CST
5777	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5778	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5779    }
5780
5781  entry_bb = region->entry;
5782  cont_bb = region->cont;
5783  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5784  fin_bb = BRANCH_EDGE (entry_bb)->dest;
5785  gcc_assert (broken_loop
5786	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5787  body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5788  if (!broken_loop)
5789    {
5790      gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5791      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5792    }
5793  exit_bb = region->exit;
5794
5795  /* Iteration space partitioning goes in ENTRY_BB.  */
5796  gsi = gsi_last_nondebug_bb (entry_bb);
5797  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5798
5799  if (fd->collapse > 1)
5800    {
5801      int first_zero_iter = -1, dummy = -1;
5802      basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5803
5804      counts = XALLOCAVEC (tree, fd->collapse);
5805      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5806				  fin_bb, first_zero_iter,
5807				  dummy_bb, dummy, l2_dom_bb);
5808      t = NULL_TREE;
5809    }
5810  else
5811    t = integer_one_node;
5812
5813  step = fd->loop.step;
5814  tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5815				 OMP_CLAUSE__LOOPTEMP_);
5816  gcc_assert (innerc);
5817  n1 = OMP_CLAUSE_DECL (innerc);
5818  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5819  gcc_assert (innerc);
5820  n2 = OMP_CLAUSE_DECL (innerc);
5821  if (bias)
5822    {
5823      n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5824      n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5825    }
5826  n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5827				 true, NULL_TREE, true, GSI_SAME_STMT);
5828  n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5829				 true, NULL_TREE, true, GSI_SAME_STMT);
5830  step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5831				   true, NULL_TREE, true, GSI_SAME_STMT);
5832
5833  tree startvar = fd->loop.v;
5834  tree endvar = NULL_TREE;
5835
5836  if (gimple_omp_for_combined_p (fd->for_stmt))
5837    {
5838      tree clauses = gimple_omp_for_clauses (inner_stmt);
5839      tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5840      gcc_assert (innerc);
5841      startvar = OMP_CLAUSE_DECL (innerc);
5842      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5843				OMP_CLAUSE__LOOPTEMP_);
5844      gcc_assert (innerc);
5845      endvar = OMP_CLAUSE_DECL (innerc);
5846    }
5847  t = fold_convert (TREE_TYPE (startvar), n1);
5848  t = force_gimple_operand_gsi (&gsi, t,
5849				DECL_P (startvar)
5850				&& TREE_ADDRESSABLE (startvar),
5851				NULL_TREE, false, GSI_CONTINUE_LINKING);
5852  gimple *assign_stmt = gimple_build_assign (startvar, t);
5853  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5854
5855  t = fold_convert (TREE_TYPE (startvar), n2);
5856  e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5857				false, GSI_CONTINUE_LINKING);
5858  if (endvar)
5859    {
5860      assign_stmt = gimple_build_assign (endvar, e);
5861      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5862      if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5863	assign_stmt = gimple_build_assign (fd->loop.v, e);
5864      else
5865	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5866      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5867    }
5868  if (fd->collapse > 1)
5869    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5870
5871  if (!broken_loop)
5872    {
5873      /* The code controlling the sequential loop replaces the
5874	 GIMPLE_OMP_CONTINUE.  */
5875      gsi = gsi_last_nondebug_bb (cont_bb);
5876      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5877      gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5878      vmain = gimple_omp_continue_control_use (cont_stmt);
5879      vback = gimple_omp_continue_control_def (cont_stmt);
5880
5881      if (!gimple_omp_for_combined_p (fd->for_stmt))
5882	{
5883	  if (POINTER_TYPE_P (type))
5884	    t = fold_build_pointer_plus (vmain, step);
5885	  else
5886	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
5887	  t = force_gimple_operand_gsi (&gsi, t,
5888					DECL_P (vback)
5889					&& TREE_ADDRESSABLE (vback),
5890					NULL_TREE, true, GSI_SAME_STMT);
5891	  assign_stmt = gimple_build_assign (vback, t);
5892	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5893
5894	  t = build2 (fd->loop.cond_code, boolean_type_node,
5895		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
5896		      ? t : vback, e);
5897	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5898	}
5899
5900      /* Remove the GIMPLE_OMP_CONTINUE statement.  */
5901      gsi_remove (&gsi, true);
5902
5903      if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5904	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5905    }
5906
5907  /* Remove the GIMPLE_OMP_FOR statement.  */
5908  gsi = gsi_for_stmt (fd->for_stmt);
5909  gsi_remove (&gsi, true);
5910
5911  /* Remove the GIMPLE_OMP_RETURN statement.  */
5912  gsi = gsi_last_nondebug_bb (exit_bb);
5913  gsi_remove (&gsi, true);
5914
5915  FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5916  if (!broken_loop)
5917    remove_edge (BRANCH_EDGE (entry_bb));
5918  else
5919    {
5920      remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5921      region->outer->cont = NULL;
5922    }
5923
5924  /* Connect all the blocks.  */
5925  if (!broken_loop)
5926    {
5927      ep = find_edge (cont_bb, body_bb);
5928      if (gimple_omp_for_combined_p (fd->for_stmt))
5929	{
5930	  remove_edge (ep);
5931	  ep = NULL;
5932	}
5933      else if (fd->collapse > 1)
5934	{
5935	  remove_edge (ep);
5936	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5937	}
5938      else
5939	ep->flags = EDGE_TRUE_VALUE;
5940      find_edge (cont_bb, fin_bb)->flags
5941	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5942    }
5943
5944  set_immediate_dominator (CDI_DOMINATORS, body_bb,
5945			   recompute_dominator (CDI_DOMINATORS, body_bb));
5946  if (!broken_loop)
5947    set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5948			     recompute_dominator (CDI_DOMINATORS, fin_bb));
5949
5950  if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5951    {
5952      class loop *loop = alloc_loop ();
5953      loop->header = body_bb;
5954      if (collapse_bb == NULL)
5955	loop->latch = cont_bb;
5956      add_loop (loop, body_bb->loop_father);
5957    }
5958}
5959
5960/* A subroutine of expand_omp_for.  Generate code for an OpenACC
5961   partitioned loop.  The lowering here is abstracted, in that the
5962   loop parameters are passed through internal functions, which are
5963   further lowered by oacc_device_lower, once we get to the target
5964   compiler.  The loop is of the form:
5965
5966   for (V = B; V LTGT E; V += S) {BODY}
5967
5968   where LTGT is < or >.  We may have a specified chunking size, CHUNKING
5969   (constant 0 for no chunking) and we will have a GWV partitioning
5970   mask, specifying dimensions over which the loop is to be
5971   partitioned (see note below).  We generate code that looks like
5972   (this ignores tiling):
5973
5974   <entry_bb> [incoming FALL->body, BRANCH->exit]
5975     typedef signedintify (typeof (V)) T;  // underlying signed integral type
5976     T range = E - B;
5977     T chunk_no = 0;
5978     T DIR = LTGT == '<' ? +1 : -1;
5979     T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5980     T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5981
5982   <head_bb> [created by splitting end of entry_bb]
5983     T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5984     T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5985     if (!(offset LTGT bound)) goto bottom_bb;
5986
5987   <body_bb> [incoming]
5988     V = B + offset;
5989     {BODY}
5990
5991   <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5992     offset += step;
5993     if (offset LTGT bound) goto body_bb; [*]
5994
5995   <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5996     chunk_no++;
5997     if (chunk < chunk_max) goto head_bb;
5998
5999   <exit_bb> [incoming]
6000     V = B + ((range -/+ 1) / S +/- 1) * S [*]
6001
6002   [*] Needed if V live at end of loop.  */
6003
6004static void
6005expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
6006{
6007  bool is_oacc_kernels_parallelized
6008    = (lookup_attribute ("oacc kernels parallelized",
6009			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
6010  {
6011    bool is_oacc_kernels
6012      = (lookup_attribute ("oacc kernels",
6013			   DECL_ATTRIBUTES (current_function_decl)) != NULL);
6014    if (is_oacc_kernels_parallelized)
6015      gcc_checking_assert (is_oacc_kernels);
6016  }
6017  gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
6018  /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
6019     for SSA specifics, and some are for 'parloops' OpenACC
6020     'kernels'-parallelized specifics.  */
6021
6022  tree v = fd->loop.v;
6023  enum tree_code cond_code = fd->loop.cond_code;
6024  enum tree_code plus_code = PLUS_EXPR;
6025
6026  tree chunk_size = integer_minus_one_node;
6027  tree gwv = integer_zero_node;
6028  tree iter_type = TREE_TYPE (v);
6029  tree diff_type = iter_type;
6030  tree plus_type = iter_type;
6031  struct oacc_collapse *counts = NULL;
6032
6033  gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
6034		       == GF_OMP_FOR_KIND_OACC_LOOP);
6035  gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
6036  gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
6037
6038  if (POINTER_TYPE_P (iter_type))
6039    {
6040      plus_code = POINTER_PLUS_EXPR;
6041      plus_type = sizetype;
6042    }
6043  for (int ix = fd->collapse; ix--;)
6044    {
6045      tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
6046      if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
6047	diff_type = diff_type2;
6048    }
6049  if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
6050    diff_type = signed_type_for (diff_type);
6051  if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
6052    diff_type = integer_type_node;
6053
6054  basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
6055  basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
6056  basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE  */
6057  basic_block bottom_bb = NULL;
6058
6059  /* entry_bb has two successors; the branch edge is to the exit
6060     block, fallthrough edge to body.  */
6061  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
6062	      && BRANCH_EDGE (entry_bb)->dest == exit_bb);
6063
6064  /* If cont_bb non-NULL, it has 2 successors.  The branch successor is
6065     body_bb, or to a block whose only successor is the body_bb.  Its
6066     fallthrough successor is the final block (same as the branch
6067     successor of the entry_bb).  */
6068  if (cont_bb)
6069    {
6070      basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
6071      basic_block bed = BRANCH_EDGE (cont_bb)->dest;
6072
6073      gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
6074      gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
6075    }
6076  else
6077    gcc_assert (!gimple_in_ssa_p (cfun));
6078
6079  /* The exit block only has entry_bb and cont_bb as predecessors.  */
6080  gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
6081
6082  tree chunk_no;
6083  tree chunk_max = NULL_TREE;
6084  tree bound, offset;
6085  tree step = create_tmp_var (diff_type, ".step");
6086  bool up = cond_code == LT_EXPR;
6087  tree dir = build_int_cst (diff_type, up ? +1 : -1);
6088  bool chunking = !gimple_in_ssa_p (cfun);
6089  bool negating;
6090
6091  /* Tiling vars.  */
6092  tree tile_size = NULL_TREE;
6093  tree element_s = NULL_TREE;
6094  tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
6095  basic_block elem_body_bb = NULL;
6096  basic_block elem_cont_bb = NULL;
6097
6098  /* SSA instances.  */
6099  tree offset_incr = NULL_TREE;
6100  tree offset_init = NULL_TREE;
6101
6102  gimple_stmt_iterator gsi;
6103  gassign *ass;
6104  gcall *call;
6105  gimple *stmt;
6106  tree expr;
6107  location_t loc;
6108  edge split, be, fte;
6109
6110  /* Split the end of entry_bb to create head_bb.  */
6111  split = split_block (entry_bb, last_stmt (entry_bb));
6112  basic_block head_bb = split->dest;
6113  entry_bb = split->src;
6114
6115  /* Chunk setup goes at end of entry_bb, replacing the omp_for.  */
6116  gsi = gsi_last_nondebug_bb (entry_bb);
6117  gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
6118  loc = gimple_location (for_stmt);
6119
6120  if (gimple_in_ssa_p (cfun))
6121    {
6122      offset_init = gimple_omp_for_index (for_stmt, 0);
6123      gcc_assert (integer_zerop (fd->loop.n1));
6124      /* The SSA parallelizer does gang parallelism.  */
6125      gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
6126    }
6127
6128  if (fd->collapse > 1 || fd->tiling)
6129    {
6130      gcc_assert (!gimple_in_ssa_p (cfun) && up);
6131      counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
6132      tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
6133					      TREE_TYPE (fd->loop.n2), loc);
6134
6135      if (SSA_VAR_P (fd->loop.n2))
6136	{
6137	  total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
6138					    true, GSI_SAME_STMT);
6139	  ass = gimple_build_assign (fd->loop.n2, total);
6140	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6141	}
6142    }
6143
6144  tree b = fd->loop.n1;
6145  tree e = fd->loop.n2;
6146  tree s = fd->loop.step;
6147
6148  b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
6149  e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
6150
6151  /* Convert the step, avoiding possible unsigned->signed overflow.  */
6152  negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
6153  if (negating)
6154    s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
6155  s = fold_convert (diff_type, s);
6156  if (negating)
6157    s = fold_build1 (NEGATE_EXPR, diff_type, s);
6158  s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
6159
6160  if (!chunking)
6161    chunk_size = integer_zero_node;
6162  expr = fold_convert (diff_type, chunk_size);
6163  chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
6164					 NULL_TREE, true, GSI_SAME_STMT);
6165
6166  if (fd->tiling)
6167    {
6168      /* Determine the tile size and element step,
6169	 modify the outer loop step size.  */
6170      tile_size = create_tmp_var (diff_type, ".tile_size");
6171      expr = build_int_cst (diff_type, 1);
6172      for (int ix = 0; ix < fd->collapse; ix++)
6173	expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
6174      expr = force_gimple_operand_gsi (&gsi, expr, true,
6175				       NULL_TREE, true, GSI_SAME_STMT);
6176      ass = gimple_build_assign (tile_size, expr);
6177      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6178
6179      element_s = create_tmp_var (diff_type, ".element_s");
6180      ass = gimple_build_assign (element_s, s);
6181      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6182
6183      expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
6184      s = force_gimple_operand_gsi (&gsi, expr, true,
6185				    NULL_TREE, true, GSI_SAME_STMT);
6186    }
6187
6188  /* Determine the range, avoiding possible unsigned->signed overflow.  */
6189  negating = !up && TYPE_UNSIGNED (iter_type);
6190  expr = fold_build2 (MINUS_EXPR, plus_type,
6191		      fold_convert (plus_type, negating ? b : e),
6192		      fold_convert (plus_type, negating ? e : b));
6193  expr = fold_convert (diff_type, expr);
6194  if (negating)
6195    expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
6196  tree range = force_gimple_operand_gsi (&gsi, expr, true,
6197					 NULL_TREE, true, GSI_SAME_STMT);
6198
6199  chunk_no = build_int_cst (diff_type, 0);
6200  if (chunking)
6201    {
6202      gcc_assert (!gimple_in_ssa_p (cfun));
6203
6204      expr = chunk_no;
6205      chunk_max = create_tmp_var (diff_type, ".chunk_max");
6206      chunk_no = create_tmp_var (diff_type, ".chunk_no");
6207
6208      ass = gimple_build_assign (chunk_no, expr);
6209      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6210
6211      call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
6212					 build_int_cst (integer_type_node,
6213							IFN_GOACC_LOOP_CHUNKS),
6214					 dir, range, s, chunk_size, gwv);
6215      gimple_call_set_lhs (call, chunk_max);
6216      gimple_set_location (call, loc);
6217      gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6218    }
6219  else
6220    chunk_size = chunk_no;
6221
6222  call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
6223				     build_int_cst (integer_type_node,
6224						    IFN_GOACC_LOOP_STEP),
6225				     dir, range, s, chunk_size, gwv);
6226  gimple_call_set_lhs (call, step);
6227  gimple_set_location (call, loc);
6228  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6229
6230  /* Remove the GIMPLE_OMP_FOR.  */
6231  gsi_remove (&gsi, true);
6232
6233  /* Fixup edges from head_bb.  */
6234  be = BRANCH_EDGE (head_bb);
6235  fte = FALLTHRU_EDGE (head_bb);
6236  be->flags |= EDGE_FALSE_VALUE;
6237  fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
6238
6239  basic_block body_bb = fte->dest;
6240
6241  if (gimple_in_ssa_p (cfun))
6242    {
6243      gsi = gsi_last_nondebug_bb (cont_bb);
6244      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6245
6246      offset = gimple_omp_continue_control_use (cont_stmt);
6247      offset_incr = gimple_omp_continue_control_def (cont_stmt);
6248    }
6249  else
6250    {
6251      offset = create_tmp_var (diff_type, ".offset");
6252      offset_init = offset_incr = offset;
6253    }
6254  bound = create_tmp_var (TREE_TYPE (offset), ".bound");
6255
6256  /* Loop offset & bound go into head_bb.  */
6257  gsi = gsi_start_bb (head_bb);
6258
6259  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
6260				     build_int_cst (integer_type_node,
6261						    IFN_GOACC_LOOP_OFFSET),
6262				     dir, range, s,
6263				     chunk_size, gwv, chunk_no);
6264  gimple_call_set_lhs (call, offset_init);
6265  gimple_set_location (call, loc);
6266  gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
6267
6268  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
6269				     build_int_cst (integer_type_node,
6270						    IFN_GOACC_LOOP_BOUND),
6271				     dir, range, s,
6272				     chunk_size, gwv, offset_init);
6273  gimple_call_set_lhs (call, bound);
6274  gimple_set_location (call, loc);
6275  gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
6276
6277  expr = build2 (cond_code, boolean_type_node, offset_init, bound);
6278  gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6279		    GSI_CONTINUE_LINKING);
6280
6281  /* V assignment goes into body_bb.  */
6282  if (!gimple_in_ssa_p (cfun))
6283    {
6284      gsi = gsi_start_bb (body_bb);
6285
6286      expr = build2 (plus_code, iter_type, b,
6287		     fold_convert (plus_type, offset));
6288      expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6289				       true, GSI_SAME_STMT);
6290      ass = gimple_build_assign (v, expr);
6291      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6292
6293      if (fd->collapse > 1 || fd->tiling)
6294	expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
6295
6296      if (fd->tiling)
6297	{
6298	  /* Determine the range of the element loop -- usually simply
6299	     the tile_size, but could be smaller if the final
6300	     iteration of the outer loop is a partial tile.  */
6301	  tree e_range = create_tmp_var (diff_type, ".e_range");
6302
6303	  expr = build2 (MIN_EXPR, diff_type,
6304			 build2 (MINUS_EXPR, diff_type, bound, offset),
6305			 build2 (MULT_EXPR, diff_type, tile_size,
6306				 element_s));
6307	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6308					   true, GSI_SAME_STMT);
6309	  ass = gimple_build_assign (e_range, expr);
6310	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6311
6312	  /* Determine bound, offset & step of inner loop. */
6313	  e_bound = create_tmp_var (diff_type, ".e_bound");
6314	  e_offset = create_tmp_var (diff_type, ".e_offset");
6315	  e_step = create_tmp_var (diff_type, ".e_step");
6316
6317	  /* Mark these as element loops.  */
6318	  tree t, e_gwv = integer_minus_one_node;
6319	  tree chunk = build_int_cst (diff_type, 0); /* Never chunked.  */
6320
6321	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
6322	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6323					     element_s, chunk, e_gwv, chunk);
6324	  gimple_call_set_lhs (call, e_offset);
6325	  gimple_set_location (call, loc);
6326	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6327
6328	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
6329	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6330					     element_s, chunk, e_gwv, e_offset);
6331	  gimple_call_set_lhs (call, e_bound);
6332	  gimple_set_location (call, loc);
6333	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6334
6335	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
6336	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
6337					     element_s, chunk, e_gwv);
6338	  gimple_call_set_lhs (call, e_step);
6339	  gimple_set_location (call, loc);
6340	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6341
6342	  /* Add test and split block.  */
6343	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6344	  stmt = gimple_build_cond_empty (expr);
6345	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6346	  split = split_block (body_bb, stmt);
6347	  elem_body_bb = split->dest;
6348	  if (cont_bb == body_bb)
6349	    cont_bb = elem_body_bb;
6350	  body_bb = split->src;
6351
6352	  split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
6353
6354	  /* Add a dummy exit for the tiled block when cont_bb is missing.  */
6355	  if (cont_bb == NULL)
6356	    {
6357	      edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
6358	      e->probability = profile_probability::even ();
6359	      split->probability = profile_probability::even ();
6360	    }
6361
6362	  /* Initialize the user's loop vars.  */
6363	  gsi = gsi_start_bb (elem_body_bb);
6364	  expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
6365				     diff_type);
6366	}
6367    }
6368
6369  /* Loop increment goes into cont_bb.  If this is not a loop, we
6370     will have spawned threads as if it was, and each one will
6371     execute one iteration.  The specification is not explicit about
6372     whether such constructs are ill-formed or not, and they can
6373     occur, especially when noreturn routines are involved.  */
6374  if (cont_bb)
6375    {
6376      gsi = gsi_last_nondebug_bb (cont_bb);
6377      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6378      loc = gimple_location (cont_stmt);
6379
6380      if (fd->tiling)
6381	{
6382	  /* Insert element loop increment and test.  */
6383	  expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
6384	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6385					   true, GSI_SAME_STMT);
6386	  ass = gimple_build_assign (e_offset, expr);
6387	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6388	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6389
6390	  stmt = gimple_build_cond_empty (expr);
6391	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6392	  split = split_block (cont_bb, stmt);
6393	  elem_cont_bb = split->src;
6394	  cont_bb = split->dest;
6395
6396	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6397	  split->probability = profile_probability::unlikely ().guessed ();
6398	  edge latch_edge
6399	    = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
6400	  latch_edge->probability = profile_probability::likely ().guessed ();
6401
6402	  edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
6403	  skip_edge->probability = profile_probability::unlikely ().guessed ();
6404	  edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
6405	  loop_entry_edge->probability
6406	    = profile_probability::likely ().guessed ();
6407
6408	  gsi = gsi_for_stmt (cont_stmt);
6409	}
6410
6411      /* Increment offset.  */
6412      if (gimple_in_ssa_p (cfun))
6413	expr = build2 (plus_code, iter_type, offset,
6414		       fold_convert (plus_type, step));
6415      else
6416	expr = build2 (PLUS_EXPR, diff_type, offset, step);
6417      expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6418				       true, GSI_SAME_STMT);
6419      ass = gimple_build_assign (offset_incr, expr);
6420      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6421      expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
6422      gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
6423
6424      /*  Remove the GIMPLE_OMP_CONTINUE.  */
6425      gsi_remove (&gsi, true);
6426
6427      /* Fixup edges from cont_bb.  */
6428      be = BRANCH_EDGE (cont_bb);
6429      fte = FALLTHRU_EDGE (cont_bb);
6430      be->flags |= EDGE_TRUE_VALUE;
6431      fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6432
6433      if (chunking)
6434	{
6435	  /* Split the beginning of exit_bb to make bottom_bb.  We
6436	     need to insert a nop at the start, because splitting is
6437	     after a stmt, not before.  */
6438	  gsi = gsi_start_bb (exit_bb);
6439	  stmt = gimple_build_nop ();
6440	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6441	  split = split_block (exit_bb, stmt);
6442	  bottom_bb = split->src;
6443	  exit_bb = split->dest;
6444	  gsi = gsi_last_bb (bottom_bb);
6445
6446	  /* Chunk increment and test goes into bottom_bb.  */
6447	  expr = build2 (PLUS_EXPR, diff_type, chunk_no,
6448			 build_int_cst (diff_type, 1));
6449	  ass = gimple_build_assign (chunk_no, expr);
6450	  gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
6451
6452	  /* Chunk test at end of bottom_bb.  */
6453	  expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
6454	  gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6455			    GSI_CONTINUE_LINKING);
6456
6457	  /* Fixup edges from bottom_bb.  */
6458	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6459	  split->probability = profile_probability::unlikely ().guessed ();
6460	  edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
6461	  latch_edge->probability = profile_probability::likely ().guessed ();
6462	}
6463    }
6464
6465  gsi = gsi_last_nondebug_bb (exit_bb);
6466  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
6467  loc = gimple_location (gsi_stmt (gsi));
6468
6469  if (!gimple_in_ssa_p (cfun))
6470    {
6471      /* Insert the final value of V, in case it is live.  This is the
6472	 value for the only thread that survives past the join.  */
6473      expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
6474      expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
6475      expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
6476      expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
6477      expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
6478      expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6479				       true, GSI_SAME_STMT);
6480      ass = gimple_build_assign (v, expr);
6481      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6482    }
6483
6484  /* Remove the OMP_RETURN.  */
6485  gsi_remove (&gsi, true);
6486
6487  if (cont_bb)
6488    {
6489      /* We now have one, two or three nested loops.  Update the loop
6490	 structures.  */
6491      class loop *parent = entry_bb->loop_father;
6492      class loop *body = body_bb->loop_father;
6493
6494      if (chunking)
6495	{
6496	  class loop *chunk_loop = alloc_loop ();
6497	  chunk_loop->header = head_bb;
6498	  chunk_loop->latch = bottom_bb;
6499	  add_loop (chunk_loop, parent);
6500	  parent = chunk_loop;
6501	}
6502      else if (parent != body)
6503	{
6504	  gcc_assert (body->header == body_bb);
6505	  gcc_assert (body->latch == cont_bb
6506		      || single_pred (body->latch) == cont_bb);
6507	  parent = NULL;
6508	}
6509
6510      if (parent)
6511	{
6512	  class loop *body_loop = alloc_loop ();
6513	  body_loop->header = body_bb;
6514	  body_loop->latch = cont_bb;
6515	  add_loop (body_loop, parent);
6516
6517	  if (fd->tiling)
6518	    {
6519	      /* Insert tiling's element loop.  */
6520	      class loop *inner_loop = alloc_loop ();
6521	      inner_loop->header = elem_body_bb;
6522	      inner_loop->latch = elem_cont_bb;
6523	      add_loop (inner_loop, body_loop);
6524	    }
6525	}
6526    }
6527}
6528
6529/* Expand the OMP loop defined by REGION.  */
6530
6531static void
6532expand_omp_for (struct omp_region *region, gimple *inner_stmt)
6533{
6534  struct omp_for_data fd;
6535  struct omp_for_data_loop *loops;
6536
6537  loops
6538    = (struct omp_for_data_loop *)
6539      alloca (gimple_omp_for_collapse (last_stmt (region->entry))
6540	      * sizeof (struct omp_for_data_loop));
6541  omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
6542			&fd, loops);
6543  region->sched_kind = fd.sched_kind;
6544  region->sched_modifiers = fd.sched_modifiers;
6545  region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
6546
6547  gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
6548  BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6549  FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6550  if (region->cont)
6551    {
6552      gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
6553      BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6554      FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6555    }
6556  else
6557    /* If there isn't a continue then this is a degerate case where
6558       the introduction of abnormal edges during lowering will prevent
6559       original loops from being detected.  Fix that up.  */
6560    loops_state_set (LOOPS_NEED_FIXUP);
6561
6562  if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
6563    expand_omp_simd (region, &fd);
6564  else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
6565    {
6566      gcc_assert (!inner_stmt);
6567      expand_oacc_for (region, &fd);
6568    }
6569  else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
6570    {
6571      if (gimple_omp_for_combined_into_p (fd.for_stmt))
6572	expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
6573      else
6574	expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
6575    }
6576  else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
6577	   && !fd.have_ordered)
6578    {
6579      if (fd.chunk_size == NULL)
6580	expand_omp_for_static_nochunk (region, &fd, inner_stmt);
6581      else
6582	expand_omp_for_static_chunk (region, &fd, inner_stmt);
6583    }
6584  else
6585    {
6586      int fn_index, start_ix, next_ix;
6587      unsigned HOST_WIDE_INT sched = 0;
6588      tree sched_arg = NULL_TREE;
6589
6590      gcc_assert (gimple_omp_for_kind (fd.for_stmt)
6591		  == GF_OMP_FOR_KIND_FOR);
6592      if (fd.chunk_size == NULL
6593	  && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
6594	fd.chunk_size = integer_zero_node;
6595      switch (fd.sched_kind)
6596	{
6597	case OMP_CLAUSE_SCHEDULE_RUNTIME:
6598	  if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
6599	      && fd.lastprivate_conditional == 0)
6600	    {
6601	      gcc_assert (!fd.have_ordered);
6602	      fn_index = 6;
6603	      sched = 4;
6604	    }
6605	  else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6606		   && !fd.have_ordered
6607		   && fd.lastprivate_conditional == 0)
6608	    fn_index = 7;
6609	  else
6610	    {
6611	      fn_index = 3;
6612	      sched = (HOST_WIDE_INT_1U << 31);
6613	    }
6614	  break;
6615	case OMP_CLAUSE_SCHEDULE_DYNAMIC:
6616	case OMP_CLAUSE_SCHEDULE_GUIDED:
6617	  if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6618	      && !fd.have_ordered
6619	      && fd.lastprivate_conditional == 0)
6620	    {
6621	      fn_index = 3 + fd.sched_kind;
6622	      sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6623	      break;
6624	    }
6625	  fn_index = fd.sched_kind;
6626	  sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6627	  sched += (HOST_WIDE_INT_1U << 31);
6628	  break;
6629	case OMP_CLAUSE_SCHEDULE_STATIC:
6630	  gcc_assert (fd.have_ordered);
6631	  fn_index = 0;
6632	  sched = (HOST_WIDE_INT_1U << 31) + 1;
6633	  break;
6634	default:
6635	  gcc_unreachable ();
6636	}
6637      if (!fd.ordered)
6638	fn_index += fd.have_ordered * 8;
6639      if (fd.ordered)
6640	start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
6641      else
6642	start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
6643      next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
6644      if (fd.have_reductemp || fd.have_pointer_condtemp)
6645	{
6646	  if (fd.ordered)
6647	    start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
6648	  else if (fd.have_ordered)
6649	    start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
6650	  else
6651	    start_ix = (int)BUILT_IN_GOMP_LOOP_START;
6652	  sched_arg = build_int_cstu (long_integer_type_node, sched);
6653	  if (!fd.chunk_size)
6654	    fd.chunk_size = integer_zero_node;
6655	}
6656      if (fd.iter_type == long_long_unsigned_type_node)
6657	{
6658	  start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6659			- (int)BUILT_IN_GOMP_LOOP_STATIC_START);
6660	  next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6661		      - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
6662	}
6663      expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
6664			      (enum built_in_function) next_ix, sched_arg,
6665			      inner_stmt);
6666    }
6667
6668  if (gimple_in_ssa_p (cfun))
6669    update_ssa (TODO_update_ssa_only_virtuals);
6670}
6671
6672/* Expand code for an OpenMP sections directive.  In pseudo code, we generate
6673
6674	v = GOMP_sections_start (n);
6675    L0:
6676	switch (v)
6677	  {
6678	  case 0:
6679	    goto L2;
6680	  case 1:
6681	    section 1;
6682	    goto L1;
6683	  case 2:
6684	    ...
6685	  case n:
6686	    ...
6687	  default:
6688	    abort ();
6689	  }
6690    L1:
6691	v = GOMP_sections_next ();
6692	goto L0;
6693    L2:
6694	reduction;
6695
6696    If this is a combined parallel sections, replace the call to
6697    GOMP_sections_start with call to GOMP_sections_next.  */
6698
6699static void
6700expand_omp_sections (struct omp_region *region)
6701{
6702  tree t, u, vin = NULL, vmain, vnext, l2;
6703  unsigned len;
6704  basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
6705  gimple_stmt_iterator si, switch_si;
6706  gomp_sections *sections_stmt;
6707  gimple *stmt;
6708  gomp_continue *cont;
6709  edge_iterator ei;
6710  edge e;
6711  struct omp_region *inner;
6712  unsigned i, casei;
6713  bool exit_reachable = region->cont != NULL;
6714
6715  gcc_assert (region->exit != NULL);
6716  entry_bb = region->entry;
6717  l0_bb = single_succ (entry_bb);
6718  l1_bb = region->cont;
6719  l2_bb = region->exit;
6720  if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6721    l2 = gimple_block_label (l2_bb);
6722  else
6723    {
6724      /* This can happen if there are reductions.  */
6725      len = EDGE_COUNT (l0_bb->succs);
6726      gcc_assert (len > 0);
6727      e = EDGE_SUCC (l0_bb, len - 1);
6728      si = gsi_last_nondebug_bb (e->dest);
6729      l2 = NULL_TREE;
6730      if (gsi_end_p (si)
6731	  || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6732	l2 = gimple_block_label (e->dest);
6733      else
6734	FOR_EACH_EDGE (e, ei, l0_bb->succs)
6735	  {
6736	    si = gsi_last_nondebug_bb (e->dest);
6737	    if (gsi_end_p (si)
6738		|| gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6739	      {
6740		l2 = gimple_block_label (e->dest);
6741		break;
6742	      }
6743	  }
6744    }
6745  if (exit_reachable)
6746    default_bb = create_empty_bb (l1_bb->prev_bb);
6747  else
6748    default_bb = create_empty_bb (l0_bb);
6749
6750  /* We will build a switch() with enough cases for all the
6751     GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6752     and a default case to abort if something goes wrong.  */
6753  len = EDGE_COUNT (l0_bb->succs);
6754
6755  /* Use vec::quick_push on label_vec throughout, since we know the size
6756     in advance.  */
6757  auto_vec<tree> label_vec (len);
6758
6759  /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6760     GIMPLE_OMP_SECTIONS statement.  */
6761  si = gsi_last_nondebug_bb (entry_bb);
6762  sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6763  gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6764  vin = gimple_omp_sections_control (sections_stmt);
6765  tree clauses = gimple_omp_sections_clauses (sections_stmt);
6766  tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
6767  tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
6768  tree cond_var = NULL_TREE;
6769  if (reductmp || condtmp)
6770    {
6771      tree reductions = null_pointer_node, mem = null_pointer_node;
6772      tree memv = NULL_TREE, condtemp = NULL_TREE;
6773      gimple_stmt_iterator gsi = gsi_none ();
6774      gimple *g = NULL;
6775      if (reductmp)
6776	{
6777	  reductions = OMP_CLAUSE_DECL (reductmp);
6778	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
6779	  g = SSA_NAME_DEF_STMT (reductions);
6780	  reductions = gimple_assign_rhs1 (g);
6781	  OMP_CLAUSE_DECL (reductmp) = reductions;
6782	  gsi = gsi_for_stmt (g);
6783	}
6784      else
6785	gsi = si;
6786      if (condtmp)
6787	{
6788	  condtemp = OMP_CLAUSE_DECL (condtmp);
6789	  tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
6790				    OMP_CLAUSE__CONDTEMP_);
6791	  cond_var = OMP_CLAUSE_DECL (c);
6792	  tree type = TREE_TYPE (condtemp);
6793	  memv = create_tmp_var (type);
6794	  TREE_ADDRESSABLE (memv) = 1;
6795	  unsigned cnt = 0;
6796	  for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
6797	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
6798		&& OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
6799	      ++cnt;
6800	  unsigned HOST_WIDE_INT sz
6801	    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
6802	  expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
6803				   false);
6804	  mem = build_fold_addr_expr (memv);
6805	}
6806      t = build_int_cst (unsigned_type_node, len - 1);
6807      u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
6808      stmt = gimple_build_call (u, 3, t, reductions, mem);
6809      gimple_call_set_lhs (stmt, vin);
6810      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6811      if (condtmp)
6812	{
6813	  expand_omp_build_assign (&gsi, condtemp, memv, false);
6814	  tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6815			   vin, build_one_cst (TREE_TYPE (cond_var)));
6816	  expand_omp_build_assign (&gsi, cond_var, t, false);
6817	}
6818      if (reductmp)
6819	{
6820	  gsi_remove (&gsi, true);
6821	  release_ssa_name (gimple_assign_lhs (g));
6822	}
6823    }
6824  else if (!is_combined_parallel (region))
6825    {
6826      /* If we are not inside a combined parallel+sections region,
6827	 call GOMP_sections_start.  */
6828      t = build_int_cst (unsigned_type_node, len - 1);
6829      u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6830      stmt = gimple_build_call (u, 1, t);
6831    }
6832  else
6833    {
6834      /* Otherwise, call GOMP_sections_next.  */
6835      u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6836      stmt = gimple_build_call (u, 0);
6837    }
6838  if (!reductmp && !condtmp)
6839    {
6840      gimple_call_set_lhs (stmt, vin);
6841      gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6842    }
6843  gsi_remove (&si, true);
6844
6845  /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6846     L0_BB.  */
6847  switch_si = gsi_last_nondebug_bb (l0_bb);
6848  gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6849  if (exit_reachable)
6850    {
6851      cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6852      gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6853      vmain = gimple_omp_continue_control_use (cont);
6854      vnext = gimple_omp_continue_control_def (cont);
6855    }
6856  else
6857    {
6858      vmain = vin;
6859      vnext = NULL_TREE;
6860    }
6861
6862  t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6863  label_vec.quick_push (t);
6864  i = 1;
6865
6866  /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR.  */
6867  for (inner = region->inner, casei = 1;
6868       inner;
6869       inner = inner->next, i++, casei++)
6870    {
6871      basic_block s_entry_bb, s_exit_bb;
6872
6873      /* Skip optional reduction region.  */
6874      if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6875	{
6876	  --i;
6877	  --casei;
6878	  continue;
6879	}
6880
6881      s_entry_bb = inner->entry;
6882      s_exit_bb = inner->exit;
6883
6884      t = gimple_block_label (s_entry_bb);
6885      u = build_int_cst (unsigned_type_node, casei);
6886      u = build_case_label (u, NULL, t);
6887      label_vec.quick_push (u);
6888
6889      si = gsi_last_nondebug_bb (s_entry_bb);
6890      gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6891      gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6892      gsi_remove (&si, true);
6893      single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6894
6895      if (s_exit_bb == NULL)
6896	continue;
6897
6898      si = gsi_last_nondebug_bb (s_exit_bb);
6899      gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6900      gsi_remove (&si, true);
6901
6902      single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6903    }
6904
6905  /* Error handling code goes in DEFAULT_BB.  */
6906  t = gimple_block_label (default_bb);
6907  u = build_case_label (NULL, NULL, t);
6908  make_edge (l0_bb, default_bb, 0);
6909  add_bb_to_loop (default_bb, current_loops->tree_root);
6910
6911  stmt = gimple_build_switch (vmain, u, label_vec);
6912  gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6913  gsi_remove (&switch_si, true);
6914
6915  si = gsi_start_bb (default_bb);
6916  stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6917  gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6918
6919  if (exit_reachable)
6920    {
6921      tree bfn_decl;
6922
6923      /* Code to get the next section goes in L1_BB.  */
6924      si = gsi_last_nondebug_bb (l1_bb);
6925      gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6926
6927      bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6928      stmt = gimple_build_call (bfn_decl, 0);
6929      gimple_call_set_lhs (stmt, vnext);
6930      gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6931      if (cond_var)
6932	{
6933	  tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6934			   vnext, build_one_cst (TREE_TYPE (cond_var)));
6935	  expand_omp_build_assign (&si, cond_var, t, false);
6936	}
6937      gsi_remove (&si, true);
6938
6939      single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6940    }
6941
6942  /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB.  */
6943  si = gsi_last_nondebug_bb (l2_bb);
6944  if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6945    t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6946  else if (gimple_omp_return_lhs (gsi_stmt (si)))
6947    t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6948  else
6949    t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6950  stmt = gimple_build_call (t, 0);
6951  if (gimple_omp_return_lhs (gsi_stmt (si)))
6952    gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6953  gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6954  gsi_remove (&si, true);
6955
6956  set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6957}
6958
6959/* Expand code for an OpenMP single directive.  We've already expanded
6960   much of the code, here we simply place the GOMP_barrier call.  */
6961
6962static void
6963expand_omp_single (struct omp_region *region)
6964{
6965  basic_block entry_bb, exit_bb;
6966  gimple_stmt_iterator si;
6967
6968  entry_bb = region->entry;
6969  exit_bb = region->exit;
6970
6971  si = gsi_last_nondebug_bb (entry_bb);
6972  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6973  gsi_remove (&si, true);
6974  single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6975
6976  si = gsi_last_nondebug_bb (exit_bb);
6977  if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6978    {
6979      tree t = gimple_omp_return_lhs (gsi_stmt (si));
6980      gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6981    }
6982  gsi_remove (&si, true);
6983  single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6984}
6985
6986/* Generic expansion for OpenMP synchronization directives: master,
6987   ordered and critical.  All we need to do here is remove the entry
6988   and exit markers for REGION.  */
6989
6990static void
6991expand_omp_synch (struct omp_region *region)
6992{
6993  basic_block entry_bb, exit_bb;
6994  gimple_stmt_iterator si;
6995
6996  entry_bb = region->entry;
6997  exit_bb = region->exit;
6998
6999  si = gsi_last_nondebug_bb (entry_bb);
7000  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
7001	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
7002	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
7003	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
7004	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
7005	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
7006  if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
7007      && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
7008    {
7009      expand_omp_taskreg (region);
7010      return;
7011    }
7012  gsi_remove (&si, true);
7013  single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7014
7015  if (exit_bb)
7016    {
7017      si = gsi_last_nondebug_bb (exit_bb);
7018      gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
7019      gsi_remove (&si, true);
7020      single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
7021    }
7022}
7023
7024/* Translate enum omp_memory_order to enum memmodel.  The two enums
7025   are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
7026   is 0.  */
7027
7028static enum memmodel
7029omp_memory_order_to_memmodel (enum omp_memory_order mo)
7030{
7031  switch (mo)
7032    {
7033    case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
7034    case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
7035    case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
7036    case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
7037    case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
7038    default: gcc_unreachable ();
7039    }
7040}
7041
7042/* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
7043   operation as a normal volatile load.  */
7044
7045static bool
7046expand_omp_atomic_load (basic_block load_bb, tree addr,
7047			tree loaded_val, int index)
7048{
7049  enum built_in_function tmpbase;
7050  gimple_stmt_iterator gsi;
7051  basic_block store_bb;
7052  location_t loc;
7053  gimple *stmt;
7054  tree decl, call, type, itype;
7055
7056  gsi = gsi_last_nondebug_bb (load_bb);
7057  stmt = gsi_stmt (gsi);
7058  gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7059  loc = gimple_location (stmt);
7060
7061  /* ??? If the target does not implement atomic_load_optab[mode], and mode
7062     is smaller than word size, then expand_atomic_load assumes that the load
7063     is atomic.  We could avoid the builtin entirely in this case.  */
7064
7065  tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7066  decl = builtin_decl_explicit (tmpbase);
7067  if (decl == NULL_TREE)
7068    return false;
7069
7070  type = TREE_TYPE (loaded_val);
7071  itype = TREE_TYPE (TREE_TYPE (decl));
7072
7073  enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7074  tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7075  call = build_call_expr_loc (loc, decl, 2, addr, mo);
7076  if (!useless_type_conversion_p (type, itype))
7077    call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7078  call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7079
7080  force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7081  gsi_remove (&gsi, true);
7082
7083  store_bb = single_succ (load_bb);
7084  gsi = gsi_last_nondebug_bb (store_bb);
7085  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
7086  gsi_remove (&gsi, true);
7087
7088  if (gimple_in_ssa_p (cfun))
7089    update_ssa (TODO_update_ssa_no_phi);
7090
7091  return true;
7092}
7093
7094/* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
7095   operation as a normal volatile store.  */
7096
7097static bool
7098expand_omp_atomic_store (basic_block load_bb, tree addr,
7099			 tree loaded_val, tree stored_val, int index)
7100{
7101  enum built_in_function tmpbase;
7102  gimple_stmt_iterator gsi;
7103  basic_block store_bb = single_succ (load_bb);
7104  location_t loc;
7105  gimple *stmt;
7106  tree decl, call, type, itype;
7107  machine_mode imode;
7108  bool exchange;
7109
7110  gsi = gsi_last_nondebug_bb (load_bb);
7111  stmt = gsi_stmt (gsi);
7112  gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7113
7114  /* If the load value is needed, then this isn't a store but an exchange.  */
7115  exchange = gimple_omp_atomic_need_value_p (stmt);
7116
7117  gsi = gsi_last_nondebug_bb (store_bb);
7118  stmt = gsi_stmt (gsi);
7119  gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
7120  loc = gimple_location (stmt);
7121
7122  /* ??? If the target does not implement atomic_store_optab[mode], and mode
7123     is smaller than word size, then expand_atomic_store assumes that the store
7124     is atomic.  We could avoid the builtin entirely in this case.  */
7125
7126  tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
7127  tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
7128  decl = builtin_decl_explicit (tmpbase);
7129  if (decl == NULL_TREE)
7130    return false;
7131
7132  type = TREE_TYPE (stored_val);
7133
7134  /* Dig out the type of the function's second argument.  */
7135  itype = TREE_TYPE (decl);
7136  itype = TYPE_ARG_TYPES (itype);
7137  itype = TREE_CHAIN (itype);
7138  itype = TREE_VALUE (itype);
7139  imode = TYPE_MODE (itype);
7140
7141  if (exchange && !can_atomic_exchange_p (imode, true))
7142    return false;
7143
7144  if (!useless_type_conversion_p (itype, type))
7145    stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
7146  enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7147  tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7148  call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
7149  if (exchange)
7150    {
7151      if (!useless_type_conversion_p (type, itype))
7152	call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7153      call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7154    }
7155
7156  force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7157  gsi_remove (&gsi, true);
7158
7159  /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above.  */
7160  gsi = gsi_last_nondebug_bb (load_bb);
7161  gsi_remove (&gsi, true);
7162
7163  if (gimple_in_ssa_p (cfun))
7164    update_ssa (TODO_update_ssa_no_phi);
7165
7166  return true;
7167}
7168
7169/* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
7170   operation as a __atomic_fetch_op builtin.  INDEX is log2 of the
7171   size of the data type, and thus usable to find the index of the builtin
7172   decl.  Returns false if the expression is not of the proper form.  */
7173
7174static bool
7175expand_omp_atomic_fetch_op (basic_block load_bb,
7176			    tree addr, tree loaded_val,
7177			    tree stored_val, int index)
7178{
7179  enum built_in_function oldbase, newbase, tmpbase;
7180  tree decl, itype, call;
7181  tree lhs, rhs;
7182  basic_block store_bb = single_succ (load_bb);
7183  gimple_stmt_iterator gsi;
7184  gimple *stmt;
7185  location_t loc;
7186  enum tree_code code;
7187  bool need_old, need_new;
7188  machine_mode imode;
7189
7190  /* We expect to find the following sequences:
7191
7192   load_bb:
7193       GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
7194
7195   store_bb:
7196       val = tmp OP something; (or: something OP tmp)
7197       GIMPLE_OMP_STORE (val)
7198
7199  ???FIXME: Allow a more flexible sequence.
7200  Perhaps use data flow to pick the statements.
7201
7202  */
7203
7204  gsi = gsi_after_labels (store_bb);
7205  stmt = gsi_stmt (gsi);
7206  if (is_gimple_debug (stmt))
7207    {
7208      gsi_next_nondebug (&gsi);
7209      if (gsi_end_p (gsi))
7210	return false;
7211      stmt = gsi_stmt (gsi);
7212    }
7213  loc = gimple_location (stmt);
7214  if (!is_gimple_assign (stmt))
7215    return false;
7216  gsi_next_nondebug (&gsi);
7217  if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
7218    return false;
7219  need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
7220  need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
7221  enum omp_memory_order omo
7222    = gimple_omp_atomic_memory_order (last_stmt (load_bb));
7223  enum memmodel mo = omp_memory_order_to_memmodel (omo);
7224  gcc_checking_assert (!need_old || !need_new);
7225
7226  if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
7227    return false;
7228
7229  /* Check for one of the supported fetch-op operations.  */
7230  code = gimple_assign_rhs_code (stmt);
7231  switch (code)
7232    {
7233    case PLUS_EXPR:
7234    case POINTER_PLUS_EXPR:
7235      oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
7236      newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
7237      break;
7238    case MINUS_EXPR:
7239      oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
7240      newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
7241      break;
7242    case BIT_AND_EXPR:
7243      oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
7244      newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
7245      break;
7246    case BIT_IOR_EXPR:
7247      oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
7248      newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
7249      break;
7250    case BIT_XOR_EXPR:
7251      oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
7252      newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
7253      break;
7254    default:
7255      return false;
7256    }
7257
7258  /* Make sure the expression is of the proper form.  */
7259  if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
7260    rhs = gimple_assign_rhs2 (stmt);
7261  else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
7262	   && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
7263    rhs = gimple_assign_rhs1 (stmt);
7264  else
7265    return false;
7266
7267  tmpbase = ((enum built_in_function)
7268	     ((need_new ? newbase : oldbase) + index + 1));
7269  decl = builtin_decl_explicit (tmpbase);
7270  if (decl == NULL_TREE)
7271    return false;
7272  itype = TREE_TYPE (TREE_TYPE (decl));
7273  imode = TYPE_MODE (itype);
7274
7275  /* We could test all of the various optabs involved, but the fact of the
7276     matter is that (with the exception of i486 vs i586 and xadd) all targets
7277     that support any atomic operaton optab also implements compare-and-swap.
7278     Let optabs.c take care of expanding any compare-and-swap loop.  */
7279  if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
7280    return false;
7281
7282  gsi = gsi_last_nondebug_bb (load_bb);
7283  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
7284
7285  /* OpenMP does not imply any barrier-like semantics on its atomic ops.
7286     It only requires that the operation happen atomically.  Thus we can
7287     use the RELAXED memory model.  */
7288  call = build_call_expr_loc (loc, decl, 3, addr,
7289			      fold_convert_loc (loc, itype, rhs),
7290			      build_int_cst (NULL, mo));
7291
7292  if (need_old || need_new)
7293    {
7294      lhs = need_old ? loaded_val : stored_val;
7295      call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
7296      call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
7297    }
7298  else
7299    call = fold_convert_loc (loc, void_type_node, call);
7300  force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7301  gsi_remove (&gsi, true);
7302
7303  gsi = gsi_last_nondebug_bb (store_bb);
7304  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
7305  gsi_remove (&gsi, true);
7306  gsi = gsi_last_nondebug_bb (store_bb);
7307  stmt = gsi_stmt (gsi);
7308  gsi_remove (&gsi, true);
7309
7310  if (gimple_in_ssa_p (cfun))
7311    {
7312      release_defs (stmt);
7313      update_ssa (TODO_update_ssa_no_phi);
7314    }
7315
7316  return true;
7317}
7318
7319/* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
7320
7321      oldval = *addr;
7322      repeat:
7323	newval = rhs;	 // with oldval replacing *addr in rhs
7324	oldval = __sync_val_compare_and_swap (addr, oldval, newval);
7325	if (oldval != newval)
7326	  goto repeat;
7327
7328   INDEX is log2 of the size of the data type, and thus usable to find the
7329   index of the builtin decl.  */
7330
7331static bool
7332expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
7333			    tree addr, tree loaded_val, tree stored_val,
7334			    int index)
7335{
7336  tree loadedi, storedi, initial, new_storedi, old_vali;
7337  tree type, itype, cmpxchg, iaddr, atype;
7338  gimple_stmt_iterator si;
7339  basic_block loop_header = single_succ (load_bb);
7340  gimple *phi, *stmt;
7341  edge e;
7342  enum built_in_function fncode;
7343
7344  /* ??? We need a non-pointer interface to __atomic_compare_exchange in
7345     order to use the RELAXED memory model effectively.  */
7346  fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
7347				    + index + 1);
7348  cmpxchg = builtin_decl_explicit (fncode);
7349  if (cmpxchg == NULL_TREE)
7350    return false;
7351  type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
7352  atype = type;
7353  itype = TREE_TYPE (TREE_TYPE (cmpxchg));
7354
7355  if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
7356      || !can_atomic_load_p (TYPE_MODE (itype)))
7357    return false;
7358
7359  /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
7360  si = gsi_last_nondebug_bb (load_bb);
7361  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7362
7363  /* For floating-point values, we'll need to view-convert them to integers
7364     so that we can perform the atomic compare and swap.  Simplify the
7365     following code by always setting up the "i"ntegral variables.  */
7366  if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
7367    {
7368      tree iaddr_val;
7369
7370      iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
7371							   true));
7372      atype = itype;
7373      iaddr_val
7374	= force_gimple_operand_gsi (&si,
7375				    fold_convert (TREE_TYPE (iaddr), addr),
7376				    false, NULL_TREE, true, GSI_SAME_STMT);
7377      stmt = gimple_build_assign (iaddr, iaddr_val);
7378      gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7379      loadedi = create_tmp_var (itype);
7380      if (gimple_in_ssa_p (cfun))
7381	loadedi = make_ssa_name (loadedi);
7382    }
7383  else
7384    {
7385      iaddr = addr;
7386      loadedi = loaded_val;
7387    }
7388
7389  fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7390  tree loaddecl = builtin_decl_explicit (fncode);
7391  if (loaddecl)
7392    initial
7393      = fold_convert (atype,
7394		      build_call_expr (loaddecl, 2, iaddr,
7395				       build_int_cst (NULL_TREE,
7396						      MEMMODEL_RELAXED)));
7397  else
7398    {
7399      tree off
7400	= build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
7401						      true), 0);
7402      initial = build2 (MEM_REF, atype, iaddr, off);
7403    }
7404
7405  initial
7406    = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
7407				GSI_SAME_STMT);
7408
7409  /* Move the value to the LOADEDI temporary.  */
7410  if (gimple_in_ssa_p (cfun))
7411    {
7412      gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
7413      phi = create_phi_node (loadedi, loop_header);
7414      SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
7415	       initial);
7416    }
7417  else
7418    gsi_insert_before (&si,
7419		       gimple_build_assign (loadedi, initial),
7420		       GSI_SAME_STMT);
7421  if (loadedi != loaded_val)
7422    {
7423      gimple_stmt_iterator gsi2;
7424      tree x;
7425
7426      x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
7427      gsi2 = gsi_start_bb (loop_header);
7428      if (gimple_in_ssa_p (cfun))
7429	{
7430	  gassign *stmt;
7431	  x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7432					true, GSI_SAME_STMT);
7433	  stmt = gimple_build_assign (loaded_val, x);
7434	  gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
7435	}
7436      else
7437	{
7438	  x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
7439	  force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7440				    true, GSI_SAME_STMT);
7441	}
7442    }
7443  gsi_remove (&si, true);
7444
7445  si = gsi_last_nondebug_bb (store_bb);
7446  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7447
7448  if (iaddr == addr)
7449    storedi = stored_val;
7450  else
7451    storedi
7452      = force_gimple_operand_gsi (&si,
7453				  build1 (VIEW_CONVERT_EXPR, itype,
7454					  stored_val), true, NULL_TREE, true,
7455				  GSI_SAME_STMT);
7456
7457  /* Build the compare&swap statement.  */
7458  new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
7459  new_storedi = force_gimple_operand_gsi (&si,
7460					  fold_convert (TREE_TYPE (loadedi),
7461							new_storedi),
7462					  true, NULL_TREE,
7463					  true, GSI_SAME_STMT);
7464
7465  if (gimple_in_ssa_p (cfun))
7466    old_vali = loadedi;
7467  else
7468    {
7469      old_vali = create_tmp_var (TREE_TYPE (loadedi));
7470      stmt = gimple_build_assign (old_vali, loadedi);
7471      gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7472
7473      stmt = gimple_build_assign (loadedi, new_storedi);
7474      gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7475    }
7476
7477  /* Note that we always perform the comparison as an integer, even for
7478     floating point.  This allows the atomic operation to properly
7479     succeed even with NaNs and -0.0.  */
7480  tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
7481  stmt = gimple_build_cond_empty (ne);
7482  gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7483
7484  /* Update cfg.  */
7485  e = single_succ_edge (store_bb);
7486  e->flags &= ~EDGE_FALLTHRU;
7487  e->flags |= EDGE_FALSE_VALUE;
7488  /* Expect no looping.  */
7489  e->probability = profile_probability::guessed_always ();
7490
7491  e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
7492  e->probability = profile_probability::guessed_never ();
7493
7494  /* Copy the new value to loadedi (we already did that before the condition
7495     if we are not in SSA).  */
7496  if (gimple_in_ssa_p (cfun))
7497    {
7498      phi = gimple_seq_first_stmt (phi_nodes (loop_header));
7499      SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
7500    }
7501
7502  /* Remove GIMPLE_OMP_ATOMIC_STORE.  */
7503  gsi_remove (&si, true);
7504
7505  class loop *loop = alloc_loop ();
7506  loop->header = loop_header;
7507  loop->latch = store_bb;
7508  add_loop (loop, loop_header->loop_father);
7509
7510  if (gimple_in_ssa_p (cfun))
7511    update_ssa (TODO_update_ssa_no_phi);
7512
7513  return true;
7514}
7515
7516/* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
7517
7518				  GOMP_atomic_start ();
7519				  *addr = rhs;
7520				  GOMP_atomic_end ();
7521
7522   The result is not globally atomic, but works so long as all parallel
7523   references are within #pragma omp atomic directives.  According to
7524   responses received from omp@openmp.org, appears to be within spec.
7525   Which makes sense, since that's how several other compilers handle
7526   this situation as well.
7527   LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
7528   expanding.  STORED_VAL is the operand of the matching
7529   GIMPLE_OMP_ATOMIC_STORE.
7530
7531   We replace
7532   GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
7533   loaded_val = *addr;
7534
7535   and replace
7536   GIMPLE_OMP_ATOMIC_STORE (stored_val)  with
7537   *addr = stored_val;
7538*/
7539
7540static bool
7541expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
7542			 tree addr, tree loaded_val, tree stored_val)
7543{
7544  gimple_stmt_iterator si;
7545  gassign *stmt;
7546  tree t;
7547
7548  si = gsi_last_nondebug_bb (load_bb);
7549  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7550
7551  t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
7552  t = build_call_expr (t, 0);
7553  force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7554
7555  tree mem = build_simple_mem_ref (addr);
7556  TREE_TYPE (mem) = TREE_TYPE (loaded_val);
7557  TREE_OPERAND (mem, 1)
7558    = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
7559						 true),
7560		    TREE_OPERAND (mem, 1));
7561  stmt = gimple_build_assign (loaded_val, mem);
7562  gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7563  gsi_remove (&si, true);
7564
7565  si = gsi_last_nondebug_bb (store_bb);
7566  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7567
7568  stmt = gimple_build_assign (unshare_expr (mem), stored_val);
7569  gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7570
7571  t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
7572  t = build_call_expr (t, 0);
7573  force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7574  gsi_remove (&si, true);
7575
7576  if (gimple_in_ssa_p (cfun))
7577    update_ssa (TODO_update_ssa_no_phi);
7578  return true;
7579}
7580
7581/* Expand an GIMPLE_OMP_ATOMIC statement.  We try to expand
7582   using expand_omp_atomic_fetch_op.  If it failed, we try to
7583   call expand_omp_atomic_pipeline, and if it fails too, the
7584   ultimate fallback is wrapping the operation in a mutex
7585   (expand_omp_atomic_mutex).  REGION is the atomic region built
7586   by build_omp_regions_1().  */
7587
7588static void
7589expand_omp_atomic (struct omp_region *region)
7590{
7591  basic_block load_bb = region->entry, store_bb = region->exit;
7592  gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
7593  gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
7594  tree loaded_val = gimple_omp_atomic_load_lhs (load);
7595  tree addr = gimple_omp_atomic_load_rhs (load);
7596  tree stored_val = gimple_omp_atomic_store_val (store);
7597  tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
7598  HOST_WIDE_INT index;
7599
7600  /* Make sure the type is one of the supported sizes.  */
7601  index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
7602  index = exact_log2 (index);
7603  if (index >= 0 && index <= 4)
7604    {
7605      unsigned int align = TYPE_ALIGN_UNIT (type);
7606
7607      /* __sync builtins require strict data alignment.  */
7608      if (exact_log2 (align) >= index)
7609	{
7610	  /* Atomic load.  */
7611	  scalar_mode smode;
7612	  if (loaded_val == stored_val
7613	      && (is_int_mode (TYPE_MODE (type), &smode)
7614		  || is_float_mode (TYPE_MODE (type), &smode))
7615	      && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
7616	      && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
7617	    return;
7618
7619	  /* Atomic store.  */
7620	  if ((is_int_mode (TYPE_MODE (type), &smode)
7621	       || is_float_mode (TYPE_MODE (type), &smode))
7622	      && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
7623	      && store_bb == single_succ (load_bb)
7624	      && first_stmt (store_bb) == store
7625	      && expand_omp_atomic_store (load_bb, addr, loaded_val,
7626					  stored_val, index))
7627	    return;
7628
7629	  /* When possible, use specialized atomic update functions.  */
7630	  if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
7631	      && store_bb == single_succ (load_bb)
7632	      && expand_omp_atomic_fetch_op (load_bb, addr,
7633					     loaded_val, stored_val, index))
7634	    return;
7635
7636	  /* If we don't have specialized __sync builtins, try and implement
7637	     as a compare and swap loop.  */
7638	  if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
7639					  loaded_val, stored_val, index))
7640	    return;
7641	}
7642    }
7643
7644  /* The ultimate fallback is wrapping the operation in a mutex.  */
7645  expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
7646}
7647
7648/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
7649   at REGION_EXIT.  */
7650
7651static void
7652mark_loops_in_oacc_kernels_region (basic_block region_entry,
7653				   basic_block region_exit)
7654{
7655  class loop *outer = region_entry->loop_father;
7656  gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
7657
7658  /* Don't parallelize the kernels region if it contains more than one outer
7659     loop.  */
7660  unsigned int nr_outer_loops = 0;
7661  class loop *single_outer = NULL;
7662  for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
7663    {
7664      gcc_assert (loop_outer (loop) == outer);
7665
7666      if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
7667	continue;
7668
7669      if (region_exit != NULL
7670	  && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
7671	continue;
7672
7673      nr_outer_loops++;
7674      single_outer = loop;
7675    }
7676  if (nr_outer_loops != 1)
7677    return;
7678
7679  for (class loop *loop = single_outer->inner;
7680       loop != NULL;
7681       loop = loop->inner)
7682    if (loop->next)
7683      return;
7684
7685  /* Mark the loops in the region.  */
7686  for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
7687    loop->in_oacc_kernels_region = true;
7688}
7689
7690/* Types used to pass grid and wortkgroup sizes to kernel invocation.  */
7691
7692struct GTY(()) grid_launch_attributes_trees
7693{
7694  tree kernel_dim_array_type;
7695  tree kernel_lattrs_dimnum_decl;
7696  tree kernel_lattrs_grid_decl;
7697  tree kernel_lattrs_group_decl;
7698  tree kernel_launch_attributes_type;
7699};
7700
7701static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
7702
7703/* Create types used to pass kernel launch attributes to target.  */
7704
7705static void
7706grid_create_kernel_launch_attr_types (void)
7707{
7708  if (grid_attr_trees)
7709    return;
7710  grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
7711
7712  tree dim_arr_index_type
7713    = build_index_type (build_int_cst (integer_type_node, 2));
7714  grid_attr_trees->kernel_dim_array_type
7715    = build_array_type (uint32_type_node, dim_arr_index_type);
7716
7717  grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
7718  grid_attr_trees->kernel_lattrs_dimnum_decl
7719    = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
7720		  uint32_type_node);
7721  DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
7722
7723  grid_attr_trees->kernel_lattrs_grid_decl
7724    = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
7725		  grid_attr_trees->kernel_dim_array_type);
7726  DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
7727    = grid_attr_trees->kernel_lattrs_dimnum_decl;
7728  grid_attr_trees->kernel_lattrs_group_decl
7729    = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
7730		  grid_attr_trees->kernel_dim_array_type);
7731  DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
7732    = grid_attr_trees->kernel_lattrs_grid_decl;
7733  finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
7734			 "__gomp_kernel_launch_attributes",
7735			 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
7736}
7737
7738/* Insert before the current statement in GSI a store of VALUE to INDEX of
7739   array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR.  VALUE must be
7740   of type uint32_type_node.  */
7741
7742static void
7743grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
7744			     tree fld_decl, int index, tree value)
7745{
7746  tree ref = build4 (ARRAY_REF, uint32_type_node,
7747		     build3 (COMPONENT_REF,
7748			     grid_attr_trees->kernel_dim_array_type,
7749			     range_var, fld_decl, NULL_TREE),
7750		     build_int_cst (integer_type_node, index),
7751		     NULL_TREE, NULL_TREE);
7752  gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
7753}
7754
7755/* Return a tree representation of a pointer to a structure with grid and
7756   work-group size information.  Statements filling that information will be
7757   inserted before GSI, TGT_STMT is the target statement which has the
7758   necessary information in it.  */
7759
7760static tree
7761grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
7762				       gomp_target *tgt_stmt)
7763{
7764  grid_create_kernel_launch_attr_types ();
7765  tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
7766				"__kernel_launch_attrs");
7767
7768  unsigned max_dim = 0;
7769  for (tree clause = gimple_omp_target_clauses (tgt_stmt);
7770       clause;
7771       clause = OMP_CLAUSE_CHAIN (clause))
7772    {
7773      if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
7774	continue;
7775
7776      unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
7777      max_dim = MAX (dim, max_dim);
7778
7779      grid_insert_store_range_dim (gsi, lattrs,
7780				   grid_attr_trees->kernel_lattrs_grid_decl,
7781				   dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
7782      grid_insert_store_range_dim (gsi, lattrs,
7783				   grid_attr_trees->kernel_lattrs_group_decl,
7784				   dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
7785    }
7786
7787  tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
7788			grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
7789  gcc_checking_assert (max_dim <= 2);
7790  tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
7791  gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
7792		     GSI_SAME_STMT);
7793  TREE_ADDRESSABLE (lattrs) = 1;
7794  return build_fold_addr_expr (lattrs);
7795}
7796
7797/* Build target argument identifier from the DEVICE identifier, value
7798   identifier ID and whether the element also has a SUBSEQUENT_PARAM.  */
7799
7800static tree
7801get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
7802{
7803  tree t = build_int_cst (integer_type_node, device);
7804  if (subseqent_param)
7805    t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7806		     build_int_cst (integer_type_node,
7807				    GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
7808  t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7809		   build_int_cst (integer_type_node, id));
7810  return t;
7811}
7812
7813/* Like above but return it in type that can be directly stored as an element
7814   of the argument array.  */
7815
7816static tree
7817get_target_argument_identifier (int device, bool subseqent_param, int id)
7818{
7819  tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
7820  return fold_convert (ptr_type_node, t);
7821}
7822
7823/* Return a target argument consisting of DEVICE identifier, value identifier
7824   ID, and the actual VALUE.  */
7825
7826static tree
7827get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7828			   tree value)
7829{
7830  tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7831			fold_convert (integer_type_node, value),
7832			build_int_cst (unsigned_type_node,
7833				       GOMP_TARGET_ARG_VALUE_SHIFT));
7834  t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7835		   get_target_argument_identifier_1 (device, false, id));
7836  t = fold_convert (ptr_type_node, t);
7837  return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7838}
7839
7840/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7841   push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7842   otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7843   arguments.  */
7844
7845static void
7846push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7847					 int id, tree value, vec <tree> *args)
7848{
7849  if (tree_fits_shwi_p (value)
7850      && tree_to_shwi (value) > -(1 << 15)
7851      && tree_to_shwi (value) < (1 << 15))
7852    args->quick_push (get_target_argument_value (gsi, device, id, value));
7853  else
7854    {
7855      args->quick_push (get_target_argument_identifier (device, true, id));
7856      value = fold_convert (ptr_type_node, value);
7857      value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7858					GSI_SAME_STMT);
7859      args->quick_push (value);
7860    }
7861}
7862
7863/* Create an array of arguments that is then passed to GOMP_target.  */
7864
7865static tree
7866get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7867{
7868  auto_vec <tree, 6> args;
7869  tree clauses = gimple_omp_target_clauses (tgt_stmt);
7870  tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7871  if (c)
7872    t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7873  else
7874    t = integer_minus_one_node;
7875  push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7876					   GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7877
7878  c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7879  if (c)
7880    t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7881  else
7882    t = integer_minus_one_node;
7883  push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7884					   GOMP_TARGET_ARG_THREAD_LIMIT, t,
7885					   &args);
7886
7887  /* Add HSA-specific grid sizes, if available.  */
7888  if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7889		       OMP_CLAUSE__GRIDDIM_))
7890    {
7891      int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7892      t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7893      args.quick_push (t);
7894      args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7895    }
7896
7897  /* Produce more, perhaps device specific, arguments here.  */
7898
7899  tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7900							  args.length () + 1),
7901				  ".omp_target_args");
7902  for (unsigned i = 0; i < args.length (); i++)
7903    {
7904      tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7905			 build_int_cst (integer_type_node, i),
7906			 NULL_TREE, NULL_TREE);
7907      gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7908			 GSI_SAME_STMT);
7909    }
7910  tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7911		     build_int_cst (integer_type_node, args.length ()),
7912		     NULL_TREE, NULL_TREE);
7913  gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7914		     GSI_SAME_STMT);
7915  TREE_ADDRESSABLE (argarray) = 1;
7916  return build_fold_addr_expr (argarray);
7917}
7918
7919/* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
7920
7921static void
7922expand_omp_target (struct omp_region *region)
7923{
7924  basic_block entry_bb, exit_bb, new_bb;
7925  struct function *child_cfun;
7926  tree child_fn, block, t;
7927  gimple_stmt_iterator gsi;
7928  gomp_target *entry_stmt;
7929  gimple *stmt;
7930  edge e;
7931  bool offloaded;
7932  int target_kind;
7933
7934  entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7935  target_kind = gimple_omp_target_kind (entry_stmt);
7936  new_bb = region->entry;
7937
7938  offloaded = is_gimple_omp_offloaded (entry_stmt);
7939  switch (target_kind)
7940    {
7941    case GF_OMP_TARGET_KIND_REGION:
7942    case GF_OMP_TARGET_KIND_UPDATE:
7943    case GF_OMP_TARGET_KIND_ENTER_DATA:
7944    case GF_OMP_TARGET_KIND_EXIT_DATA:
7945    case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7946    case GF_OMP_TARGET_KIND_OACC_KERNELS:
7947    case GF_OMP_TARGET_KIND_OACC_SERIAL:
7948    case GF_OMP_TARGET_KIND_OACC_UPDATE:
7949    case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7950    case GF_OMP_TARGET_KIND_OACC_DECLARE:
7951    case GF_OMP_TARGET_KIND_DATA:
7952    case GF_OMP_TARGET_KIND_OACC_DATA:
7953    case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7954      break;
7955    default:
7956      gcc_unreachable ();
7957    }
7958
7959  child_fn = NULL_TREE;
7960  child_cfun = NULL;
7961  if (offloaded)
7962    {
7963      child_fn = gimple_omp_target_child_fn (entry_stmt);
7964      child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7965    }
7966
7967  /* Supported by expand_omp_taskreg, but not here.  */
7968  if (child_cfun != NULL)
7969    gcc_checking_assert (!child_cfun->cfg);
7970  gcc_checking_assert (!gimple_in_ssa_p (cfun));
7971
7972  entry_bb = region->entry;
7973  exit_bb = region->exit;
7974
7975  if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
7976    mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7977
7978  /* Going on, all OpenACC compute constructs are mapped to
7979     'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
7980     To distinguish between them, we attach attributes.  */
7981  switch (target_kind)
7982    {
7983    case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7984      DECL_ATTRIBUTES (child_fn)
7985	= tree_cons (get_identifier ("oacc parallel"),
7986		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
7987      break;
7988    case GF_OMP_TARGET_KIND_OACC_KERNELS:
7989      DECL_ATTRIBUTES (child_fn)
7990	= tree_cons (get_identifier ("oacc kernels"),
7991		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
7992      break;
7993    case GF_OMP_TARGET_KIND_OACC_SERIAL:
7994      DECL_ATTRIBUTES (child_fn)
7995	= tree_cons (get_identifier ("oacc serial"),
7996		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
7997      break;
7998    default:
7999      /* Make sure we don't miss any.  */
8000      gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
8001			     && is_gimple_omp_offloaded (entry_stmt)));
8002      break;
8003    }
8004
8005  if (offloaded)
8006    {
8007      unsigned srcidx, dstidx, num;
8008
8009      /* If the offloading region needs data sent from the parent
8010	 function, then the very first statement (except possible
8011	 tree profile counter updates) of the offloading body
8012	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
8013	 &.OMP_DATA_O is passed as an argument to the child function,
8014	 we need to replace it with the argument as seen by the child
8015	 function.
8016
8017	 In most cases, this will end up being the identity assignment
8018	 .OMP_DATA_I = .OMP_DATA_I.  However, if the offloading body had
8019	 a function call that has been inlined, the original PARM_DECL
8020	 .OMP_DATA_I may have been converted into a different local
8021	 variable.  In which case, we need to keep the assignment.  */
8022      tree data_arg = gimple_omp_target_data_arg (entry_stmt);
8023      if (data_arg)
8024	{
8025	  basic_block entry_succ_bb = single_succ (entry_bb);
8026	  gimple_stmt_iterator gsi;
8027	  tree arg;
8028	  gimple *tgtcopy_stmt = NULL;
8029	  tree sender = TREE_VEC_ELT (data_arg, 0);
8030
8031	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
8032	    {
8033	      gcc_assert (!gsi_end_p (gsi));
8034	      stmt = gsi_stmt (gsi);
8035	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
8036		continue;
8037
8038	      if (gimple_num_ops (stmt) == 2)
8039		{
8040		  tree arg = gimple_assign_rhs1 (stmt);
8041
8042		  /* We're ignoring the subcode because we're
8043		     effectively doing a STRIP_NOPS.  */
8044
8045		  if (TREE_CODE (arg) == ADDR_EXPR
8046		      && TREE_OPERAND (arg, 0) == sender)
8047		    {
8048		      tgtcopy_stmt = stmt;
8049		      break;
8050		    }
8051		}
8052	    }
8053
8054	  gcc_assert (tgtcopy_stmt != NULL);
8055	  arg = DECL_ARGUMENTS (child_fn);
8056
8057	  gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
8058	  gsi_remove (&gsi, true);
8059	}
8060
8061      /* Declare local variables needed in CHILD_CFUN.  */
8062      block = DECL_INITIAL (child_fn);
8063      BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
8064      /* The gimplifier could record temporaries in the offloading block
8065	 rather than in containing function's local_decls chain,
8066	 which would mean cgraph missed finalizing them.  Do it now.  */
8067      for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
8068	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
8069	  varpool_node::finalize_decl (t);
8070      DECL_SAVED_TREE (child_fn) = NULL;
8071      /* We'll create a CFG for child_fn, so no gimple body is needed.  */
8072      gimple_set_body (child_fn, NULL);
8073      TREE_USED (block) = 1;
8074
8075      /* Reset DECL_CONTEXT on function arguments.  */
8076      for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
8077	DECL_CONTEXT (t) = child_fn;
8078
8079      /* Split ENTRY_BB at GIMPLE_*,
8080	 so that it can be moved to the child function.  */
8081      gsi = gsi_last_nondebug_bb (entry_bb);
8082      stmt = gsi_stmt (gsi);
8083      gcc_assert (stmt
8084		  && gimple_code (stmt) == gimple_code (entry_stmt));
8085      e = split_block (entry_bb, stmt);
8086      gsi_remove (&gsi, true);
8087      entry_bb = e->dest;
8088      single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8089
8090      /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR.  */
8091      if (exit_bb)
8092	{
8093	  gsi = gsi_last_nondebug_bb (exit_bb);
8094	  gcc_assert (!gsi_end_p (gsi)
8095		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8096	  stmt = gimple_build_return (NULL);
8097	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
8098	  gsi_remove (&gsi, true);
8099	}
8100
8101      /* Move the offloading region into CHILD_CFUN.  */
8102
8103      block = gimple_block (entry_stmt);
8104
8105      new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
8106      if (exit_bb)
8107	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
8108      /* When the OMP expansion process cannot guarantee an up-to-date
8109	 loop tree arrange for the child function to fixup loops.  */
8110      if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8111	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
8112
8113      /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
8114      num = vec_safe_length (child_cfun->local_decls);
8115      for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
8116	{
8117	  t = (*child_cfun->local_decls)[srcidx];
8118	  if (DECL_CONTEXT (t) == cfun->decl)
8119	    continue;
8120	  if (srcidx != dstidx)
8121	    (*child_cfun->local_decls)[dstidx] = t;
8122	  dstidx++;
8123	}
8124      if (dstidx != num)
8125	vec_safe_truncate (child_cfun->local_decls, dstidx);
8126
8127      /* Inform the callgraph about the new function.  */
8128      child_cfun->curr_properties = cfun->curr_properties;
8129      child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
8130      child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
8131      cgraph_node *node = cgraph_node::get_create (child_fn);
8132      node->parallelized_function = 1;
8133      cgraph_node::add_new_function (child_fn, true);
8134
8135      /* Add the new function to the offload table.  */
8136      if (ENABLE_OFFLOADING)
8137	{
8138	  if (in_lto_p)
8139	    DECL_PRESERVE_P (child_fn) = 1;
8140	  vec_safe_push (offload_funcs, child_fn);
8141	}
8142
8143      bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
8144		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
8145
8146      /* Fix the callgraph edges for child_cfun.  Those for cfun will be
8147	 fixed in a following pass.  */
8148      push_cfun (child_cfun);
8149      if (need_asm)
8150	assign_assembler_name_if_needed (child_fn);
8151      cgraph_edge::rebuild_edges ();
8152
8153      /* Some EH regions might become dead, see PR34608.  If
8154	 pass_cleanup_cfg isn't the first pass to happen with the
8155	 new child, these dead EH edges might cause problems.
8156	 Clean them up now.  */
8157      if (flag_exceptions)
8158	{
8159	  basic_block bb;
8160	  bool changed = false;
8161
8162	  FOR_EACH_BB_FN (bb, cfun)
8163	    changed |= gimple_purge_dead_eh_edges (bb);
8164	  if (changed)
8165	    cleanup_tree_cfg ();
8166	}
8167      if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8168	verify_loop_structure ();
8169      pop_cfun ();
8170
8171      if (dump_file && !gimple_in_ssa_p (cfun))
8172	{
8173	  omp_any_child_fn_dumped = true;
8174	  dump_function_header (dump_file, child_fn, dump_flags);
8175	  dump_function_to_file (child_fn, dump_file, dump_flags);
8176	}
8177
8178      adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
8179    }
8180
8181  /* Emit a library call to launch the offloading region, or do data
8182     transfers.  */
8183  tree t1, t2, t3, t4, depend, c, clauses;
8184  enum built_in_function start_ix;
8185  unsigned int flags_i = 0;
8186
8187  switch (gimple_omp_target_kind (entry_stmt))
8188    {
8189    case GF_OMP_TARGET_KIND_REGION:
8190      start_ix = BUILT_IN_GOMP_TARGET;
8191      break;
8192    case GF_OMP_TARGET_KIND_DATA:
8193      start_ix = BUILT_IN_GOMP_TARGET_DATA;
8194      break;
8195    case GF_OMP_TARGET_KIND_UPDATE:
8196      start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
8197      break;
8198    case GF_OMP_TARGET_KIND_ENTER_DATA:
8199      start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
8200      break;
8201    case GF_OMP_TARGET_KIND_EXIT_DATA:
8202      start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
8203      flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
8204      break;
8205    case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8206    case GF_OMP_TARGET_KIND_OACC_KERNELS:
8207    case GF_OMP_TARGET_KIND_OACC_SERIAL:
8208      start_ix = BUILT_IN_GOACC_PARALLEL;
8209      break;
8210    case GF_OMP_TARGET_KIND_OACC_DATA:
8211    case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8212      start_ix = BUILT_IN_GOACC_DATA_START;
8213      break;
8214    case GF_OMP_TARGET_KIND_OACC_UPDATE:
8215      start_ix = BUILT_IN_GOACC_UPDATE;
8216      break;
8217    case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8218      start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
8219      break;
8220    case GF_OMP_TARGET_KIND_OACC_DECLARE:
8221      start_ix = BUILT_IN_GOACC_DECLARE;
8222      break;
8223    default:
8224      gcc_unreachable ();
8225    }
8226
8227  clauses = gimple_omp_target_clauses (entry_stmt);
8228
8229  tree device = NULL_TREE;
8230  location_t device_loc = UNKNOWN_LOCATION;
8231  tree goacc_flags = NULL_TREE;
8232  if (is_gimple_omp_oacc (entry_stmt))
8233    {
8234      /* By default, no GOACC_FLAGs are set.  */
8235      goacc_flags = integer_zero_node;
8236    }
8237  else
8238    {
8239      c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
8240      if (c)
8241	{
8242	  device = OMP_CLAUSE_DEVICE_ID (c);
8243	  device_loc = OMP_CLAUSE_LOCATION (c);
8244	}
8245      else
8246	{
8247	  /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
8248	     library choose).  */
8249	  device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
8250	  device_loc = gimple_location (entry_stmt);
8251	}
8252
8253      c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
8254      if (c)
8255	flags_i |= GOMP_TARGET_FLAG_NOWAIT;
8256    }
8257
8258  /* By default, there is no conditional.  */
8259  tree cond = NULL_TREE;
8260  c = omp_find_clause (clauses, OMP_CLAUSE_IF);
8261  if (c)
8262    cond = OMP_CLAUSE_IF_EXPR (c);
8263  /* If we found the clause 'if (cond)', build:
8264     OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
8265     OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
8266  if (cond)
8267    {
8268      tree *tp;
8269      if (is_gimple_omp_oacc (entry_stmt))
8270	tp = &goacc_flags;
8271      else
8272	{
8273	  /* Ensure 'device' is of the correct type.  */
8274	  device = fold_convert_loc (device_loc, integer_type_node, device);
8275
8276	  tp = &device;
8277	}
8278
8279      cond = gimple_boolify (cond);
8280
8281      basic_block cond_bb, then_bb, else_bb;
8282      edge e;
8283      tree tmp_var;
8284
8285      tmp_var = create_tmp_var (TREE_TYPE (*tp));
8286      if (offloaded)
8287	e = split_block_after_labels (new_bb);
8288      else
8289	{
8290	  gsi = gsi_last_nondebug_bb (new_bb);
8291	  gsi_prev (&gsi);
8292	  e = split_block (new_bb, gsi_stmt (gsi));
8293	}
8294      cond_bb = e->src;
8295      new_bb = e->dest;
8296      remove_edge (e);
8297
8298      then_bb = create_empty_bb (cond_bb);
8299      else_bb = create_empty_bb (then_bb);
8300      set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
8301      set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
8302
8303      stmt = gimple_build_cond_empty (cond);
8304      gsi = gsi_last_bb (cond_bb);
8305      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8306
8307      gsi = gsi_start_bb (then_bb);
8308      stmt = gimple_build_assign (tmp_var, *tp);
8309      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8310
8311      gsi = gsi_start_bb (else_bb);
8312      if (is_gimple_omp_oacc (entry_stmt))
8313	stmt = gimple_build_assign (tmp_var,
8314				    BIT_IOR_EXPR,
8315				    *tp,
8316				    build_int_cst (integer_type_node,
8317						   GOACC_FLAG_HOST_FALLBACK));
8318      else
8319	stmt = gimple_build_assign (tmp_var,
8320				    build_int_cst (integer_type_node,
8321						   GOMP_DEVICE_HOST_FALLBACK));
8322      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8323
8324      make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
8325      make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
8326      add_bb_to_loop (then_bb, cond_bb->loop_father);
8327      add_bb_to_loop (else_bb, cond_bb->loop_father);
8328      make_edge (then_bb, new_bb, EDGE_FALLTHRU);
8329      make_edge (else_bb, new_bb, EDGE_FALLTHRU);
8330
8331      *tp = tmp_var;
8332
8333      gsi = gsi_last_nondebug_bb (new_bb);
8334    }
8335  else
8336    {
8337      gsi = gsi_last_nondebug_bb (new_bb);
8338
8339      if (device != NULL_TREE)
8340	device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
8341					   true, GSI_SAME_STMT);
8342    }
8343
8344  t = gimple_omp_target_data_arg (entry_stmt);
8345  if (t == NULL)
8346    {
8347      t1 = size_zero_node;
8348      t2 = build_zero_cst (ptr_type_node);
8349      t3 = t2;
8350      t4 = t2;
8351    }
8352  else
8353    {
8354      t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
8355      t1 = size_binop (PLUS_EXPR, t1, size_int (1));
8356      t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
8357      t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
8358      t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
8359    }
8360
8361  gimple *g;
8362  bool tagging = false;
8363  /* The maximum number used by any start_ix, without varargs.  */
8364  auto_vec<tree, 11> args;
8365  if (is_gimple_omp_oacc (entry_stmt))
8366    {
8367      tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
8368					TREE_TYPE (goacc_flags), goacc_flags);
8369      goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
8370						NULL_TREE, true,
8371						GSI_SAME_STMT);
8372      args.quick_push (goacc_flags_m);
8373    }
8374  else
8375    args.quick_push (device);
8376  if (offloaded)
8377    args.quick_push (build_fold_addr_expr (child_fn));
8378  args.quick_push (t1);
8379  args.quick_push (t2);
8380  args.quick_push (t3);
8381  args.quick_push (t4);
8382  switch (start_ix)
8383    {
8384    case BUILT_IN_GOACC_DATA_START:
8385    case BUILT_IN_GOACC_DECLARE:
8386    case BUILT_IN_GOMP_TARGET_DATA:
8387      break;
8388    case BUILT_IN_GOMP_TARGET:
8389    case BUILT_IN_GOMP_TARGET_UPDATE:
8390    case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
8391      args.quick_push (build_int_cst (unsigned_type_node, flags_i));
8392      c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
8393      if (c)
8394	depend = OMP_CLAUSE_DECL (c);
8395      else
8396	depend = build_int_cst (ptr_type_node, 0);
8397      args.quick_push (depend);
8398      if (start_ix == BUILT_IN_GOMP_TARGET)
8399	args.quick_push (get_target_arguments (&gsi, entry_stmt));
8400      break;
8401    case BUILT_IN_GOACC_PARALLEL:
8402      if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
8403	{
8404	  tree dims = NULL_TREE;
8405	  unsigned int ix;
8406
8407	  /* For serial constructs we set all dimensions to 1.  */
8408	  for (ix = GOMP_DIM_MAX; ix--;)
8409	    dims = tree_cons (NULL_TREE, integer_one_node, dims);
8410	  oacc_replace_fn_attrib (child_fn, dims);
8411	}
8412      else
8413	oacc_set_fn_attrib (child_fn, clauses, &args);
8414      tagging = true;
8415      /* FALLTHRU */
8416    case BUILT_IN_GOACC_ENTER_EXIT_DATA:
8417    case BUILT_IN_GOACC_UPDATE:
8418      {
8419	tree t_async = NULL_TREE;
8420
8421	/* If present, use the value specified by the respective
8422	   clause, making sure that is of the correct type.  */
8423	c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
8424	if (c)
8425	  t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8426				      integer_type_node,
8427				      OMP_CLAUSE_ASYNC_EXPR (c));
8428	else if (!tagging)
8429	  /* Default values for t_async.  */
8430	  t_async = fold_convert_loc (gimple_location (entry_stmt),
8431				      integer_type_node,
8432				      build_int_cst (integer_type_node,
8433						     GOMP_ASYNC_SYNC));
8434	if (tagging && t_async)
8435	  {
8436	    unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
8437
8438	    if (TREE_CODE (t_async) == INTEGER_CST)
8439	      {
8440		/* See if we can pack the async arg in to the tag's
8441		   operand.  */
8442		i_async = TREE_INT_CST_LOW (t_async);
8443		if (i_async < GOMP_LAUNCH_OP_MAX)
8444		  t_async = NULL_TREE;
8445		else
8446		  i_async = GOMP_LAUNCH_OP_MAX;
8447	      }
8448	    args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
8449					      i_async));
8450	  }
8451	if (t_async)
8452	  args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
8453						    NULL_TREE, true,
8454						    GSI_SAME_STMT));
8455
8456	/* Save the argument index, and ... */
8457	unsigned t_wait_idx = args.length ();
8458	unsigned num_waits = 0;
8459	c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
8460	if (!tagging || c)
8461	  /* ... push a placeholder.  */
8462	  args.safe_push (integer_zero_node);
8463
8464	for (; c; c = OMP_CLAUSE_CHAIN (c))
8465	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
8466	    {
8467	      tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8468					   integer_type_node,
8469					   OMP_CLAUSE_WAIT_EXPR (c));
8470	      arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
8471					      GSI_SAME_STMT);
8472	      args.safe_push (arg);
8473	      num_waits++;
8474	    }
8475
8476	if (!tagging || num_waits)
8477	  {
8478	    tree len;
8479
8480	    /* Now that we know the number, update the placeholder.  */
8481	    if (tagging)
8482	      len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
8483	    else
8484	      len = build_int_cst (integer_type_node, num_waits);
8485	    len = fold_convert_loc (gimple_location (entry_stmt),
8486				    unsigned_type_node, len);
8487	    args[t_wait_idx] = len;
8488	  }
8489      }
8490      break;
8491    default:
8492      gcc_unreachable ();
8493    }
8494  if (tagging)
8495    /*  Push terminal marker - zero.  */
8496    args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
8497
8498  g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
8499  gimple_set_location (g, gimple_location (entry_stmt));
8500  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8501  if (!offloaded)
8502    {
8503      g = gsi_stmt (gsi);
8504      gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
8505      gsi_remove (&gsi, true);
8506    }
8507}
8508
8509/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
8510   iteration variable derived from the thread number.  INTRA_GROUP means this
8511   is an expansion of a loop iterating over work-items within a separate
8512   iteration over groups.  */
8513
8514static void
8515grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
8516{
8517  gimple_stmt_iterator gsi;
8518  gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8519  gcc_checking_assert (gimple_omp_for_kind (for_stmt)
8520		       == GF_OMP_FOR_KIND_GRID_LOOP);
8521  size_t collapse = gimple_omp_for_collapse (for_stmt);
8522  struct omp_for_data_loop *loops
8523    = XALLOCAVEC (struct omp_for_data_loop,
8524		  gimple_omp_for_collapse (for_stmt));
8525  struct omp_for_data fd;
8526
8527  remove_edge (BRANCH_EDGE (kfor->entry));
8528  basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
8529
8530  gcc_assert (kfor->cont);
8531  omp_extract_for_data (for_stmt, &fd, loops);
8532
8533  gsi = gsi_start_bb (body_bb);
8534
8535  for (size_t dim = 0; dim < collapse; dim++)
8536    {
8537      tree type, itype;
8538      itype = type = TREE_TYPE (fd.loops[dim].v);
8539      if (POINTER_TYPE_P (type))
8540	itype = signed_type_for (type);
8541
8542      tree n1 = fd.loops[dim].n1;
8543      tree step = fd.loops[dim].step;
8544      n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
8545				     true, NULL_TREE, true, GSI_SAME_STMT);
8546      step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
8547				       true, NULL_TREE, true, GSI_SAME_STMT);
8548      tree threadid;
8549      if (gimple_omp_for_grid_group_iter (for_stmt))
8550	{
8551	  gcc_checking_assert (!intra_group);
8552	  threadid = build_call_expr (builtin_decl_explicit
8553				      (BUILT_IN_HSA_WORKGROUPID), 1,
8554				      build_int_cstu (unsigned_type_node, dim));
8555	}
8556      else if (intra_group)
8557	threadid = build_call_expr (builtin_decl_explicit
8558				    (BUILT_IN_HSA_WORKITEMID), 1,
8559				    build_int_cstu (unsigned_type_node, dim));
8560      else
8561	threadid = build_call_expr (builtin_decl_explicit
8562				    (BUILT_IN_HSA_WORKITEMABSID), 1,
8563				    build_int_cstu (unsigned_type_node, dim));
8564      threadid = fold_convert (itype, threadid);
8565      threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
8566					   true, GSI_SAME_STMT);
8567
8568      tree startvar = fd.loops[dim].v;
8569      tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
8570      if (POINTER_TYPE_P (type))
8571	t = fold_build_pointer_plus (n1, t);
8572      else
8573	t = fold_build2 (PLUS_EXPR, type, t, n1);
8574      t = fold_convert (type, t);
8575      t = force_gimple_operand_gsi (&gsi, t,
8576				    DECL_P (startvar)
8577				    && TREE_ADDRESSABLE (startvar),
8578				    NULL_TREE, true, GSI_SAME_STMT);
8579      gassign *assign_stmt = gimple_build_assign (startvar, t);
8580      gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
8581    }
8582  /* Remove the omp for statement.  */
8583  gsi = gsi_last_nondebug_bb (kfor->entry);
8584  gsi_remove (&gsi, true);
8585
8586  /* Remove the GIMPLE_OMP_CONTINUE statement.  */
8587  gsi = gsi_last_nondebug_bb (kfor->cont);
8588  gcc_assert (!gsi_end_p (gsi)
8589	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
8590  gsi_remove (&gsi, true);
8591
8592  /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary.  */
8593  gsi = gsi_last_nondebug_bb (kfor->exit);
8594  gcc_assert (!gsi_end_p (gsi)
8595	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8596  if (intra_group)
8597    gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
8598  gsi_remove (&gsi, true);
8599
8600  /* Fixup the much simpler CFG.  */
8601  remove_edge (find_edge (kfor->cont, body_bb));
8602
8603  if (kfor->cont != body_bb)
8604    set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
8605  set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
8606}
8607
8608/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
8609   argument_decls.  */
8610
8611struct grid_arg_decl_map
8612{
8613  tree old_arg;
8614  tree new_arg;
8615};
8616
8617/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
8618   pertaining to kernel function.  */
8619
8620static tree
8621grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
8622{
8623  struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
8624  struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
8625  tree t = *tp;
8626
8627  if (t == adm->old_arg)
8628    *tp = adm->new_arg;
8629  *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
8630  return NULL_TREE;
8631}
8632
8633/* If TARGET region contains a kernel body for loop, remove its region from the
8634   TARGET and expand it in HSA gridified kernel fashion.  */
8635
8636static void
8637grid_expand_target_grid_body (struct omp_region *target)
8638{
8639  if (!hsa_gen_requested_p ())
8640    return;
8641
8642  gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
8643  struct omp_region **pp;
8644
8645  for (pp = &target->inner; *pp; pp = &(*pp)->next)
8646    if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
8647      break;
8648
8649  struct omp_region *gpukernel = *pp;
8650
8651  tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
8652  if (!gpukernel)
8653    {
8654      /* HSA cannot handle OACC stuff.  */
8655      if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
8656	return;
8657      gcc_checking_assert (orig_child_fndecl);
8658      gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8659				    OMP_CLAUSE__GRIDDIM_));
8660      cgraph_node *n = cgraph_node::get (orig_child_fndecl);
8661
8662      hsa_register_kernel (n);
8663      return;
8664    }
8665
8666  gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8667			       OMP_CLAUSE__GRIDDIM_));
8668  tree inside_block
8669    = gimple_block (first_stmt (single_succ (gpukernel->entry)));
8670  *pp = gpukernel->next;
8671  for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
8672    if ((*pp)->type == GIMPLE_OMP_FOR)
8673      break;
8674
8675  struct omp_region *kfor = *pp;
8676  gcc_assert (kfor);
8677  gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8678  gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
8679  *pp = kfor->next;
8680  if (kfor->inner)
8681    {
8682      if (gimple_omp_for_grid_group_iter (for_stmt))
8683	{
8684	  struct omp_region **next_pp;
8685	  for (pp = &kfor->inner; *pp; pp = next_pp)
8686	    {
8687	      next_pp = &(*pp)->next;
8688	      if ((*pp)->type != GIMPLE_OMP_FOR)
8689		continue;
8690	      gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
8691	      gcc_assert (gimple_omp_for_kind (inner)
8692			  == GF_OMP_FOR_KIND_GRID_LOOP);
8693	      grid_expand_omp_for_loop (*pp, true);
8694	      *pp = (*pp)->next;
8695	      next_pp = pp;
8696	    }
8697	}
8698      expand_omp (kfor->inner);
8699    }
8700  if (gpukernel->inner)
8701    expand_omp (gpukernel->inner);
8702
8703  tree kern_fndecl = copy_node (orig_child_fndecl);
8704  DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
8705							  "kernel");
8706  SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
8707  tree tgtblock = gimple_block (tgt_stmt);
8708  tree fniniblock = make_node (BLOCK);
8709  BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
8710  BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
8711  BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
8712  BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
8713  DECL_INITIAL (kern_fndecl) = fniniblock;
8714  push_struct_function (kern_fndecl);
8715  cfun->function_end_locus = gimple_location (tgt_stmt);
8716  init_tree_ssa (cfun);
8717  pop_cfun ();
8718
8719  tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
8720  gcc_assert (!DECL_CHAIN (old_parm_decl));
8721  tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
8722  DECL_CONTEXT (new_parm_decl) = kern_fndecl;
8723  DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
8724  gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
8725  DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
8726  DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
8727  struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
8728  kern_cfun->curr_properties = cfun->curr_properties;
8729
8730  grid_expand_omp_for_loop (kfor, false);
8731
8732  /* Remove the omp for statement.  */
8733  gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
8734  gsi_remove (&gsi, true);
8735  /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8736     return.  */
8737  gsi = gsi_last_nondebug_bb (gpukernel->exit);
8738  gcc_assert (!gsi_end_p (gsi)
8739	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8740  gimple *ret_stmt = gimple_build_return (NULL);
8741  gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
8742  gsi_remove (&gsi, true);
8743
8744  /* Statements in the first BB in the target construct have been produced by
8745     target lowering and must be copied inside the GPUKERNEL, with the two
8746     exceptions of the first OMP statement and the OMP_DATA assignment
8747     statement.  */
8748  gsi = gsi_start_bb (single_succ (gpukernel->entry));
8749  tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
8750  tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
8751  for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
8752       !gsi_end_p (tsi); gsi_next (&tsi))
8753    {
8754      gimple *stmt = gsi_stmt (tsi);
8755      if (is_gimple_omp (stmt))
8756	break;
8757      if (sender
8758	  && is_gimple_assign (stmt)
8759	  && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
8760	  && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
8761	continue;
8762      gimple *copy = gimple_copy (stmt);
8763      gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
8764      gimple_set_block (copy, fniniblock);
8765    }
8766
8767  move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
8768			  gpukernel->exit, inside_block);
8769
8770  cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
8771  kcn->mark_force_output ();
8772  cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
8773
8774  hsa_register_kernel (kcn, orig_child);
8775
8776  cgraph_node::add_new_function (kern_fndecl, true);
8777  push_cfun (kern_cfun);
8778  cgraph_edge::rebuild_edges ();
8779
8780  /* Re-map any mention of the PARM_DECL of the original function to the
8781     PARM_DECL of the new one.
8782
8783     TODO: It would be great if lowering produced references into the GPU
8784     kernel decl straight away and we did not have to do this.  */
8785  struct grid_arg_decl_map adm;
8786  adm.old_arg = old_parm_decl;
8787  adm.new_arg = new_parm_decl;
8788  basic_block bb;
8789  FOR_EACH_BB_FN (bb, kern_cfun)
8790    {
8791      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8792	{
8793	  gimple *stmt = gsi_stmt (gsi);
8794	  struct walk_stmt_info wi;
8795	  memset (&wi, 0, sizeof (wi));
8796	  wi.info = &adm;
8797	  walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
8798	}
8799    }
8800  pop_cfun ();
8801
8802  return;
8803}
8804
8805/* Expand the parallel region tree rooted at REGION.  Expansion
8806   proceeds in depth-first order.  Innermost regions are expanded
8807   first.  This way, parallel regions that require a new function to
8808   be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8809   internal dependencies in their body.  */
8810
8811static void
8812expand_omp (struct omp_region *region)
8813{
8814  omp_any_child_fn_dumped = false;
8815  while (region)
8816    {
8817      location_t saved_location;
8818      gimple *inner_stmt = NULL;
8819
8820      /* First, determine whether this is a combined parallel+workshare
8821	 region.  */
8822      if (region->type == GIMPLE_OMP_PARALLEL)
8823	determine_parallel_type (region);
8824      else if (region->type == GIMPLE_OMP_TARGET)
8825	grid_expand_target_grid_body (region);
8826
8827      if (region->type == GIMPLE_OMP_FOR
8828	  && gimple_omp_for_combined_p (last_stmt (region->entry)))
8829	inner_stmt = last_stmt (region->inner->entry);
8830
8831      if (region->inner)
8832	expand_omp (region->inner);
8833
8834      saved_location = input_location;
8835      if (gimple_has_location (last_stmt (region->entry)))
8836	input_location = gimple_location (last_stmt (region->entry));
8837
8838      switch (region->type)
8839	{
8840	case GIMPLE_OMP_PARALLEL:
8841	case GIMPLE_OMP_TASK:
8842	  expand_omp_taskreg (region);
8843	  break;
8844
8845	case GIMPLE_OMP_FOR:
8846	  expand_omp_for (region, inner_stmt);
8847	  break;
8848
8849	case GIMPLE_OMP_SECTIONS:
8850	  expand_omp_sections (region);
8851	  break;
8852
8853	case GIMPLE_OMP_SECTION:
8854	  /* Individual omp sections are handled together with their
8855	     parent GIMPLE_OMP_SECTIONS region.  */
8856	  break;
8857
8858	case GIMPLE_OMP_SINGLE:
8859	  expand_omp_single (region);
8860	  break;
8861
8862	case GIMPLE_OMP_ORDERED:
8863	  {
8864	    gomp_ordered *ord_stmt
8865	      = as_a <gomp_ordered *> (last_stmt (region->entry));
8866	    if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
8867				 OMP_CLAUSE_DEPEND))
8868	      {
8869		/* We'll expand these when expanding corresponding
8870		   worksharing region with ordered(n) clause.  */
8871		gcc_assert (region->outer
8872			    && region->outer->type == GIMPLE_OMP_FOR);
8873		region->ord_stmt = ord_stmt;
8874		break;
8875	      }
8876	  }
8877	  /* FALLTHRU */
8878	case GIMPLE_OMP_MASTER:
8879	case GIMPLE_OMP_TASKGROUP:
8880	case GIMPLE_OMP_CRITICAL:
8881	case GIMPLE_OMP_TEAMS:
8882	  expand_omp_synch (region);
8883	  break;
8884
8885	case GIMPLE_OMP_ATOMIC_LOAD:
8886	  expand_omp_atomic (region);
8887	  break;
8888
8889	case GIMPLE_OMP_TARGET:
8890	  expand_omp_target (region);
8891	  break;
8892
8893	default:
8894	  gcc_unreachable ();
8895	}
8896
8897      input_location = saved_location;
8898      region = region->next;
8899    }
8900  if (omp_any_child_fn_dumped)
8901    {
8902      if (dump_file)
8903	dump_function_header (dump_file, current_function_decl, dump_flags);
8904      omp_any_child_fn_dumped = false;
8905    }
8906}
8907
8908/* Helper for build_omp_regions.  Scan the dominator tree starting at
8909   block BB.  PARENT is the region that contains BB.  If SINGLE_TREE is
8910   true, the function ends once a single tree is built (otherwise, whole
8911   forest of OMP constructs may be built).  */
8912
8913static void
8914build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8915		     bool single_tree)
8916{
8917  gimple_stmt_iterator gsi;
8918  gimple *stmt;
8919  basic_block son;
8920
8921  gsi = gsi_last_nondebug_bb (bb);
8922  if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8923    {
8924      struct omp_region *region;
8925      enum gimple_code code;
8926
8927      stmt = gsi_stmt (gsi);
8928      code = gimple_code (stmt);
8929      if (code == GIMPLE_OMP_RETURN)
8930	{
8931	  /* STMT is the return point out of region PARENT.  Mark it
8932	     as the exit point and make PARENT the immediately
8933	     enclosing region.  */
8934	  gcc_assert (parent);
8935	  region = parent;
8936	  region->exit = bb;
8937	  parent = parent->outer;
8938	}
8939      else if (code == GIMPLE_OMP_ATOMIC_STORE)
8940	{
8941	  /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8942	     GIMPLE_OMP_RETURN, but matches with
8943	     GIMPLE_OMP_ATOMIC_LOAD.  */
8944	  gcc_assert (parent);
8945	  gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8946	  region = parent;
8947	  region->exit = bb;
8948	  parent = parent->outer;
8949	}
8950      else if (code == GIMPLE_OMP_CONTINUE)
8951	{
8952	  gcc_assert (parent);
8953	  parent->cont = bb;
8954	}
8955      else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8956	{
8957	  /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8958	     GIMPLE_OMP_SECTIONS, and we do nothing for it.  */
8959	}
8960      else
8961	{
8962	  region = new_omp_region (bb, code, parent);
8963	  /* Otherwise...  */
8964	  if (code == GIMPLE_OMP_TARGET)
8965	    {
8966	      switch (gimple_omp_target_kind (stmt))
8967		{
8968		case GF_OMP_TARGET_KIND_REGION:
8969		case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8970		case GF_OMP_TARGET_KIND_OACC_KERNELS:
8971		case GF_OMP_TARGET_KIND_OACC_SERIAL:
8972		  break;
8973		case GF_OMP_TARGET_KIND_UPDATE:
8974		case GF_OMP_TARGET_KIND_ENTER_DATA:
8975		case GF_OMP_TARGET_KIND_EXIT_DATA:
8976		case GF_OMP_TARGET_KIND_DATA:
8977		case GF_OMP_TARGET_KIND_OACC_DATA:
8978		case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8979		case GF_OMP_TARGET_KIND_OACC_UPDATE:
8980		case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8981		case GF_OMP_TARGET_KIND_OACC_DECLARE:
8982		  /* ..., other than for those stand-alone directives...
8983		     To be precise, target data isn't stand-alone, but
8984		     gimplifier put the end API call into try finally block
8985		     for it, so omp expansion can treat it as such.  */
8986		  region = NULL;
8987		  break;
8988		default:
8989		  gcc_unreachable ();
8990		}
8991	    }
8992	  else if (code == GIMPLE_OMP_ORDERED
8993		   && omp_find_clause (gimple_omp_ordered_clauses
8994					 (as_a <gomp_ordered *> (stmt)),
8995				       OMP_CLAUSE_DEPEND))
8996	    /* #pragma omp ordered depend is also just a stand-alone
8997	       directive.  */
8998	    region = NULL;
8999	  else if (code == GIMPLE_OMP_TASK
9000		   && gimple_omp_task_taskwait_p (stmt))
9001	    /* #pragma omp taskwait depend(...) is a stand-alone directive.  */
9002	    region = NULL;
9003	  else if (code == GIMPLE_OMP_TASKGROUP)
9004	    /* #pragma omp taskgroup isn't a stand-alone directive, but
9005	       gimplifier put the end API call into try finall block
9006	       for it, so omp expansion can treat it as such.  */
9007	    region = NULL;
9008	  /* ..., this directive becomes the parent for a new region.  */
9009	  if (region)
9010	    parent = region;
9011	}
9012    }
9013
9014  if (single_tree && !parent)
9015    return;
9016
9017  for (son = first_dom_son (CDI_DOMINATORS, bb);
9018       son;
9019       son = next_dom_son (CDI_DOMINATORS, son))
9020    build_omp_regions_1 (son, parent, single_tree);
9021}
9022
9023/* Builds the tree of OMP regions rooted at ROOT, storing it to
9024   root_omp_region.  */
9025
9026static void
9027build_omp_regions_root (basic_block root)
9028{
9029  gcc_assert (root_omp_region == NULL);
9030  build_omp_regions_1 (root, NULL, true);
9031  gcc_assert (root_omp_region != NULL);
9032}
9033
9034/* Expands omp construct (and its subconstructs) starting in HEAD.  */
9035
9036void
9037omp_expand_local (basic_block head)
9038{
9039  build_omp_regions_root (head);
9040  if (dump_file && (dump_flags & TDF_DETAILS))
9041    {
9042      fprintf (dump_file, "\nOMP region tree\n\n");
9043      dump_omp_region (dump_file, root_omp_region, 0);
9044      fprintf (dump_file, "\n");
9045    }
9046
9047  remove_exit_barriers (root_omp_region);
9048  expand_omp (root_omp_region);
9049
9050  omp_free_regions ();
9051}
9052
9053/* Scan the CFG and build a tree of OMP regions.  Return the root of
9054   the OMP region tree.  */
9055
9056static void
9057build_omp_regions (void)
9058{
9059  gcc_assert (root_omp_region == NULL);
9060  calculate_dominance_info (CDI_DOMINATORS);
9061  build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
9062}
9063
9064/* Main entry point for expanding OMP-GIMPLE into runtime calls.  */
9065
9066static unsigned int
9067execute_expand_omp (void)
9068{
9069  build_omp_regions ();
9070
9071  if (!root_omp_region)
9072    return 0;
9073
9074  if (dump_file)
9075    {
9076      fprintf (dump_file, "\nOMP region tree\n\n");
9077      dump_omp_region (dump_file, root_omp_region, 0);
9078      fprintf (dump_file, "\n");
9079    }
9080
9081  remove_exit_barriers (root_omp_region);
9082
9083  expand_omp (root_omp_region);
9084
9085  if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9086    verify_loop_structure ();
9087  cleanup_tree_cfg ();
9088
9089  omp_free_regions ();
9090
9091  return 0;
9092}
9093
9094/* OMP expansion -- the default pass, run before creation of SSA form.  */
9095
9096namespace {
9097
9098const pass_data pass_data_expand_omp =
9099{
9100  GIMPLE_PASS, /* type */
9101  "ompexp", /* name */
9102  OPTGROUP_OMP, /* optinfo_flags */
9103  TV_NONE, /* tv_id */
9104  PROP_gimple_any, /* properties_required */
9105  PROP_gimple_eomp, /* properties_provided */
9106  0, /* properties_destroyed */
9107  0, /* todo_flags_start */
9108  0, /* todo_flags_finish */
9109};
9110
9111class pass_expand_omp : public gimple_opt_pass
9112{
9113public:
9114  pass_expand_omp (gcc::context *ctxt)
9115    : gimple_opt_pass (pass_data_expand_omp, ctxt)
9116  {}
9117
9118  /* opt_pass methods: */
9119  virtual unsigned int execute (function *)
9120    {
9121      bool gate = ((flag_openacc != 0 || flag_openmp != 0
9122		    || flag_openmp_simd != 0)
9123		   && !seen_error ());
9124
9125      /* This pass always runs, to provide PROP_gimple_eomp.
9126	 But often, there is nothing to do.  */
9127      if (!gate)
9128	return 0;
9129
9130      return execute_expand_omp ();
9131    }
9132
9133}; // class pass_expand_omp
9134
9135} // anon namespace
9136
9137gimple_opt_pass *
9138make_pass_expand_omp (gcc::context *ctxt)
9139{
9140  return new pass_expand_omp (ctxt);
9141}
9142
9143namespace {
9144
9145const pass_data pass_data_expand_omp_ssa =
9146{
9147  GIMPLE_PASS, /* type */
9148  "ompexpssa", /* name */
9149  OPTGROUP_OMP, /* optinfo_flags */
9150  TV_NONE, /* tv_id */
9151  PROP_cfg | PROP_ssa, /* properties_required */
9152  PROP_gimple_eomp, /* properties_provided */
9153  0, /* properties_destroyed */
9154  0, /* todo_flags_start */
9155  TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
9156};
9157
9158class pass_expand_omp_ssa : public gimple_opt_pass
9159{
9160public:
9161  pass_expand_omp_ssa (gcc::context *ctxt)
9162    : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
9163  {}
9164
9165  /* opt_pass methods: */
9166  virtual bool gate (function *fun)
9167    {
9168      return !(fun->curr_properties & PROP_gimple_eomp);
9169    }
9170  virtual unsigned int execute (function *) { return execute_expand_omp (); }
9171  opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
9172
9173}; // class pass_expand_omp_ssa
9174
9175} // anon namespace
9176
9177gimple_opt_pass *
9178make_pass_expand_omp_ssa (gcc::context *ctxt)
9179{
9180  return new pass_expand_omp_ssa (ctxt);
9181}
9182
9183/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
9184   GIMPLE_* codes.  */
9185
9186bool
9187omp_make_gimple_edges (basic_block bb, struct omp_region **region,
9188		       int *region_idx)
9189{
9190  gimple *last = last_stmt (bb);
9191  enum gimple_code code = gimple_code (last);
9192  struct omp_region *cur_region = *region;
9193  bool fallthru = false;
9194
9195  switch (code)
9196    {
9197    case GIMPLE_OMP_PARALLEL:
9198    case GIMPLE_OMP_FOR:
9199    case GIMPLE_OMP_SINGLE:
9200    case GIMPLE_OMP_TEAMS:
9201    case GIMPLE_OMP_MASTER:
9202    case GIMPLE_OMP_CRITICAL:
9203    case GIMPLE_OMP_SECTION:
9204    case GIMPLE_OMP_GRID_BODY:
9205      cur_region = new_omp_region (bb, code, cur_region);
9206      fallthru = true;
9207      break;
9208
9209    case GIMPLE_OMP_TASKGROUP:
9210      cur_region = new_omp_region (bb, code, cur_region);
9211      fallthru = true;
9212      cur_region = cur_region->outer;
9213      break;
9214
9215    case GIMPLE_OMP_TASK:
9216      cur_region = new_omp_region (bb, code, cur_region);
9217      fallthru = true;
9218      if (gimple_omp_task_taskwait_p (last))
9219	cur_region = cur_region->outer;
9220      break;
9221
9222    case GIMPLE_OMP_ORDERED:
9223      cur_region = new_omp_region (bb, code, cur_region);
9224      fallthru = true;
9225      if (omp_find_clause (gimple_omp_ordered_clauses
9226			     (as_a <gomp_ordered *> (last)),
9227			   OMP_CLAUSE_DEPEND))
9228	cur_region = cur_region->outer;
9229      break;
9230
9231    case GIMPLE_OMP_TARGET:
9232      cur_region = new_omp_region (bb, code, cur_region);
9233      fallthru = true;
9234      switch (gimple_omp_target_kind (last))
9235	{
9236	case GF_OMP_TARGET_KIND_REGION:
9237	case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9238	case GF_OMP_TARGET_KIND_OACC_KERNELS:
9239	case GF_OMP_TARGET_KIND_OACC_SERIAL:
9240	  break;
9241	case GF_OMP_TARGET_KIND_UPDATE:
9242	case GF_OMP_TARGET_KIND_ENTER_DATA:
9243	case GF_OMP_TARGET_KIND_EXIT_DATA:
9244	case GF_OMP_TARGET_KIND_DATA:
9245	case GF_OMP_TARGET_KIND_OACC_DATA:
9246	case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9247	case GF_OMP_TARGET_KIND_OACC_UPDATE:
9248	case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9249	case GF_OMP_TARGET_KIND_OACC_DECLARE:
9250	  cur_region = cur_region->outer;
9251	  break;
9252	default:
9253	  gcc_unreachable ();
9254	}
9255      break;
9256
9257    case GIMPLE_OMP_SECTIONS:
9258      cur_region = new_omp_region (bb, code, cur_region);
9259      fallthru = true;
9260      break;
9261
9262    case GIMPLE_OMP_SECTIONS_SWITCH:
9263      fallthru = false;
9264      break;
9265
9266    case GIMPLE_OMP_ATOMIC_LOAD:
9267    case GIMPLE_OMP_ATOMIC_STORE:
9268       fallthru = true;
9269       break;
9270
9271    case GIMPLE_OMP_RETURN:
9272      /* In the case of a GIMPLE_OMP_SECTION, the edge will go
9273	 somewhere other than the next block.  This will be
9274	 created later.  */
9275      cur_region->exit = bb;
9276      if (cur_region->type == GIMPLE_OMP_TASK)
9277	/* Add an edge corresponding to not scheduling the task
9278	   immediately.  */
9279	make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
9280      fallthru = cur_region->type != GIMPLE_OMP_SECTION;
9281      cur_region = cur_region->outer;
9282      break;
9283
9284    case GIMPLE_OMP_CONTINUE:
9285      cur_region->cont = bb;
9286      switch (cur_region->type)
9287	{
9288	case GIMPLE_OMP_FOR:
9289	  /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
9290	     succs edges as abnormal to prevent splitting
9291	     them.  */
9292	  single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
9293	  /* Make the loopback edge.  */
9294	  make_edge (bb, single_succ (cur_region->entry),
9295		     EDGE_ABNORMAL);
9296
9297	  /* Create an edge from GIMPLE_OMP_FOR to exit, which
9298	     corresponds to the case that the body of the loop
9299	     is not executed at all.  */
9300	  make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
9301	  make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
9302	  fallthru = false;
9303	  break;
9304
9305	case GIMPLE_OMP_SECTIONS:
9306	  /* Wire up the edges into and out of the nested sections.  */
9307	  {
9308	    basic_block switch_bb = single_succ (cur_region->entry);
9309
9310	    struct omp_region *i;
9311	    for (i = cur_region->inner; i ; i = i->next)
9312	      {
9313		gcc_assert (i->type == GIMPLE_OMP_SECTION);
9314		make_edge (switch_bb, i->entry, 0);
9315		make_edge (i->exit, bb, EDGE_FALLTHRU);
9316	      }
9317
9318	    /* Make the loopback edge to the block with
9319	       GIMPLE_OMP_SECTIONS_SWITCH.  */
9320	    make_edge (bb, switch_bb, 0);
9321
9322	    /* Make the edge from the switch to exit.  */
9323	    make_edge (switch_bb, bb->next_bb, 0);
9324	    fallthru = false;
9325	  }
9326	  break;
9327
9328	case GIMPLE_OMP_TASK:
9329	  fallthru = true;
9330	  break;
9331
9332	default:
9333	  gcc_unreachable ();
9334	}
9335      break;
9336
9337    default:
9338      gcc_unreachable ();
9339    }
9340
9341  if (*region != cur_region)
9342    {
9343      *region = cur_region;
9344      if (cur_region)
9345	*region_idx = cur_region->entry->index;
9346      else
9347	*region_idx = 0;
9348    }
9349
9350  return fallthru;
9351}
9352
9353#include "gt-omp-expand.h"
9354