1/* Loop autoparallelization.
2   Copyright (C) 2006-2015 Free Software Foundation, Inc.
3   Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4   Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3.  If not see
20<http://www.gnu.org/licenses/>.  */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "hash-set.h"
26#include "machmode.h"
27#include "vec.h"
28#include "double-int.h"
29#include "input.h"
30#include "alias.h"
31#include "symtab.h"
32#include "options.h"
33#include "wide-int.h"
34#include "inchash.h"
35#include "tree.h"
36#include "fold-const.h"
37#include "predict.h"
38#include "tm.h"
39#include "hard-reg-set.h"
40#include "input.h"
41#include "function.h"
42#include "dominance.h"
43#include "cfg.h"
44#include "basic-block.h"
45#include "tree-ssa-alias.h"
46#include "internal-fn.h"
47#include "gimple-expr.h"
48#include "is-a.h"
49#include "gimple.h"
50#include "gimplify.h"
51#include "gimple-iterator.h"
52#include "gimplify-me.h"
53#include "gimple-walk.h"
54#include "stor-layout.h"
55#include "tree-nested.h"
56#include "gimple-ssa.h"
57#include "tree-cfg.h"
58#include "tree-phinodes.h"
59#include "ssa-iterators.h"
60#include "stringpool.h"
61#include "tree-ssanames.h"
62#include "tree-ssa-loop-ivopts.h"
63#include "tree-ssa-loop-manip.h"
64#include "tree-ssa-loop-niter.h"
65#include "tree-ssa-loop.h"
66#include "tree-into-ssa.h"
67#include "cfgloop.h"
68#include "tree-data-ref.h"
69#include "tree-scalar-evolution.h"
70#include "gimple-pretty-print.h"
71#include "tree-pass.h"
72#include "langhooks.h"
73#include "tree-vectorizer.h"
74#include "tree-hasher.h"
75#include "tree-parloops.h"
76#include "omp-low.h"
77#include "tree-nested.h"
78#include "plugin-api.h"
79#include "ipa-ref.h"
80#include "cgraph.h"
81
82/* This pass tries to distribute iterations of loops into several threads.
83   The implementation is straightforward -- for each loop we test whether its
84   iterations are independent, and if it is the case (and some additional
85   conditions regarding profitability and correctness are satisfied), we
86   add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
87   machinery do its job.
88
89   The most of the complexity is in bringing the code into shape expected
90   by the omp expanders:
91   -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
92      variable and that the exit test is at the start of the loop body
93   -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
94      variables by accesses through pointers, and breaking up ssa chains
95      by storing the values incoming to the parallelized loop to a structure
96      passed to the new function as an argument (something similar is done
97      in omp gimplification, unfortunately only a small part of the code
98      can be shared).
99
100   TODO:
101   -- if there are several parallelizable loops in a function, it may be
102      possible to generate the threads just once (using synchronization to
103      ensure that cross-loop dependences are obeyed).
104   -- handling of common reduction patterns for outer loops.
105
106   More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC  */
107/*
108  Reduction handling:
109  currently we use vect_force_simple_reduction() to detect reduction patterns.
110  The code transformation will be introduced by an example.
111
112
113parloop
114{
115  int sum=1;
116
117  for (i = 0; i < N; i++)
118   {
119    x[i] = i + 3;
120    sum+=x[i];
121   }
122}
123
124gimple-like code:
125header_bb:
126
127  # sum_29 = PHI <sum_11(5), 1(3)>
128  # i_28 = PHI <i_12(5), 0(3)>
129  D.1795_8 = i_28 + 3;
130  x[i_28] = D.1795_8;
131  sum_11 = D.1795_8 + sum_29;
132  i_12 = i_28 + 1;
133  if (N_6(D) > i_12)
134    goto header_bb;
135
136
137exit_bb:
138
139  # sum_21 = PHI <sum_11(4)>
140  printf (&"%d"[0], sum_21);
141
142
143after reduction transformation (only relevant parts):
144
145parloop
146{
147
148....
149
150
151  # Storing the initial value given by the user.  #
152
153  .paral_data_store.32.sum.27 = 1;
154
155  #pragma omp parallel num_threads(4)
156
157  #pragma omp for schedule(static)
158
159  # The neutral element corresponding to the particular
160  reduction's operation, e.g. 0 for PLUS_EXPR,
161  1 for MULT_EXPR, etc. replaces the user's initial value.  #
162
163  # sum.27_29 = PHI <sum.27_11, 0>
164
165  sum.27_11 = D.1827_8 + sum.27_29;
166
167  GIMPLE_OMP_CONTINUE
168
169  # Adding this reduction phi is done at create_phi_for_local_result() #
170  # sum.27_56 = PHI <sum.27_11, 0>
171  GIMPLE_OMP_RETURN
172
173  # Creating the atomic operation is done at
174  create_call_for_reduction_1()  #
175
176  #pragma omp atomic_load
177  D.1839_59 = *&.paral_data_load.33_51->reduction.23;
178  D.1840_60 = sum.27_56 + D.1839_59;
179  #pragma omp atomic_store (D.1840_60);
180
181  GIMPLE_OMP_RETURN
182
183 # collecting the result after the join of the threads is done at
184  create_loads_for_reductions().
185  The value computed by the threads is loaded from the
186  shared struct.  #
187
188
189  .paral_data_load.33_52 = &.paral_data_store.32;
190  sum_37 =  .paral_data_load.33_52->sum.27;
191  sum_43 = D.1795_41 + sum_37;
192
193  exit bb:
194  # sum_21 = PHI <sum_43, sum_26>
195  printf (&"%d"[0], sum_21);
196
197...
198
199}
200
201*/
202
203/* Minimal number of iterations of a loop that should be executed in each
204   thread.  */
205#define MIN_PER_THREAD 100
206
207/* Element of the hashtable, representing a
208   reduction in the current loop.  */
209struct reduction_info
210{
211  gimple reduc_stmt;		/* reduction statement.  */
212  gimple reduc_phi;		/* The phi node defining the reduction.  */
213  enum tree_code reduction_code;/* code for the reduction operation.  */
214  unsigned reduc_version;	/* SSA_NAME_VERSION of original reduc_phi
215				   result.  */
216  gphi *keep_res;		/* The PHI_RESULT of this phi is the resulting value
217				   of the reduction variable when existing the loop. */
218  tree initial_value;		/* The initial value of the reduction var before entering the loop.  */
219  tree field;			/*  the name of the field in the parloop data structure intended for reduction.  */
220  tree init;			/* reduction initialization value.  */
221  gphi *new_phi;		/* (helper field) Newly created phi node whose result
222				   will be passed to the atomic operation.  Represents
223				   the local result each thread computed for the reduction
224				   operation.  */
225};
226
227/* Reduction info hashtable helpers.  */
228
229struct reduction_hasher : typed_free_remove <reduction_info>
230{
231  typedef reduction_info value_type;
232  typedef reduction_info compare_type;
233  static inline hashval_t hash (const value_type *);
234  static inline bool equal (const value_type *, const compare_type *);
235};
236
237/* Equality and hash functions for hashtab code.  */
238
239inline bool
240reduction_hasher::equal (const value_type *a, const compare_type *b)
241{
242  return (a->reduc_phi == b->reduc_phi);
243}
244
245inline hashval_t
246reduction_hasher::hash (const value_type *a)
247{
248  return a->reduc_version;
249}
250
251typedef hash_table<reduction_hasher> reduction_info_table_type;
252
253
254static struct reduction_info *
255reduction_phi (reduction_info_table_type *reduction_list, gimple phi)
256{
257  struct reduction_info tmpred, *red;
258
259  if (reduction_list->elements () == 0 || phi == NULL)
260    return NULL;
261
262  tmpred.reduc_phi = phi;
263  tmpred.reduc_version = gimple_uid (phi);
264  red = reduction_list->find (&tmpred);
265
266  return red;
267}
268
269/* Element of hashtable of names to copy.  */
270
271struct name_to_copy_elt
272{
273  unsigned version;	/* The version of the name to copy.  */
274  tree new_name;	/* The new name used in the copy.  */
275  tree field;		/* The field of the structure used to pass the
276			   value.  */
277};
278
279/* Name copies hashtable helpers.  */
280
281struct name_to_copy_hasher : typed_free_remove <name_to_copy_elt>
282{
283  typedef name_to_copy_elt value_type;
284  typedef name_to_copy_elt compare_type;
285  static inline hashval_t hash (const value_type *);
286  static inline bool equal (const value_type *, const compare_type *);
287};
288
289/* Equality and hash functions for hashtab code.  */
290
291inline bool
292name_to_copy_hasher::equal (const value_type *a, const compare_type *b)
293{
294  return a->version == b->version;
295}
296
297inline hashval_t
298name_to_copy_hasher::hash (const value_type *a)
299{
300  return (hashval_t) a->version;
301}
302
303typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
304
305/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
306   matrix.  Rather than use floats, we simply keep a single DENOMINATOR that
307   represents the denominator for every element in the matrix.  */
308typedef struct lambda_trans_matrix_s
309{
310  lambda_matrix matrix;
311  int rowsize;
312  int colsize;
313  int denominator;
314} *lambda_trans_matrix;
315#define LTM_MATRIX(T) ((T)->matrix)
316#define LTM_ROWSIZE(T) ((T)->rowsize)
317#define LTM_COLSIZE(T) ((T)->colsize)
318#define LTM_DENOMINATOR(T) ((T)->denominator)
319
320/* Allocate a new transformation matrix.  */
321
322static lambda_trans_matrix
323lambda_trans_matrix_new (int colsize, int rowsize,
324			 struct obstack * lambda_obstack)
325{
326  lambda_trans_matrix ret;
327
328  ret = (lambda_trans_matrix)
329    obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
330  LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
331  LTM_ROWSIZE (ret) = rowsize;
332  LTM_COLSIZE (ret) = colsize;
333  LTM_DENOMINATOR (ret) = 1;
334  return ret;
335}
336
337/* Multiply a vector VEC by a matrix MAT.
338   MAT is an M*N matrix, and VEC is a vector with length N.  The result
339   is stored in DEST which must be a vector of length M.  */
340
341static void
342lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
343			   lambda_vector vec, lambda_vector dest)
344{
345  int i, j;
346
347  lambda_vector_clear (dest, m);
348  for (i = 0; i < m; i++)
349    for (j = 0; j < n; j++)
350      dest[i] += matrix[i][j] * vec[j];
351}
352
353/* Return true if TRANS is a legal transformation matrix that respects
354   the dependence vectors in DISTS and DIRS.  The conservative answer
355   is false.
356
357   "Wolfe proves that a unimodular transformation represented by the
358   matrix T is legal when applied to a loop nest with a set of
359   lexicographically non-negative distance vectors RDG if and only if
360   for each vector d in RDG, (T.d >= 0) is lexicographically positive.
361   i.e.: if and only if it transforms the lexicographically positive
362   distance vectors to lexicographically positive vectors.  Note that
363   a unimodular matrix must transform the zero vector (and only it) to
364   the zero vector." S.Muchnick.  */
365
366static bool
367lambda_transform_legal_p (lambda_trans_matrix trans,
368			  int nb_loops,
369			  vec<ddr_p> dependence_relations)
370{
371  unsigned int i, j;
372  lambda_vector distres;
373  struct data_dependence_relation *ddr;
374
375  gcc_assert (LTM_COLSIZE (trans) == nb_loops
376	      && LTM_ROWSIZE (trans) == nb_loops);
377
378  /* When there are no dependences, the transformation is correct.  */
379  if (dependence_relations.length () == 0)
380    return true;
381
382  ddr = dependence_relations[0];
383  if (ddr == NULL)
384    return true;
385
386  /* When there is an unknown relation in the dependence_relations, we
387     know that it is no worth looking at this loop nest: give up.  */
388  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
389    return false;
390
391  distres = lambda_vector_new (nb_loops);
392
393  /* For each distance vector in the dependence graph.  */
394  FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
395    {
396      /* Don't care about relations for which we know that there is no
397	 dependence, nor about read-read (aka. output-dependences):
398	 these data accesses can happen in any order.  */
399      if (DDR_ARE_DEPENDENT (ddr) == chrec_known
400	  || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
401	continue;
402
403      /* Conservatively answer: "this transformation is not valid".  */
404      if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
405	return false;
406
407      /* If the dependence could not be captured by a distance vector,
408	 conservatively answer that the transform is not valid.  */
409      if (DDR_NUM_DIST_VECTS (ddr) == 0)
410	return false;
411
412      /* Compute trans.dist_vect */
413      for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
414	{
415	  lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
416				     DDR_DIST_VECT (ddr, j), distres);
417
418	  if (!lambda_vector_lexico_pos (distres, nb_loops))
419	    return false;
420	}
421    }
422  return true;
423}
424
425/* Data dependency analysis. Returns true if the iterations of LOOP
426   are independent on each other (that is, if we can execute them
427   in parallel).  */
428
429static bool
430loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
431{
432  vec<ddr_p> dependence_relations;
433  vec<data_reference_p> datarefs;
434  lambda_trans_matrix trans;
435  bool ret = false;
436
437  if (dump_file && (dump_flags & TDF_DETAILS))
438  {
439    fprintf (dump_file, "Considering loop %d\n", loop->num);
440    if (!loop->inner)
441      fprintf (dump_file, "loop is innermost\n");
442    else
443      fprintf (dump_file, "loop NOT innermost\n");
444   }
445
446  /* Check for problems with dependences.  If the loop can be reversed,
447     the iterations are independent.  */
448  auto_vec<loop_p, 3> loop_nest;
449  datarefs.create (10);
450  dependence_relations.create (100);
451  if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
452					   &dependence_relations))
453    {
454      if (dump_file && (dump_flags & TDF_DETAILS))
455	fprintf (dump_file, "  FAILED: cannot analyze data dependencies\n");
456      ret = false;
457      goto end;
458    }
459  if (dump_file && (dump_flags & TDF_DETAILS))
460    dump_data_dependence_relations (dump_file, dependence_relations);
461
462  trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
463  LTM_MATRIX (trans)[0][0] = -1;
464
465  if (lambda_transform_legal_p (trans, 1, dependence_relations))
466    {
467      ret = true;
468      if (dump_file && (dump_flags & TDF_DETAILS))
469	fprintf (dump_file, "  SUCCESS: may be parallelized\n");
470    }
471  else if (dump_file && (dump_flags & TDF_DETAILS))
472    fprintf (dump_file,
473	     "  FAILED: data dependencies exist across iterations\n");
474
475 end:
476  free_dependence_relations (dependence_relations);
477  free_data_refs (datarefs);
478
479  return ret;
480}
481
482/* Return true when LOOP contains basic blocks marked with the
483   BB_IRREDUCIBLE_LOOP flag.  */
484
485static inline bool
486loop_has_blocks_with_irreducible_flag (struct loop *loop)
487{
488  unsigned i;
489  basic_block *bbs = get_loop_body_in_dom_order (loop);
490  bool res = true;
491
492  for (i = 0; i < loop->num_nodes; i++)
493    if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
494      goto end;
495
496  res = false;
497 end:
498  free (bbs);
499  return res;
500}
501
502/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
503   The assignment statement is placed on edge ENTRY.  DECL_ADDRESS maps decls
504   to their addresses that can be reused.  The address of OBJ is known to
505   be invariant in the whole function.  Other needed statements are placed
506   right before GSI.  */
507
508static tree
509take_address_of (tree obj, tree type, edge entry,
510		 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
511{
512  int uid;
513  tree *var_p, name, addr;
514  gassign *stmt;
515  gimple_seq stmts;
516
517  /* Since the address of OBJ is invariant, the trees may be shared.
518     Avoid rewriting unrelated parts of the code.  */
519  obj = unshare_expr (obj);
520  for (var_p = &obj;
521       handled_component_p (*var_p);
522       var_p = &TREE_OPERAND (*var_p, 0))
523    continue;
524
525  /* Canonicalize the access to base on a MEM_REF.  */
526  if (DECL_P (*var_p))
527    *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
528
529  /* Assign a canonical SSA name to the address of the base decl used
530     in the address and share it for all accesses and addresses based
531     on it.  */
532  uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
533  int_tree_map elt;
534  elt.uid = uid;
535  int_tree_map *slot = decl_address->find_slot (elt, INSERT);
536  if (!slot->to)
537    {
538      if (gsi == NULL)
539	return NULL;
540      addr = TREE_OPERAND (*var_p, 0);
541      const char *obj_name
542	= get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
543      if (obj_name)
544	name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
545      else
546	name = make_ssa_name (TREE_TYPE (addr));
547      stmt = gimple_build_assign (name, addr);
548      gsi_insert_on_edge_immediate (entry, stmt);
549
550      slot->uid = uid;
551      slot->to = name;
552    }
553  else
554    name = slot->to;
555
556  /* Express the address in terms of the canonical SSA name.  */
557  TREE_OPERAND (*var_p, 0) = name;
558  if (gsi == NULL)
559    return build_fold_addr_expr_with_type (obj, type);
560
561  name = force_gimple_operand (build_addr (obj, current_function_decl),
562			       &stmts, true, NULL_TREE);
563  if (!gimple_seq_empty_p (stmts))
564    gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
565
566  if (!useless_type_conversion_p (type, TREE_TYPE (name)))
567    {
568      name = force_gimple_operand (fold_convert (type, name), &stmts, true,
569				   NULL_TREE);
570      if (!gimple_seq_empty_p (stmts))
571	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
572    }
573
574  return name;
575}
576
577/* Callback for htab_traverse.  Create the initialization statement
578   for reduction described in SLOT, and place it at the preheader of
579   the loop described in DATA.  */
580
581int
582initialize_reductions (reduction_info **slot, struct loop *loop)
583{
584  tree init, c;
585  tree bvar, type, arg;
586  edge e;
587
588  struct reduction_info *const reduc = *slot;
589
590  /* Create initialization in preheader:
591     reduction_variable = initialization value of reduction.  */
592
593  /* In the phi node at the header, replace the argument coming
594     from the preheader with the reduction initialization value.  */
595
596  /* Create a new variable to initialize the reduction.  */
597  type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
598  bvar = create_tmp_var (type, "reduction");
599
600  c = build_omp_clause (gimple_location (reduc->reduc_stmt),
601			OMP_CLAUSE_REDUCTION);
602  OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
603  OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
604
605  init = omp_reduction_init (c, TREE_TYPE (bvar));
606  reduc->init = init;
607
608  /* Replace the argument representing the initialization value
609     with the initialization value for the reduction (neutral
610     element for the particular operation, e.g. 0 for PLUS_EXPR,
611     1 for MULT_EXPR, etc).
612     Keep the old value in a new variable "reduction_initial",
613     that will be taken in consideration after the parallel
614     computing is done.  */
615
616  e = loop_preheader_edge (loop);
617  arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
618  /* Create new variable to hold the initial value.  */
619
620  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
621	   (reduc->reduc_phi, loop_preheader_edge (loop)), init);
622  reduc->initial_value = arg;
623  return 1;
624}
625
626struct elv_data
627{
628  struct walk_stmt_info info;
629  edge entry;
630  int_tree_htab_type *decl_address;
631  gimple_stmt_iterator *gsi;
632  bool changed;
633  bool reset;
634};
635
636/* Eliminates references to local variables in *TP out of the single
637   entry single exit region starting at DTA->ENTRY.
638   DECL_ADDRESS contains addresses of the references that had their
639   address taken already.  If the expression is changed, CHANGED is
640   set to true.  Callback for walk_tree.  */
641
642static tree
643eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
644{
645  struct elv_data *const dta = (struct elv_data *) data;
646  tree t = *tp, var, addr, addr_type, type, obj;
647
648  if (DECL_P (t))
649    {
650      *walk_subtrees = 0;
651
652      if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
653	return NULL_TREE;
654
655      type = TREE_TYPE (t);
656      addr_type = build_pointer_type (type);
657      addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
658			      dta->gsi);
659      if (dta->gsi == NULL && addr == NULL_TREE)
660	{
661	  dta->reset = true;
662	  return NULL_TREE;
663	}
664
665      *tp = build_simple_mem_ref (addr);
666
667      dta->changed = true;
668      return NULL_TREE;
669    }
670
671  if (TREE_CODE (t) == ADDR_EXPR)
672    {
673      /* ADDR_EXPR may appear in two contexts:
674	 -- as a gimple operand, when the address taken is a function invariant
675	 -- as gimple rhs, when the resulting address in not a function
676	    invariant
677	 We do not need to do anything special in the latter case (the base of
678	 the memory reference whose address is taken may be replaced in the
679	 DECL_P case).  The former case is more complicated, as we need to
680	 ensure that the new address is still a gimple operand.  Thus, it
681	 is not sufficient to replace just the base of the memory reference --
682	 we need to move the whole computation of the address out of the
683	 loop.  */
684      if (!is_gimple_val (t))
685	return NULL_TREE;
686
687      *walk_subtrees = 0;
688      obj = TREE_OPERAND (t, 0);
689      var = get_base_address (obj);
690      if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
691	return NULL_TREE;
692
693      addr_type = TREE_TYPE (t);
694      addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
695			      dta->gsi);
696      if (dta->gsi == NULL && addr == NULL_TREE)
697	{
698	  dta->reset = true;
699	  return NULL_TREE;
700	}
701      *tp = addr;
702
703      dta->changed = true;
704      return NULL_TREE;
705    }
706
707  if (!EXPR_P (t))
708    *walk_subtrees = 0;
709
710  return NULL_TREE;
711}
712
713/* Moves the references to local variables in STMT at *GSI out of the single
714   entry single exit region starting at ENTRY.  DECL_ADDRESS contains
715   addresses of the references that had their address taken
716   already.  */
717
718static void
719eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
720				int_tree_htab_type *decl_address)
721{
722  struct elv_data dta;
723  gimple stmt = gsi_stmt (*gsi);
724
725  memset (&dta.info, '\0', sizeof (dta.info));
726  dta.entry = entry;
727  dta.decl_address = decl_address;
728  dta.changed = false;
729  dta.reset = false;
730
731  if (gimple_debug_bind_p (stmt))
732    {
733      dta.gsi = NULL;
734      walk_tree (gimple_debug_bind_get_value_ptr (stmt),
735		 eliminate_local_variables_1, &dta.info, NULL);
736      if (dta.reset)
737	{
738	  gimple_debug_bind_reset_value (stmt);
739	  dta.changed = true;
740	}
741    }
742  else if (gimple_clobber_p (stmt))
743    {
744      unlink_stmt_vdef (stmt);
745      stmt = gimple_build_nop ();
746      gsi_replace (gsi, stmt, false);
747      dta.changed = true;
748    }
749  else
750    {
751      dta.gsi = gsi;
752      walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
753    }
754
755  if (dta.changed)
756    update_stmt (stmt);
757}
758
759/* Eliminates the references to local variables from the single entry
760   single exit region between the ENTRY and EXIT edges.
761
762   This includes:
763   1) Taking address of a local variable -- these are moved out of the
764   region (and temporary variable is created to hold the address if
765   necessary).
766
767   2) Dereferencing a local variable -- these are replaced with indirect
768   references.  */
769
770static void
771eliminate_local_variables (edge entry, edge exit)
772{
773  basic_block bb;
774  auto_vec<basic_block, 3> body;
775  unsigned i;
776  gimple_stmt_iterator gsi;
777  bool has_debug_stmt = false;
778  int_tree_htab_type decl_address (10);
779  basic_block entry_bb = entry->src;
780  basic_block exit_bb = exit->dest;
781
782  gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
783
784  FOR_EACH_VEC_ELT (body, i, bb)
785    if (bb != entry_bb && bb != exit_bb)
786      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
787	if (is_gimple_debug (gsi_stmt (gsi)))
788	  {
789	    if (gimple_debug_bind_p (gsi_stmt (gsi)))
790	      has_debug_stmt = true;
791	  }
792	else
793	  eliminate_local_variables_stmt (entry, &gsi, &decl_address);
794
795  if (has_debug_stmt)
796    FOR_EACH_VEC_ELT (body, i, bb)
797      if (bb != entry_bb && bb != exit_bb)
798	for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
799	  if (gimple_debug_bind_p (gsi_stmt (gsi)))
800	    eliminate_local_variables_stmt (entry, &gsi, &decl_address);
801}
802
803/* Returns true if expression EXPR is not defined between ENTRY and
804   EXIT, i.e. if all its operands are defined outside of the region.  */
805
806static bool
807expr_invariant_in_region_p (edge entry, edge exit, tree expr)
808{
809  basic_block entry_bb = entry->src;
810  basic_block exit_bb = exit->dest;
811  basic_block def_bb;
812
813  if (is_gimple_min_invariant (expr))
814    return true;
815
816  if (TREE_CODE (expr) == SSA_NAME)
817    {
818      def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
819      if (def_bb
820	  && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
821	  && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
822	return false;
823
824      return true;
825    }
826
827  return false;
828}
829
830/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
831   The copies are stored to NAME_COPIES, if NAME was already duplicated,
832   its duplicate stored in NAME_COPIES is returned.
833
834   Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
835   duplicated, storing the copies in DECL_COPIES.  */
836
837static tree
838separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
839			       int_tree_htab_type *decl_copies,
840			       bool copy_name_p)
841{
842  tree copy, var, var_copy;
843  unsigned idx, uid, nuid;
844  struct int_tree_map ielt;
845  struct name_to_copy_elt elt, *nelt;
846  name_to_copy_elt **slot;
847  int_tree_map *dslot;
848
849  if (TREE_CODE (name) != SSA_NAME)
850    return name;
851
852  idx = SSA_NAME_VERSION (name);
853  elt.version = idx;
854  slot = name_copies->find_slot_with_hash (&elt, idx,
855					   copy_name_p ? INSERT : NO_INSERT);
856  if (slot && *slot)
857    return (*slot)->new_name;
858
859  if (copy_name_p)
860    {
861      copy = duplicate_ssa_name (name, NULL);
862      nelt = XNEW (struct name_to_copy_elt);
863      nelt->version = idx;
864      nelt->new_name = copy;
865      nelt->field = NULL_TREE;
866      *slot = nelt;
867    }
868  else
869    {
870      gcc_assert (!slot);
871      copy = name;
872    }
873
874  var = SSA_NAME_VAR (name);
875  if (!var)
876    return copy;
877
878  uid = DECL_UID (var);
879  ielt.uid = uid;
880  dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
881  if (!dslot->to)
882    {
883      var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
884      DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
885      dslot->uid = uid;
886      dslot->to = var_copy;
887
888      /* Ensure that when we meet this decl next time, we won't duplicate
889         it again.  */
890      nuid = DECL_UID (var_copy);
891      ielt.uid = nuid;
892      dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
893      gcc_assert (!dslot->to);
894      dslot->uid = nuid;
895      dslot->to = var_copy;
896    }
897  else
898    var_copy = dslot->to;
899
900  replace_ssa_name_symbol (copy, var_copy);
901  return copy;
902}
903
904/* Finds the ssa names used in STMT that are defined outside the
905   region between ENTRY and EXIT and replaces such ssa names with
906   their duplicates.  The duplicates are stored to NAME_COPIES.  Base
907   decls of all ssa names used in STMT (including those defined in
908   LOOP) are replaced with the new temporary variables; the
909   replacement decls are stored in DECL_COPIES.  */
910
911static void
912separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
913			       name_to_copy_table_type *name_copies,
914			       int_tree_htab_type *decl_copies)
915{
916  use_operand_p use;
917  def_operand_p def;
918  ssa_op_iter oi;
919  tree name, copy;
920  bool copy_name_p;
921
922  FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
923  {
924    name = DEF_FROM_PTR (def);
925    gcc_assert (TREE_CODE (name) == SSA_NAME);
926    copy = separate_decls_in_region_name (name, name_copies, decl_copies,
927					  false);
928    gcc_assert (copy == name);
929  }
930
931  FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
932  {
933    name = USE_FROM_PTR (use);
934    if (TREE_CODE (name) != SSA_NAME)
935      continue;
936
937    copy_name_p = expr_invariant_in_region_p (entry, exit, name);
938    copy = separate_decls_in_region_name (name, name_copies, decl_copies,
939					  copy_name_p);
940    SET_USE (use, copy);
941  }
942}
943
944/* Finds the ssa names used in STMT that are defined outside the
945   region between ENTRY and EXIT and replaces such ssa names with
946   their duplicates.  The duplicates are stored to NAME_COPIES.  Base
947   decls of all ssa names used in STMT (including those defined in
948   LOOP) are replaced with the new temporary variables; the
949   replacement decls are stored in DECL_COPIES.  */
950
951static bool
952separate_decls_in_region_debug (gimple stmt,
953				name_to_copy_table_type *name_copies,
954				int_tree_htab_type *decl_copies)
955{
956  use_operand_p use;
957  ssa_op_iter oi;
958  tree var, name;
959  struct int_tree_map ielt;
960  struct name_to_copy_elt elt;
961  name_to_copy_elt **slot;
962  int_tree_map *dslot;
963
964  if (gimple_debug_bind_p (stmt))
965    var = gimple_debug_bind_get_var (stmt);
966  else if (gimple_debug_source_bind_p (stmt))
967    var = gimple_debug_source_bind_get_var (stmt);
968  else
969    return true;
970  if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
971    return true;
972  gcc_assert (DECL_P (var) && SSA_VAR_P (var));
973  ielt.uid = DECL_UID (var);
974  dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
975  if (!dslot)
976    return true;
977  if (gimple_debug_bind_p (stmt))
978    gimple_debug_bind_set_var (stmt, dslot->to);
979  else if (gimple_debug_source_bind_p (stmt))
980    gimple_debug_source_bind_set_var (stmt, dslot->to);
981
982  FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
983  {
984    name = USE_FROM_PTR (use);
985    if (TREE_CODE (name) != SSA_NAME)
986      continue;
987
988    elt.version = SSA_NAME_VERSION (name);
989    slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
990    if (!slot)
991      {
992	gimple_debug_bind_reset_value (stmt);
993	update_stmt (stmt);
994	break;
995      }
996
997    SET_USE (use, (*slot)->new_name);
998  }
999
1000  return false;
1001}
1002
1003/* Callback for htab_traverse.  Adds a field corresponding to the reduction
1004   specified in SLOT. The type is passed in DATA.  */
1005
1006int
1007add_field_for_reduction (reduction_info **slot, tree type)
1008{
1009
1010  struct reduction_info *const red = *slot;
1011  tree var = gimple_assign_lhs (red->reduc_stmt);
1012  tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
1013			   SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
1014
1015  insert_field_into_struct (type, field);
1016
1017  red->field = field;
1018
1019  return 1;
1020}
1021
1022/* Callback for htab_traverse.  Adds a field corresponding to a ssa name
1023   described in SLOT. The type is passed in DATA.  */
1024
1025int
1026add_field_for_name (name_to_copy_elt **slot, tree type)
1027{
1028  struct name_to_copy_elt *const elt = *slot;
1029  tree name = ssa_name (elt->version);
1030  tree field = build_decl (UNKNOWN_LOCATION,
1031			   FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1032			   TREE_TYPE (name));
1033
1034  insert_field_into_struct (type, field);
1035  elt->field = field;
1036
1037  return 1;
1038}
1039
1040/* Callback for htab_traverse.  A local result is the intermediate result
1041   computed by a single
1042   thread, or the initial value in case no iteration was executed.
1043   This function creates a phi node reflecting these values.
1044   The phi's result will be stored in NEW_PHI field of the
1045   reduction's data structure.  */
1046
1047int
1048create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1049{
1050  struct reduction_info *const reduc = *slot;
1051  edge e;
1052  gphi *new_phi;
1053  basic_block store_bb;
1054  tree local_res;
1055  source_location locus;
1056
1057  /* STORE_BB is the block where the phi
1058     should be stored.  It is the destination of the loop exit.
1059     (Find the fallthru edge from GIMPLE_OMP_CONTINUE).  */
1060  store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1061
1062  /* STORE_BB has two predecessors.  One coming from  the loop
1063     (the reduction's result is computed at the loop),
1064     and another coming from a block preceding the loop,
1065     when no iterations
1066     are executed (the initial value should be taken).  */
1067  if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1068    e = EDGE_PRED (store_bb, 1);
1069  else
1070    e = EDGE_PRED (store_bb, 0);
1071  local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt));
1072  locus = gimple_location (reduc->reduc_stmt);
1073  new_phi = create_phi_node (local_res, store_bb);
1074  add_phi_arg (new_phi, reduc->init, e, locus);
1075  add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1076	       FALLTHRU_EDGE (loop->latch), locus);
1077  reduc->new_phi = new_phi;
1078
1079  return 1;
1080}
1081
1082struct clsn_data
1083{
1084  tree store;
1085  tree load;
1086
1087  basic_block store_bb;
1088  basic_block load_bb;
1089};
1090
1091/* Callback for htab_traverse.  Create an atomic instruction for the
1092   reduction described in SLOT.
1093   DATA annotates the place in memory the atomic operation relates to,
1094   and the basic block it needs to be generated in.  */
1095
1096int
1097create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1098{
1099  struct reduction_info *const reduc = *slot;
1100  gimple_stmt_iterator gsi;
1101  tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1102  tree load_struct;
1103  basic_block bb;
1104  basic_block new_bb;
1105  edge e;
1106  tree t, addr, ref, x;
1107  tree tmp_load, name;
1108  gimple load;
1109
1110  load_struct = build_simple_mem_ref (clsn_data->load);
1111  t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1112
1113  addr = build_addr (t, current_function_decl);
1114
1115  /* Create phi node.  */
1116  bb = clsn_data->load_bb;
1117
1118  gsi = gsi_last_bb (bb);
1119  e = split_block (bb, gsi_stmt (gsi));
1120  new_bb = e->dest;
1121
1122  tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1123  tmp_load = make_ssa_name (tmp_load);
1124  load = gimple_build_omp_atomic_load (tmp_load, addr);
1125  SSA_NAME_DEF_STMT (tmp_load) = load;
1126  gsi = gsi_start_bb (new_bb);
1127  gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1128
1129  e = split_block (new_bb, load);
1130  new_bb = e->dest;
1131  gsi = gsi_start_bb (new_bb);
1132  ref = tmp_load;
1133  x = fold_build2 (reduc->reduction_code,
1134		   TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1135		   PHI_RESULT (reduc->new_phi));
1136
1137  name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1138				   GSI_CONTINUE_LINKING);
1139
1140  gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1141  return 1;
1142}
1143
1144/* Create the atomic operation at the join point of the threads.
1145   REDUCTION_LIST describes the reductions in the LOOP.
1146   LD_ST_DATA describes the shared data structure where
1147   shared data is stored in and loaded from.  */
1148static void
1149create_call_for_reduction (struct loop *loop,
1150			   reduction_info_table_type *reduction_list,
1151			   struct clsn_data *ld_st_data)
1152{
1153  reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop);
1154  /* Find the fallthru edge from GIMPLE_OMP_CONTINUE.  */
1155  ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1156  reduction_list
1157    ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1158}
1159
1160/* Callback for htab_traverse.  Loads the final reduction value at the
1161   join point of all threads, and inserts it in the right place.  */
1162
1163int
1164create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1165{
1166  struct reduction_info *const red = *slot;
1167  gimple stmt;
1168  gimple_stmt_iterator gsi;
1169  tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1170  tree load_struct;
1171  tree name;
1172  tree x;
1173
1174  gsi = gsi_after_labels (clsn_data->load_bb);
1175  load_struct = build_simple_mem_ref (clsn_data->load);
1176  load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1177			NULL_TREE);
1178
1179  x = load_struct;
1180  name = PHI_RESULT (red->keep_res);
1181  stmt = gimple_build_assign (name, x);
1182
1183  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1184
1185  for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1186       !gsi_end_p (gsi); gsi_next (&gsi))
1187    if (gsi_stmt (gsi) == red->keep_res)
1188      {
1189	remove_phi_node (&gsi, false);
1190	return 1;
1191      }
1192  gcc_unreachable ();
1193}
1194
1195/* Load the reduction result that was stored in LD_ST_DATA.
1196   REDUCTION_LIST describes the list of reductions that the
1197   loads should be generated for.  */
1198static void
1199create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
1200				  struct clsn_data *ld_st_data)
1201{
1202  gimple_stmt_iterator gsi;
1203  tree t;
1204  gimple stmt;
1205
1206  gsi = gsi_after_labels (ld_st_data->load_bb);
1207  t = build_fold_addr_expr (ld_st_data->store);
1208  stmt = gimple_build_assign (ld_st_data->load, t);
1209
1210  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1211
1212  reduction_list
1213    ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1214
1215}
1216
1217/* Callback for htab_traverse.  Store the neutral value for the
1218  particular reduction's operation, e.g. 0 for PLUS_EXPR,
1219  1 for MULT_EXPR, etc. into the reduction field.
1220  The reduction is specified in SLOT. The store information is
1221  passed in DATA.  */
1222
1223int
1224create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1225{
1226  struct reduction_info *const red = *slot;
1227  tree t;
1228  gimple stmt;
1229  gimple_stmt_iterator gsi;
1230  tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1231
1232  gsi = gsi_last_bb (clsn_data->store_bb);
1233  t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1234  stmt = gimple_build_assign (t, red->initial_value);
1235  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1236
1237  return 1;
1238}
1239
1240/* Callback for htab_traverse.  Creates loads to a field of LOAD in LOAD_BB and
1241   store to a field of STORE in STORE_BB for the ssa name and its duplicate
1242   specified in SLOT.  */
1243
1244int
1245create_loads_and_stores_for_name (name_to_copy_elt **slot,
1246				  struct clsn_data *clsn_data)
1247{
1248  struct name_to_copy_elt *const elt = *slot;
1249  tree t;
1250  gimple stmt;
1251  gimple_stmt_iterator gsi;
1252  tree type = TREE_TYPE (elt->new_name);
1253  tree load_struct;
1254
1255  gsi = gsi_last_bb (clsn_data->store_bb);
1256  t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1257  stmt = gimple_build_assign (t, ssa_name (elt->version));
1258  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1259
1260  gsi = gsi_last_bb (clsn_data->load_bb);
1261  load_struct = build_simple_mem_ref (clsn_data->load);
1262  t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1263  stmt = gimple_build_assign (elt->new_name, t);
1264  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1265
1266  return 1;
1267}
1268
1269/* Moves all the variables used in LOOP and defined outside of it (including
1270   the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1271   name) to a structure created for this purpose.  The code
1272
1273   while (1)
1274     {
1275       use (a);
1276       use (b);
1277     }
1278
1279   is transformed this way:
1280
1281   bb0:
1282   old.a = a;
1283   old.b = b;
1284
1285   bb1:
1286   a' = new->a;
1287   b' = new->b;
1288   while (1)
1289     {
1290       use (a');
1291       use (b');
1292     }
1293
1294   `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT.  The
1295   pointer `new' is intentionally not initialized (the loop will be split to a
1296   separate function later, and `new' will be initialized from its arguments).
1297   LD_ST_DATA holds information about the shared data structure used to pass
1298   information among the threads.  It is initialized here, and
1299   gen_parallel_loop will pass it to create_call_for_reduction that
1300   needs this information.  REDUCTION_LIST describes the reductions
1301   in LOOP.  */
1302
1303static void
1304separate_decls_in_region (edge entry, edge exit,
1305			  reduction_info_table_type *reduction_list,
1306			  tree *arg_struct, tree *new_arg_struct,
1307			  struct clsn_data *ld_st_data)
1308
1309{
1310  basic_block bb1 = split_edge (entry);
1311  basic_block bb0 = single_pred (bb1);
1312  name_to_copy_table_type name_copies (10);
1313  int_tree_htab_type decl_copies (10);
1314  unsigned i;
1315  tree type, type_name, nvar;
1316  gimple_stmt_iterator gsi;
1317  struct clsn_data clsn_data;
1318  auto_vec<basic_block, 3> body;
1319  basic_block bb;
1320  basic_block entry_bb = bb1;
1321  basic_block exit_bb = exit->dest;
1322  bool has_debug_stmt = false;
1323
1324  entry = single_succ_edge (entry_bb);
1325  gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1326
1327  FOR_EACH_VEC_ELT (body, i, bb)
1328    {
1329      if (bb != entry_bb && bb != exit_bb)
1330	{
1331	  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1332	    separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1333					   &name_copies, &decl_copies);
1334
1335	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1336	    {
1337	      gimple stmt = gsi_stmt (gsi);
1338
1339	      if (is_gimple_debug (stmt))
1340		has_debug_stmt = true;
1341	      else
1342		separate_decls_in_region_stmt (entry, exit, stmt,
1343					       &name_copies, &decl_copies);
1344	    }
1345	}
1346    }
1347
1348  /* Now process debug bind stmts.  We must not create decls while
1349     processing debug stmts, so we defer their processing so as to
1350     make sure we will have debug info for as many variables as
1351     possible (all of those that were dealt with in the loop above),
1352     and discard those for which we know there's nothing we can
1353     do.  */
1354  if (has_debug_stmt)
1355    FOR_EACH_VEC_ELT (body, i, bb)
1356      if (bb != entry_bb && bb != exit_bb)
1357	{
1358	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1359	    {
1360	      gimple stmt = gsi_stmt (gsi);
1361
1362	      if (is_gimple_debug (stmt))
1363		{
1364		  if (separate_decls_in_region_debug (stmt, &name_copies,
1365						      &decl_copies))
1366		    {
1367		      gsi_remove (&gsi, true);
1368		      continue;
1369		    }
1370		}
1371
1372	      gsi_next (&gsi);
1373	    }
1374	}
1375
1376  if (name_copies.elements () == 0 && reduction_list->elements () == 0)
1377    {
1378      /* It may happen that there is nothing to copy (if there are only
1379         loop carried and external variables in the loop).  */
1380      *arg_struct = NULL;
1381      *new_arg_struct = NULL;
1382    }
1383  else
1384    {
1385      /* Create the type for the structure to store the ssa names to.  */
1386      type = lang_hooks.types.make_type (RECORD_TYPE);
1387      type_name = build_decl (UNKNOWN_LOCATION,
1388			      TYPE_DECL, create_tmp_var_name (".paral_data"),
1389			      type);
1390      TYPE_NAME (type) = type_name;
1391
1392      name_copies.traverse <tree, add_field_for_name> (type);
1393      if (reduction_list && reduction_list->elements () > 0)
1394	{
1395	  /* Create the fields for reductions.  */
1396	  reduction_list->traverse <tree, add_field_for_reduction> (type);
1397	}
1398      layout_type (type);
1399
1400      /* Create the loads and stores.  */
1401      *arg_struct = create_tmp_var (type, ".paral_data_store");
1402      nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1403      *new_arg_struct = make_ssa_name (nvar);
1404
1405      ld_st_data->store = *arg_struct;
1406      ld_st_data->load = *new_arg_struct;
1407      ld_st_data->store_bb = bb0;
1408      ld_st_data->load_bb = bb1;
1409
1410      name_copies
1411	.traverse <struct clsn_data *, create_loads_and_stores_for_name>
1412		  (ld_st_data);
1413
1414      /* Load the calculation from memory (after the join of the threads).  */
1415
1416      if (reduction_list && reduction_list->elements () > 0)
1417	{
1418	  reduction_list
1419	    ->traverse <struct clsn_data *, create_stores_for_reduction>
1420	    (ld_st_data);
1421	  clsn_data.load = make_ssa_name (nvar);
1422	  clsn_data.load_bb = exit->dest;
1423	  clsn_data.store = ld_st_data->store;
1424	  create_final_loads_for_reduction (reduction_list, &clsn_data);
1425	}
1426    }
1427}
1428
1429/* Returns true if FN was created to run in parallel.  */
1430
1431bool
1432parallelized_function_p (tree fndecl)
1433{
1434  cgraph_node *node = cgraph_node::get (fndecl);
1435  gcc_assert (node != NULL);
1436  return node->parallelized_function;
1437}
1438
1439/* Creates and returns an empty function that will receive the body of
1440   a parallelized loop.  */
1441
1442static tree
1443create_loop_fn (location_t loc)
1444{
1445  char buf[100];
1446  char *tname;
1447  tree decl, type, name, t;
1448  struct function *act_cfun = cfun;
1449  static unsigned loopfn_num;
1450
1451  loc = LOCATION_LOCUS (loc);
1452  snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1453  ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1454  clean_symbol_name (tname);
1455  name = get_identifier (tname);
1456  type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1457
1458  decl = build_decl (loc, FUNCTION_DECL, name, type);
1459  TREE_STATIC (decl) = 1;
1460  TREE_USED (decl) = 1;
1461  DECL_ARTIFICIAL (decl) = 1;
1462  DECL_IGNORED_P (decl) = 0;
1463  TREE_PUBLIC (decl) = 0;
1464  DECL_UNINLINABLE (decl) = 1;
1465  DECL_EXTERNAL (decl) = 0;
1466  DECL_CONTEXT (decl) = NULL_TREE;
1467  DECL_INITIAL (decl) = make_node (BLOCK);
1468
1469  t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1470  DECL_ARTIFICIAL (t) = 1;
1471  DECL_IGNORED_P (t) = 1;
1472  DECL_RESULT (decl) = t;
1473
1474  t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1475		  ptr_type_node);
1476  DECL_ARTIFICIAL (t) = 1;
1477  DECL_ARG_TYPE (t) = ptr_type_node;
1478  DECL_CONTEXT (t) = decl;
1479  TREE_USED (t) = 1;
1480  DECL_ARGUMENTS (decl) = t;
1481
1482  allocate_struct_function (decl, false);
1483
1484  /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1485     it.  */
1486  set_cfun (act_cfun);
1487
1488  return decl;
1489}
1490
1491/* Moves the exit condition of LOOP to the beginning of its header, and
1492   duplicates the part of the last iteration that gets disabled to the
1493   exit of the loop.  NIT is the number of iterations of the loop
1494   (used to initialize the variables in the duplicated part).
1495
1496   TODO: the common case is that latch of the loop is empty and immediately
1497   follows the loop exit.  In this case, it would be better not to copy the
1498   body of the loop, but only move the entry of the loop directly before the
1499   exit check and increase the number of iterations of the loop by one.
1500   This may need some additional preconditioning in case NIT = ~0.
1501   REDUCTION_LIST describes the reductions in LOOP.  */
1502
1503static void
1504transform_to_exit_first_loop (struct loop *loop,
1505			      reduction_info_table_type *reduction_list,
1506			      tree nit)
1507{
1508  basic_block *bbs, *nbbs, ex_bb, orig_header;
1509  unsigned n;
1510  bool ok;
1511  edge exit = single_dom_exit (loop), hpred;
1512  tree control, control_name, res, t;
1513  gphi *phi, *nphi;
1514  gassign *stmt;
1515  gcond *cond_stmt, *cond_nit;
1516  tree nit_1;
1517
1518  split_block_after_labels (loop->header);
1519  orig_header = single_succ (loop->header);
1520  hpred = single_succ_edge (loop->header);
1521
1522  cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1523  control = gimple_cond_lhs (cond_stmt);
1524  gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1525
1526  /* Make sure that we have phi nodes on exit for all loop header phis
1527     (create_parallel_loop requires that).  */
1528  for (gphi_iterator gsi = gsi_start_phis (loop->header);
1529       !gsi_end_p (gsi);
1530       gsi_next (&gsi))
1531    {
1532      phi = gsi.phi ();
1533      res = PHI_RESULT (phi);
1534      t = copy_ssa_name (res, phi);
1535      SET_PHI_RESULT (phi, t);
1536      nphi = create_phi_node (res, orig_header);
1537      add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1538
1539      if (res == control)
1540	{
1541	  gimple_cond_set_lhs (cond_stmt, t);
1542	  update_stmt (cond_stmt);
1543	  control = t;
1544	}
1545    }
1546
1547  bbs = get_loop_body_in_dom_order (loop);
1548
1549  for (n = 0; bbs[n] != exit->src; n++)
1550   continue;
1551  nbbs = XNEWVEC (basic_block, n);
1552  ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1553				   bbs + 1, n, nbbs);
1554  gcc_assert (ok);
1555  free (bbs);
1556  ex_bb = nbbs[0];
1557  free (nbbs);
1558
1559  /* Other than reductions, the only gimple reg that should be copied
1560     out of the loop is the control variable.  */
1561  exit = single_dom_exit (loop);
1562  control_name = NULL_TREE;
1563  for (gphi_iterator gsi = gsi_start_phis (ex_bb);
1564       !gsi_end_p (gsi); )
1565    {
1566      phi = gsi.phi ();
1567      res = PHI_RESULT (phi);
1568      if (virtual_operand_p (res))
1569	{
1570	  gsi_next (&gsi);
1571	  continue;
1572	}
1573
1574      /* Check if it is a part of reduction.  If it is,
1575         keep the phi at the reduction's keep_res field.  The
1576         PHI_RESULT of this phi is the resulting value of the reduction
1577         variable when exiting the loop.  */
1578
1579      if (reduction_list->elements () > 0)
1580	{
1581	  struct reduction_info *red;
1582
1583	  tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1584	  red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1585	  if (red)
1586	    {
1587	      red->keep_res = phi;
1588	      gsi_next (&gsi);
1589	      continue;
1590	    }
1591	}
1592      gcc_assert (control_name == NULL_TREE
1593		  && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1594      control_name = res;
1595      remove_phi_node (&gsi, false);
1596    }
1597  gcc_assert (control_name != NULL_TREE);
1598
1599  /* Initialize the control variable to number of iterations
1600     according to the rhs of the exit condition.  */
1601  gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
1602  cond_nit = as_a <gcond *> (last_stmt (exit->src));
1603  nit_1 =  gimple_cond_rhs (cond_nit);
1604  nit_1 = force_gimple_operand_gsi (&gsi,
1605				  fold_convert (TREE_TYPE (control_name), nit_1),
1606				  false, NULL_TREE, false, GSI_SAME_STMT);
1607  stmt = gimple_build_assign (control_name, nit_1);
1608  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1609}
1610
1611/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1612   LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1613   NEW_DATA is the variable that should be initialized from the argument
1614   of LOOP_FN.  N_THREADS is the requested number of threads.  Returns the
1615   basic block containing GIMPLE_OMP_PARALLEL tree.  */
1616
1617static basic_block
1618create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1619		      tree new_data, unsigned n_threads, location_t loc)
1620{
1621  gimple_stmt_iterator gsi;
1622  basic_block bb, paral_bb, for_bb, ex_bb;
1623  tree t, param;
1624  gomp_parallel *omp_par_stmt;
1625  gimple omp_return_stmt1, omp_return_stmt2;
1626  gimple phi;
1627  gcond *cond_stmt;
1628  gomp_for *for_stmt;
1629  gomp_continue *omp_cont_stmt;
1630  tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1631  edge exit, nexit, guard, end, e;
1632
1633  /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
1634  bb = loop_preheader_edge (loop)->src;
1635  paral_bb = single_pred (bb);
1636  gsi = gsi_last_bb (paral_bb);
1637
1638  t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1639  OMP_CLAUSE_NUM_THREADS_EXPR (t)
1640    = build_int_cst (integer_type_node, n_threads);
1641  omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1642  gimple_set_location (omp_par_stmt, loc);
1643
1644  gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
1645
1646  /* Initialize NEW_DATA.  */
1647  if (data)
1648    {
1649      gassign *assign_stmt;
1650
1651      gsi = gsi_after_labels (bb);
1652
1653      param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
1654      assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1655      gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
1656
1657      assign_stmt = gimple_build_assign (new_data,
1658				  fold_convert (TREE_TYPE (new_data), param));
1659      gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
1660    }
1661
1662  /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL.  */
1663  bb = split_loop_exit_edge (single_dom_exit (loop));
1664  gsi = gsi_last_bb (bb);
1665  omp_return_stmt1 = gimple_build_omp_return (false);
1666  gimple_set_location (omp_return_stmt1, loc);
1667  gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
1668
1669  /* Extract data for GIMPLE_OMP_FOR.  */
1670  gcc_assert (loop->header == single_dom_exit (loop)->src);
1671  cond_stmt = as_a <gcond *> (last_stmt (loop->header));
1672
1673  cvar = gimple_cond_lhs (cond_stmt);
1674  cvar_base = SSA_NAME_VAR (cvar);
1675  phi = SSA_NAME_DEF_STMT (cvar);
1676  cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1677  initvar = copy_ssa_name (cvar);
1678  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1679	   initvar);
1680  cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1681
1682  gsi = gsi_last_nondebug_bb (loop->latch);
1683  gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1684  gsi_remove (&gsi, true);
1685
1686  /* Prepare cfg.  */
1687  for_bb = split_edge (loop_preheader_edge (loop));
1688  ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1689  extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1690  gcc_assert (exit == single_dom_exit (loop));
1691
1692  guard = make_edge (for_bb, ex_bb, 0);
1693  single_succ_edge (loop->latch)->flags = 0;
1694  end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1695  for (gphi_iterator gpi = gsi_start_phis (ex_bb);
1696       !gsi_end_p (gpi); gsi_next (&gpi))
1697    {
1698      source_location locus;
1699      tree def;
1700      gphi *phi = gpi.phi ();
1701      gphi *stmt;
1702
1703      stmt = as_a <gphi *> (
1704	       SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit)));
1705
1706      def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1707      locus = gimple_phi_arg_location_from_edge (stmt,
1708						 loop_preheader_edge (loop));
1709      add_phi_arg (phi, def, guard, locus);
1710
1711      def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1712      locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1713      add_phi_arg (phi, def, end, locus);
1714    }
1715  e = redirect_edge_and_branch (exit, nexit->dest);
1716  PENDING_STMT (e) = NULL;
1717
1718  /* Emit GIMPLE_OMP_FOR.  */
1719  gimple_cond_set_lhs (cond_stmt, cvar_base);
1720  type = TREE_TYPE (cvar);
1721  t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
1722  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1723
1724  for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
1725  gimple_set_location (for_stmt, loc);
1726  gimple_omp_for_set_index (for_stmt, 0, initvar);
1727  gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1728  gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1729  gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1730  gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1731						cvar_base,
1732						build_int_cst (type, 1)));
1733
1734  gsi = gsi_last_bb (for_bb);
1735  gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1736  SSA_NAME_DEF_STMT (initvar) = for_stmt;
1737
1738  /* Emit GIMPLE_OMP_CONTINUE.  */
1739  gsi = gsi_last_bb (loop->latch);
1740  omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
1741  gimple_set_location (omp_cont_stmt, loc);
1742  gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
1743  SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
1744
1745  /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR.  */
1746  gsi = gsi_last_bb (ex_bb);
1747  omp_return_stmt2 = gimple_build_omp_return (true);
1748  gimple_set_location (omp_return_stmt2, loc);
1749  gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
1750
1751  /* After the above dom info is hosed.  Re-compute it.  */
1752  free_dominance_info (CDI_DOMINATORS);
1753  calculate_dominance_info (CDI_DOMINATORS);
1754
1755  return paral_bb;
1756}
1757
1758/* Generates code to execute the iterations of LOOP in N_THREADS
1759   threads in parallel.
1760
1761   NITER describes number of iterations of LOOP.
1762   REDUCTION_LIST describes the reductions existent in the LOOP.  */
1763
1764static void
1765gen_parallel_loop (struct loop *loop,
1766		   reduction_info_table_type *reduction_list,
1767		   unsigned n_threads, struct tree_niter_desc *niter)
1768{
1769  tree many_iterations_cond, type, nit;
1770  tree arg_struct, new_arg_struct;
1771  gimple_seq stmts;
1772  edge entry, exit;
1773  struct clsn_data clsn_data;
1774  unsigned prob;
1775  location_t loc;
1776  gimple cond_stmt;
1777  unsigned int m_p_thread=2;
1778
1779  /* From
1780
1781     ---------------------------------------------------------------------
1782     loop
1783       {
1784	 IV = phi (INIT, IV + STEP)
1785	 BODY1;
1786	 if (COND)
1787	   break;
1788	 BODY2;
1789       }
1790     ---------------------------------------------------------------------
1791
1792     with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1793     we generate the following code:
1794
1795     ---------------------------------------------------------------------
1796
1797     if (MAY_BE_ZERO
1798     || NITER < MIN_PER_THREAD * N_THREADS)
1799     goto original;
1800
1801     BODY1;
1802     store all local loop-invariant variables used in body of the loop to DATA.
1803     GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1804     load the variables from DATA.
1805     GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1806     BODY2;
1807     BODY1;
1808     GIMPLE_OMP_CONTINUE;
1809     GIMPLE_OMP_RETURN         -- GIMPLE_OMP_FOR
1810     GIMPLE_OMP_RETURN         -- GIMPLE_OMP_PARALLEL
1811     goto end;
1812
1813     original:
1814     loop
1815       {
1816	 IV = phi (INIT, IV + STEP)
1817	 BODY1;
1818	 if (COND)
1819	   break;
1820	 BODY2;
1821       }
1822
1823     end:
1824
1825   */
1826
1827  /* Create two versions of the loop -- in the old one, we know that the
1828     number of iterations is large enough, and we will transform it into the
1829     loop that will be split to loop_fn, the new one will be used for the
1830     remaining iterations.  */
1831
1832  /* We should compute a better number-of-iterations value for outer loops.
1833     That is, if we have
1834
1835    for (i = 0; i < n; ++i)
1836      for (j = 0; j < m; ++j)
1837        ...
1838
1839    we should compute nit = n * m, not nit = n.
1840    Also may_be_zero handling would need to be adjusted.  */
1841
1842  type = TREE_TYPE (niter->niter);
1843  nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1844			      NULL_TREE);
1845  if (stmts)
1846    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1847
1848  if (loop->inner)
1849    m_p_thread=2;
1850  else
1851    m_p_thread=MIN_PER_THREAD;
1852
1853   many_iterations_cond =
1854     fold_build2 (GE_EXPR, boolean_type_node,
1855                nit, build_int_cst (type, m_p_thread * n_threads));
1856
1857  many_iterations_cond
1858    = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1859		   invert_truthvalue (unshare_expr (niter->may_be_zero)),
1860		   many_iterations_cond);
1861  many_iterations_cond
1862    = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1863  if (stmts)
1864    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1865  if (!is_gimple_condexpr (many_iterations_cond))
1866    {
1867      many_iterations_cond
1868	= force_gimple_operand (many_iterations_cond, &stmts,
1869				true, NULL_TREE);
1870      if (stmts)
1871	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1872    }
1873
1874  initialize_original_copy_tables ();
1875
1876  /* We assume that the loop usually iterates a lot.  */
1877  prob = 4 * REG_BR_PROB_BASE / 5;
1878  loop_version (loop, many_iterations_cond, NULL,
1879		prob, prob, REG_BR_PROB_BASE - prob, true);
1880  update_ssa (TODO_update_ssa);
1881  free_original_copy_tables ();
1882
1883  /* Base all the induction variables in LOOP on a single control one.  */
1884  canonicalize_loop_ivs (loop, &nit, true);
1885
1886  /* Ensure that the exit condition is the first statement in the loop.  */
1887  transform_to_exit_first_loop (loop, reduction_list, nit);
1888
1889  /* Generate initializations for reductions.  */
1890  if (reduction_list->elements () > 0)
1891    reduction_list->traverse <struct loop *, initialize_reductions> (loop);
1892
1893  /* Eliminate the references to local variables from the loop.  */
1894  gcc_assert (single_exit (loop));
1895  entry = loop_preheader_edge (loop);
1896  exit = single_dom_exit (loop);
1897
1898  eliminate_local_variables (entry, exit);
1899  /* In the old loop, move all variables non-local to the loop to a structure
1900     and back, and create separate decls for the variables used in loop.  */
1901  separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1902			    &new_arg_struct, &clsn_data);
1903
1904  /* Create the parallel constructs.  */
1905  loc = UNKNOWN_LOCATION;
1906  cond_stmt = last_stmt (loop->header);
1907  if (cond_stmt)
1908    loc = gimple_location (cond_stmt);
1909  create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1910			new_arg_struct, n_threads, loc);
1911  if (reduction_list->elements () > 0)
1912    create_call_for_reduction (loop, reduction_list, &clsn_data);
1913
1914  scev_reset ();
1915
1916  /* Cancel the loop (it is simpler to do it here rather than to teach the
1917     expander to do it).  */
1918  cancel_loop_tree (loop);
1919
1920  /* Free loop bound estimations that could contain references to
1921     removed statements.  */
1922  FOR_EACH_LOOP (loop, 0)
1923    free_numbers_of_iterations_estimates_loop (loop);
1924}
1925
1926/* Returns true when LOOP contains vector phi nodes.  */
1927
1928static bool
1929loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1930{
1931  unsigned i;
1932  basic_block *bbs = get_loop_body_in_dom_order (loop);
1933  gphi_iterator gsi;
1934  bool res = true;
1935
1936  for (i = 0; i < loop->num_nodes; i++)
1937    for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1938      if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
1939	goto end;
1940
1941  res = false;
1942 end:
1943  free (bbs);
1944  return res;
1945}
1946
1947/* Create a reduction_info struct, initialize it with REDUC_STMT
1948   and PHI, insert it to the REDUCTION_LIST.  */
1949
1950static void
1951build_new_reduction (reduction_info_table_type *reduction_list,
1952		     gimple reduc_stmt, gphi *phi)
1953{
1954  reduction_info **slot;
1955  struct reduction_info *new_reduction;
1956
1957  gcc_assert (reduc_stmt);
1958
1959  if (dump_file && (dump_flags & TDF_DETAILS))
1960    {
1961      fprintf (dump_file,
1962	       "Detected reduction. reduction stmt is: \n");
1963      print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1964      fprintf (dump_file, "\n");
1965    }
1966
1967  new_reduction = XCNEW (struct reduction_info);
1968
1969  new_reduction->reduc_stmt = reduc_stmt;
1970  new_reduction->reduc_phi = phi;
1971  new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
1972  new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1973  slot = reduction_list->find_slot (new_reduction, INSERT);
1974  *slot = new_reduction;
1975}
1976
1977/* Callback for htab_traverse.  Sets gimple_uid of reduc_phi stmts.  */
1978
1979int
1980set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
1981{
1982  struct reduction_info *const red = *slot;
1983  gimple_set_uid (red->reduc_phi, red->reduc_version);
1984  return 1;
1985}
1986
1987/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST.  */
1988
1989static void
1990gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
1991{
1992  gphi_iterator gsi;
1993  loop_vec_info simple_loop_info;
1994
1995  simple_loop_info = vect_analyze_loop_form (loop);
1996
1997  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1998    {
1999      gphi *phi = gsi.phi ();
2000      affine_iv iv;
2001      tree res = PHI_RESULT (phi);
2002      bool double_reduc;
2003
2004      if (virtual_operand_p (res))
2005	continue;
2006
2007      if (!simple_iv (loop, loop, res, &iv, true)
2008	&& simple_loop_info)
2009	{
2010           gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
2011							    phi, true,
2012							    &double_reduc);
2013	   if (reduc_stmt && !double_reduc)
2014              build_new_reduction (reduction_list, reduc_stmt, phi);
2015        }
2016    }
2017  destroy_loop_vec_info (simple_loop_info, true);
2018
2019  /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2020     and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2021     only now.  */
2022  reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
2023}
2024
2025/* Try to initialize NITER for code generation part.  */
2026
2027static bool
2028try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2029{
2030  edge exit = single_dom_exit (loop);
2031
2032  gcc_assert (exit);
2033
2034  /* We need to know # of iterations, and there should be no uses of values
2035     defined inside loop outside of it, unless the values are invariants of
2036     the loop.  */
2037  if (!number_of_iterations_exit (loop, exit, niter, false))
2038    {
2039      if (dump_file && (dump_flags & TDF_DETAILS))
2040	fprintf (dump_file, "  FAILED: number of iterations not known\n");
2041      return false;
2042    }
2043
2044  return true;
2045}
2046
2047/* Try to initialize REDUCTION_LIST for code generation part.
2048   REDUCTION_LIST describes the reductions.  */
2049
2050static bool
2051try_create_reduction_list (loop_p loop,
2052			   reduction_info_table_type *reduction_list)
2053{
2054  edge exit = single_dom_exit (loop);
2055  gphi_iterator gsi;
2056
2057  gcc_assert (exit);
2058
2059  gather_scalar_reductions (loop, reduction_list);
2060
2061
2062  for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2063    {
2064      gphi *phi = gsi.phi ();
2065      struct reduction_info *red;
2066      imm_use_iterator imm_iter;
2067      use_operand_p use_p;
2068      gimple reduc_phi;
2069      tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2070
2071      if (!virtual_operand_p (val))
2072	{
2073	  if (dump_file && (dump_flags & TDF_DETAILS))
2074	    {
2075	      fprintf (dump_file, "phi is ");
2076	      print_gimple_stmt (dump_file, phi, 0, 0);
2077	      fprintf (dump_file, "arg of phi to exit:   value ");
2078	      print_generic_expr (dump_file, val, 0);
2079	      fprintf (dump_file, " used outside loop\n");
2080	      fprintf (dump_file,
2081		       "  checking if it a part of reduction pattern:  \n");
2082	    }
2083	  if (reduction_list->elements () == 0)
2084	    {
2085	      if (dump_file && (dump_flags & TDF_DETAILS))
2086		fprintf (dump_file,
2087			 "  FAILED: it is not a part of reduction.\n");
2088	      return false;
2089	    }
2090	  reduc_phi = NULL;
2091	  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2092	    {
2093	      if (!gimple_debug_bind_p (USE_STMT (use_p))
2094		  && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2095		{
2096		  reduc_phi = USE_STMT (use_p);
2097		  break;
2098		}
2099	    }
2100	  red = reduction_phi (reduction_list, reduc_phi);
2101	  if (red == NULL)
2102	    {
2103	      if (dump_file && (dump_flags & TDF_DETAILS))
2104		fprintf (dump_file,
2105			 "  FAILED: it is not a part of reduction.\n");
2106	      return false;
2107	    }
2108	  if (dump_file && (dump_flags & TDF_DETAILS))
2109	    {
2110	      fprintf (dump_file, "reduction phi is  ");
2111	      print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2112	      fprintf (dump_file, "reduction stmt is  ");
2113	      print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2114	    }
2115	}
2116    }
2117
2118  /* The iterations of the loop may communicate only through bivs whose
2119     iteration space can be distributed efficiently.  */
2120  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2121    {
2122      gphi *phi = gsi.phi ();
2123      tree def = PHI_RESULT (phi);
2124      affine_iv iv;
2125
2126      if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2127	{
2128	  struct reduction_info *red;
2129
2130	  red = reduction_phi (reduction_list, phi);
2131	  if (red == NULL)
2132	    {
2133	      if (dump_file && (dump_flags & TDF_DETAILS))
2134		fprintf (dump_file,
2135			 "  FAILED: scalar dependency between iterations\n");
2136	      return false;
2137	    }
2138	}
2139    }
2140
2141
2142  return true;
2143}
2144
2145/* Detect parallel loops and generate parallel code using libgomp
2146   primitives.  Returns true if some loop was parallelized, false
2147   otherwise.  */
2148
2149static bool
2150parallelize_loops (void)
2151{
2152  unsigned n_threads = flag_tree_parallelize_loops;
2153  bool changed = false;
2154  struct loop *loop;
2155  struct tree_niter_desc niter_desc;
2156  struct obstack parloop_obstack;
2157  HOST_WIDE_INT estimated;
2158  source_location loop_loc;
2159
2160  /* Do not parallelize loops in the functions created by parallelization.  */
2161  if (parallelized_function_p (cfun->decl))
2162    return false;
2163  if (cfun->has_nonlocal_label)
2164    return false;
2165
2166  gcc_obstack_init (&parloop_obstack);
2167  reduction_info_table_type reduction_list (10);
2168  init_stmt_vec_info_vec ();
2169
2170  FOR_EACH_LOOP (loop, 0)
2171    {
2172      reduction_list.empty ();
2173      if (dump_file && (dump_flags & TDF_DETAILS))
2174      {
2175        fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2176	if (loop->inner)
2177	  fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2178	else
2179	  fprintf (dump_file, "loop %d is innermost\n",loop->num);
2180      }
2181
2182      /* If we use autopar in graphite pass, we use its marked dependency
2183      checking results.  */
2184      if (flag_loop_parallelize_all && !loop->can_be_parallel)
2185      {
2186        if (dump_file && (dump_flags & TDF_DETAILS))
2187	   fprintf (dump_file, "loop is not parallel according to graphite\n");
2188	continue;
2189      }
2190
2191      if (!single_dom_exit (loop))
2192      {
2193
2194        if (dump_file && (dump_flags & TDF_DETAILS))
2195	  fprintf (dump_file, "loop is !single_dom_exit\n");
2196
2197	continue;
2198      }
2199
2200      if (/* And of course, the loop must be parallelizable.  */
2201	  !can_duplicate_loop_p (loop)
2202	  || loop_has_blocks_with_irreducible_flag (loop)
2203	  || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2204	  /* FIXME: the check for vector phi nodes could be removed.  */
2205	  || loop_has_vector_phi_nodes (loop))
2206	continue;
2207
2208      estimated = estimated_stmt_executions_int (loop);
2209      if (estimated == -1)
2210	estimated = max_stmt_executions_int (loop);
2211      /* FIXME: Bypass this check as graphite doesn't update the
2212	 count and frequency correctly now.  */
2213      if (!flag_loop_parallelize_all
2214	  && ((estimated != -1
2215	       && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2216	      /* Do not bother with loops in cold areas.  */
2217	      || optimize_loop_nest_for_size_p (loop)))
2218	continue;
2219
2220      if (!try_get_loop_niter (loop, &niter_desc))
2221	continue;
2222
2223      if (!try_create_reduction_list (loop, &reduction_list))
2224	continue;
2225
2226      if (!flag_loop_parallelize_all
2227	  && !loop_parallel_p (loop, &parloop_obstack))
2228	continue;
2229
2230      changed = true;
2231      if (dump_file && (dump_flags & TDF_DETAILS))
2232      {
2233	if (loop->inner)
2234	  fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2235	else
2236	  fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2237	loop_loc = find_loop_location (loop);
2238	if (loop_loc != UNKNOWN_LOCATION)
2239	  fprintf (dump_file, "\nloop at %s:%d: ",
2240		   LOCATION_FILE (loop_loc), LOCATION_LINE (loop_loc));
2241      }
2242      gen_parallel_loop (loop, &reduction_list,
2243			 n_threads, &niter_desc);
2244    }
2245
2246  free_stmt_vec_info_vec ();
2247  obstack_free (&parloop_obstack, NULL);
2248
2249  /* Parallelization will cause new function calls to be inserted through
2250     which local variables will escape.  Reset the points-to solution
2251     for ESCAPED.  */
2252  if (changed)
2253    pt_solution_reset (&cfun->gimple_df->escaped);
2254
2255  return changed;
2256}
2257
2258/* Parallelization.  */
2259
2260namespace {
2261
2262const pass_data pass_data_parallelize_loops =
2263{
2264  GIMPLE_PASS, /* type */
2265  "parloops", /* name */
2266  OPTGROUP_LOOP, /* optinfo_flags */
2267  TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2268  ( PROP_cfg | PROP_ssa ), /* properties_required */
2269  0, /* properties_provided */
2270  0, /* properties_destroyed */
2271  0, /* todo_flags_start */
2272  0, /* todo_flags_finish */
2273};
2274
2275class pass_parallelize_loops : public gimple_opt_pass
2276{
2277public:
2278  pass_parallelize_loops (gcc::context *ctxt)
2279    : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2280  {}
2281
2282  /* opt_pass methods: */
2283  virtual bool gate (function *) { return flag_tree_parallelize_loops > 1; }
2284  virtual unsigned int execute (function *);
2285
2286}; // class pass_parallelize_loops
2287
2288unsigned
2289pass_parallelize_loops::execute (function *fun)
2290{
2291  if (number_of_loops (fun) <= 1)
2292    return 0;
2293
2294  if (parallelize_loops ())
2295    {
2296      fun->curr_properties &= ~(PROP_gimple_eomp);
2297      return TODO_update_ssa;
2298    }
2299
2300  return 0;
2301}
2302
2303} // anon namespace
2304
2305gimple_opt_pass *
2306make_pass_parallelize_loops (gcc::context *ctxt)
2307{
2308  return new pass_parallelize_loops (ctxt);
2309}
2310