1/* Predictive commoning.
2   Copyright (C) 2005-2015 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20/* This file implements the predictive commoning optimization.  Predictive
21   commoning can be viewed as CSE around a loop, and with some improvements,
22   as generalized strength reduction-- i.e., reusing values computed in
23   earlier iterations of a loop in the later ones.  So far, the pass only
24   handles the most useful case, that is, reusing values of memory references.
25   If you think this is all just a special case of PRE, you are sort of right;
26   however, concentrating on loops is simpler, and makes it possible to
27   incorporate data dependence analysis to detect the opportunities, perform
28   loop unrolling to avoid copies together with renaming immediately,
29   and if needed, we could also take register pressure into account.
30
31   Let us demonstrate what is done on an example:
32
33   for (i = 0; i < 100; i++)
34     {
35       a[i+2] = a[i] + a[i+1];
36       b[10] = b[10] + i;
37       c[i] = c[99 - i];
38       d[i] = d[i + 1];
39     }
40
41   1) We find data references in the loop, and split them to mutually
42      independent groups (i.e., we find components of a data dependence
43      graph).  We ignore read-read dependences whose distance is not constant.
44      (TODO -- we could also ignore antidependences).  In this example, we
45      find the following groups:
46
47      a[i]{read}, a[i+1]{read}, a[i+2]{write}
48      b[10]{read}, b[10]{write}
49      c[99 - i]{read}, c[i]{write}
50      d[i + 1]{read}, d[i]{write}
51
52   2) Inside each of the group, we verify several conditions:
53      a) all the references must differ in indices only, and the indices
54	 must all have the same step
55      b) the references must dominate loop latch (and thus, they must be
56	 ordered by dominance relation).
57      c) the distance of the indices must be a small multiple of the step
58      We are then able to compute the difference of the references (# of
59      iterations before they point to the same place as the first of them).
60      Also, in case there are writes in the loop, we split the groups into
61      chains whose head is the write whose values are used by the reads in
62      the same chain.  The chains are then processed independently,
63      making the further transformations simpler.  Also, the shorter chains
64      need the same number of registers, but may require lower unrolling
65      factor in order to get rid of the copies on the loop latch.
66
67      In our example, we get the following chains (the chain for c is invalid).
68
69      a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70      b[10]{read,+0}, b[10]{write,+0}
71      d[i + 1]{read,+0}, d[i]{write,+1}
72
73   3) For each read, we determine the read or write whose value it reuses,
74      together with the distance of this reuse.  I.e. we take the last
75      reference before it with distance 0, or the last of the references
76      with the smallest positive distance to the read.  Then, we remove
77      the references that are not used in any of these chains, discard the
78      empty groups, and propagate all the links so that they point to the
79      single root reference of the chain (adjusting their distance
80      appropriately).  Some extra care needs to be taken for references with
81      step 0.  In our example (the numbers indicate the distance of the
82      reuse),
83
84      a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85      b[10] --> (*) 1, b[10] (*)
86
87   4) The chains are combined together if possible.  If the corresponding
88      elements of two chains are always combined together with the same
89      operator, we remember just the result of this combination, instead
90      of remembering the values separately.  We may need to perform
91      reassociation to enable combining, for example
92
93      e[i] + f[i+1] + e[i+1] + f[i]
94
95      can be reassociated as
96
97      (e[i] + f[i]) + (e[i+1] + f[i+1])
98
99      and we can combine the chains for e and f into one chain.
100
101   5) For each root reference (end of the chain) R, let N be maximum distance
102      of a reference reusing its value.  Variables R0 up to RN are created,
103      together with phi nodes that transfer values from R1 .. RN to
104      R0 .. R(N-1).
105      Initial values are loaded to R0..R(N-1) (in case not all references
106      must necessarily be accessed and they may trap, we may fail here;
107      TODO sometimes, the loads could be guarded by a check for the number
108      of iterations).  Values loaded/stored in roots are also copied to
109      RN.  Other reads are replaced with the appropriate variable Ri.
110      Everything is put to SSA form.
111
112      As a small improvement, if R0 is dead after the root (i.e., all uses of
113      the value with the maximum distance dominate the root), we can avoid
114      creating RN and use R0 instead of it.
115
116      In our example, we get (only the parts concerning a and b are shown):
117      for (i = 0; i < 100; i++)
118	{
119	  f = phi (a[0], s);
120	  s = phi (a[1], f);
121	  x = phi (b[10], x);
122
123	  f = f + s;
124	  a[i+2] = f;
125	  x = x + i;
126	  b[10] = x;
127	}
128
129   6) Factor F for unrolling is determined as the smallest common multiple of
130      (N + 1) for each root reference (N for references for that we avoided
131      creating RN).  If F and the loop is small enough, loop is unrolled F
132      times.  The stores to RN (R0) in the copies of the loop body are
133      periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134      be coalesced and the copies can be eliminated.
135
136      TODO -- copy propagation and other optimizations may change the live
137      ranges of the temporary registers and prevent them from being coalesced;
138      this may increase the register pressure.
139
140      In our case, F = 2 and the (main loop of the) result is
141
142      for (i = 0; i < ...; i += 2)
143        {
144          f = phi (a[0], f);
145          s = phi (a[1], s);
146          x = phi (b[10], x);
147
148          f = f + s;
149          a[i+2] = f;
150          x = x + i;
151          b[10] = x;
152
153          s = s + f;
154          a[i+3] = s;
155          x = x + i;
156          b[10] = x;
157       }
158
159   TODO -- stores killing other stores can be taken into account, e.g.,
160   for (i = 0; i < n; i++)
161     {
162       a[i] = 1;
163       a[i+2] = 2;
164     }
165
166   can be replaced with
167
168   t0 = a[0];
169   t1 = a[1];
170   for (i = 0; i < n; i++)
171     {
172       a[i] = 1;
173       t2 = 2;
174       t0 = t1;
175       t1 = t2;
176     }
177   a[n] = t0;
178   a[n+1] = t1;
179
180   The interesting part is that this would generalize store motion; still, since
181   sm is performed elsewhere, it does not seem that important.
182
183   Predictive commoning can be generalized for arbitrary computations (not
184   just memory loads), and also nontrivial transfer functions (e.g., replacing
185   i * i with ii_last + 2 * i + 1), to generalize strength reduction.  */
186
187#include "config.h"
188#include "system.h"
189#include "coretypes.h"
190#include "tm.h"
191#include "hash-set.h"
192#include "machmode.h"
193#include "vec.h"
194#include "double-int.h"
195#include "input.h"
196#include "alias.h"
197#include "symtab.h"
198#include "wide-int.h"
199#include "inchash.h"
200#include "tree.h"
201#include "fold-const.h"
202#include "tm_p.h"
203#include "cfgloop.h"
204#include "predict.h"
205#include "hard-reg-set.h"
206#include "function.h"
207#include "dominance.h"
208#include "cfg.h"
209#include "basic-block.h"
210#include "tree-ssa-alias.h"
211#include "internal-fn.h"
212#include "tree-eh.h"
213#include "gimple-expr.h"
214#include "is-a.h"
215#include "gimple.h"
216#include "gimplify.h"
217#include "gimple-iterator.h"
218#include "gimplify-me.h"
219#include "gimple-ssa.h"
220#include "tree-phinodes.h"
221#include "ssa-iterators.h"
222#include "stringpool.h"
223#include "tree-ssanames.h"
224#include "tree-ssa-loop-ivopts.h"
225#include "tree-ssa-loop-manip.h"
226#include "tree-ssa-loop-niter.h"
227#include "tree-ssa-loop.h"
228#include "tree-into-ssa.h"
229#include "hashtab.h"
230#include "rtl.h"
231#include "flags.h"
232#include "statistics.h"
233#include "real.h"
234#include "fixed-value.h"
235#include "insn-config.h"
236#include "expmed.h"
237#include "dojump.h"
238#include "explow.h"
239#include "calls.h"
240#include "emit-rtl.h"
241#include "varasm.h"
242#include "stmt.h"
243#include "expr.h"
244#include "tree-dfa.h"
245#include "tree-ssa.h"
246#include "tree-data-ref.h"
247#include "tree-scalar-evolution.h"
248#include "tree-chrec.h"
249#include "params.h"
250#include "gimple-pretty-print.h"
251#include "tree-pass.h"
252#include "tree-affine.h"
253#include "tree-inline.h"
254#include "wide-int-print.h"
255
256/* The maximum number of iterations between the considered memory
257   references.  */
258
259#define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
260
261/* Data references (or phi nodes that carry data reference values across
262   loop iterations).  */
263
264typedef struct dref_d
265{
266  /* The reference itself.  */
267  struct data_reference *ref;
268
269  /* The statement in that the reference appears.  */
270  gimple stmt;
271
272  /* In case that STMT is a phi node, this field is set to the SSA name
273     defined by it in replace_phis_by_defined_names (in order to avoid
274     pointing to phi node that got reallocated in the meantime).  */
275  tree name_defined_by_phi;
276
277  /* Distance of the reference from the root of the chain (in number of
278     iterations of the loop).  */
279  unsigned distance;
280
281  /* Number of iterations offset from the first reference in the component.  */
282  widest_int offset;
283
284  /* Number of the reference in a component, in dominance ordering.  */
285  unsigned pos;
286
287  /* True if the memory reference is always accessed when the loop is
288     entered.  */
289  unsigned always_accessed : 1;
290} *dref;
291
292
293/* Type of the chain of the references.  */
294
295enum chain_type
296{
297  /* The addresses of the references in the chain are constant.  */
298  CT_INVARIANT,
299
300  /* There are only loads in the chain.  */
301  CT_LOAD,
302
303  /* Root of the chain is store, the rest are loads.  */
304  CT_STORE_LOAD,
305
306  /* A combination of two chains.  */
307  CT_COMBINATION
308};
309
310/* Chains of data references.  */
311
312typedef struct chain
313{
314  /* Type of the chain.  */
315  enum chain_type type;
316
317  /* For combination chains, the operator and the two chains that are
318     combined, and the type of the result.  */
319  enum tree_code op;
320  tree rslt_type;
321  struct chain *ch1, *ch2;
322
323  /* The references in the chain.  */
324  vec<dref> refs;
325
326  /* The maximum distance of the reference in the chain from the root.  */
327  unsigned length;
328
329  /* The variables used to copy the value throughout iterations.  */
330  vec<tree> vars;
331
332  /* Initializers for the variables.  */
333  vec<tree> inits;
334
335  /* True if there is a use of a variable with the maximal distance
336     that comes after the root in the loop.  */
337  unsigned has_max_use_after : 1;
338
339  /* True if all the memory references in the chain are always accessed.  */
340  unsigned all_always_accessed : 1;
341
342  /* True if this chain was combined together with some other chain.  */
343  unsigned combined : 1;
344} *chain_p;
345
346
347/* Describes the knowledge about the step of the memory references in
348   the component.  */
349
350enum ref_step_type
351{
352  /* The step is zero.  */
353  RS_INVARIANT,
354
355  /* The step is nonzero.  */
356  RS_NONZERO,
357
358  /* The step may or may not be nonzero.  */
359  RS_ANY
360};
361
362/* Components of the data dependence graph.  */
363
364struct component
365{
366  /* The references in the component.  */
367  vec<dref> refs;
368
369  /* What we know about the step of the references in the component.  */
370  enum ref_step_type comp_step;
371
372  /* Next component in the list.  */
373  struct component *next;
374};
375
376/* Bitmap of ssa names defined by looparound phi nodes covered by chains.  */
377
378static bitmap looparound_phis;
379
380/* Cache used by tree_to_aff_combination_expand.  */
381
382static hash_map<tree, name_expansion *> *name_expansions;
383
384/* Dumps data reference REF to FILE.  */
385
386extern void dump_dref (FILE *, dref);
387void
388dump_dref (FILE *file, dref ref)
389{
390  if (ref->ref)
391    {
392      fprintf (file, "    ");
393      print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
394      fprintf (file, " (id %u%s)\n", ref->pos,
395	       DR_IS_READ (ref->ref) ? "" : ", write");
396
397      fprintf (file, "      offset ");
398      print_decs (ref->offset, file);
399      fprintf (file, "\n");
400
401      fprintf (file, "      distance %u\n", ref->distance);
402    }
403  else
404    {
405      if (gimple_code (ref->stmt) == GIMPLE_PHI)
406	fprintf (file, "    looparound ref\n");
407      else
408	fprintf (file, "    combination ref\n");
409      fprintf (file, "      in statement ");
410      print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
411      fprintf (file, "\n");
412      fprintf (file, "      distance %u\n", ref->distance);
413    }
414
415}
416
417/* Dumps CHAIN to FILE.  */
418
419extern void dump_chain (FILE *, chain_p);
420void
421dump_chain (FILE *file, chain_p chain)
422{
423  dref a;
424  const char *chain_type;
425  unsigned i;
426  tree var;
427
428  switch (chain->type)
429    {
430    case CT_INVARIANT:
431      chain_type = "Load motion";
432      break;
433
434    case CT_LOAD:
435      chain_type = "Loads-only";
436      break;
437
438    case CT_STORE_LOAD:
439      chain_type = "Store-loads";
440      break;
441
442    case CT_COMBINATION:
443      chain_type = "Combination";
444      break;
445
446    default:
447      gcc_unreachable ();
448    }
449
450  fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
451	   chain->combined ? " (combined)" : "");
452  if (chain->type != CT_INVARIANT)
453    fprintf (file, "  max distance %u%s\n", chain->length,
454	     chain->has_max_use_after ? "" : ", may reuse first");
455
456  if (chain->type == CT_COMBINATION)
457    {
458      fprintf (file, "  equal to %p %s %p in type ",
459	       (void *) chain->ch1, op_symbol_code (chain->op),
460	       (void *) chain->ch2);
461      print_generic_expr (file, chain->rslt_type, TDF_SLIM);
462      fprintf (file, "\n");
463    }
464
465  if (chain->vars.exists ())
466    {
467      fprintf (file, "  vars");
468      FOR_EACH_VEC_ELT (chain->vars, i, var)
469	{
470	  fprintf (file, " ");
471	  print_generic_expr (file, var, TDF_SLIM);
472	}
473      fprintf (file, "\n");
474    }
475
476  if (chain->inits.exists ())
477    {
478      fprintf (file, "  inits");
479      FOR_EACH_VEC_ELT (chain->inits, i, var)
480	{
481	  fprintf (file, " ");
482	  print_generic_expr (file, var, TDF_SLIM);
483	}
484      fprintf (file, "\n");
485    }
486
487  fprintf (file, "  references:\n");
488  FOR_EACH_VEC_ELT (chain->refs, i, a)
489    dump_dref (file, a);
490
491  fprintf (file, "\n");
492}
493
494/* Dumps CHAINS to FILE.  */
495
496extern void dump_chains (FILE *, vec<chain_p> );
497void
498dump_chains (FILE *file, vec<chain_p> chains)
499{
500  chain_p chain;
501  unsigned i;
502
503  FOR_EACH_VEC_ELT (chains, i, chain)
504    dump_chain (file, chain);
505}
506
507/* Dumps COMP to FILE.  */
508
509extern void dump_component (FILE *, struct component *);
510void
511dump_component (FILE *file, struct component *comp)
512{
513  dref a;
514  unsigned i;
515
516  fprintf (file, "Component%s:\n",
517	   comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
518  FOR_EACH_VEC_ELT (comp->refs, i, a)
519    dump_dref (file, a);
520  fprintf (file, "\n");
521}
522
523/* Dumps COMPS to FILE.  */
524
525extern void dump_components (FILE *, struct component *);
526void
527dump_components (FILE *file, struct component *comps)
528{
529  struct component *comp;
530
531  for (comp = comps; comp; comp = comp->next)
532    dump_component (file, comp);
533}
534
535/* Frees a chain CHAIN.  */
536
537static void
538release_chain (chain_p chain)
539{
540  dref ref;
541  unsigned i;
542
543  if (chain == NULL)
544    return;
545
546  FOR_EACH_VEC_ELT (chain->refs, i, ref)
547    free (ref);
548
549  chain->refs.release ();
550  chain->vars.release ();
551  chain->inits.release ();
552
553  free (chain);
554}
555
556/* Frees CHAINS.  */
557
558static void
559release_chains (vec<chain_p> chains)
560{
561  unsigned i;
562  chain_p chain;
563
564  FOR_EACH_VEC_ELT (chains, i, chain)
565    release_chain (chain);
566  chains.release ();
567}
568
569/* Frees a component COMP.  */
570
571static void
572release_component (struct component *comp)
573{
574  comp->refs.release ();
575  free (comp);
576}
577
578/* Frees list of components COMPS.  */
579
580static void
581release_components (struct component *comps)
582{
583  struct component *act, *next;
584
585  for (act = comps; act; act = next)
586    {
587      next = act->next;
588      release_component (act);
589    }
590}
591
592/* Finds a root of tree given by FATHERS containing A, and performs path
593   shortening.  */
594
595static unsigned
596component_of (unsigned fathers[], unsigned a)
597{
598  unsigned root, n;
599
600  for (root = a; root != fathers[root]; root = fathers[root])
601    continue;
602
603  for (; a != root; a = n)
604    {
605      n = fathers[a];
606      fathers[a] = root;
607    }
608
609  return root;
610}
611
612/* Join operation for DFU.  FATHERS gives the tree, SIZES are sizes of the
613   components, A and B are components to merge.  */
614
615static void
616merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
617{
618  unsigned ca = component_of (fathers, a);
619  unsigned cb = component_of (fathers, b);
620
621  if (ca == cb)
622    return;
623
624  if (sizes[ca] < sizes[cb])
625    {
626      sizes[cb] += sizes[ca];
627      fathers[ca] = cb;
628    }
629  else
630    {
631      sizes[ca] += sizes[cb];
632      fathers[cb] = ca;
633    }
634}
635
636/* Returns true if A is a reference that is suitable for predictive commoning
637   in the innermost loop that contains it.  REF_STEP is set according to the
638   step of the reference A.  */
639
640static bool
641suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
642{
643  tree ref = DR_REF (a), step = DR_STEP (a);
644
645  if (!step
646      || TREE_THIS_VOLATILE (ref)
647      || !is_gimple_reg_type (TREE_TYPE (ref))
648      || tree_could_throw_p (ref))
649    return false;
650
651  if (integer_zerop (step))
652    *ref_step = RS_INVARIANT;
653  else if (integer_nonzerop (step))
654    *ref_step = RS_NONZERO;
655  else
656    *ref_step = RS_ANY;
657
658  return true;
659}
660
661/* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET.  */
662
663static void
664aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
665{
666  tree type = TREE_TYPE (DR_OFFSET (dr));
667  aff_tree delta;
668
669  tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
670				  &name_expansions);
671  aff_combination_const (&delta, type, wi::to_widest (DR_INIT (dr)));
672  aff_combination_add (offset, &delta);
673}
674
675/* Determines number of iterations of the innermost enclosing loop before B
676   refers to exactly the same location as A and stores it to OFF.  If A and
677   B do not have the same step, they never meet, or anything else fails,
678   returns false, otherwise returns true.  Both A and B are assumed to
679   satisfy suitable_reference_p.  */
680
681static bool
682determine_offset (struct data_reference *a, struct data_reference *b,
683		  widest_int *off)
684{
685  aff_tree diff, baseb, step;
686  tree typea, typeb;
687
688  /* Check that both the references access the location in the same type.  */
689  typea = TREE_TYPE (DR_REF (a));
690  typeb = TREE_TYPE (DR_REF (b));
691  if (!useless_type_conversion_p (typeb, typea))
692    return false;
693
694  /* Check whether the base address and the step of both references is the
695     same.  */
696  if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
697      || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
698    return false;
699
700  if (integer_zerop (DR_STEP (a)))
701    {
702      /* If the references have loop invariant address, check that they access
703	 exactly the same location.  */
704      *off = 0;
705      return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
706	      && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
707    }
708
709  /* Compare the offsets of the addresses, and check whether the difference
710     is a multiple of step.  */
711  aff_combination_dr_offset (a, &diff);
712  aff_combination_dr_offset (b, &baseb);
713  aff_combination_scale (&baseb, -1);
714  aff_combination_add (&diff, &baseb);
715
716  tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
717				  &step, &name_expansions);
718  return aff_combination_constant_multiple_p (&diff, &step, off);
719}
720
721/* Returns the last basic block in LOOP for that we are sure that
722   it is executed whenever the loop is entered.  */
723
724static basic_block
725last_always_executed_block (struct loop *loop)
726{
727  unsigned i;
728  vec<edge> exits = get_loop_exit_edges (loop);
729  edge ex;
730  basic_block last = loop->latch;
731
732  FOR_EACH_VEC_ELT (exits, i, ex)
733    last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
734  exits.release ();
735
736  return last;
737}
738
739/* Splits dependence graph on DATAREFS described by DEPENDS to components.  */
740
741static struct component *
742split_data_refs_to_components (struct loop *loop,
743			       vec<data_reference_p> datarefs,
744			       vec<ddr_p> depends)
745{
746  unsigned i, n = datarefs.length ();
747  unsigned ca, ia, ib, bad;
748  unsigned *comp_father = XNEWVEC (unsigned, n + 1);
749  unsigned *comp_size = XNEWVEC (unsigned, n + 1);
750  struct component **comps;
751  struct data_reference *dr, *dra, *drb;
752  struct data_dependence_relation *ddr;
753  struct component *comp_list = NULL, *comp;
754  dref dataref;
755  basic_block last_always_executed = last_always_executed_block (loop);
756
757  FOR_EACH_VEC_ELT (datarefs, i, dr)
758    {
759      if (!DR_REF (dr))
760	{
761	  /* A fake reference for call or asm_expr that may clobber memory;
762	     just fail.  */
763	  goto end;
764	}
765      /* predcom pass isn't prepared to handle calls with data references.  */
766      if (is_gimple_call (DR_STMT (dr)))
767	goto end;
768      dr->aux = (void *) (size_t) i;
769      comp_father[i] = i;
770      comp_size[i] = 1;
771    }
772
773  /* A component reserved for the "bad" data references.  */
774  comp_father[n] = n;
775  comp_size[n] = 1;
776
777  FOR_EACH_VEC_ELT (datarefs, i, dr)
778    {
779      enum ref_step_type dummy;
780
781      if (!suitable_reference_p (dr, &dummy))
782	{
783	  ia = (unsigned) (size_t) dr->aux;
784	  merge_comps (comp_father, comp_size, n, ia);
785	}
786    }
787
788  FOR_EACH_VEC_ELT (depends, i, ddr)
789    {
790      widest_int dummy_off;
791
792      if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
793	continue;
794
795      dra = DDR_A (ddr);
796      drb = DDR_B (ddr);
797      ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
798      ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
799      if (ia == ib)
800	continue;
801
802      bad = component_of (comp_father, n);
803
804      /* If both A and B are reads, we may ignore unsuitable dependences.  */
805      if (DR_IS_READ (dra) && DR_IS_READ (drb))
806	{
807	  if (ia == bad || ib == bad
808	      || !determine_offset (dra, drb, &dummy_off))
809	    continue;
810	}
811      /* If A is read and B write or vice versa and there is unsuitable
812	 dependence, instead of merging both components into a component
813	 that will certainly not pass suitable_component_p, just put the
814	 read into bad component, perhaps at least the write together with
815	 all the other data refs in it's component will be optimizable.  */
816      else if (DR_IS_READ (dra) && ib != bad)
817	{
818	  if (ia == bad)
819	    continue;
820	  else if (!determine_offset (dra, drb, &dummy_off))
821	    {
822	      merge_comps (comp_father, comp_size, bad, ia);
823	      continue;
824	    }
825	}
826      else if (DR_IS_READ (drb) && ia != bad)
827	{
828	  if (ib == bad)
829	    continue;
830	  else if (!determine_offset (dra, drb, &dummy_off))
831	    {
832	      merge_comps (comp_father, comp_size, bad, ib);
833	      continue;
834	    }
835	}
836
837      merge_comps (comp_father, comp_size, ia, ib);
838    }
839
840  comps = XCNEWVEC (struct component *, n);
841  bad = component_of (comp_father, n);
842  FOR_EACH_VEC_ELT (datarefs, i, dr)
843    {
844      ia = (unsigned) (size_t) dr->aux;
845      ca = component_of (comp_father, ia);
846      if (ca == bad)
847	continue;
848
849      comp = comps[ca];
850      if (!comp)
851	{
852	  comp = XCNEW (struct component);
853	  comp->refs.create (comp_size[ca]);
854	  comps[ca] = comp;
855	}
856
857      dataref = XCNEW (struct dref_d);
858      dataref->ref = dr;
859      dataref->stmt = DR_STMT (dr);
860      dataref->offset = 0;
861      dataref->distance = 0;
862
863      dataref->always_accessed
864	      = dominated_by_p (CDI_DOMINATORS, last_always_executed,
865				gimple_bb (dataref->stmt));
866      dataref->pos = comp->refs.length ();
867      comp->refs.quick_push (dataref);
868    }
869
870  for (i = 0; i < n; i++)
871    {
872      comp = comps[i];
873      if (comp)
874	{
875	  comp->next = comp_list;
876	  comp_list = comp;
877	}
878    }
879  free (comps);
880
881end:
882  free (comp_father);
883  free (comp_size);
884  return comp_list;
885}
886
887/* Returns true if the component COMP satisfies the conditions
888   described in 2) at the beginning of this file.  LOOP is the current
889   loop.  */
890
891static bool
892suitable_component_p (struct loop *loop, struct component *comp)
893{
894  unsigned i;
895  dref a, first;
896  basic_block ba, bp = loop->header;
897  bool ok, has_write = false;
898
899  FOR_EACH_VEC_ELT (comp->refs, i, a)
900    {
901      ba = gimple_bb (a->stmt);
902
903      if (!just_once_each_iteration_p (loop, ba))
904	return false;
905
906      gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
907      bp = ba;
908
909      if (DR_IS_WRITE (a->ref))
910	has_write = true;
911    }
912
913  first = comp->refs[0];
914  ok = suitable_reference_p (first->ref, &comp->comp_step);
915  gcc_assert (ok);
916  first->offset = 0;
917
918  for (i = 1; comp->refs.iterate (i, &a); i++)
919    {
920      if (!determine_offset (first->ref, a->ref, &a->offset))
921	return false;
922
923#ifdef ENABLE_CHECKING
924      {
925	enum ref_step_type a_step;
926	ok = suitable_reference_p (a->ref, &a_step);
927	gcc_assert (ok && a_step == comp->comp_step);
928      }
929#endif
930    }
931
932  /* If there is a write inside the component, we must know whether the
933     step is nonzero or not -- we would not otherwise be able to recognize
934     whether the value accessed by reads comes from the OFFSET-th iteration
935     or the previous one.  */
936  if (has_write && comp->comp_step == RS_ANY)
937    return false;
938
939  return true;
940}
941
942/* Check the conditions on references inside each of components COMPS,
943   and remove the unsuitable components from the list.  The new list
944   of components is returned.  The conditions are described in 2) at
945   the beginning of this file.  LOOP is the current loop.  */
946
947static struct component *
948filter_suitable_components (struct loop *loop, struct component *comps)
949{
950  struct component **comp, *act;
951
952  for (comp = &comps; *comp; )
953    {
954      act = *comp;
955      if (suitable_component_p (loop, act))
956	comp = &act->next;
957      else
958	{
959	  dref ref;
960	  unsigned i;
961
962	  *comp = act->next;
963	  FOR_EACH_VEC_ELT (act->refs, i, ref)
964	    free (ref);
965	  release_component (act);
966	}
967    }
968
969  return comps;
970}
971
972/* Compares two drefs A and B by their offset and position.  Callback for
973   qsort.  */
974
975static int
976order_drefs (const void *a, const void *b)
977{
978  const dref *const da = (const dref *) a;
979  const dref *const db = (const dref *) b;
980  int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
981
982  if (offcmp != 0)
983    return offcmp;
984
985  return (*da)->pos - (*db)->pos;
986}
987
988/* Returns root of the CHAIN.  */
989
990static inline dref
991get_chain_root (chain_p chain)
992{
993  return chain->refs[0];
994}
995
996/* Adds REF to the chain CHAIN.  */
997
998static void
999add_ref_to_chain (chain_p chain, dref ref)
1000{
1001  dref root = get_chain_root (chain);
1002
1003  gcc_assert (wi::les_p (root->offset, ref->offset));
1004  widest_int dist = ref->offset - root->offset;
1005  if (wi::leu_p (MAX_DISTANCE, dist))
1006    {
1007      free (ref);
1008      return;
1009    }
1010  gcc_assert (wi::fits_uhwi_p (dist));
1011
1012  chain->refs.safe_push (ref);
1013
1014  ref->distance = dist.to_uhwi ();
1015
1016  if (ref->distance >= chain->length)
1017    {
1018      chain->length = ref->distance;
1019      chain->has_max_use_after = false;
1020    }
1021
1022  if (ref->distance == chain->length
1023      && ref->pos > root->pos)
1024    chain->has_max_use_after = true;
1025
1026  chain->all_always_accessed &= ref->always_accessed;
1027}
1028
1029/* Returns the chain for invariant component COMP.  */
1030
1031static chain_p
1032make_invariant_chain (struct component *comp)
1033{
1034  chain_p chain = XCNEW (struct chain);
1035  unsigned i;
1036  dref ref;
1037
1038  chain->type = CT_INVARIANT;
1039
1040  chain->all_always_accessed = true;
1041
1042  FOR_EACH_VEC_ELT (comp->refs, i, ref)
1043    {
1044      chain->refs.safe_push (ref);
1045      chain->all_always_accessed &= ref->always_accessed;
1046    }
1047
1048  return chain;
1049}
1050
1051/* Make a new chain rooted at REF.  */
1052
1053static chain_p
1054make_rooted_chain (dref ref)
1055{
1056  chain_p chain = XCNEW (struct chain);
1057
1058  chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
1059
1060  chain->refs.safe_push (ref);
1061  chain->all_always_accessed = ref->always_accessed;
1062
1063  ref->distance = 0;
1064
1065  return chain;
1066}
1067
1068/* Returns true if CHAIN is not trivial.  */
1069
1070static bool
1071nontrivial_chain_p (chain_p chain)
1072{
1073  return chain != NULL && chain->refs.length () > 1;
1074}
1075
1076/* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1077   is no such name.  */
1078
1079static tree
1080name_for_ref (dref ref)
1081{
1082  tree name;
1083
1084  if (is_gimple_assign (ref->stmt))
1085    {
1086      if (!ref->ref || DR_IS_READ (ref->ref))
1087	name = gimple_assign_lhs (ref->stmt);
1088      else
1089	name = gimple_assign_rhs1 (ref->stmt);
1090    }
1091  else
1092    name = PHI_RESULT (ref->stmt);
1093
1094  return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1095}
1096
1097/* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1098   iterations of the innermost enclosing loop).  */
1099
1100static bool
1101valid_initializer_p (struct data_reference *ref,
1102		     unsigned distance, struct data_reference *root)
1103{
1104  aff_tree diff, base, step;
1105  widest_int off;
1106
1107  /* Both REF and ROOT must be accessing the same object.  */
1108  if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1109    return false;
1110
1111  /* The initializer is defined outside of loop, hence its address must be
1112     invariant inside the loop.  */
1113  gcc_assert (integer_zerop (DR_STEP (ref)));
1114
1115  /* If the address of the reference is invariant, initializer must access
1116     exactly the same location.  */
1117  if (integer_zerop (DR_STEP (root)))
1118    return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1119	    && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1120
1121  /* Verify that this index of REF is equal to the root's index at
1122     -DISTANCE-th iteration.  */
1123  aff_combination_dr_offset (root, &diff);
1124  aff_combination_dr_offset (ref, &base);
1125  aff_combination_scale (&base, -1);
1126  aff_combination_add (&diff, &base);
1127
1128  tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1129				  &step, &name_expansions);
1130  if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1131    return false;
1132
1133  if (off != distance)
1134    return false;
1135
1136  return true;
1137}
1138
1139/* Finds looparound phi node of LOOP that copies the value of REF, and if its
1140   initial value is correct (equal to initial value of REF shifted by one
1141   iteration), returns the phi node.  Otherwise, NULL_TREE is returned.  ROOT
1142   is the root of the current chain.  */
1143
1144static gphi *
1145find_looparound_phi (struct loop *loop, dref ref, dref root)
1146{
1147  tree name, init, init_ref;
1148  gphi *phi = NULL;
1149  gimple init_stmt;
1150  edge latch = loop_latch_edge (loop);
1151  struct data_reference init_dr;
1152  gphi_iterator psi;
1153
1154  if (is_gimple_assign (ref->stmt))
1155    {
1156      if (DR_IS_READ (ref->ref))
1157	name = gimple_assign_lhs (ref->stmt);
1158      else
1159	name = gimple_assign_rhs1 (ref->stmt);
1160    }
1161  else
1162    name = PHI_RESULT (ref->stmt);
1163  if (!name)
1164    return NULL;
1165
1166  for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1167    {
1168      phi = psi.phi ();
1169      if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1170	break;
1171    }
1172
1173  if (gsi_end_p (psi))
1174    return NULL;
1175
1176  init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1177  if (TREE_CODE (init) != SSA_NAME)
1178    return NULL;
1179  init_stmt = SSA_NAME_DEF_STMT (init);
1180  if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1181    return NULL;
1182  gcc_assert (gimple_assign_lhs (init_stmt) == init);
1183
1184  init_ref = gimple_assign_rhs1 (init_stmt);
1185  if (!REFERENCE_CLASS_P (init_ref)
1186      && !DECL_P (init_ref))
1187    return NULL;
1188
1189  /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1190     loop enclosing PHI).  */
1191  memset (&init_dr, 0, sizeof (struct data_reference));
1192  DR_REF (&init_dr) = init_ref;
1193  DR_STMT (&init_dr) = phi;
1194  if (!dr_analyze_innermost (&init_dr, loop))
1195    return NULL;
1196
1197  if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1198    return NULL;
1199
1200  return phi;
1201}
1202
1203/* Adds a reference for the looparound copy of REF in PHI to CHAIN.  */
1204
1205static void
1206insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1207{
1208  dref nw = XCNEW (struct dref_d), aref;
1209  unsigned i;
1210
1211  nw->stmt = phi;
1212  nw->distance = ref->distance + 1;
1213  nw->always_accessed = 1;
1214
1215  FOR_EACH_VEC_ELT (chain->refs, i, aref)
1216    if (aref->distance >= nw->distance)
1217      break;
1218  chain->refs.safe_insert (i, nw);
1219
1220  if (nw->distance > chain->length)
1221    {
1222      chain->length = nw->distance;
1223      chain->has_max_use_after = false;
1224    }
1225}
1226
1227/* For references in CHAIN that are copied around the LOOP (created previously
1228   by PRE, or by user), add the results of such copies to the chain.  This
1229   enables us to remove the copies by unrolling, and may need less registers
1230   (also, it may allow us to combine chains together).  */
1231
1232static void
1233add_looparound_copies (struct loop *loop, chain_p chain)
1234{
1235  unsigned i;
1236  dref ref, root = get_chain_root (chain);
1237  gphi *phi;
1238
1239  FOR_EACH_VEC_ELT (chain->refs, i, ref)
1240    {
1241      phi = find_looparound_phi (loop, ref, root);
1242      if (!phi)
1243	continue;
1244
1245      bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1246      insert_looparound_copy (chain, ref, phi);
1247    }
1248}
1249
1250/* Find roots of the values and determine distances in the component COMP.
1251   The references are redistributed into CHAINS.  LOOP is the current
1252   loop.  */
1253
1254static void
1255determine_roots_comp (struct loop *loop,
1256		      struct component *comp,
1257		      vec<chain_p> *chains)
1258{
1259  unsigned i;
1260  dref a;
1261  chain_p chain = NULL;
1262  widest_int last_ofs = 0;
1263
1264  /* Invariants are handled specially.  */
1265  if (comp->comp_step == RS_INVARIANT)
1266    {
1267      chain = make_invariant_chain (comp);
1268      chains->safe_push (chain);
1269      return;
1270    }
1271
1272  comp->refs.qsort (order_drefs);
1273
1274  FOR_EACH_VEC_ELT (comp->refs, i, a)
1275    {
1276      if (!chain || DR_IS_WRITE (a->ref)
1277	  || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1278	{
1279	  if (nontrivial_chain_p (chain))
1280	    {
1281	      add_looparound_copies (loop, chain);
1282	      chains->safe_push (chain);
1283	    }
1284	  else
1285	    release_chain (chain);
1286	  chain = make_rooted_chain (a);
1287	  last_ofs = a->offset;
1288	  continue;
1289	}
1290
1291      add_ref_to_chain (chain, a);
1292    }
1293
1294  if (nontrivial_chain_p (chain))
1295    {
1296      add_looparound_copies (loop, chain);
1297      chains->safe_push (chain);
1298    }
1299  else
1300    release_chain (chain);
1301}
1302
1303/* Find roots of the values and determine distances in components COMPS, and
1304   separates the references to CHAINS.  LOOP is the current loop.  */
1305
1306static void
1307determine_roots (struct loop *loop,
1308		 struct component *comps, vec<chain_p> *chains)
1309{
1310  struct component *comp;
1311
1312  for (comp = comps; comp; comp = comp->next)
1313    determine_roots_comp (loop, comp, chains);
1314}
1315
1316/* Replace the reference in statement STMT with temporary variable
1317   NEW_TREE.  If SET is true, NEW_TREE is instead initialized to the value of
1318   the reference in the statement.  IN_LHS is true if the reference
1319   is in the lhs of STMT, false if it is in rhs.  */
1320
1321static void
1322replace_ref_with (gimple stmt, tree new_tree, bool set, bool in_lhs)
1323{
1324  tree val;
1325  gassign *new_stmt;
1326  gimple_stmt_iterator bsi, psi;
1327
1328  if (gimple_code (stmt) == GIMPLE_PHI)
1329    {
1330      gcc_assert (!in_lhs && !set);
1331
1332      val = PHI_RESULT (stmt);
1333      bsi = gsi_after_labels (gimple_bb (stmt));
1334      psi = gsi_for_stmt (stmt);
1335      remove_phi_node (&psi, false);
1336
1337      /* Turn the phi node into GIMPLE_ASSIGN.  */
1338      new_stmt = gimple_build_assign (val, new_tree);
1339      gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1340      return;
1341    }
1342
1343  /* Since the reference is of gimple_reg type, it should only
1344     appear as lhs or rhs of modify statement.  */
1345  gcc_assert (is_gimple_assign (stmt));
1346
1347  bsi = gsi_for_stmt (stmt);
1348
1349  /* If we do not need to initialize NEW_TREE, just replace the use of OLD.  */
1350  if (!set)
1351    {
1352      gcc_assert (!in_lhs);
1353      gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1354      stmt = gsi_stmt (bsi);
1355      update_stmt (stmt);
1356      return;
1357    }
1358
1359  if (in_lhs)
1360    {
1361      /* We have statement
1362
1363	 OLD = VAL
1364
1365	 If OLD is a memory reference, then VAL is gimple_val, and we transform
1366	 this to
1367
1368	 OLD = VAL
1369	 NEW = VAL
1370
1371	 Otherwise, we are replacing a combination chain,
1372	 VAL is the expression that performs the combination, and OLD is an
1373	 SSA name.  In this case, we transform the assignment to
1374
1375	 OLD = VAL
1376	 NEW = OLD
1377
1378	 */
1379
1380      val = gimple_assign_lhs (stmt);
1381      if (TREE_CODE (val) != SSA_NAME)
1382	{
1383	  val = gimple_assign_rhs1 (stmt);
1384	  gcc_assert (gimple_assign_single_p (stmt));
1385	  if (TREE_CLOBBER_P (val))
1386	    val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1387	  else
1388	    gcc_assert (gimple_assign_copy_p (stmt));
1389	}
1390    }
1391  else
1392    {
1393      /* VAL = OLD
1394
1395	 is transformed to
1396
1397	 VAL = OLD
1398	 NEW = VAL  */
1399
1400      val = gimple_assign_lhs (stmt);
1401    }
1402
1403  new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1404  gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1405}
1406
1407/* Returns a memory reference to DR in the ITER-th iteration of
1408   the loop it was analyzed in.  Append init stmts to STMTS.  */
1409
1410static tree
1411ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts)
1412{
1413  tree off = DR_OFFSET (dr);
1414  tree coff = DR_INIT (dr);
1415  if (iter == 0)
1416    ;
1417  else if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
1418    coff = size_binop (PLUS_EXPR, coff,
1419		       size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1420  else
1421    off = size_binop (PLUS_EXPR, off,
1422		      size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1423  tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1424  addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1425				 is_gimple_mem_ref_addr, NULL_TREE);
1426  tree alias_ptr = fold_convert (reference_alias_ptr_type (DR_REF (dr)), coff);
1427  /* While data-ref analysis punts on bit offsets it still handles
1428     bitfield accesses at byte boundaries.  Cope with that.  Note that
1429     we cannot simply re-apply the outer COMPONENT_REF because the
1430     byte-granular portion of it is already applied via DR_INIT and
1431     DR_OFFSET, so simply build a BIT_FIELD_REF knowing that the bits
1432     start at offset zero.  */
1433  if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
1434      && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
1435    {
1436      tree field = TREE_OPERAND (DR_REF (dr), 1);
1437      return build3 (BIT_FIELD_REF, TREE_TYPE (DR_REF (dr)),
1438		     build2 (MEM_REF, DECL_BIT_FIELD_TYPE (field),
1439			     addr, alias_ptr),
1440		     DECL_SIZE (field), bitsize_zero_node);
1441    }
1442  else
1443    return fold_build2 (MEM_REF, TREE_TYPE (DR_REF (dr)), addr, alias_ptr);
1444}
1445
1446/* Get the initialization expression for the INDEX-th temporary variable
1447   of CHAIN.  */
1448
1449static tree
1450get_init_expr (chain_p chain, unsigned index)
1451{
1452  if (chain->type == CT_COMBINATION)
1453    {
1454      tree e1 = get_init_expr (chain->ch1, index);
1455      tree e2 = get_init_expr (chain->ch2, index);
1456
1457      return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1458    }
1459  else
1460    return chain->inits[index];
1461}
1462
1463/* Returns a new temporary variable used for the I-th variable carrying
1464   value of REF.  The variable's uid is marked in TMP_VARS.  */
1465
1466static tree
1467predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1468{
1469  tree type = TREE_TYPE (ref);
1470  /* We never access the components of the temporary variable in predictive
1471     commoning.  */
1472  tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1473  bitmap_set_bit (tmp_vars, DECL_UID (var));
1474  return var;
1475}
1476
1477/* Creates the variables for CHAIN, as well as phi nodes for them and
1478   initialization on entry to LOOP.  Uids of the newly created
1479   temporary variables are marked in TMP_VARS.  */
1480
1481static void
1482initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1483{
1484  unsigned i;
1485  unsigned n = chain->length;
1486  dref root = get_chain_root (chain);
1487  bool reuse_first = !chain->has_max_use_after;
1488  tree ref, init, var, next;
1489  gphi *phi;
1490  gimple_seq stmts;
1491  edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1492
1493  /* If N == 0, then all the references are within the single iteration.  And
1494     since this is an nonempty chain, reuse_first cannot be true.  */
1495  gcc_assert (n > 0 || !reuse_first);
1496
1497  chain->vars.create (n + 1);
1498
1499  if (chain->type == CT_COMBINATION)
1500    ref = gimple_assign_lhs (root->stmt);
1501  else
1502    ref = DR_REF (root->ref);
1503
1504  for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1505    {
1506      var = predcom_tmp_var (ref, i, tmp_vars);
1507      chain->vars.quick_push (var);
1508    }
1509  if (reuse_first)
1510    chain->vars.quick_push (chain->vars[0]);
1511
1512  FOR_EACH_VEC_ELT (chain->vars, i, var)
1513    chain->vars[i] = make_ssa_name (var);
1514
1515  for (i = 0; i < n; i++)
1516    {
1517      var = chain->vars[i];
1518      next = chain->vars[i + 1];
1519      init = get_init_expr (chain, i);
1520
1521      init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1522      if (stmts)
1523	gsi_insert_seq_on_edge_immediate (entry, stmts);
1524
1525      phi = create_phi_node (var, loop->header);
1526      add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1527      add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1528    }
1529}
1530
1531/* Create the variables and initialization statement for root of chain
1532   CHAIN.  Uids of the newly created temporary variables are marked
1533   in TMP_VARS.  */
1534
1535static void
1536initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
1537{
1538  dref root = get_chain_root (chain);
1539  bool in_lhs = (chain->type == CT_STORE_LOAD
1540		 || chain->type == CT_COMBINATION);
1541
1542  initialize_root_vars (loop, chain, tmp_vars);
1543  replace_ref_with (root->stmt,
1544		    chain->vars[chain->length],
1545		    true, in_lhs);
1546}
1547
1548/* Initializes a variable for load motion for ROOT and prepares phi nodes and
1549   initialization on entry to LOOP if necessary.  The ssa name for the variable
1550   is stored in VARS.  If WRITTEN is true, also a phi node to copy its value
1551   around the loop is created.  Uid of the newly created temporary variable
1552   is marked in TMP_VARS.  INITS is the list containing the (single)
1553   initializer.  */
1554
1555static void
1556initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1557			 vec<tree> *vars, vec<tree> inits,
1558			 bitmap tmp_vars)
1559{
1560  unsigned i;
1561  tree ref = DR_REF (root->ref), init, var, next;
1562  gimple_seq stmts;
1563  gphi *phi;
1564  edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1565
1566  /* Find the initializer for the variable, and check that it cannot
1567     trap.  */
1568  init = inits[0];
1569
1570  vars->create (written ? 2 : 1);
1571  var = predcom_tmp_var (ref, 0, tmp_vars);
1572  vars->quick_push (var);
1573  if (written)
1574    vars->quick_push ((*vars)[0]);
1575
1576  FOR_EACH_VEC_ELT (*vars, i, var)
1577    (*vars)[i] = make_ssa_name (var);
1578
1579  var = (*vars)[0];
1580
1581  init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1582  if (stmts)
1583    gsi_insert_seq_on_edge_immediate (entry, stmts);
1584
1585  if (written)
1586    {
1587      next = (*vars)[1];
1588      phi = create_phi_node (var, loop->header);
1589      add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1590      add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1591    }
1592  else
1593    {
1594      gassign *init_stmt = gimple_build_assign (var, init);
1595      gsi_insert_on_edge_immediate (entry, init_stmt);
1596    }
1597}
1598
1599
1600/* Execute load motion for references in chain CHAIN.  Uids of the newly
1601   created temporary variables are marked in TMP_VARS.  */
1602
1603static void
1604execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1605{
1606  auto_vec<tree> vars;
1607  dref a;
1608  unsigned n_writes = 0, ridx, i;
1609  tree var;
1610
1611  gcc_assert (chain->type == CT_INVARIANT);
1612  gcc_assert (!chain->combined);
1613  FOR_EACH_VEC_ELT (chain->refs, i, a)
1614    if (DR_IS_WRITE (a->ref))
1615      n_writes++;
1616
1617  /* If there are no reads in the loop, there is nothing to do.  */
1618  if (n_writes == chain->refs.length ())
1619    return;
1620
1621  initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1622			   &vars, chain->inits, tmp_vars);
1623
1624  ridx = 0;
1625  FOR_EACH_VEC_ELT (chain->refs, i, a)
1626    {
1627      bool is_read = DR_IS_READ (a->ref);
1628
1629      if (DR_IS_WRITE (a->ref))
1630	{
1631	  n_writes--;
1632	  if (n_writes)
1633	    {
1634	      var = vars[0];
1635	      var = make_ssa_name (SSA_NAME_VAR (var));
1636	      vars[0] = var;
1637	    }
1638	  else
1639	    ridx = 1;
1640	}
1641
1642      replace_ref_with (a->stmt, vars[ridx],
1643			!is_read, !is_read);
1644    }
1645}
1646
1647/* Returns the single statement in that NAME is used, excepting
1648   the looparound phi nodes contained in one of the chains.  If there is no
1649   such statement, or more statements, NULL is returned.  */
1650
1651static gimple
1652single_nonlooparound_use (tree name)
1653{
1654  use_operand_p use;
1655  imm_use_iterator it;
1656  gimple stmt, ret = NULL;
1657
1658  FOR_EACH_IMM_USE_FAST (use, it, name)
1659    {
1660      stmt = USE_STMT (use);
1661
1662      if (gimple_code (stmt) == GIMPLE_PHI)
1663	{
1664	  /* Ignore uses in looparound phi nodes.  Uses in other phi nodes
1665	     could not be processed anyway, so just fail for them.  */
1666	  if (bitmap_bit_p (looparound_phis,
1667			    SSA_NAME_VERSION (PHI_RESULT (stmt))))
1668	    continue;
1669
1670	  return NULL;
1671	}
1672      else if (is_gimple_debug (stmt))
1673	continue;
1674      else if (ret != NULL)
1675	return NULL;
1676      else
1677	ret = stmt;
1678    }
1679
1680  return ret;
1681}
1682
1683/* Remove statement STMT, as well as the chain of assignments in that it is
1684   used.  */
1685
1686static void
1687remove_stmt (gimple stmt)
1688{
1689  tree name;
1690  gimple next;
1691  gimple_stmt_iterator psi;
1692
1693  if (gimple_code (stmt) == GIMPLE_PHI)
1694    {
1695      name = PHI_RESULT (stmt);
1696      next = single_nonlooparound_use (name);
1697      reset_debug_uses (stmt);
1698      psi = gsi_for_stmt (stmt);
1699      remove_phi_node (&psi, true);
1700
1701      if (!next
1702	  || !gimple_assign_ssa_name_copy_p (next)
1703	  || gimple_assign_rhs1 (next) != name)
1704	return;
1705
1706      stmt = next;
1707    }
1708
1709  while (1)
1710    {
1711      gimple_stmt_iterator bsi;
1712
1713      bsi = gsi_for_stmt (stmt);
1714
1715      name = gimple_assign_lhs (stmt);
1716      gcc_assert (TREE_CODE (name) == SSA_NAME);
1717
1718      next = single_nonlooparound_use (name);
1719      reset_debug_uses (stmt);
1720
1721      unlink_stmt_vdef (stmt);
1722      gsi_remove (&bsi, true);
1723      release_defs (stmt);
1724
1725      if (!next
1726	  || !gimple_assign_ssa_name_copy_p (next)
1727	  || gimple_assign_rhs1 (next) != name)
1728	return;
1729
1730      stmt = next;
1731    }
1732}
1733
1734/* Perform the predictive commoning optimization for a chain CHAIN.
1735   Uids of the newly created temporary variables are marked in TMP_VARS.*/
1736
1737static void
1738execute_pred_commoning_chain (struct loop *loop, chain_p chain,
1739			     bitmap tmp_vars)
1740{
1741  unsigned i;
1742  dref a;
1743  tree var;
1744
1745  if (chain->combined)
1746    {
1747      /* For combined chains, just remove the statements that are used to
1748	 compute the values of the expression (except for the root one).
1749	 We delay this until after all chains are processed.  */
1750    }
1751  else
1752    {
1753      /* For non-combined chains, set up the variables that hold its value,
1754	 and replace the uses of the original references by these
1755	 variables.  */
1756      initialize_root (loop, chain, tmp_vars);
1757      for (i = 1; chain->refs.iterate (i, &a); i++)
1758	{
1759	  var = chain->vars[chain->length - a->distance];
1760	  replace_ref_with (a->stmt, var, false, false);
1761	}
1762    }
1763}
1764
1765/* Determines the unroll factor necessary to remove as many temporary variable
1766   copies as possible.  CHAINS is the list of chains that will be
1767   optimized.  */
1768
1769static unsigned
1770determine_unroll_factor (vec<chain_p> chains)
1771{
1772  chain_p chain;
1773  unsigned factor = 1, af, nfactor, i;
1774  unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1775
1776  FOR_EACH_VEC_ELT (chains, i, chain)
1777    {
1778      if (chain->type == CT_INVARIANT)
1779	continue;
1780
1781      if (chain->combined)
1782	{
1783	  /* For combined chains, we can't handle unrolling if we replace
1784	     looparound PHIs.  */
1785	  dref a;
1786	  unsigned j;
1787	  for (j = 1; chain->refs.iterate (j, &a); j++)
1788	    if (gimple_code (a->stmt) == GIMPLE_PHI)
1789	      return 1;
1790	  continue;
1791	}
1792
1793      /* The best unroll factor for this chain is equal to the number of
1794	 temporary variables that we create for it.  */
1795      af = chain->length;
1796      if (chain->has_max_use_after)
1797	af++;
1798
1799      nfactor = factor * af / gcd (factor, af);
1800      if (nfactor <= max)
1801	factor = nfactor;
1802    }
1803
1804  return factor;
1805}
1806
1807/* Perform the predictive commoning optimization for CHAINS.
1808   Uids of the newly created temporary variables are marked in TMP_VARS.  */
1809
1810static void
1811execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
1812			bitmap tmp_vars)
1813{
1814  chain_p chain;
1815  unsigned i;
1816
1817  FOR_EACH_VEC_ELT (chains, i, chain)
1818    {
1819      if (chain->type == CT_INVARIANT)
1820	execute_load_motion (loop, chain, tmp_vars);
1821      else
1822	execute_pred_commoning_chain (loop, chain, tmp_vars);
1823    }
1824
1825  FOR_EACH_VEC_ELT (chains, i, chain)
1826    {
1827      if (chain->type == CT_INVARIANT)
1828	;
1829      else if (chain->combined)
1830	{
1831	  /* For combined chains, just remove the statements that are used to
1832	     compute the values of the expression (except for the root one).  */
1833	  dref a;
1834	  unsigned j;
1835	  for (j = 1; chain->refs.iterate (j, &a); j++)
1836	    remove_stmt (a->stmt);
1837	}
1838    }
1839
1840  update_ssa (TODO_update_ssa_only_virtuals);
1841}
1842
1843/* For each reference in CHAINS, if its defining statement is
1844   phi node, record the ssa name that is defined by it.  */
1845
1846static void
1847replace_phis_by_defined_names (vec<chain_p> chains)
1848{
1849  chain_p chain;
1850  dref a;
1851  unsigned i, j;
1852
1853  FOR_EACH_VEC_ELT (chains, i, chain)
1854    FOR_EACH_VEC_ELT (chain->refs, j, a)
1855      {
1856	if (gimple_code (a->stmt) == GIMPLE_PHI)
1857	  {
1858	    a->name_defined_by_phi = PHI_RESULT (a->stmt);
1859	    a->stmt = NULL;
1860	  }
1861      }
1862}
1863
1864/* For each reference in CHAINS, if name_defined_by_phi is not
1865   NULL, use it to set the stmt field.  */
1866
1867static void
1868replace_names_by_phis (vec<chain_p> chains)
1869{
1870  chain_p chain;
1871  dref a;
1872  unsigned i, j;
1873
1874  FOR_EACH_VEC_ELT (chains, i, chain)
1875    FOR_EACH_VEC_ELT (chain->refs, j, a)
1876      if (a->stmt == NULL)
1877	{
1878	  a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
1879	  gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
1880	  a->name_defined_by_phi = NULL_TREE;
1881	}
1882}
1883
1884/* Wrapper over execute_pred_commoning, to pass it as a callback
1885   to tree_transform_and_unroll_loop.  */
1886
1887struct epcc_data
1888{
1889  vec<chain_p> chains;
1890  bitmap tmp_vars;
1891};
1892
1893static void
1894execute_pred_commoning_cbck (struct loop *loop, void *data)
1895{
1896  struct epcc_data *const dta = (struct epcc_data *) data;
1897
1898  /* Restore phi nodes that were replaced by ssa names before
1899     tree_transform_and_unroll_loop (see detailed description in
1900     tree_predictive_commoning_loop).  */
1901  replace_names_by_phis (dta->chains);
1902  execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
1903}
1904
1905/* Base NAME and all the names in the chain of phi nodes that use it
1906   on variable VAR.  The phi nodes are recognized by being in the copies of
1907   the header of the LOOP.  */
1908
1909static void
1910base_names_in_chain_on (struct loop *loop, tree name, tree var)
1911{
1912  gimple stmt, phi;
1913  imm_use_iterator iter;
1914
1915  replace_ssa_name_symbol (name, var);
1916
1917  while (1)
1918    {
1919      phi = NULL;
1920      FOR_EACH_IMM_USE_STMT (stmt, iter, name)
1921	{
1922	  if (gimple_code (stmt) == GIMPLE_PHI
1923	      && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1924	    {
1925	      phi = stmt;
1926	      BREAK_FROM_IMM_USE_STMT (iter);
1927	    }
1928	}
1929      if (!phi)
1930	return;
1931
1932      name = PHI_RESULT (phi);
1933      replace_ssa_name_symbol (name, var);
1934    }
1935}
1936
1937/* Given an unrolled LOOP after predictive commoning, remove the
1938   register copies arising from phi nodes by changing the base
1939   variables of SSA names.  TMP_VARS is the set of the temporary variables
1940   for those we want to perform this.  */
1941
1942static void
1943eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
1944{
1945  edge e;
1946  gphi *phi;
1947  gimple stmt;
1948  tree name, use, var;
1949  gphi_iterator psi;
1950
1951  e = loop_latch_edge (loop);
1952  for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1953    {
1954      phi = psi.phi ();
1955      name = PHI_RESULT (phi);
1956      var = SSA_NAME_VAR (name);
1957      if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
1958	continue;
1959      use = PHI_ARG_DEF_FROM_EDGE (phi, e);
1960      gcc_assert (TREE_CODE (use) == SSA_NAME);
1961
1962      /* Base all the ssa names in the ud and du chain of NAME on VAR.  */
1963      stmt = SSA_NAME_DEF_STMT (use);
1964      while (gimple_code (stmt) == GIMPLE_PHI
1965	     /* In case we could not unroll the loop enough to eliminate
1966		all copies, we may reach the loop header before the defining
1967		statement (in that case, some register copies will be present
1968		in loop latch in the final code, corresponding to the newly
1969		created looparound phi nodes).  */
1970	     && gimple_bb (stmt) != loop->header)
1971	{
1972	  gcc_assert (single_pred_p (gimple_bb (stmt)));
1973	  use = PHI_ARG_DEF (stmt, 0);
1974	  stmt = SSA_NAME_DEF_STMT (use);
1975	}
1976
1977      base_names_in_chain_on (loop, use, var);
1978    }
1979}
1980
1981/* Returns true if CHAIN is suitable to be combined.  */
1982
1983static bool
1984chain_can_be_combined_p (chain_p chain)
1985{
1986  return (!chain->combined
1987	  && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
1988}
1989
1990/* Returns the modify statement that uses NAME.  Skips over assignment
1991   statements, NAME is replaced with the actual name used in the returned
1992   statement.  */
1993
1994static gimple
1995find_use_stmt (tree *name)
1996{
1997  gimple stmt;
1998  tree rhs, lhs;
1999
2000  /* Skip over assignments.  */
2001  while (1)
2002    {
2003      stmt = single_nonlooparound_use (*name);
2004      if (!stmt)
2005	return NULL;
2006
2007      if (gimple_code (stmt) != GIMPLE_ASSIGN)
2008	return NULL;
2009
2010      lhs = gimple_assign_lhs (stmt);
2011      if (TREE_CODE (lhs) != SSA_NAME)
2012	return NULL;
2013
2014      if (gimple_assign_copy_p (stmt))
2015	{
2016	  rhs = gimple_assign_rhs1 (stmt);
2017	  if (rhs != *name)
2018	    return NULL;
2019
2020	  *name = lhs;
2021	}
2022      else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2023	       == GIMPLE_BINARY_RHS)
2024	return stmt;
2025      else
2026	return NULL;
2027    }
2028}
2029
2030/* Returns true if we may perform reassociation for operation CODE in TYPE.  */
2031
2032static bool
2033may_reassociate_p (tree type, enum tree_code code)
2034{
2035  if (FLOAT_TYPE_P (type)
2036      && !flag_unsafe_math_optimizations)
2037    return false;
2038
2039  return (commutative_tree_code (code)
2040	  && associative_tree_code (code));
2041}
2042
2043/* If the operation used in STMT is associative and commutative, go through the
2044   tree of the same operations and returns its root.  Distance to the root
2045   is stored in DISTANCE.  */
2046
2047static gimple
2048find_associative_operation_root (gimple stmt, unsigned *distance)
2049{
2050  tree lhs;
2051  gimple next;
2052  enum tree_code code = gimple_assign_rhs_code (stmt);
2053  tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2054  unsigned dist = 0;
2055
2056  if (!may_reassociate_p (type, code))
2057    return NULL;
2058
2059  while (1)
2060    {
2061      lhs = gimple_assign_lhs (stmt);
2062      gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2063
2064      next = find_use_stmt (&lhs);
2065      if (!next
2066	  || gimple_assign_rhs_code (next) != code)
2067	break;
2068
2069      stmt = next;
2070      dist++;
2071    }
2072
2073  if (distance)
2074    *distance = dist;
2075  return stmt;
2076}
2077
2078/* Returns the common statement in that NAME1 and NAME2 have a use.  If there
2079   is no such statement, returns NULL_TREE.  In case the operation used on
2080   NAME1 and NAME2 is associative and commutative, returns the root of the
2081   tree formed by this operation instead of the statement that uses NAME1 or
2082   NAME2.  */
2083
2084static gimple
2085find_common_use_stmt (tree *name1, tree *name2)
2086{
2087  gimple stmt1, stmt2;
2088
2089  stmt1 = find_use_stmt (name1);
2090  if (!stmt1)
2091    return NULL;
2092
2093  stmt2 = find_use_stmt (name2);
2094  if (!stmt2)
2095    return NULL;
2096
2097  if (stmt1 == stmt2)
2098    return stmt1;
2099
2100  stmt1 = find_associative_operation_root (stmt1, NULL);
2101  if (!stmt1)
2102    return NULL;
2103  stmt2 = find_associative_operation_root (stmt2, NULL);
2104  if (!stmt2)
2105    return NULL;
2106
2107  return (stmt1 == stmt2 ? stmt1 : NULL);
2108}
2109
2110/* Checks whether R1 and R2 are combined together using CODE, with the result
2111   in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2112   if it is true.  If CODE is ERROR_MARK, set these values instead.  */
2113
2114static bool
2115combinable_refs_p (dref r1, dref r2,
2116		   enum tree_code *code, bool *swap, tree *rslt_type)
2117{
2118  enum tree_code acode;
2119  bool aswap;
2120  tree atype;
2121  tree name1, name2;
2122  gimple stmt;
2123
2124  name1 = name_for_ref (r1);
2125  name2 = name_for_ref (r2);
2126  gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2127
2128  stmt = find_common_use_stmt (&name1, &name2);
2129
2130  if (!stmt
2131      /* A simple post-dominance check - make sure the combination
2132         is executed under the same condition as the references.  */
2133      || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2134	  && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2135    return false;
2136
2137  acode = gimple_assign_rhs_code (stmt);
2138  aswap = (!commutative_tree_code (acode)
2139	   && gimple_assign_rhs1 (stmt) != name1);
2140  atype = TREE_TYPE (gimple_assign_lhs (stmt));
2141
2142  if (*code == ERROR_MARK)
2143    {
2144      *code = acode;
2145      *swap = aswap;
2146      *rslt_type = atype;
2147      return true;
2148    }
2149
2150  return (*code == acode
2151	  && *swap == aswap
2152	  && *rslt_type == atype);
2153}
2154
2155/* Remove OP from the operation on rhs of STMT, and replace STMT with
2156   an assignment of the remaining operand.  */
2157
2158static void
2159remove_name_from_operation (gimple stmt, tree op)
2160{
2161  tree other_op;
2162  gimple_stmt_iterator si;
2163
2164  gcc_assert (is_gimple_assign (stmt));
2165
2166  if (gimple_assign_rhs1 (stmt) == op)
2167    other_op = gimple_assign_rhs2 (stmt);
2168  else
2169    other_op = gimple_assign_rhs1 (stmt);
2170
2171  si = gsi_for_stmt (stmt);
2172  gimple_assign_set_rhs_from_tree (&si, other_op);
2173
2174  /* We should not have reallocated STMT.  */
2175  gcc_assert (gsi_stmt (si) == stmt);
2176
2177  update_stmt (stmt);
2178}
2179
2180/* Reassociates the expression in that NAME1 and NAME2 are used so that they
2181   are combined in a single statement, and returns this statement.  */
2182
2183static gimple
2184reassociate_to_the_same_stmt (tree name1, tree name2)
2185{
2186  gimple stmt1, stmt2, root1, root2, s1, s2;
2187  gassign *new_stmt, *tmp_stmt;
2188  tree new_name, tmp_name, var, r1, r2;
2189  unsigned dist1, dist2;
2190  enum tree_code code;
2191  tree type = TREE_TYPE (name1);
2192  gimple_stmt_iterator bsi;
2193
2194  stmt1 = find_use_stmt (&name1);
2195  stmt2 = find_use_stmt (&name2);
2196  root1 = find_associative_operation_root (stmt1, &dist1);
2197  root2 = find_associative_operation_root (stmt2, &dist2);
2198  code = gimple_assign_rhs_code (stmt1);
2199
2200  gcc_assert (root1 && root2 && root1 == root2
2201	      && code == gimple_assign_rhs_code (stmt2));
2202
2203  /* Find the root of the nearest expression in that both NAME1 and NAME2
2204     are used.  */
2205  r1 = name1;
2206  s1 = stmt1;
2207  r2 = name2;
2208  s2 = stmt2;
2209
2210  while (dist1 > dist2)
2211    {
2212      s1 = find_use_stmt (&r1);
2213      r1 = gimple_assign_lhs (s1);
2214      dist1--;
2215    }
2216  while (dist2 > dist1)
2217    {
2218      s2 = find_use_stmt (&r2);
2219      r2 = gimple_assign_lhs (s2);
2220      dist2--;
2221    }
2222
2223  while (s1 != s2)
2224    {
2225      s1 = find_use_stmt (&r1);
2226      r1 = gimple_assign_lhs (s1);
2227      s2 = find_use_stmt (&r2);
2228      r2 = gimple_assign_lhs (s2);
2229    }
2230
2231  /* Remove NAME1 and NAME2 from the statements in that they are used
2232     currently.  */
2233  remove_name_from_operation (stmt1, name1);
2234  remove_name_from_operation (stmt2, name2);
2235
2236  /* Insert the new statement combining NAME1 and NAME2 before S1, and
2237     combine it with the rhs of S1.  */
2238  var = create_tmp_reg (type, "predreastmp");
2239  new_name = make_ssa_name (var);
2240  new_stmt = gimple_build_assign (new_name, code, name1, name2);
2241
2242  var = create_tmp_reg (type, "predreastmp");
2243  tmp_name = make_ssa_name (var);
2244
2245  /* Rhs of S1 may now be either a binary expression with operation
2246     CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2247     so that name1 or name2 was removed from it).  */
2248  tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2249				  gimple_assign_rhs1 (s1),
2250				  gimple_assign_rhs2 (s1));
2251
2252  bsi = gsi_for_stmt (s1);
2253  gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2254  s1 = gsi_stmt (bsi);
2255  update_stmt (s1);
2256
2257  gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2258  gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2259
2260  return new_stmt;
2261}
2262
2263/* Returns the statement that combines references R1 and R2.  In case R1
2264   and R2 are not used in the same statement, but they are used with an
2265   associative and commutative operation in the same expression, reassociate
2266   the expression so that they are used in the same statement.  */
2267
2268static gimple
2269stmt_combining_refs (dref r1, dref r2)
2270{
2271  gimple stmt1, stmt2;
2272  tree name1 = name_for_ref (r1);
2273  tree name2 = name_for_ref (r2);
2274
2275  stmt1 = find_use_stmt (&name1);
2276  stmt2 = find_use_stmt (&name2);
2277  if (stmt1 == stmt2)
2278    return stmt1;
2279
2280  return reassociate_to_the_same_stmt (name1, name2);
2281}
2282
2283/* Tries to combine chains CH1 and CH2 together.  If this succeeds, the
2284   description of the new chain is returned, otherwise we return NULL.  */
2285
2286static chain_p
2287combine_chains (chain_p ch1, chain_p ch2)
2288{
2289  dref r1, r2, nw;
2290  enum tree_code op = ERROR_MARK;
2291  bool swap = false;
2292  chain_p new_chain;
2293  unsigned i;
2294  gimple root_stmt;
2295  tree rslt_type = NULL_TREE;
2296
2297  if (ch1 == ch2)
2298    return NULL;
2299  if (ch1->length != ch2->length)
2300    return NULL;
2301
2302  if (ch1->refs.length () != ch2->refs.length ())
2303    return NULL;
2304
2305  for (i = 0; (ch1->refs.iterate (i, &r1)
2306	       && ch2->refs.iterate (i, &r2)); i++)
2307    {
2308      if (r1->distance != r2->distance)
2309	return NULL;
2310
2311      if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2312	return NULL;
2313    }
2314
2315  if (swap)
2316    {
2317      chain_p tmp = ch1;
2318      ch1 = ch2;
2319      ch2 = tmp;
2320    }
2321
2322  new_chain = XCNEW (struct chain);
2323  new_chain->type = CT_COMBINATION;
2324  new_chain->op = op;
2325  new_chain->ch1 = ch1;
2326  new_chain->ch2 = ch2;
2327  new_chain->rslt_type = rslt_type;
2328  new_chain->length = ch1->length;
2329
2330  for (i = 0; (ch1->refs.iterate (i, &r1)
2331	       && ch2->refs.iterate (i, &r2)); i++)
2332    {
2333      nw = XCNEW (struct dref_d);
2334      nw->stmt = stmt_combining_refs (r1, r2);
2335      nw->distance = r1->distance;
2336
2337      new_chain->refs.safe_push (nw);
2338    }
2339
2340  new_chain->has_max_use_after = false;
2341  root_stmt = get_chain_root (new_chain)->stmt;
2342  for (i = 1; new_chain->refs.iterate (i, &nw); i++)
2343    {
2344      if (nw->distance == new_chain->length
2345	  && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
2346	{
2347	  new_chain->has_max_use_after = true;
2348	  break;
2349	}
2350    }
2351
2352  ch1->combined = true;
2353  ch2->combined = true;
2354  return new_chain;
2355}
2356
2357/* Try to combine the CHAINS.  */
2358
2359static void
2360try_combine_chains (vec<chain_p> *chains)
2361{
2362  unsigned i, j;
2363  chain_p ch1, ch2, cch;
2364  auto_vec<chain_p> worklist;
2365
2366  FOR_EACH_VEC_ELT (*chains, i, ch1)
2367    if (chain_can_be_combined_p (ch1))
2368      worklist.safe_push (ch1);
2369
2370  while (!worklist.is_empty ())
2371    {
2372      ch1 = worklist.pop ();
2373      if (!chain_can_be_combined_p (ch1))
2374	continue;
2375
2376      FOR_EACH_VEC_ELT (*chains, j, ch2)
2377	{
2378	  if (!chain_can_be_combined_p (ch2))
2379	    continue;
2380
2381	  cch = combine_chains (ch1, ch2);
2382	  if (cch)
2383	    {
2384	      worklist.safe_push (cch);
2385	      chains->safe_push (cch);
2386	      break;
2387	    }
2388	}
2389    }
2390}
2391
2392/* Prepare initializers for CHAIN in LOOP.  Returns false if this is
2393   impossible because one of these initializers may trap, true otherwise.  */
2394
2395static bool
2396prepare_initializers_chain (struct loop *loop, chain_p chain)
2397{
2398  unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2399  struct data_reference *dr = get_chain_root (chain)->ref;
2400  tree init;
2401  dref laref;
2402  edge entry = loop_preheader_edge (loop);
2403
2404  /* Find the initializers for the variables, and check that they cannot
2405     trap.  */
2406  chain->inits.create (n);
2407  for (i = 0; i < n; i++)
2408    chain->inits.quick_push (NULL_TREE);
2409
2410  /* If we have replaced some looparound phi nodes, use their initializers
2411     instead of creating our own.  */
2412  FOR_EACH_VEC_ELT (chain->refs, i, laref)
2413    {
2414      if (gimple_code (laref->stmt) != GIMPLE_PHI)
2415	continue;
2416
2417      gcc_assert (laref->distance > 0);
2418      chain->inits[n - laref->distance]
2419	= PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
2420    }
2421
2422  for (i = 0; i < n; i++)
2423    {
2424      gimple_seq stmts = NULL;
2425
2426      if (chain->inits[i] != NULL_TREE)
2427	continue;
2428
2429      init = ref_at_iteration (dr, (int) i - n, &stmts);
2430      if (!chain->all_always_accessed && tree_could_trap_p (init))
2431	{
2432	  gimple_seq_discard (stmts);
2433	  return false;
2434	}
2435
2436      if (stmts)
2437	gsi_insert_seq_on_edge_immediate (entry, stmts);
2438
2439      chain->inits[i] = init;
2440    }
2441
2442  return true;
2443}
2444
2445/* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2446   be used because the initializers might trap.  */
2447
2448static void
2449prepare_initializers (struct loop *loop, vec<chain_p> chains)
2450{
2451  chain_p chain;
2452  unsigned i;
2453
2454  for (i = 0; i < chains.length (); )
2455    {
2456      chain = chains[i];
2457      if (prepare_initializers_chain (loop, chain))
2458	i++;
2459      else
2460	{
2461	  release_chain (chain);
2462	  chains.unordered_remove (i);
2463	}
2464    }
2465}
2466
2467/* Performs predictive commoning for LOOP.  Returns true if LOOP was
2468   unrolled.  */
2469
2470static bool
2471tree_predictive_commoning_loop (struct loop *loop)
2472{
2473  vec<data_reference_p> datarefs;
2474  vec<ddr_p> dependences;
2475  struct component *components;
2476  vec<chain_p> chains = vNULL;
2477  unsigned unroll_factor;
2478  struct tree_niter_desc desc;
2479  bool unroll = false;
2480  edge exit;
2481  bitmap tmp_vars;
2482
2483  if (dump_file && (dump_flags & TDF_DETAILS))
2484    fprintf (dump_file, "Processing loop %d\n",  loop->num);
2485
2486  /* Find the data references and split them into components according to their
2487     dependence relations.  */
2488  auto_vec<loop_p, 3> loop_nest;
2489  dependences.create (10);
2490  datarefs.create (10);
2491  if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
2492					   &dependences))
2493    {
2494      if (dump_file && (dump_flags & TDF_DETAILS))
2495	fprintf (dump_file, "Cannot analyze data dependencies\n");
2496      free_data_refs (datarefs);
2497      free_dependence_relations (dependences);
2498      return false;
2499    }
2500
2501  if (dump_file && (dump_flags & TDF_DETAILS))
2502    dump_data_dependence_relations (dump_file, dependences);
2503
2504  components = split_data_refs_to_components (loop, datarefs, dependences);
2505  loop_nest.release ();
2506  free_dependence_relations (dependences);
2507  if (!components)
2508    {
2509      free_data_refs (datarefs);
2510      free_affine_expand_cache (&name_expansions);
2511      return false;
2512    }
2513
2514  if (dump_file && (dump_flags & TDF_DETAILS))
2515    {
2516      fprintf (dump_file, "Initial state:\n\n");
2517      dump_components (dump_file, components);
2518    }
2519
2520  /* Find the suitable components and split them into chains.  */
2521  components = filter_suitable_components (loop, components);
2522
2523  tmp_vars = BITMAP_ALLOC (NULL);
2524  looparound_phis = BITMAP_ALLOC (NULL);
2525  determine_roots (loop, components, &chains);
2526  release_components (components);
2527
2528  if (!chains.exists ())
2529    {
2530      if (dump_file && (dump_flags & TDF_DETAILS))
2531	fprintf (dump_file,
2532		 "Predictive commoning failed: no suitable chains\n");
2533      goto end;
2534    }
2535  prepare_initializers (loop, chains);
2536
2537  /* Try to combine the chains that are always worked with together.  */
2538  try_combine_chains (&chains);
2539
2540  if (dump_file && (dump_flags & TDF_DETAILS))
2541    {
2542      fprintf (dump_file, "Before commoning:\n\n");
2543      dump_chains (dump_file, chains);
2544    }
2545
2546  /* Determine the unroll factor, and if the loop should be unrolled, ensure
2547     that its number of iterations is divisible by the factor.  */
2548  unroll_factor = determine_unroll_factor (chains);
2549  scev_reset ();
2550  unroll = (unroll_factor > 1
2551	    && can_unroll_loop_p (loop, unroll_factor, &desc));
2552  exit = single_dom_exit (loop);
2553
2554  /* Execute the predictive commoning transformations, and possibly unroll the
2555     loop.  */
2556  if (unroll)
2557    {
2558      struct epcc_data dta;
2559
2560      if (dump_file && (dump_flags & TDF_DETAILS))
2561	fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
2562
2563      dta.chains = chains;
2564      dta.tmp_vars = tmp_vars;
2565
2566      update_ssa (TODO_update_ssa_only_virtuals);
2567
2568      /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2569	 execute_pred_commoning_cbck is called may cause phi nodes to be
2570	 reallocated, which is a problem since CHAINS may point to these
2571	 statements.  To fix this, we store the ssa names defined by the
2572	 phi nodes here instead of the phi nodes themselves, and restore
2573	 the phi nodes in execute_pred_commoning_cbck.  A bit hacky.  */
2574      replace_phis_by_defined_names (chains);
2575
2576      tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
2577				      execute_pred_commoning_cbck, &dta);
2578      eliminate_temp_copies (loop, tmp_vars);
2579    }
2580  else
2581    {
2582      if (dump_file && (dump_flags & TDF_DETAILS))
2583	fprintf (dump_file,
2584		 "Executing predictive commoning without unrolling.\n");
2585      execute_pred_commoning (loop, chains, tmp_vars);
2586    }
2587
2588end: ;
2589  release_chains (chains);
2590  free_data_refs (datarefs);
2591  BITMAP_FREE (tmp_vars);
2592  BITMAP_FREE (looparound_phis);
2593
2594  free_affine_expand_cache (&name_expansions);
2595
2596  return unroll;
2597}
2598
2599/* Runs predictive commoning.  */
2600
2601unsigned
2602tree_predictive_commoning (void)
2603{
2604  bool unrolled = false;
2605  struct loop *loop;
2606  unsigned ret = 0;
2607
2608  initialize_original_copy_tables ();
2609  FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
2610    if (optimize_loop_for_speed_p (loop))
2611      {
2612	unrolled |= tree_predictive_commoning_loop (loop);
2613      }
2614
2615  if (unrolled)
2616    {
2617      scev_reset ();
2618      ret = TODO_cleanup_cfg;
2619    }
2620  free_original_copy_tables ();
2621
2622  return ret;
2623}
2624
2625/* Predictive commoning Pass.  */
2626
2627static unsigned
2628run_tree_predictive_commoning (struct function *fun)
2629{
2630  if (number_of_loops (fun) <= 1)
2631    return 0;
2632
2633  return tree_predictive_commoning ();
2634}
2635
2636namespace {
2637
2638const pass_data pass_data_predcom =
2639{
2640  GIMPLE_PASS, /* type */
2641  "pcom", /* name */
2642  OPTGROUP_LOOP, /* optinfo_flags */
2643  TV_PREDCOM, /* tv_id */
2644  PROP_cfg, /* properties_required */
2645  0, /* properties_provided */
2646  0, /* properties_destroyed */
2647  0, /* todo_flags_start */
2648  TODO_update_ssa_only_virtuals, /* todo_flags_finish */
2649};
2650
2651class pass_predcom : public gimple_opt_pass
2652{
2653public:
2654  pass_predcom (gcc::context *ctxt)
2655    : gimple_opt_pass (pass_data_predcom, ctxt)
2656  {}
2657
2658  /* opt_pass methods: */
2659  virtual bool gate (function *) { return flag_predictive_commoning != 0; }
2660  virtual unsigned int execute (function *fun)
2661    {
2662      return run_tree_predictive_commoning (fun);
2663    }
2664
2665}; // class pass_predcom
2666
2667} // anon namespace
2668
2669gimple_opt_pass *
2670make_pass_predcom (gcc::context *ctxt)
2671{
2672  return new pass_predcom (ctxt);
2673}
2674
2675
2676