1/* Loop distribution.
2   Copyright (C) 2006-2015 Free Software Foundation, Inc.
3   Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4   and Sebastian Pop <sebastian.pop@amd.com>.
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 3, or (at your option) any
11later version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT
14ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3.  If not see
20<http://www.gnu.org/licenses/>.  */
21
22/* This pass performs loop distribution: for example, the loop
23
24   |DO I = 2, N
25   |    A(I) = B(I) + C
26   |    D(I) = A(I-1)*E
27   |ENDDO
28
29   is transformed to
30
31   |DOALL I = 2, N
32   |   A(I) = B(I) + C
33   |ENDDO
34   |
35   |DOALL I = 2, N
36   |   D(I) = A(I-1)*E
37   |ENDDO
38
39   This pass uses an RDG, Reduced Dependence Graph built on top of the
40   data dependence relations.  The RDG is then topologically sorted to
41   obtain a map of information producers/consumers based on which it
42   generates the new loops.  */
43
44#include "config.h"
45#include "system.h"
46#include "coretypes.h"
47#include "hash-set.h"
48#include "machmode.h"
49#include "vec.h"
50#include "double-int.h"
51#include "input.h"
52#include "alias.h"
53#include "symtab.h"
54#include "options.h"
55#include "wide-int.h"
56#include "inchash.h"
57#include "tree.h"
58#include "fold-const.h"
59#include "predict.h"
60#include "tm.h"
61#include "hard-reg-set.h"
62#include "input.h"
63#include "function.h"
64#include "dominance.h"
65#include "cfg.h"
66#include "cfganal.h"
67#include "basic-block.h"
68#include "tree-ssa-alias.h"
69#include "internal-fn.h"
70#include "gimple-expr.h"
71#include "is-a.h"
72#include "gimple.h"
73#include "gimple-iterator.h"
74#include "gimplify-me.h"
75#include "stor-layout.h"
76#include "gimple-ssa.h"
77#include "tree-cfg.h"
78#include "tree-phinodes.h"
79#include "ssa-iterators.h"
80#include "stringpool.h"
81#include "tree-ssanames.h"
82#include "tree-ssa-loop-manip.h"
83#include "tree-ssa-loop.h"
84#include "tree-into-ssa.h"
85#include "tree-ssa.h"
86#include "cfgloop.h"
87#include "tree-chrec.h"
88#include "tree-data-ref.h"
89#include "tree-scalar-evolution.h"
90#include "tree-pass.h"
91#include "gimple-pretty-print.h"
92#include "tree-vectorizer.h"
93
94
95/* A Reduced Dependence Graph (RDG) vertex representing a statement.  */
96typedef struct rdg_vertex
97{
98  /* The statement represented by this vertex.  */
99  gimple stmt;
100
101  /* Vector of data-references in this statement.  */
102  vec<data_reference_p> datarefs;
103
104  /* True when the statement contains a write to memory.  */
105  bool has_mem_write;
106
107  /* True when the statement contains a read from memory.  */
108  bool has_mem_reads;
109} *rdg_vertex_p;
110
111#define RDGV_STMT(V)     ((struct rdg_vertex *) ((V)->data))->stmt
112#define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
113#define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
114#define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
115#define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
116#define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
117#define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
118#define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
119
120/* Data dependence type.  */
121
122enum rdg_dep_type
123{
124  /* Read After Write (RAW).  */
125  flow_dd = 'f',
126
127  /* Control dependence (execute conditional on).  */
128  control_dd = 'c'
129};
130
131/* Dependence information attached to an edge of the RDG.  */
132
133typedef struct rdg_edge
134{
135  /* Type of the dependence.  */
136  enum rdg_dep_type type;
137} *rdg_edge_p;
138
139#define RDGE_TYPE(E)        ((struct rdg_edge *) ((E)->data))->type
140
141/* Dump vertex I in RDG to FILE.  */
142
143static void
144dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
145{
146  struct vertex *v = &(rdg->vertices[i]);
147  struct graph_edge *e;
148
149  fprintf (file, "(vertex %d: (%s%s) (in:", i,
150	   RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
151	   RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
152
153  if (v->pred)
154    for (e = v->pred; e; e = e->pred_next)
155      fprintf (file, " %d", e->src);
156
157  fprintf (file, ") (out:");
158
159  if (v->succ)
160    for (e = v->succ; e; e = e->succ_next)
161      fprintf (file, " %d", e->dest);
162
163  fprintf (file, ")\n");
164  print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
165  fprintf (file, ")\n");
166}
167
168/* Call dump_rdg_vertex on stderr.  */
169
170DEBUG_FUNCTION void
171debug_rdg_vertex (struct graph *rdg, int i)
172{
173  dump_rdg_vertex (stderr, rdg, i);
174}
175
176/* Dump the reduced dependence graph RDG to FILE.  */
177
178static void
179dump_rdg (FILE *file, struct graph *rdg)
180{
181  fprintf (file, "(rdg\n");
182  for (int i = 0; i < rdg->n_vertices; i++)
183    dump_rdg_vertex (file, rdg, i);
184  fprintf (file, ")\n");
185}
186
187/* Call dump_rdg on stderr.  */
188
189DEBUG_FUNCTION void
190debug_rdg (struct graph *rdg)
191{
192  dump_rdg (stderr, rdg);
193}
194
195static void
196dot_rdg_1 (FILE *file, struct graph *rdg)
197{
198  int i;
199  pretty_printer buffer;
200  pp_needs_newline (&buffer) = false;
201  buffer.buffer->stream = file;
202
203  fprintf (file, "digraph RDG {\n");
204
205  for (i = 0; i < rdg->n_vertices; i++)
206    {
207      struct vertex *v = &(rdg->vertices[i]);
208      struct graph_edge *e;
209
210      fprintf (file, "%d [label=\"[%d] ", i, i);
211      pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
212      pp_flush (&buffer);
213      fprintf (file, "\"]\n");
214
215      /* Highlight reads from memory.  */
216      if (RDG_MEM_READS_STMT (rdg, i))
217       fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
218
219      /* Highlight stores to memory.  */
220      if (RDG_MEM_WRITE_STMT (rdg, i))
221       fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
222
223      if (v->succ)
224       for (e = v->succ; e; e = e->succ_next)
225         switch (RDGE_TYPE (e))
226           {
227           case flow_dd:
228             /* These are the most common dependences: don't print these. */
229             fprintf (file, "%d -> %d \n", i, e->dest);
230             break;
231
232	   case control_dd:
233             fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
234             break;
235
236           default:
237             gcc_unreachable ();
238           }
239    }
240
241  fprintf (file, "}\n\n");
242}
243
244/* Display the Reduced Dependence Graph using dotty.  */
245
246DEBUG_FUNCTION void
247dot_rdg (struct graph *rdg)
248{
249  /* When debugging, you may want to enable the following code.  */
250#ifdef HAVE_POPEN
251  FILE *file = popen ("dot -Tx11", "w");
252  if (!file)
253    return;
254  dot_rdg_1 (file, rdg);
255  fflush (file);
256  close (fileno (file));
257  pclose (file);
258#else
259  dot_rdg_1 (stderr, rdg);
260#endif
261}
262
263/* Returns the index of STMT in RDG.  */
264
265static int
266rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
267{
268  int index = gimple_uid (stmt);
269  gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
270  return index;
271}
272
273/* Creates dependence edges in RDG for all the uses of DEF.  IDEF is
274   the index of DEF in RDG.  */
275
276static void
277create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
278{
279  use_operand_p imm_use_p;
280  imm_use_iterator iterator;
281
282  FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
283    {
284      struct graph_edge *e;
285      int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
286
287      if (use < 0)
288	continue;
289
290      e = add_edge (rdg, idef, use);
291      e->data = XNEW (struct rdg_edge);
292      RDGE_TYPE (e) = flow_dd;
293    }
294}
295
296/* Creates an edge for the control dependences of BB to the vertex V.  */
297
298static void
299create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
300				    int v, control_dependences *cd)
301{
302  bitmap_iterator bi;
303  unsigned edge_n;
304  EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
305			    0, edge_n, bi)
306    {
307      basic_block cond_bb = cd->get_edge (edge_n)->src;
308      gimple stmt = last_stmt (cond_bb);
309      if (stmt && is_ctrl_stmt (stmt))
310	{
311	  struct graph_edge *e;
312	  int c = rdg_vertex_for_stmt (rdg, stmt);
313	  if (c < 0)
314	    continue;
315
316	  e = add_edge (rdg, c, v);
317	  e->data = XNEW (struct rdg_edge);
318	  RDGE_TYPE (e) = control_dd;
319	}
320    }
321}
322
323/* Creates the edges of the reduced dependence graph RDG.  */
324
325static void
326create_rdg_flow_edges (struct graph *rdg)
327{
328  int i;
329  def_operand_p def_p;
330  ssa_op_iter iter;
331
332  for (i = 0; i < rdg->n_vertices; i++)
333    FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
334			      iter, SSA_OP_DEF)
335      create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
336}
337
338/* Creates the edges of the reduced dependence graph RDG.  */
339
340static void
341create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
342{
343  int i;
344
345  for (i = 0; i < rdg->n_vertices; i++)
346    {
347      gimple stmt = RDG_STMT (rdg, i);
348      if (gimple_code (stmt) == GIMPLE_PHI)
349	{
350	  edge_iterator ei;
351	  edge e;
352	  FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
353	      create_edge_for_control_dependence (rdg, e->src, i, cd);
354	}
355      else
356	create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
357    }
358}
359
360/* Build the vertices of the reduced dependence graph RDG.  Return false
361   if that failed.  */
362
363static bool
364create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
365		     vec<data_reference_p> *datarefs)
366{
367  int i;
368  gimple stmt;
369
370  FOR_EACH_VEC_ELT (stmts, i, stmt)
371    {
372      struct vertex *v = &(rdg->vertices[i]);
373
374      /* Record statement to vertex mapping.  */
375      gimple_set_uid (stmt, i);
376
377      v->data = XNEW (struct rdg_vertex);
378      RDGV_STMT (v) = stmt;
379      RDGV_DATAREFS (v).create (0);
380      RDGV_HAS_MEM_WRITE (v) = false;
381      RDGV_HAS_MEM_READS (v) = false;
382      if (gimple_code (stmt) == GIMPLE_PHI)
383	continue;
384
385      unsigned drp = datarefs->length ();
386      if (!find_data_references_in_stmt (loop, stmt, datarefs))
387	return false;
388      for (unsigned j = drp; j < datarefs->length (); ++j)
389	{
390	  data_reference_p dr = (*datarefs)[j];
391	  if (DR_IS_READ (dr))
392	    RDGV_HAS_MEM_READS (v) = true;
393	  else
394	    RDGV_HAS_MEM_WRITE (v) = true;
395	  RDGV_DATAREFS (v).safe_push (dr);
396	}
397    }
398  return true;
399}
400
401/* Initialize STMTS with all the statements of LOOP.  The order in
402   which we discover statements is important as
403   generate_loops_for_partition is using the same traversal for
404   identifying statements in loop copies.  */
405
406static void
407stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
408{
409  unsigned int i;
410  basic_block *bbs = get_loop_body_in_dom_order (loop);
411
412  for (i = 0; i < loop->num_nodes; i++)
413    {
414      basic_block bb = bbs[i];
415
416      for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
417	   gsi_next (&bsi))
418	if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
419	  stmts->safe_push (bsi.phi ());
420
421      for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
422	   gsi_next (&bsi))
423	{
424	  gimple stmt = gsi_stmt (bsi);
425	  if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
426	    stmts->safe_push (stmt);
427	}
428    }
429
430  free (bbs);
431}
432
433/* Free the reduced dependence graph RDG.  */
434
435static void
436free_rdg (struct graph *rdg)
437{
438  int i;
439
440  for (i = 0; i < rdg->n_vertices; i++)
441    {
442      struct vertex *v = &(rdg->vertices[i]);
443      struct graph_edge *e;
444
445      for (e = v->succ; e; e = e->succ_next)
446	free (e->data);
447
448      if (v->data)
449	{
450	  gimple_set_uid (RDGV_STMT (v), -1);
451	  free_data_refs (RDGV_DATAREFS (v));
452	  free (v->data);
453	}
454    }
455
456  free_graph (rdg);
457}
458
459/* Build the Reduced Dependence Graph (RDG) with one vertex per
460   statement of the loop nest LOOP_NEST, and one edge per data dependence or
461   scalar dependence.  */
462
463static struct graph *
464build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
465{
466  struct graph *rdg;
467  vec<data_reference_p> datarefs;
468
469  /* Create the RDG vertices from the stmts of the loop nest.  */
470  auto_vec<gimple, 10> stmts;
471  stmts_from_loop (loop_nest[0], &stmts);
472  rdg = new_graph (stmts.length ());
473  datarefs.create (10);
474  if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
475    {
476      datarefs.release ();
477      free_rdg (rdg);
478      return NULL;
479    }
480  stmts.release ();
481
482  create_rdg_flow_edges (rdg);
483  if (cd)
484    create_rdg_cd_edges (rdg, cd);
485
486  datarefs.release ();
487
488  return rdg;
489}
490
491
492
493enum partition_kind {
494    PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
495};
496
497typedef struct partition_s
498{
499  bitmap stmts;
500  bitmap loops;
501  bool reduction_p;
502  enum partition_kind kind;
503  /* data-references a kind != PKIND_NORMAL partition is about.  */
504  data_reference_p main_dr;
505  data_reference_p secondary_dr;
506  tree niter;
507  bool plus_one;
508} *partition_t;
509
510
511/* Allocate and initialize a partition from BITMAP.  */
512
513static partition_t
514partition_alloc (bitmap stmts, bitmap loops)
515{
516  partition_t partition = XCNEW (struct partition_s);
517  partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
518  partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
519  partition->reduction_p = false;
520  partition->kind = PKIND_NORMAL;
521  return partition;
522}
523
524/* Free PARTITION.  */
525
526static void
527partition_free (partition_t partition)
528{
529  BITMAP_FREE (partition->stmts);
530  BITMAP_FREE (partition->loops);
531  free (partition);
532}
533
534/* Returns true if the partition can be generated as a builtin.  */
535
536static bool
537partition_builtin_p (partition_t partition)
538{
539  return partition->kind != PKIND_NORMAL;
540}
541
542/* Returns true if the partition contains a reduction.  */
543
544static bool
545partition_reduction_p (partition_t partition)
546{
547  return partition->reduction_p;
548}
549
550/* Merge PARTITION into the partition DEST.  */
551
552static void
553partition_merge_into (partition_t dest, partition_t partition)
554{
555  dest->kind = PKIND_NORMAL;
556  bitmap_ior_into (dest->stmts, partition->stmts);
557  if (partition_reduction_p (partition))
558    dest->reduction_p = true;
559}
560
561
562/* Returns true when DEF is an SSA_NAME defined in LOOP and used after
563   the LOOP.  */
564
565static bool
566ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
567{
568  imm_use_iterator imm_iter;
569  use_operand_p use_p;
570
571  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
572    {
573      gimple use_stmt = USE_STMT (use_p);
574      if (!is_gimple_debug (use_stmt)
575	  && loop != loop_containing_stmt (use_stmt))
576	return true;
577    }
578
579  return false;
580}
581
582/* Returns true when STMT defines a scalar variable used after the
583   loop LOOP.  */
584
585static bool
586stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
587{
588  def_operand_p def_p;
589  ssa_op_iter op_iter;
590
591  if (gimple_code (stmt) == GIMPLE_PHI)
592    return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
593
594  FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
595    if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
596      return true;
597
598  return false;
599}
600
601/* Return a copy of LOOP placed before LOOP.  */
602
603static struct loop *
604copy_loop_before (struct loop *loop)
605{
606  struct loop *res;
607  edge preheader = loop_preheader_edge (loop);
608
609  initialize_original_copy_tables ();
610  res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
611  gcc_assert (res != NULL);
612  free_original_copy_tables ();
613  delete_update_ssa ();
614
615  return res;
616}
617
618/* Creates an empty basic block after LOOP.  */
619
620static void
621create_bb_after_loop (struct loop *loop)
622{
623  edge exit = single_exit (loop);
624
625  if (!exit)
626    return;
627
628  split_edge (exit);
629}
630
631/* Generate code for PARTITION from the code in LOOP.  The loop is
632   copied when COPY_P is true.  All the statements not flagged in the
633   PARTITION bitmap are removed from the loop or from its copy.  The
634   statements are indexed in sequence inside a basic block, and the
635   basic blocks of a loop are taken in dom order.  */
636
637static void
638generate_loops_for_partition (struct loop *loop, partition_t partition,
639			      bool copy_p)
640{
641  unsigned i;
642  basic_block *bbs;
643
644  if (copy_p)
645    {
646      loop = copy_loop_before (loop);
647      gcc_assert (loop != NULL);
648      create_preheader (loop, CP_SIMPLE_PREHEADERS);
649      create_bb_after_loop (loop);
650    }
651
652  /* Remove stmts not in the PARTITION bitmap.  */
653  bbs = get_loop_body_in_dom_order (loop);
654
655  if (MAY_HAVE_DEBUG_STMTS)
656    for (i = 0; i < loop->num_nodes; i++)
657      {
658	basic_block bb = bbs[i];
659
660	for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
661	     gsi_next (&bsi))
662	  {
663	    gphi *phi = bsi.phi ();
664	    if (!virtual_operand_p (gimple_phi_result (phi))
665		&& !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
666	      reset_debug_uses (phi);
667	  }
668
669	for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
670	  {
671	    gimple stmt = gsi_stmt (bsi);
672	    if (gimple_code (stmt) != GIMPLE_LABEL
673		&& !is_gimple_debug (stmt)
674		&& !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
675	      reset_debug_uses (stmt);
676	  }
677      }
678
679  for (i = 0; i < loop->num_nodes; i++)
680    {
681      basic_block bb = bbs[i];
682
683      for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
684	{
685	  gphi *phi = bsi.phi ();
686	  if (!virtual_operand_p (gimple_phi_result (phi))
687	      && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
688	    remove_phi_node (&bsi, true);
689	  else
690	    gsi_next (&bsi);
691	}
692
693      for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
694	{
695	  gimple stmt = gsi_stmt (bsi);
696	  if (gimple_code (stmt) != GIMPLE_LABEL
697	      && !is_gimple_debug (stmt)
698	      && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
699	    {
700	      /* Choose an arbitrary path through the empty CFG part
701		 that this unnecessary control stmt controls.  */
702	      if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
703		{
704		  gimple_cond_make_false (cond_stmt);
705		  update_stmt (stmt);
706		}
707	      else if (gimple_code (stmt) == GIMPLE_SWITCH)
708		{
709		  gswitch *switch_stmt = as_a <gswitch *> (stmt);
710		  gimple_switch_set_index
711		      (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
712		  update_stmt (stmt);
713		}
714	      else
715		{
716		  unlink_stmt_vdef (stmt);
717		  gsi_remove (&bsi, true);
718		  release_defs (stmt);
719		  continue;
720		}
721	    }
722	  gsi_next (&bsi);
723	}
724    }
725
726  free (bbs);
727}
728
729/* Build the size argument for a memory operation call.  */
730
731static tree
732build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
733		    bool plus_one)
734{
735  tree size = fold_convert_loc (loc, sizetype, nb_iter);
736  if (plus_one)
737    size = size_binop (PLUS_EXPR, size, size_one_node);
738  size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
739			  TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
740  size = fold_convert_loc (loc, size_type_node, size);
741  return size;
742}
743
744/* Build an address argument for a memory operation call.  */
745
746static tree
747build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
748{
749  tree addr_base;
750
751  addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
752  addr_base = fold_convert_loc (loc, sizetype, addr_base);
753
754  /* Test for a negative stride, iterating over every element.  */
755  if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
756    {
757      addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
758				  fold_convert_loc (loc, sizetype, nb_bytes));
759      addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
760				  TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
761    }
762
763  return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
764}
765
766/* If VAL memory representation contains the same value in all bytes,
767   return that value, otherwise return -1.
768   E.g. for 0x24242424 return 0x24, for IEEE double
769   747708026454360457216.0 return 0x44, etc.  */
770
771static int
772const_with_all_bytes_same (tree val)
773{
774  unsigned char buf[64];
775  int i, len;
776
777  if (integer_zerop (val)
778      || real_zerop (val)
779      || (TREE_CODE (val) == CONSTRUCTOR
780          && !TREE_CLOBBER_P (val)
781          && CONSTRUCTOR_NELTS (val) == 0))
782    return 0;
783
784  if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
785    return -1;
786
787  len = native_encode_expr (val, buf, sizeof (buf));
788  if (len == 0)
789    return -1;
790  for (i = 1; i < len; i++)
791    if (buf[i] != buf[0])
792      return -1;
793  return buf[0];
794}
795
796/* Generate a call to memset for PARTITION in LOOP.  */
797
798static void
799generate_memset_builtin (struct loop *loop, partition_t partition)
800{
801  gimple_stmt_iterator gsi;
802  gimple stmt, fn_call;
803  tree mem, fn, nb_bytes;
804  location_t loc;
805  tree val;
806
807  stmt = DR_STMT (partition->main_dr);
808  loc = gimple_location (stmt);
809
810  /* The new statements will be placed before LOOP.  */
811  gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
812
813  nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
814				 partition->plus_one);
815  nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
816				       false, GSI_CONTINUE_LINKING);
817  mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
818  mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
819				  false, GSI_CONTINUE_LINKING);
820
821  /* This exactly matches the pattern recognition in classify_partition.  */
822  val = gimple_assign_rhs1 (stmt);
823  /* Handle constants like 0x15151515 and similarly
824     floating point constants etc. where all bytes are the same.  */
825  int bytev = const_with_all_bytes_same (val);
826  if (bytev != -1)
827    val = build_int_cst (integer_type_node, bytev);
828  else if (TREE_CODE (val) == INTEGER_CST)
829    val = fold_convert (integer_type_node, val);
830  else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
831    {
832      tree tem = make_ssa_name (integer_type_node);
833      gimple cstmt = gimple_build_assign (tem, NOP_EXPR, val);
834      gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
835      val = tem;
836    }
837
838  fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
839  fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
840  gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
841
842  if (dump_file && (dump_flags & TDF_DETAILS))
843    {
844      fprintf (dump_file, "generated memset");
845      if (bytev == 0)
846	fprintf (dump_file, " zero\n");
847      else
848	fprintf (dump_file, "\n");
849    }
850}
851
852/* Generate a call to memcpy for PARTITION in LOOP.  */
853
854static void
855generate_memcpy_builtin (struct loop *loop, partition_t partition)
856{
857  gimple_stmt_iterator gsi;
858  gimple stmt, fn_call;
859  tree dest, src, fn, nb_bytes;
860  location_t loc;
861  enum built_in_function kind;
862
863  stmt = DR_STMT (partition->main_dr);
864  loc = gimple_location (stmt);
865
866  /* The new statements will be placed before LOOP.  */
867  gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
868
869  nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
870				 partition->plus_one);
871  nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
872				       false, GSI_CONTINUE_LINKING);
873  dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
874  src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
875  if (ptr_derefs_may_alias_p (dest, src))
876    kind = BUILT_IN_MEMMOVE;
877  else
878    kind = BUILT_IN_MEMCPY;
879
880  dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
881				   false, GSI_CONTINUE_LINKING);
882  src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
883				  false, GSI_CONTINUE_LINKING);
884  fn = build_fold_addr_expr (builtin_decl_implicit (kind));
885  fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
886  gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
887
888  if (dump_file && (dump_flags & TDF_DETAILS))
889    {
890      if (kind == BUILT_IN_MEMCPY)
891	fprintf (dump_file, "generated memcpy\n");
892      else
893	fprintf (dump_file, "generated memmove\n");
894    }
895}
896
897/* Remove and destroy the loop LOOP.  */
898
899static void
900destroy_loop (struct loop *loop)
901{
902  unsigned nbbs = loop->num_nodes;
903  edge exit = single_exit (loop);
904  basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
905  basic_block *bbs;
906  unsigned i;
907
908  bbs = get_loop_body_in_dom_order (loop);
909
910  redirect_edge_pred (exit, src);
911  exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
912  exit->flags |= EDGE_FALLTHRU;
913  cancel_loop_tree (loop);
914  rescan_loop_exit (exit, false, true);
915
916  for (i = 0; i < nbbs; i++)
917    {
918      /* We have made sure to not leave any dangling uses of SSA
919         names defined in the loop.  With the exception of virtuals.
920	 Make sure we replace all uses of virtual defs that will remain
921	 outside of the loop with the bare symbol as delete_basic_block
922	 will release them.  */
923      for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
924	   gsi_next (&gsi))
925	{
926	  gphi *phi = gsi.phi ();
927	  if (virtual_operand_p (gimple_phi_result (phi)))
928	    mark_virtual_phi_result_for_renaming (phi);
929	}
930      for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
931	   gsi_next (&gsi))
932	{
933	  gimple stmt = gsi_stmt (gsi);
934	  tree vdef = gimple_vdef (stmt);
935	  if (vdef && TREE_CODE (vdef) == SSA_NAME)
936	    mark_virtual_operand_for_renaming (vdef);
937	}
938      delete_basic_block (bbs[i]);
939    }
940  free (bbs);
941
942  set_immediate_dominator (CDI_DOMINATORS, dest,
943			   recompute_dominator (CDI_DOMINATORS, dest));
944}
945
946/* Generates code for PARTITION.  */
947
948static void
949generate_code_for_partition (struct loop *loop,
950			     partition_t partition, bool copy_p)
951{
952  switch (partition->kind)
953    {
954    case PKIND_NORMAL:
955      /* Reductions all have to be in the last partition.  */
956      gcc_assert (!partition_reduction_p (partition)
957		  || !copy_p);
958      generate_loops_for_partition (loop, partition, copy_p);
959      return;
960
961    case PKIND_MEMSET:
962      generate_memset_builtin (loop, partition);
963      break;
964
965    case PKIND_MEMCPY:
966      generate_memcpy_builtin (loop, partition);
967      break;
968
969    default:
970      gcc_unreachable ();
971    }
972
973  /* Common tail for partitions we turn into a call.  If this was the last
974     partition for which we generate code, we have to destroy the loop.  */
975  if (!copy_p)
976    destroy_loop (loop);
977}
978
979
980/* Returns a partition with all the statements needed for computing
981   the vertex V of the RDG, also including the loop exit conditions.  */
982
983static partition_t
984build_rdg_partition_for_vertex (struct graph *rdg, int v)
985{
986  partition_t partition = partition_alloc (NULL, NULL);
987  auto_vec<int, 3> nodes;
988  unsigned i;
989  int x;
990
991  graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
992
993  FOR_EACH_VEC_ELT (nodes, i, x)
994    {
995      bitmap_set_bit (partition->stmts, x);
996      bitmap_set_bit (partition->loops,
997		      loop_containing_stmt (RDG_STMT (rdg, x))->num);
998    }
999
1000  return partition;
1001}
1002
1003/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1004   For the moment we detect only the memset zero pattern.  */
1005
1006static void
1007classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
1008{
1009  bitmap_iterator bi;
1010  unsigned i;
1011  tree nb_iter;
1012  data_reference_p single_load, single_store;
1013  bool volatiles_p = false;
1014  bool plus_one = false;
1015
1016  partition->kind = PKIND_NORMAL;
1017  partition->main_dr = NULL;
1018  partition->secondary_dr = NULL;
1019  partition->niter = NULL_TREE;
1020  partition->plus_one = false;
1021
1022  EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1023    {
1024      gimple stmt = RDG_STMT (rdg, i);
1025
1026      if (gimple_has_volatile_ops (stmt))
1027	volatiles_p = true;
1028
1029      /* If the stmt has uses outside of the loop mark it as reduction.  */
1030      if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1031	{
1032	  partition->reduction_p = true;
1033	  return;
1034	}
1035    }
1036
1037  /* Perform general partition disqualification for builtins.  */
1038  if (volatiles_p
1039      || !flag_tree_loop_distribute_patterns)
1040    return;
1041
1042  /* Detect memset and memcpy.  */
1043  single_load = NULL;
1044  single_store = NULL;
1045  EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1046    {
1047      gimple stmt = RDG_STMT (rdg, i);
1048      data_reference_p dr;
1049      unsigned j;
1050
1051      if (gimple_code (stmt) == GIMPLE_PHI)
1052	continue;
1053
1054      /* Any scalar stmts are ok.  */
1055      if (!gimple_vuse (stmt))
1056	continue;
1057
1058      /* Otherwise just regular loads/stores.  */
1059      if (!gimple_assign_single_p (stmt))
1060	return;
1061
1062      /* But exactly one store and/or load.  */
1063      for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1064	{
1065	  if (DR_IS_READ (dr))
1066	    {
1067	      if (single_load != NULL)
1068		return;
1069	      single_load = dr;
1070	    }
1071	  else
1072	    {
1073	      if (single_store != NULL)
1074		return;
1075	      single_store = dr;
1076	    }
1077	}
1078    }
1079
1080  if (!single_store)
1081    return;
1082
1083  nb_iter = number_of_latch_executions (loop);
1084  if (!nb_iter || nb_iter == chrec_dont_know)
1085    return;
1086  if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1087		      gimple_bb (DR_STMT (single_store))))
1088    plus_one = true;
1089
1090  if (single_store && !single_load)
1091    {
1092      gimple stmt = DR_STMT (single_store);
1093      tree rhs = gimple_assign_rhs1 (stmt);
1094      if (const_with_all_bytes_same (rhs) == -1
1095	  && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1096	      || (TYPE_MODE (TREE_TYPE (rhs))
1097		  != TYPE_MODE (unsigned_char_type_node))))
1098	return;
1099      if (TREE_CODE (rhs) == SSA_NAME
1100	  && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1101	  && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1102	return;
1103      if (!adjacent_dr_p (single_store)
1104	  || !dominated_by_p (CDI_DOMINATORS,
1105			      loop->latch, gimple_bb (stmt)))
1106	return;
1107      partition->kind = PKIND_MEMSET;
1108      partition->main_dr = single_store;
1109      partition->niter = nb_iter;
1110      partition->plus_one = plus_one;
1111    }
1112  else if (single_store && single_load)
1113    {
1114      gimple store = DR_STMT (single_store);
1115      gimple load = DR_STMT (single_load);
1116      /* Direct aggregate copy or via an SSA name temporary.  */
1117      if (load != store
1118	  && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1119	return;
1120      if (!adjacent_dr_p (single_store)
1121	  || !adjacent_dr_p (single_load)
1122	  || !operand_equal_p (DR_STEP (single_store),
1123			       DR_STEP (single_load), 0)
1124	  || !dominated_by_p (CDI_DOMINATORS,
1125			      loop->latch, gimple_bb (store)))
1126	return;
1127      /* Now check that if there is a dependence this dependence is
1128         of a suitable form for memmove.  */
1129      vec<loop_p> loops = vNULL;
1130      ddr_p ddr;
1131      loops.safe_push (loop);
1132      ddr = initialize_data_dependence_relation (single_load, single_store,
1133						 loops);
1134      compute_affine_dependence (ddr, loop);
1135      if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1136	{
1137	  free_dependence_relation (ddr);
1138	  loops.release ();
1139	  return;
1140	}
1141      if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1142	{
1143	  if (DDR_NUM_DIST_VECTS (ddr) == 0)
1144	    {
1145	      free_dependence_relation (ddr);
1146	      loops.release ();
1147	      return;
1148	    }
1149	  lambda_vector dist_v;
1150	  FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1151	    {
1152	      int dist = dist_v[index_in_loop_nest (loop->num,
1153						    DDR_LOOP_NEST (ddr))];
1154	      if (dist > 0 && !DDR_REVERSED_P (ddr))
1155		{
1156		  free_dependence_relation (ddr);
1157		  loops.release ();
1158		  return;
1159		}
1160	    }
1161	}
1162      free_dependence_relation (ddr);
1163      loops.release ();
1164      partition->kind = PKIND_MEMCPY;
1165      partition->main_dr = single_store;
1166      partition->secondary_dr = single_load;
1167      partition->niter = nb_iter;
1168      partition->plus_one = plus_one;
1169    }
1170}
1171
1172/* For a data reference REF, return the declaration of its base
1173   address or NULL_TREE if the base is not determined.  */
1174
1175static tree
1176ref_base_address (data_reference_p dr)
1177{
1178  tree base_address = DR_BASE_ADDRESS (dr);
1179  if (base_address
1180      && TREE_CODE (base_address) == ADDR_EXPR)
1181    return TREE_OPERAND (base_address, 0);
1182
1183  return base_address;
1184}
1185
1186/* Returns true when PARTITION1 and PARTITION2 have similar memory
1187   accesses in RDG.  */
1188
1189static bool
1190similar_memory_accesses (struct graph *rdg, partition_t partition1,
1191			 partition_t partition2)
1192{
1193  unsigned i, j, k, l;
1194  bitmap_iterator bi, bj;
1195  data_reference_p ref1, ref2;
1196
1197  /* First check whether in the intersection of the two partitions are
1198     any loads or stores.  Common loads are the situation that happens
1199     most often.  */
1200  EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1201    if (RDG_MEM_WRITE_STMT (rdg, i)
1202	|| RDG_MEM_READS_STMT (rdg, i))
1203      return true;
1204
1205  /* Then check all data-references against each other.  */
1206  EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1207    if (RDG_MEM_WRITE_STMT (rdg, i)
1208	|| RDG_MEM_READS_STMT (rdg, i))
1209      EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1210	if (RDG_MEM_WRITE_STMT (rdg, j)
1211	    || RDG_MEM_READS_STMT (rdg, j))
1212	  {
1213	    FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1214	      {
1215		tree base1 = ref_base_address (ref1);
1216		if (base1)
1217		  FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1218		    if (base1 == ref_base_address (ref2))
1219		      return true;
1220	      }
1221	  }
1222
1223  return false;
1224}
1225
1226/* Aggregate several components into a useful partition that is
1227   registered in the PARTITIONS vector.  Partitions will be
1228   distributed in different loops.  */
1229
1230static void
1231rdg_build_partitions (struct graph *rdg,
1232		      vec<gimple> starting_stmts,
1233		      vec<partition_t> *partitions)
1234{
1235  bitmap processed = BITMAP_ALLOC (NULL);
1236  int i;
1237  gimple stmt;
1238
1239  FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1240    {
1241      int v = rdg_vertex_for_stmt (rdg, stmt);
1242
1243      if (dump_file && (dump_flags & TDF_DETAILS))
1244	fprintf (dump_file,
1245		 "ldist asked to generate code for vertex %d\n", v);
1246
1247      /* If the vertex is already contained in another partition so
1248         is the partition rooted at it.  */
1249      if (bitmap_bit_p (processed, v))
1250	continue;
1251
1252      partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1253      bitmap_ior_into (processed, partition->stmts);
1254
1255      if (dump_file && (dump_flags & TDF_DETAILS))
1256	{
1257	  fprintf (dump_file, "ldist useful partition:\n");
1258	  dump_bitmap (dump_file, partition->stmts);
1259	}
1260
1261      partitions->safe_push (partition);
1262    }
1263
1264  /* All vertices should have been assigned to at least one partition now,
1265     other than vertices belonging to dead code.  */
1266
1267  BITMAP_FREE (processed);
1268}
1269
1270/* Dump to FILE the PARTITIONS.  */
1271
1272static void
1273dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1274{
1275  int i;
1276  partition_t partition;
1277
1278  FOR_EACH_VEC_ELT (partitions, i, partition)
1279    debug_bitmap_file (file, partition->stmts);
1280}
1281
1282/* Debug PARTITIONS.  */
1283extern void debug_rdg_partitions (vec<partition_t> );
1284
1285DEBUG_FUNCTION void
1286debug_rdg_partitions (vec<partition_t> partitions)
1287{
1288  dump_rdg_partitions (stderr, partitions);
1289}
1290
1291/* Returns the number of read and write operations in the RDG.  */
1292
1293static int
1294number_of_rw_in_rdg (struct graph *rdg)
1295{
1296  int i, res = 0;
1297
1298  for (i = 0; i < rdg->n_vertices; i++)
1299    {
1300      if (RDG_MEM_WRITE_STMT (rdg, i))
1301	++res;
1302
1303      if (RDG_MEM_READS_STMT (rdg, i))
1304	++res;
1305    }
1306
1307  return res;
1308}
1309
1310/* Returns the number of read and write operations in a PARTITION of
1311   the RDG.  */
1312
1313static int
1314number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1315{
1316  int res = 0;
1317  unsigned i;
1318  bitmap_iterator ii;
1319
1320  EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1321    {
1322      if (RDG_MEM_WRITE_STMT (rdg, i))
1323	++res;
1324
1325      if (RDG_MEM_READS_STMT (rdg, i))
1326	++res;
1327    }
1328
1329  return res;
1330}
1331
1332/* Returns true when one of the PARTITIONS contains all the read or
1333   write operations of RDG.  */
1334
1335static bool
1336partition_contains_all_rw (struct graph *rdg,
1337			   vec<partition_t> partitions)
1338{
1339  int i;
1340  partition_t partition;
1341  int nrw = number_of_rw_in_rdg (rdg);
1342
1343  FOR_EACH_VEC_ELT (partitions, i, partition)
1344    if (nrw == number_of_rw_in_partition (rdg, partition))
1345      return true;
1346
1347  return false;
1348}
1349
1350/* Compute partition dependence created by the data references in DRS1
1351   and DRS2 and modify and return DIR according to that.  */
1352
1353static int
1354pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1355			 vec<data_reference_p> drs1,
1356			 vec<data_reference_p> drs2)
1357{
1358  data_reference_p dr1, dr2;
1359
1360  /* dependence direction - 0 is no dependence, -1 is back,
1361     1 is forth, 2 is both (we can stop then, merging will occur).  */
1362  for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1363    for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1364      {
1365	data_reference_p saved_dr1 = dr1;
1366	int this_dir = 1;
1367	ddr_p ddr;
1368	/* Re-shuffle data-refs to be in dominator order.  */
1369	if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1370	    > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1371	  {
1372	    data_reference_p tem = dr1;
1373	    dr1 = dr2;
1374	    dr2 = tem;
1375	    this_dir = -this_dir;
1376	  }
1377	ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1378	compute_affine_dependence (ddr, loops[0]);
1379	if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1380	  this_dir = 2;
1381	else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1382	  {
1383	    if (DDR_REVERSED_P (ddr))
1384	      {
1385		data_reference_p tem = dr1;
1386		dr1 = dr2;
1387		dr2 = tem;
1388		this_dir = -this_dir;
1389	      }
1390	    /* Known dependences can still be unordered througout the
1391	       iteration space, see gcc.dg/tree-ssa/ldist-16.c.  */
1392	    if (DDR_NUM_DIST_VECTS (ddr) != 1)
1393	      this_dir = 2;
1394	    /* If the overlap is exact preserve stmt order.  */
1395	    else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1396	      ;
1397	    else
1398	      {
1399		/* Else as the distance vector is lexicographic positive
1400		   swap the dependence direction.  */
1401		this_dir = -this_dir;
1402	      }
1403	  }
1404	else
1405	  this_dir = 0;
1406	free_dependence_relation (ddr);
1407	if (dir == 0)
1408	  dir = this_dir;
1409	else if (dir != this_dir)
1410	  return 2;
1411	/* Shuffle "back" dr1.  */
1412	dr1 = saved_dr1;
1413      }
1414  return dir;
1415}
1416
1417/* Compare postorder number of the partition graph vertices V1 and V2.  */
1418
1419static int
1420pgcmp (const void *v1_, const void *v2_)
1421{
1422  const vertex *v1 = (const vertex *)v1_;
1423  const vertex *v2 = (const vertex *)v2_;
1424  return v2->post - v1->post;
1425}
1426
1427/* Distributes the code from LOOP in such a way that producer
1428   statements are placed before consumer statements.  Tries to separate
1429   only the statements from STMTS into separate loops.
1430   Returns the number of distributed loops.  */
1431
1432static int
1433distribute_loop (struct loop *loop, vec<gimple> stmts,
1434		 control_dependences *cd, int *nb_calls)
1435{
1436  struct graph *rdg;
1437  partition_t partition;
1438  bool any_builtin;
1439  int i, nbp;
1440  graph *pg = NULL;
1441  int num_sccs = 1;
1442
1443  *nb_calls = 0;
1444  auto_vec<loop_p, 3> loop_nest;
1445  if (!find_loop_nest (loop, &loop_nest))
1446    return 0;
1447
1448  rdg = build_rdg (loop_nest, cd);
1449  if (!rdg)
1450    {
1451      if (dump_file && (dump_flags & TDF_DETAILS))
1452	fprintf (dump_file,
1453		 "Loop %d not distributed: failed to build the RDG.\n",
1454		 loop->num);
1455
1456      return 0;
1457    }
1458
1459  if (dump_file && (dump_flags & TDF_DETAILS))
1460    dump_rdg (dump_file, rdg);
1461
1462  auto_vec<partition_t, 3> partitions;
1463  rdg_build_partitions (rdg, stmts, &partitions);
1464
1465  any_builtin = false;
1466  FOR_EACH_VEC_ELT (partitions, i, partition)
1467    {
1468      classify_partition (loop, rdg, partition);
1469      any_builtin |= partition_builtin_p (partition);
1470    }
1471
1472  /* If we are only distributing patterns but did not detect any,
1473     simply bail out.  */
1474  if (!flag_tree_loop_distribution
1475      && !any_builtin)
1476    {
1477      nbp = 0;
1478      goto ldist_done;
1479    }
1480
1481  /* If we are only distributing patterns fuse all partitions that
1482     were not classified as builtins.  This also avoids chopping
1483     a loop into pieces, separated by builtin calls.  That is, we
1484     only want no or a single loop body remaining.  */
1485  partition_t into;
1486  if (!flag_tree_loop_distribution)
1487    {
1488      for (i = 0; partitions.iterate (i, &into); ++i)
1489	if (!partition_builtin_p (into))
1490	  break;
1491      for (++i; partitions.iterate (i, &partition); ++i)
1492	if (!partition_builtin_p (partition))
1493	  {
1494	    if (dump_file && (dump_flags & TDF_DETAILS))
1495	      {
1496		fprintf (dump_file, "fusing non-builtin partitions\n");
1497		dump_bitmap (dump_file, into->stmts);
1498		dump_bitmap (dump_file, partition->stmts);
1499	      }
1500	    partition_merge_into (into, partition);
1501	    partitions.unordered_remove (i);
1502	    partition_free (partition);
1503	    i--;
1504	  }
1505    }
1506
1507  /* Due to limitations in the transform phase we have to fuse all
1508     reduction partitions into the last partition so the existing
1509     loop will contain all loop-closed PHI nodes.  */
1510  for (i = 0; partitions.iterate (i, &into); ++i)
1511    if (partition_reduction_p (into))
1512      break;
1513  for (i = i + 1; partitions.iterate (i, &partition); ++i)
1514    if (partition_reduction_p (partition))
1515      {
1516	if (dump_file && (dump_flags & TDF_DETAILS))
1517	  {
1518	    fprintf (dump_file, "fusing partitions\n");
1519	    dump_bitmap (dump_file, into->stmts);
1520	    dump_bitmap (dump_file, partition->stmts);
1521	    fprintf (dump_file, "because they have reductions\n");
1522	  }
1523	partition_merge_into (into, partition);
1524	partitions.unordered_remove (i);
1525	partition_free (partition);
1526	i--;
1527      }
1528
1529  /* Apply our simple cost model - fuse partitions with similar
1530     memory accesses.  */
1531  for (i = 0; partitions.iterate (i, &into); ++i)
1532    {
1533      if (partition_builtin_p (into))
1534	continue;
1535      for (int j = i + 1;
1536	   partitions.iterate (j, &partition); ++j)
1537	{
1538	  if (!partition_builtin_p (partition)
1539	      && similar_memory_accesses (rdg, into, partition))
1540	    {
1541	      if (dump_file && (dump_flags & TDF_DETAILS))
1542		{
1543		  fprintf (dump_file, "fusing partitions\n");
1544		  dump_bitmap (dump_file, into->stmts);
1545		  dump_bitmap (dump_file, partition->stmts);
1546		  fprintf (dump_file, "because they have similar "
1547			   "memory accesses\n");
1548		}
1549	      partition_merge_into (into, partition);
1550	      partitions.unordered_remove (j);
1551	      partition_free (partition);
1552	      j--;
1553	    }
1554	}
1555    }
1556
1557  /* Build the partition dependency graph.  */
1558  if (partitions.length () > 1)
1559    {
1560      pg = new_graph (partitions.length ());
1561      struct pgdata {
1562	  partition_t partition;
1563	  vec<data_reference_p> writes;
1564	  vec<data_reference_p> reads;
1565      };
1566#define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1567      for (i = 0; partitions.iterate (i, &partition); ++i)
1568	{
1569	  vertex *v = &pg->vertices[i];
1570	  pgdata *data = new pgdata;
1571	  data_reference_p dr;
1572	  /* FIXME - leaks.  */
1573	  v->data = data;
1574	  bitmap_iterator bi;
1575	  unsigned j;
1576	  data->partition = partition;
1577	  data->reads = vNULL;
1578	  data->writes = vNULL;
1579	  EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1580	    for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1581	      if (DR_IS_READ (dr))
1582		data->reads.safe_push (dr);
1583	      else
1584		data->writes.safe_push (dr);
1585	}
1586      partition_t partition1, partition2;
1587      for (i = 0; partitions.iterate (i, &partition1); ++i)
1588	for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1589	  {
1590	    /* dependence direction - 0 is no dependence, -1 is back,
1591	       1 is forth, 2 is both (we can stop then, merging will occur).  */
1592	    int dir = 0;
1593	    dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1594					   PGDATA(i)->writes,
1595					   PGDATA(j)->reads);
1596	    if (dir != 2)
1597	      dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1598					     PGDATA(i)->reads,
1599					     PGDATA(j)->writes);
1600	    if (dir != 2)
1601	      dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1602					     PGDATA(i)->writes,
1603					     PGDATA(j)->writes);
1604	    if (dir == 1 || dir == 2)
1605	      add_edge (pg, i, j);
1606	    if (dir == -1 || dir == 2)
1607	      add_edge (pg, j, i);
1608	  }
1609
1610      /* Add edges to the reduction partition (if any) to force it last.  */
1611      unsigned j;
1612      for (j = 0; partitions.iterate (j, &partition); ++j)
1613	if (partition_reduction_p (partition))
1614	  break;
1615      if (j < partitions.length ())
1616	{
1617	  for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1618	    if (i != j)
1619	      add_edge (pg, i, j);
1620	}
1621
1622      /* Compute partitions we cannot separate and fuse them.  */
1623      num_sccs = graphds_scc (pg, NULL);
1624      for (i = 0; i < num_sccs; ++i)
1625	{
1626	  partition_t first;
1627	  int j;
1628	  for (j = 0; partitions.iterate (j, &first); ++j)
1629	    if (pg->vertices[j].component == i)
1630	      break;
1631	  for (j = j + 1; partitions.iterate (j, &partition); ++j)
1632	    if (pg->vertices[j].component == i)
1633	      {
1634		if (dump_file && (dump_flags & TDF_DETAILS))
1635		  {
1636		    fprintf (dump_file, "fusing partitions\n");
1637		    dump_bitmap (dump_file, first->stmts);
1638		    dump_bitmap (dump_file, partition->stmts);
1639		    fprintf (dump_file, "because they are in the same "
1640			     "dependence SCC\n");
1641		  }
1642		partition_merge_into (first, partition);
1643		partitions[j] = NULL;
1644		partition_free (partition);
1645		PGDATA (j)->partition = NULL;
1646	      }
1647	}
1648
1649      /* Now order the remaining nodes in postorder.  */
1650      qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1651      partitions.truncate (0);
1652      for (i = 0; i < pg->n_vertices; ++i)
1653	{
1654	  pgdata *data = PGDATA (i);
1655	  if (data->partition)
1656	    partitions.safe_push (data->partition);
1657	  data->reads.release ();
1658	  data->writes.release ();
1659	  delete data;
1660	}
1661      gcc_assert (partitions.length () == (unsigned)num_sccs);
1662      free_graph (pg);
1663    }
1664
1665  nbp = partitions.length ();
1666  if (nbp == 0
1667      || (nbp == 1 && !partition_builtin_p (partitions[0]))
1668      || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1669    {
1670      nbp = 0;
1671      goto ldist_done;
1672    }
1673
1674  if (dump_file && (dump_flags & TDF_DETAILS))
1675    dump_rdg_partitions (dump_file, partitions);
1676
1677  FOR_EACH_VEC_ELT (partitions, i, partition)
1678    {
1679      if (partition_builtin_p (partition))
1680	(*nb_calls)++;
1681      generate_code_for_partition (loop, partition, i < nbp - 1);
1682    }
1683
1684 ldist_done:
1685
1686  FOR_EACH_VEC_ELT (partitions, i, partition)
1687    partition_free (partition);
1688
1689  free_rdg (rdg);
1690  return nbp - *nb_calls;
1691}
1692
1693/* Distribute all loops in the current function.  */
1694
1695namespace {
1696
1697const pass_data pass_data_loop_distribution =
1698{
1699  GIMPLE_PASS, /* type */
1700  "ldist", /* name */
1701  OPTGROUP_LOOP, /* optinfo_flags */
1702  TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1703  ( PROP_cfg | PROP_ssa ), /* properties_required */
1704  0, /* properties_provided */
1705  0, /* properties_destroyed */
1706  0, /* todo_flags_start */
1707  0, /* todo_flags_finish */
1708};
1709
1710class pass_loop_distribution : public gimple_opt_pass
1711{
1712public:
1713  pass_loop_distribution (gcc::context *ctxt)
1714    : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1715  {}
1716
1717  /* opt_pass methods: */
1718  virtual bool gate (function *)
1719    {
1720      return flag_tree_loop_distribution
1721	|| flag_tree_loop_distribute_patterns;
1722    }
1723
1724  virtual unsigned int execute (function *);
1725
1726}; // class pass_loop_distribution
1727
1728unsigned int
1729pass_loop_distribution::execute (function *fun)
1730{
1731  struct loop *loop;
1732  bool changed = false;
1733  basic_block bb;
1734  control_dependences *cd = NULL;
1735
1736  FOR_ALL_BB_FN (bb, fun)
1737    {
1738      gimple_stmt_iterator gsi;
1739      for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1740	gimple_set_uid (gsi_stmt (gsi), -1);
1741      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1742	gimple_set_uid (gsi_stmt (gsi), -1);
1743    }
1744
1745  /* We can at the moment only distribute non-nested loops, thus restrict
1746     walking to innermost loops.  */
1747  FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1748    {
1749      auto_vec<gimple> work_list;
1750      basic_block *bbs;
1751      int num = loop->num;
1752      unsigned int i;
1753
1754      /* If the loop doesn't have a single exit we will fail anyway,
1755	 so do that early.  */
1756      if (!single_exit (loop))
1757	continue;
1758
1759      /* Only optimize hot loops.  */
1760      if (!optimize_loop_for_speed_p (loop))
1761	continue;
1762
1763      /* Initialize the worklist with stmts we seed the partitions with.  */
1764      bbs = get_loop_body_in_dom_order (loop);
1765      for (i = 0; i < loop->num_nodes; ++i)
1766	{
1767	  for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1768	       !gsi_end_p (gsi);
1769	       gsi_next (&gsi))
1770	    {
1771	      gphi *phi = gsi.phi ();
1772	      if (virtual_operand_p (gimple_phi_result (phi)))
1773		continue;
1774	      /* Distribute stmts which have defs that are used outside of
1775		 the loop.  */
1776	      if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1777		continue;
1778	      work_list.safe_push (phi);
1779	    }
1780	  for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1781	       !gsi_end_p (gsi);
1782	       gsi_next (&gsi))
1783	    {
1784	      gimple stmt = gsi_stmt (gsi);
1785
1786	      /* If there is a stmt with side-effects bail out - we
1787		 cannot and should not distribute this loop.  */
1788	      if (gimple_has_side_effects (stmt))
1789		{
1790		  work_list.truncate (0);
1791		  goto out;
1792		}
1793
1794	      /* Distribute stmts which have defs that are used outside of
1795		 the loop.  */
1796	      if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1797		;
1798	      /* Otherwise only distribute stores for now.  */
1799	      else if (!gimple_vdef (stmt))
1800		continue;
1801
1802	      work_list.safe_push (stmt);
1803	    }
1804	}
1805out:
1806      free (bbs);
1807
1808      int nb_generated_loops = 0;
1809      int nb_generated_calls = 0;
1810      location_t loc = find_loop_location (loop);
1811      if (work_list.length () > 0)
1812	{
1813	  if (!cd)
1814	    {
1815	      calculate_dominance_info (CDI_DOMINATORS);
1816	      calculate_dominance_info (CDI_POST_DOMINATORS);
1817	      cd = new control_dependences (create_edge_list ());
1818	      free_dominance_info (CDI_POST_DOMINATORS);
1819	    }
1820	  nb_generated_loops = distribute_loop (loop, work_list, cd,
1821						&nb_generated_calls);
1822	}
1823
1824      if (nb_generated_loops + nb_generated_calls > 0)
1825	{
1826	  changed = true;
1827	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1828			   loc, "Loop %d distributed: split to %d loops "
1829			   "and %d library calls.\n",
1830			   num, nb_generated_loops, nb_generated_calls);
1831	}
1832      else if (dump_file && (dump_flags & TDF_DETAILS))
1833	fprintf (dump_file, "Loop %d is the same.\n", num);
1834    }
1835
1836  if (cd)
1837    delete cd;
1838
1839  if (changed)
1840    {
1841      /* Cached scalar evolutions now may refer to wrong or non-existing
1842	 loops.  */
1843      scev_reset_htab ();
1844      mark_virtual_operands_for_renaming (fun);
1845      rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1846    }
1847
1848#ifdef ENABLE_CHECKING
1849  verify_loop_structure ();
1850#endif
1851
1852  return 0;
1853}
1854
1855} // anon namespace
1856
1857gimple_opt_pass *
1858make_pass_loop_distribution (gcc::context *ctxt)
1859{
1860  return new pass_loop_distribution (ctxt);
1861}
1862