1/* Tail merging for gimple.
2   Copyright (C) 2011-2015 Free Software Foundation, Inc.
3   Contributed by Tom de Vries (tom@codesourcery.com)
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3.  If not see
19<http://www.gnu.org/licenses/>.  */
20
21/* Pass overview.
22
23
24   MOTIVATIONAL EXAMPLE
25
26   gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
27
28   hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
29   {
30     struct FILED.1638 * fpD.2605;
31     charD.1 fileNameD.2604[1000];
32     intD.0 D.3915;
33     const charD.1 * restrict outputFileName.0D.3914;
34
35     # BLOCK 2 freq:10000
36     # PRED: ENTRY [100.0%]  (fallthru,exec)
37     # PT = nonlocal { D.3926 } (restr)
38     outputFileName.0D.3914_3
39       = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40     # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41     # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42     # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43     sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44     # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45     # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46     # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47     D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48     if (D.3915_4 == 0)
49       goto <bb 3>;
50     else
51       goto <bb 4>;
52     # SUCC: 3 [10.0%]  (true,exec) 4 [90.0%]  (false,exec)
53
54     # BLOCK 3 freq:1000
55     # PRED: 2 [10.0%]  (true,exec)
56     # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57     # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58     # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59     freeD.898 (ctxD.2601_5(D));
60     goto <bb 7>;
61     # SUCC: 7 [100.0%]  (fallthru,exec)
62
63     # BLOCK 4 freq:9000
64     # PRED: 2 [90.0%]  (false,exec)
65     # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66     # PT = nonlocal escaped
67     # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68     # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69     fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70     if (fpD.2605_8 == 0B)
71       goto <bb 5>;
72     else
73       goto <bb 6>;
74     # SUCC: 5 [1.9%]  (true,exec) 6 [98.1%]  (false,exec)
75
76     # BLOCK 5 freq:173
77     # PRED: 4 [1.9%]  (true,exec)
78     # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79     # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80     # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81     freeD.898 (ctxD.2601_5(D));
82     goto <bb 7>;
83     # SUCC: 7 [100.0%]  (fallthru,exec)
84
85     # BLOCK 6 freq:8827
86     # PRED: 4 [98.1%]  (false,exec)
87     # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88     # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89     # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90     fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91     # SUCC: 7 [100.0%]  (fallthru,exec)
92
93     # BLOCK 7 freq:10000
94     # PRED: 3 [100.0%]  (fallthru,exec) 5 [100.0%]  (fallthru,exec)
95             6 [100.0%]  (fallthru,exec)
96     # PT = nonlocal null
97
98     # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99     # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100                            .MEMD.3923_18(6)>
101     # VUSE <.MEMD.3923_11>
102     return ctxD.2601_1;
103     # SUCC: EXIT [100.0%]
104   }
105
106   bb 3 and bb 5 can be merged.  The blocks have different predecessors, but the
107   same successors, and the same operations.
108
109
110   CONTEXT
111
112   A technique called tail merging (or cross jumping) can fix the example
113   above.  For a block, we look for common code at the end (the tail) of the
114   predecessor blocks, and insert jumps from one block to the other.
115   The example is a special case for tail merging, in that 2 whole blocks
116   can be merged, rather than just the end parts of it.
117   We currently only focus on whole block merging, so in that sense
118   calling this pass tail merge is a bit of a misnomer.
119
120   We distinguish 2 kinds of situations in which blocks can be merged:
121   - same operations, same predecessors.  The successor edges coming from one
122     block are redirected to come from the other block.
123   - same operations, same successors.  The predecessor edges entering one block
124     are redirected to enter the other block.  Note that this operation might
125     involve introducing phi operations.
126
127   For efficient implementation, we would like to value numbers the blocks, and
128   have a comparison operator that tells us whether the blocks are equal.
129   Besides being runtime efficient, block value numbering should also abstract
130   from irrelevant differences in order of operations, much like normal value
131   numbering abstracts from irrelevant order of operations.
132
133   For the first situation (same_operations, same predecessors), normal value
134   numbering fits well.  We can calculate a block value number based on the
135   value numbers of the defs and vdefs.
136
137   For the second situation (same operations, same successors), this approach
138   doesn't work so well.  We can illustrate this using the example.  The calls
139   to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140   remain different in value numbering, since they represent different memory
141   states.  So the resulting vdefs of the frees will be different in value
142   numbering, so the block value numbers will be different.
143
144   The reason why we call the blocks equal is not because they define the same
145   values, but because uses in the blocks use (possibly different) defs in the
146   same way.  To be able to detect this efficiently, we need to do some kind of
147   reverse value numbering, meaning number the uses rather than the defs, and
148   calculate a block value number based on the value number of the uses.
149   Ideally, a block comparison operator will also indicate which phis are needed
150   to merge the blocks.
151
152   For the moment, we don't do block value numbering, but we do insn-by-insn
153   matching, using scc value numbers to match operations with results, and
154   structural comparison otherwise, while ignoring vop mismatches.
155
156
157   IMPLEMENTATION
158
159   1. The pass first determines all groups of blocks with the same successor
160      blocks.
161   2. Within each group, it tries to determine clusters of equal basic blocks.
162   3. The clusters are applied.
163   4. The same successor groups are updated.
164   5. This process is repeated from 2 onwards, until no more changes.
165
166
167   LIMITATIONS/TODO
168
169   - block only
170   - handles only 'same operations, same successors'.
171     It handles same predecessors as a special subcase though.
172   - does not implement the reverse value numbering and block value numbering.
173   - improve memory allocation: use garbage collected memory, obstacks,
174     allocpools where appropriate.
175   - no insertion of gimple_reg phis,  We only introduce vop-phis.
176   - handle blocks with gimple_reg phi_nodes.
177
178
179   PASS PLACEMENT
180   This 'pass' is not a stand-alone gimple pass, but runs as part of
181   pass_pre, in order to share the value numbering.
182
183
184   SWITCHES
185
186   - ftree-tail-merge.  On at -O2.  We may have to enable it only at -Os.  */
187
188#include "config.h"
189#include "system.h"
190#include "coretypes.h"
191#include "tm.h"
192#include "hash-set.h"
193#include "machmode.h"
194#include "vec.h"
195#include "double-int.h"
196#include "input.h"
197#include "alias.h"
198#include "symtab.h"
199#include "wide-int.h"
200#include "inchash.h"
201#include "real.h"
202#include "tree.h"
203#include "fold-const.h"
204#include "stor-layout.h"
205#include "trans-mem.h"
206#include "inchash.h"
207#include "tm_p.h"
208#include "predict.h"
209#include "hard-reg-set.h"
210#include "input.h"
211#include "function.h"
212#include "dominance.h"
213#include "cfg.h"
214#include "cfganal.h"
215#include "cfgcleanup.h"
216#include "basic-block.h"
217#include "flags.h"
218#include "hash-table.h"
219#include "tree-ssa-alias.h"
220#include "internal-fn.h"
221#include "tree-eh.h"
222#include "gimple-expr.h"
223#include "is-a.h"
224#include "gimple.h"
225#include "gimple-iterator.h"
226#include "gimple-ssa.h"
227#include "tree-cfg.h"
228#include "tree-phinodes.h"
229#include "ssa-iterators.h"
230#include "tree-into-ssa.h"
231#include "params.h"
232#include "gimple-pretty-print.h"
233#include "tree-ssa-sccvn.h"
234#include "tree-dump.h"
235#include "cfgloop.h"
236#include "tree-pass.h"
237#include "trans-mem.h"
238#include "stringpool.h"
239#include "tree-ssanames.h"
240
241/* Describes a group of bbs with the same successors.  The successor bbs are
242   cached in succs, and the successor edge flags are cached in succ_flags.
243   If a bb has the EDGE_TRUE/FALSE_VALUE flags swapped compared to succ_flags,
244   it's marked in inverse.
245   Additionally, the hash value for the struct is cached in hashval, and
246   in_worklist indicates whether it's currently part of worklist.  */
247
248struct same_succ_def
249{
250  /* The bbs that have the same successor bbs.  */
251  bitmap bbs;
252  /* The successor bbs.  */
253  bitmap succs;
254  /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
255     bb.  */
256  bitmap inverse;
257  /* The edge flags for each of the successor bbs.  */
258  vec<int> succ_flags;
259  /* Indicates whether the struct is currently in the worklist.  */
260  bool in_worklist;
261  /* The hash value of the struct.  */
262  hashval_t hashval;
263
264  /* hash_table support.  */
265  typedef same_succ_def value_type;
266  typedef same_succ_def compare_type;
267  static inline hashval_t hash (const value_type *);
268  static int equal (const value_type *, const compare_type *);
269  static void remove (value_type *);
270};
271typedef struct same_succ_def *same_succ;
272typedef const struct same_succ_def *const_same_succ;
273
274/* hash routine for hash_table support, returns hashval of E.  */
275
276inline hashval_t
277same_succ_def::hash (const value_type *e)
278{
279  return e->hashval;
280}
281
282/* A group of bbs where 1 bb from bbs can replace the other bbs.  */
283
284struct bb_cluster_def
285{
286  /* The bbs in the cluster.  */
287  bitmap bbs;
288  /* The preds of the bbs in the cluster.  */
289  bitmap preds;
290  /* Index in all_clusters vector.  */
291  int index;
292  /* The bb to replace the cluster with.  */
293  basic_block rep_bb;
294};
295typedef struct bb_cluster_def *bb_cluster;
296typedef const struct bb_cluster_def *const_bb_cluster;
297
298/* Per bb-info.  */
299
300struct aux_bb_info
301{
302  /* The number of non-debug statements in the bb.  */
303  int size;
304  /* The same_succ that this bb is a member of.  */
305  same_succ bb_same_succ;
306  /* The cluster that this bb is a member of.  */
307  bb_cluster cluster;
308  /* The vop state at the exit of a bb.  This is shortlived data, used to
309     communicate data between update_block_by and update_vuses.  */
310  tree vop_at_exit;
311  /* The bb that either contains or is dominated by the dependencies of the
312     bb.  */
313  basic_block dep_bb;
314};
315
316/* Macros to access the fields of struct aux_bb_info.  */
317
318#define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
319#define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
320#define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
321#define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
322#define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
323
324/* Returns true if the only effect a statement STMT has, is to define locally
325   used SSA_NAMEs.  */
326
327static bool
328stmt_local_def (gimple stmt)
329{
330  basic_block bb, def_bb;
331  imm_use_iterator iter;
332  use_operand_p use_p;
333  tree val;
334  def_operand_p def_p;
335
336  if (gimple_vdef (stmt) != NULL_TREE
337      || gimple_has_side_effects (stmt)
338      || gimple_could_trap_p_1 (stmt, false, false)
339      || gimple_vuse (stmt) != NULL_TREE)
340    return false;
341
342  def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
343  if (def_p == NULL)
344    return false;
345
346  val = DEF_FROM_PTR (def_p);
347  if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
348    return false;
349
350  def_bb = gimple_bb (stmt);
351
352  FOR_EACH_IMM_USE_FAST (use_p, iter, val)
353    {
354      if (is_gimple_debug (USE_STMT (use_p)))
355	continue;
356      bb = gimple_bb (USE_STMT (use_p));
357      if (bb == def_bb)
358	continue;
359
360      if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
361	  && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
362	continue;
363
364      return false;
365    }
366
367  return true;
368}
369
370/* Let GSI skip forwards over local defs.  */
371
372static void
373gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
374{
375  gimple stmt;
376
377  while (true)
378    {
379      if (gsi_end_p (*gsi))
380	return;
381      stmt = gsi_stmt (*gsi);
382      if (!stmt_local_def (stmt))
383	return;
384	gsi_next_nondebug (gsi);
385    }
386}
387
388/* VAL1 and VAL2 are either:
389   - uses in BB1 and BB2, or
390   - phi alternatives for BB1 and BB2.
391   Return true if the uses have the same gvn value.  */
392
393static bool
394gvn_uses_equal (tree val1, tree val2)
395{
396  gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
397
398  if (val1 == val2)
399    return true;
400
401  if (vn_valueize (val1) != vn_valueize (val2))
402    return false;
403
404  return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
405	  && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
406}
407
408/* Prints E to FILE.  */
409
410static void
411same_succ_print (FILE *file, const same_succ e)
412{
413  unsigned int i;
414  bitmap_print (file, e->bbs, "bbs:", "\n");
415  bitmap_print (file, e->succs, "succs:", "\n");
416  bitmap_print (file, e->inverse, "inverse:", "\n");
417  fprintf (file, "flags:");
418  for (i = 0; i < e->succ_flags.length (); ++i)
419    fprintf (file, " %x", e->succ_flags[i]);
420  fprintf (file, "\n");
421}
422
423/* Prints same_succ VE to VFILE.  */
424
425inline int
426ssa_same_succ_print_traverse (same_succ *pe, FILE *file)
427{
428  const same_succ e = *pe;
429  same_succ_print (file, e);
430  return 1;
431}
432
433/* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB.  */
434
435static void
436update_dep_bb (basic_block use_bb, tree val)
437{
438  basic_block dep_bb;
439
440  /* Not a dep.  */
441  if (TREE_CODE (val) != SSA_NAME)
442    return;
443
444  /* Skip use of global def.  */
445  if (SSA_NAME_IS_DEFAULT_DEF (val))
446    return;
447
448  /* Skip use of local def.  */
449  dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
450  if (dep_bb == use_bb)
451    return;
452
453  if (BB_DEP_BB (use_bb) == NULL
454      || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
455    BB_DEP_BB (use_bb) = dep_bb;
456}
457
458/* Update BB_DEP_BB, given the dependencies in STMT.  */
459
460static void
461stmt_update_dep_bb (gimple stmt)
462{
463  ssa_op_iter iter;
464  use_operand_p use;
465
466  FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
467    update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
468}
469
470/* Calculates hash value for same_succ VE.  */
471
472static hashval_t
473same_succ_hash (const_same_succ e)
474{
475  inchash::hash hstate (bitmap_hash (e->succs));
476  int flags;
477  unsigned int i;
478  unsigned int first = bitmap_first_set_bit (e->bbs);
479  basic_block bb = BASIC_BLOCK_FOR_FN (cfun, first);
480  int size = 0;
481  gimple stmt;
482  tree arg;
483  unsigned int s;
484  bitmap_iterator bs;
485
486  for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
487       !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
488    {
489      stmt = gsi_stmt (gsi);
490      stmt_update_dep_bb (stmt);
491      if (stmt_local_def (stmt))
492	continue;
493      size++;
494
495      hstate.add_int (gimple_code (stmt));
496      if (is_gimple_assign (stmt))
497	hstate.add_int (gimple_assign_rhs_code (stmt));
498      if (!is_gimple_call (stmt))
499	continue;
500      if (gimple_call_internal_p (stmt))
501        hstate.add_int (gimple_call_internal_fn (stmt));
502      else
503	{
504	  inchash::add_expr (gimple_call_fn (stmt), hstate);
505	  if (gimple_call_chain (stmt))
506	    inchash::add_expr (gimple_call_chain (stmt), hstate);
507	}
508      for (i = 0; i < gimple_call_num_args (stmt); i++)
509	{
510	  arg = gimple_call_arg (stmt, i);
511	  arg = vn_valueize (arg);
512	  inchash::add_expr (arg, hstate);
513	}
514    }
515
516  hstate.add_int (size);
517  BB_SIZE (bb) = size;
518
519  for (i = 0; i < e->succ_flags.length (); ++i)
520    {
521      flags = e->succ_flags[i];
522      flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
523      hstate.add_int (flags);
524    }
525
526  EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
527    {
528      int n = find_edge (bb, BASIC_BLOCK_FOR_FN (cfun, s))->dest_idx;
529      for (gphi_iterator gsi = gsi_start_phis (BASIC_BLOCK_FOR_FN (cfun, s));
530	   !gsi_end_p (gsi);
531	   gsi_next (&gsi))
532	{
533	  gphi *phi = gsi.phi ();
534	  tree lhs = gimple_phi_result (phi);
535	  tree val = gimple_phi_arg_def (phi, n);
536
537	  if (virtual_operand_p (lhs))
538	    continue;
539	  update_dep_bb (bb, val);
540	}
541    }
542
543  return hstate.end ();
544}
545
546/* Returns true if E1 and E2 have 2 successors, and if the successor flags
547   are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
548   the other edge flags.  */
549
550static bool
551inverse_flags (const_same_succ e1, const_same_succ e2)
552{
553  int f1a, f1b, f2a, f2b;
554  int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
555
556  if (e1->succ_flags.length () != 2)
557    return false;
558
559  f1a = e1->succ_flags[0];
560  f1b = e1->succ_flags[1];
561  f2a = e2->succ_flags[0];
562  f2b = e2->succ_flags[1];
563
564  if (f1a == f2a && f1b == f2b)
565    return false;
566
567  return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
568}
569
570/* Compares SAME_SUCCs E1 and E2.  */
571
572int
573same_succ_def::equal (const value_type *e1, const compare_type *e2)
574{
575  unsigned int i, first1, first2;
576  gimple_stmt_iterator gsi1, gsi2;
577  gimple s1, s2;
578  basic_block bb1, bb2;
579
580  if (e1->hashval != e2->hashval)
581    return 0;
582
583  if (e1->succ_flags.length () != e2->succ_flags.length ())
584    return 0;
585
586  if (!bitmap_equal_p (e1->succs, e2->succs))
587    return 0;
588
589  if (!inverse_flags (e1, e2))
590    {
591      for (i = 0; i < e1->succ_flags.length (); ++i)
592	if (e1->succ_flags[i] != e2->succ_flags[i])
593	  return 0;
594    }
595
596  first1 = bitmap_first_set_bit (e1->bbs);
597  first2 = bitmap_first_set_bit (e2->bbs);
598
599  bb1 = BASIC_BLOCK_FOR_FN (cfun, first1);
600  bb2 = BASIC_BLOCK_FOR_FN (cfun, first2);
601
602  if (BB_SIZE (bb1) != BB_SIZE (bb2))
603    return 0;
604
605  gsi1 = gsi_start_nondebug_bb (bb1);
606  gsi2 = gsi_start_nondebug_bb (bb2);
607  gsi_advance_fw_nondebug_nonlocal (&gsi1);
608  gsi_advance_fw_nondebug_nonlocal (&gsi2);
609  while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
610    {
611      s1 = gsi_stmt (gsi1);
612      s2 = gsi_stmt (gsi2);
613      if (gimple_code (s1) != gimple_code (s2))
614	return 0;
615      if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
616	return 0;
617      gsi_next_nondebug (&gsi1);
618      gsi_next_nondebug (&gsi2);
619      gsi_advance_fw_nondebug_nonlocal (&gsi1);
620      gsi_advance_fw_nondebug_nonlocal (&gsi2);
621    }
622
623  return 1;
624}
625
626/* Alloc and init a new SAME_SUCC.  */
627
628static same_succ
629same_succ_alloc (void)
630{
631  same_succ same = XNEW (struct same_succ_def);
632
633  same->bbs = BITMAP_ALLOC (NULL);
634  same->succs = BITMAP_ALLOC (NULL);
635  same->inverse = BITMAP_ALLOC (NULL);
636  same->succ_flags.create (10);
637  same->in_worklist = false;
638
639  return same;
640}
641
642/* Delete same_succ E.  */
643
644void
645same_succ_def::remove (same_succ e)
646{
647  BITMAP_FREE (e->bbs);
648  BITMAP_FREE (e->succs);
649  BITMAP_FREE (e->inverse);
650  e->succ_flags.release ();
651
652  XDELETE (e);
653}
654
655/* Reset same_succ SAME.  */
656
657static void
658same_succ_reset (same_succ same)
659{
660  bitmap_clear (same->bbs);
661  bitmap_clear (same->succs);
662  bitmap_clear (same->inverse);
663  same->succ_flags.truncate (0);
664}
665
666static hash_table<same_succ_def> *same_succ_htab;
667
668/* Array that is used to store the edge flags for a successor.  */
669
670static int *same_succ_edge_flags;
671
672/* Bitmap that is used to mark bbs that are recently deleted.  */
673
674static bitmap deleted_bbs;
675
676/* Bitmap that is used to mark predecessors of bbs that are
677   deleted.  */
678
679static bitmap deleted_bb_preds;
680
681/* Prints same_succ_htab to stderr.  */
682
683extern void debug_same_succ (void);
684DEBUG_FUNCTION void
685debug_same_succ ( void)
686{
687  same_succ_htab->traverse <FILE *, ssa_same_succ_print_traverse> (stderr);
688}
689
690
691/* Vector of bbs to process.  */
692
693static vec<same_succ> worklist;
694
695/* Prints worklist to FILE.  */
696
697static void
698print_worklist (FILE *file)
699{
700  unsigned int i;
701  for (i = 0; i < worklist.length (); ++i)
702    same_succ_print (file, worklist[i]);
703}
704
705/* Adds SAME to worklist.  */
706
707static void
708add_to_worklist (same_succ same)
709{
710  if (same->in_worklist)
711    return;
712
713  if (bitmap_count_bits (same->bbs) < 2)
714    return;
715
716  same->in_worklist = true;
717  worklist.safe_push (same);
718}
719
720/* Add BB to same_succ_htab.  */
721
722static void
723find_same_succ_bb (basic_block bb, same_succ *same_p)
724{
725  unsigned int j;
726  bitmap_iterator bj;
727  same_succ same = *same_p;
728  same_succ *slot;
729  edge_iterator ei;
730  edge e;
731
732  if (bb == NULL
733      /* Be conservative with loop structure.  It's not evident that this test
734	 is sufficient.  Before tail-merge, we've just called
735	 loop_optimizer_finalize, and LOOPS_MAY_HAVE_MULTIPLE_LATCHES is now
736	 set, so there's no guarantee that the loop->latch value is still valid.
737	 But we assume that, since we've forced LOOPS_HAVE_SIMPLE_LATCHES at the
738	 start of pre, we've kept that property intact throughout pre, and are
739	 keeping it throughout tail-merge using this test.  */
740      || bb->loop_father->latch == bb)
741    return;
742  bitmap_set_bit (same->bbs, bb->index);
743  FOR_EACH_EDGE (e, ei, bb->succs)
744    {
745      int index = e->dest->index;
746      bitmap_set_bit (same->succs, index);
747      same_succ_edge_flags[index] = e->flags;
748    }
749  EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
750    same->succ_flags.safe_push (same_succ_edge_flags[j]);
751
752  same->hashval = same_succ_hash (same);
753
754  slot = same_succ_htab->find_slot_with_hash (same, same->hashval, INSERT);
755  if (*slot == NULL)
756    {
757      *slot = same;
758      BB_SAME_SUCC (bb) = same;
759      add_to_worklist (same);
760      *same_p = NULL;
761    }
762  else
763    {
764      bitmap_set_bit ((*slot)->bbs, bb->index);
765      BB_SAME_SUCC (bb) = *slot;
766      add_to_worklist (*slot);
767      if (inverse_flags (same, *slot))
768	bitmap_set_bit ((*slot)->inverse, bb->index);
769      same_succ_reset (same);
770    }
771}
772
773/* Find bbs with same successors.  */
774
775static void
776find_same_succ (void)
777{
778  same_succ same = same_succ_alloc ();
779  basic_block bb;
780
781  FOR_EACH_BB_FN (bb, cfun)
782    {
783      find_same_succ_bb (bb, &same);
784      if (same == NULL)
785	same = same_succ_alloc ();
786    }
787
788  same_succ_def::remove (same);
789}
790
791/* Initializes worklist administration.  */
792
793static void
794init_worklist (void)
795{
796  alloc_aux_for_blocks (sizeof (struct aux_bb_info));
797  same_succ_htab = new hash_table<same_succ_def> (n_basic_blocks_for_fn (cfun));
798  same_succ_edge_flags = XCNEWVEC (int, last_basic_block_for_fn (cfun));
799  deleted_bbs = BITMAP_ALLOC (NULL);
800  deleted_bb_preds = BITMAP_ALLOC (NULL);
801  worklist.create (n_basic_blocks_for_fn (cfun));
802  find_same_succ ();
803
804  if (dump_file && (dump_flags & TDF_DETAILS))
805    {
806      fprintf (dump_file, "initial worklist:\n");
807      print_worklist (dump_file);
808    }
809}
810
811/* Deletes worklist administration.  */
812
813static void
814delete_worklist (void)
815{
816  free_aux_for_blocks ();
817  delete same_succ_htab;
818  same_succ_htab = NULL;
819  XDELETEVEC (same_succ_edge_flags);
820  same_succ_edge_flags = NULL;
821  BITMAP_FREE (deleted_bbs);
822  BITMAP_FREE (deleted_bb_preds);
823  worklist.release ();
824}
825
826/* Mark BB as deleted, and mark its predecessors.  */
827
828static void
829mark_basic_block_deleted (basic_block bb)
830{
831  edge e;
832  edge_iterator ei;
833
834  bitmap_set_bit (deleted_bbs, bb->index);
835
836  FOR_EACH_EDGE (e, ei, bb->preds)
837    bitmap_set_bit (deleted_bb_preds, e->src->index);
838}
839
840/* Removes BB from its corresponding same_succ.  */
841
842static void
843same_succ_flush_bb (basic_block bb)
844{
845  same_succ same = BB_SAME_SUCC (bb);
846  BB_SAME_SUCC (bb) = NULL;
847  if (bitmap_single_bit_set_p (same->bbs))
848    same_succ_htab->remove_elt_with_hash (same, same->hashval);
849  else
850    bitmap_clear_bit (same->bbs, bb->index);
851}
852
853/* Removes all bbs in BBS from their corresponding same_succ.  */
854
855static void
856same_succ_flush_bbs (bitmap bbs)
857{
858  unsigned int i;
859  bitmap_iterator bi;
860
861  EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
862    same_succ_flush_bb (BASIC_BLOCK_FOR_FN (cfun, i));
863}
864
865/* Release the last vdef in BB, either normal or phi result.  */
866
867static void
868release_last_vdef (basic_block bb)
869{
870  for (gimple_stmt_iterator i = gsi_last_bb (bb); !gsi_end_p (i);
871       gsi_prev_nondebug (&i))
872    {
873      gimple stmt = gsi_stmt (i);
874      if (gimple_vdef (stmt) == NULL_TREE)
875	continue;
876
877      mark_virtual_operand_for_renaming (gimple_vdef (stmt));
878      return;
879    }
880
881  for (gphi_iterator i = gsi_start_phis (bb); !gsi_end_p (i);
882       gsi_next (&i))
883    {
884      gphi *phi = i.phi ();
885      tree res = gimple_phi_result (phi);
886
887      if (!virtual_operand_p (res))
888	continue;
889
890      mark_virtual_phi_result_for_renaming (phi);
891      return;
892    }
893
894}
895
896/* For deleted_bb_preds, find bbs with same successors.  */
897
898static void
899update_worklist (void)
900{
901  unsigned int i;
902  bitmap_iterator bi;
903  basic_block bb;
904  same_succ same;
905
906  bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
907  bitmap_clear (deleted_bbs);
908
909  bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
910  same_succ_flush_bbs (deleted_bb_preds);
911
912  same = same_succ_alloc ();
913  EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
914    {
915      bb = BASIC_BLOCK_FOR_FN (cfun, i);
916      gcc_assert (bb != NULL);
917      find_same_succ_bb (bb, &same);
918      if (same == NULL)
919	same = same_succ_alloc ();
920    }
921  same_succ_def::remove (same);
922  bitmap_clear (deleted_bb_preds);
923}
924
925/* Prints cluster C to FILE.  */
926
927static void
928print_cluster (FILE *file, bb_cluster c)
929{
930  if (c == NULL)
931    return;
932  bitmap_print (file, c->bbs, "bbs:", "\n");
933  bitmap_print (file, c->preds, "preds:", "\n");
934}
935
936/* Prints cluster C to stderr.  */
937
938extern void debug_cluster (bb_cluster);
939DEBUG_FUNCTION void
940debug_cluster (bb_cluster c)
941{
942  print_cluster (stderr, c);
943}
944
945/* Update C->rep_bb, given that BB is added to the cluster.  */
946
947static void
948update_rep_bb (bb_cluster c, basic_block bb)
949{
950  /* Initial.  */
951  if (c->rep_bb == NULL)
952    {
953      c->rep_bb = bb;
954      return;
955    }
956
957  /* Current needs no deps, keep it.  */
958  if (BB_DEP_BB (c->rep_bb) == NULL)
959    return;
960
961  /* Bb needs no deps, change rep_bb.  */
962  if (BB_DEP_BB (bb) == NULL)
963    {
964      c->rep_bb = bb;
965      return;
966    }
967
968  /* Bb needs last deps earlier than current, change rep_bb.  A potential
969     problem with this, is that the first deps might also be earlier, which
970     would mean we prefer longer lifetimes for the deps.  To be able to check
971     for this, we would have to trace BB_FIRST_DEP_BB as well, besides
972     BB_DEP_BB, which is really BB_LAST_DEP_BB.
973     The benefit of choosing the bb with last deps earlier, is that it can
974     potentially be used as replacement for more bbs.  */
975  if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
976    c->rep_bb = bb;
977}
978
979/* Add BB to cluster C.  Sets BB in C->bbs, and preds of BB in C->preds.  */
980
981static void
982add_bb_to_cluster (bb_cluster c, basic_block bb)
983{
984  edge e;
985  edge_iterator ei;
986
987  bitmap_set_bit (c->bbs, bb->index);
988
989  FOR_EACH_EDGE (e, ei, bb->preds)
990    bitmap_set_bit (c->preds, e->src->index);
991
992  update_rep_bb (c, bb);
993}
994
995/* Allocate and init new cluster.  */
996
997static bb_cluster
998new_cluster (void)
999{
1000  bb_cluster c;
1001  c = XCNEW (struct bb_cluster_def);
1002  c->bbs = BITMAP_ALLOC (NULL);
1003  c->preds = BITMAP_ALLOC (NULL);
1004  c->rep_bb = NULL;
1005  return c;
1006}
1007
1008/* Delete clusters.  */
1009
1010static void
1011delete_cluster (bb_cluster c)
1012{
1013  if (c == NULL)
1014    return;
1015  BITMAP_FREE (c->bbs);
1016  BITMAP_FREE (c->preds);
1017  XDELETE (c);
1018}
1019
1020
1021/* Array that contains all clusters.  */
1022
1023static vec<bb_cluster> all_clusters;
1024
1025/* Allocate all cluster vectors.  */
1026
1027static void
1028alloc_cluster_vectors (void)
1029{
1030  all_clusters.create (n_basic_blocks_for_fn (cfun));
1031}
1032
1033/* Reset all cluster vectors.  */
1034
1035static void
1036reset_cluster_vectors (void)
1037{
1038  unsigned int i;
1039  basic_block bb;
1040  for (i = 0; i < all_clusters.length (); ++i)
1041    delete_cluster (all_clusters[i]);
1042  all_clusters.truncate (0);
1043  FOR_EACH_BB_FN (bb, cfun)
1044    BB_CLUSTER (bb) = NULL;
1045}
1046
1047/* Delete all cluster vectors.  */
1048
1049static void
1050delete_cluster_vectors (void)
1051{
1052  unsigned int i;
1053  for (i = 0; i < all_clusters.length (); ++i)
1054    delete_cluster (all_clusters[i]);
1055  all_clusters.release ();
1056}
1057
1058/* Merge cluster C2 into C1.  */
1059
1060static void
1061merge_clusters (bb_cluster c1, bb_cluster c2)
1062{
1063  bitmap_ior_into (c1->bbs, c2->bbs);
1064  bitmap_ior_into (c1->preds, c2->preds);
1065}
1066
1067/* Register equivalence of BB1 and BB2 (members of cluster C).  Store c in
1068   all_clusters, or merge c with existing cluster.  */
1069
1070static void
1071set_cluster (basic_block bb1, basic_block bb2)
1072{
1073  basic_block merge_bb, other_bb;
1074  bb_cluster merge, old, c;
1075
1076  if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1077    {
1078      c = new_cluster ();
1079      add_bb_to_cluster (c, bb1);
1080      add_bb_to_cluster (c, bb2);
1081      BB_CLUSTER (bb1) = c;
1082      BB_CLUSTER (bb2) = c;
1083      c->index = all_clusters.length ();
1084      all_clusters.safe_push (c);
1085    }
1086  else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1087    {
1088      merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1089      other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1090      merge = BB_CLUSTER (merge_bb);
1091      add_bb_to_cluster (merge, other_bb);
1092      BB_CLUSTER (other_bb) = merge;
1093    }
1094  else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1095    {
1096      unsigned int i;
1097      bitmap_iterator bi;
1098
1099      old = BB_CLUSTER (bb2);
1100      merge = BB_CLUSTER (bb1);
1101      merge_clusters (merge, old);
1102      EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1103	BB_CLUSTER (BASIC_BLOCK_FOR_FN (cfun, i)) = merge;
1104      all_clusters[old->index] = NULL;
1105      update_rep_bb (merge, old->rep_bb);
1106      delete_cluster (old);
1107    }
1108  else
1109    gcc_unreachable ();
1110}
1111
1112/* Return true if gimple operands T1 and T2 have the same value.  */
1113
1114static bool
1115gimple_operand_equal_value_p (tree t1, tree t2)
1116{
1117  if (t1 == t2)
1118    return true;
1119
1120  if (t1 == NULL_TREE
1121      || t2 == NULL_TREE)
1122    return false;
1123
1124  if (operand_equal_p (t1, t2, 0))
1125    return true;
1126
1127  return gvn_uses_equal (t1, t2);
1128}
1129
1130/* Return true if gimple statements S1 and S2 are equal.  Gimple_bb (s1) and
1131   gimple_bb (s2) are members of SAME_SUCC.  */
1132
1133static bool
1134gimple_equal_p (same_succ same_succ, gimple s1, gimple s2)
1135{
1136  unsigned int i;
1137  tree lhs1, lhs2;
1138  basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1139  tree t1, t2;
1140  bool inv_cond;
1141  enum tree_code code1, code2;
1142
1143  if (gimple_code (s1) != gimple_code (s2))
1144    return false;
1145
1146  switch (gimple_code (s1))
1147    {
1148    case GIMPLE_CALL:
1149      if (!gimple_call_same_target_p (s1, s2))
1150        return false;
1151
1152      t1 = gimple_call_chain (s1);
1153      t2 = gimple_call_chain (s2);
1154      if (!gimple_operand_equal_value_p (t1, t2))
1155	return false;
1156
1157      if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1158	return false;
1159
1160      for (i = 0; i < gimple_call_num_args (s1); ++i)
1161	{
1162	  t1 = gimple_call_arg (s1, i);
1163	  t2 = gimple_call_arg (s2, i);
1164	  if (!gimple_operand_equal_value_p (t1, t2))
1165	    return false;
1166	}
1167
1168      lhs1 = gimple_get_lhs (s1);
1169      lhs2 = gimple_get_lhs (s2);
1170      if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1171	return true;
1172      if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1173	return false;
1174      if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1175	return vn_valueize (lhs1) == vn_valueize (lhs2);
1176      return operand_equal_p (lhs1, lhs2, 0);
1177
1178    case GIMPLE_ASSIGN:
1179      lhs1 = gimple_get_lhs (s1);
1180      lhs2 = gimple_get_lhs (s2);
1181      if (TREE_CODE (lhs1) != SSA_NAME
1182	  && TREE_CODE (lhs2) != SSA_NAME)
1183	return (operand_equal_p (lhs1, lhs2, 0)
1184		&& gimple_operand_equal_value_p (gimple_assign_rhs1 (s1),
1185						 gimple_assign_rhs1 (s2)));
1186      else if (TREE_CODE (lhs1) == SSA_NAME
1187	       && TREE_CODE (lhs2) == SSA_NAME)
1188	return operand_equal_p (gimple_assign_rhs1 (s1),
1189				gimple_assign_rhs1 (s2), 0);
1190      return false;
1191
1192    case GIMPLE_COND:
1193      t1 = gimple_cond_lhs (s1);
1194      t2 = gimple_cond_lhs (s2);
1195      if (!gimple_operand_equal_value_p (t1, t2))
1196	return false;
1197
1198      t1 = gimple_cond_rhs (s1);
1199      t2 = gimple_cond_rhs (s2);
1200      if (!gimple_operand_equal_value_p (t1, t2))
1201	return false;
1202
1203      code1 = gimple_expr_code (s1);
1204      code2 = gimple_expr_code (s2);
1205      inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1206		  != bitmap_bit_p (same_succ->inverse, bb2->index));
1207      if (inv_cond)
1208	{
1209	  bool honor_nans = HONOR_NANS (t1);
1210	  code2 = invert_tree_comparison (code2, honor_nans);
1211	}
1212      return code1 == code2;
1213
1214    default:
1215      return false;
1216    }
1217}
1218
1219/* Let GSI skip backwards over local defs.  Return the earliest vuse in VUSE.
1220   Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1221   processed statements.  */
1222
1223static void
1224gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1225				  bool *vuse_escaped)
1226{
1227  gimple stmt;
1228  tree lvuse;
1229
1230  while (true)
1231    {
1232      if (gsi_end_p (*gsi))
1233	return;
1234      stmt = gsi_stmt (*gsi);
1235
1236      lvuse = gimple_vuse (stmt);
1237      if (lvuse != NULL_TREE)
1238	{
1239	  *vuse = lvuse;
1240	  if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1241	    *vuse_escaped = true;
1242	}
1243
1244      if (!stmt_local_def (stmt))
1245	return;
1246      gsi_prev_nondebug (gsi);
1247    }
1248}
1249
1250/* Determines whether BB1 and BB2 (members of same_succ) are duplicates.  If so,
1251   clusters them.  */
1252
1253static void
1254find_duplicate (same_succ same_succ, basic_block bb1, basic_block bb2)
1255{
1256  gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1257  gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
1258  tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1259  bool vuse_escaped = false;
1260
1261  gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1262  gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1263
1264  while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1265    {
1266      gimple stmt1 = gsi_stmt (gsi1);
1267      gimple stmt2 = gsi_stmt (gsi2);
1268
1269      /* What could be better than to this this here is to blacklist the bb
1270	 containing the stmt, when encountering the stmt f.i. in
1271	 same_succ_hash.  */
1272      if (is_tm_ending (stmt1)
1273	  || is_tm_ending (stmt2))
1274	return;
1275
1276      if (!gimple_equal_p (same_succ, stmt1, stmt2))
1277	return;
1278
1279      gsi_prev_nondebug (&gsi1);
1280      gsi_prev_nondebug (&gsi2);
1281      gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1282      gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1283    }
1284
1285  if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1286    return;
1287
1288  /* If the incoming vuses are not the same, and the vuse escaped into an
1289     SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1290     which potentially means the semantics of one of the blocks will be changed.
1291     TODO: make this check more precise.  */
1292  if (vuse_escaped && vuse1 != vuse2)
1293    return;
1294
1295  if (dump_file)
1296    fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1297	     bb1->index, bb2->index);
1298
1299  set_cluster (bb1, bb2);
1300}
1301
1302/* Returns whether for all phis in DEST the phi alternatives for E1 and
1303   E2 are equal.  */
1304
1305static bool
1306same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1307{
1308  int n1 = e1->dest_idx, n2 = e2->dest_idx;
1309  gphi_iterator gsi;
1310
1311  for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1312    {
1313      gphi *phi = gsi.phi ();
1314      tree lhs = gimple_phi_result (phi);
1315      tree val1 = gimple_phi_arg_def (phi, n1);
1316      tree val2 = gimple_phi_arg_def (phi, n2);
1317
1318      if (virtual_operand_p (lhs))
1319	continue;
1320
1321      if (operand_equal_for_phi_arg_p (val1, val2))
1322        continue;
1323      if (gvn_uses_equal (val1, val2))
1324	continue;
1325
1326      return false;
1327    }
1328
1329  return true;
1330}
1331
1332/* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1333   phi alternatives for BB1 and BB2 are equal.  */
1334
1335static bool
1336same_phi_alternatives (same_succ same_succ, basic_block bb1, basic_block bb2)
1337{
1338  unsigned int s;
1339  bitmap_iterator bs;
1340  edge e1, e2;
1341  basic_block succ;
1342
1343  EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1344    {
1345      succ = BASIC_BLOCK_FOR_FN (cfun, s);
1346      e1 = find_edge (bb1, succ);
1347      e2 = find_edge (bb2, succ);
1348      if (e1->flags & EDGE_COMPLEX
1349	  || e2->flags & EDGE_COMPLEX)
1350	return false;
1351
1352      /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1353	 the same value.  */
1354      if (!same_phi_alternatives_1 (succ, e1, e2))
1355	return false;
1356    }
1357
1358  return true;
1359}
1360
1361/* Return true if BB has non-vop phis.  */
1362
1363static bool
1364bb_has_non_vop_phi (basic_block bb)
1365{
1366  gimple_seq phis = phi_nodes (bb);
1367  gimple phi;
1368
1369  if (phis == NULL)
1370    return false;
1371
1372  if (!gimple_seq_singleton_p (phis))
1373    return true;
1374
1375  phi = gimple_seq_first_stmt (phis);
1376  return !virtual_operand_p (gimple_phi_result (phi));
1377}
1378
1379/* Returns true if redirecting the incoming edges of FROM to TO maintains the
1380   invariant that uses in FROM are dominates by their defs.  */
1381
1382static bool
1383deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1384{
1385  basic_block cd, dep_bb = BB_DEP_BB (to);
1386  edge_iterator ei;
1387  edge e;
1388  bitmap from_preds = BITMAP_ALLOC (NULL);
1389
1390  if (dep_bb == NULL)
1391    return true;
1392
1393  FOR_EACH_EDGE (e, ei, from->preds)
1394    bitmap_set_bit (from_preds, e->src->index);
1395  cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1396  BITMAP_FREE (from_preds);
1397
1398  return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1399}
1400
1401/* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1402   replacement bb) and vice versa maintains the invariant that uses in the
1403   replacement are dominates by their defs.  */
1404
1405static bool
1406deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1407{
1408  if (BB_CLUSTER (bb1) != NULL)
1409    bb1 = BB_CLUSTER (bb1)->rep_bb;
1410
1411  if (BB_CLUSTER (bb2) != NULL)
1412    bb2 = BB_CLUSTER (bb2)->rep_bb;
1413
1414  return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1415	  && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1416}
1417
1418/* Within SAME_SUCC->bbs, find clusters of bbs which can be merged.  */
1419
1420static void
1421find_clusters_1 (same_succ same_succ)
1422{
1423  basic_block bb1, bb2;
1424  unsigned int i, j;
1425  bitmap_iterator bi, bj;
1426  int nr_comparisons;
1427  int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
1428
1429  EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1430    {
1431      bb1 = BASIC_BLOCK_FOR_FN (cfun, i);
1432
1433      /* TODO: handle blocks with phi-nodes.  We'll have to find corresponding
1434	 phi-nodes in bb1 and bb2, with the same alternatives for the same
1435	 preds.  */
1436      if (bb_has_non_vop_phi (bb1))
1437	continue;
1438
1439      nr_comparisons = 0;
1440      EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1441	{
1442	  bb2 = BASIC_BLOCK_FOR_FN (cfun, j);
1443
1444	  if (bb_has_non_vop_phi (bb2))
1445	    continue;
1446
1447	  if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1448	    continue;
1449
1450	  /* Limit quadratic behaviour.  */
1451	  nr_comparisons++;
1452	  if (nr_comparisons > max_comparisons)
1453	    break;
1454
1455	  /* This is a conservative dependency check.  We could test more
1456	     precise for allowed replacement direction.  */
1457	  if (!deps_ok_for_redirect (bb1, bb2))
1458	    continue;
1459
1460	  if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1461	    continue;
1462
1463	  find_duplicate (same_succ, bb1, bb2);
1464        }
1465    }
1466}
1467
1468/* Find clusters of bbs which can be merged.  */
1469
1470static void
1471find_clusters (void)
1472{
1473  same_succ same;
1474
1475  while (!worklist.is_empty ())
1476    {
1477      same = worklist.pop ();
1478      same->in_worklist = false;
1479      if (dump_file && (dump_flags & TDF_DETAILS))
1480	{
1481	  fprintf (dump_file, "processing worklist entry\n");
1482	  same_succ_print (dump_file, same);
1483	}
1484      find_clusters_1 (same);
1485    }
1486}
1487
1488/* Returns the vop phi of BB, if any.  */
1489
1490static gphi *
1491vop_phi (basic_block bb)
1492{
1493  gphi *stmt;
1494  gphi_iterator gsi;
1495  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1496    {
1497      stmt = gsi.phi ();
1498      if (! virtual_operand_p (gimple_phi_result (stmt)))
1499	continue;
1500      return stmt;
1501    }
1502  return NULL;
1503}
1504
1505/* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed.  */
1506
1507static void
1508replace_block_by (basic_block bb1, basic_block bb2)
1509{
1510  edge pred_edge;
1511  edge e1, e2;
1512  edge_iterator ei;
1513  unsigned int i;
1514  gphi *bb2_phi;
1515
1516  bb2_phi = vop_phi (bb2);
1517
1518  /* Mark the basic block as deleted.  */
1519  mark_basic_block_deleted (bb1);
1520
1521  /* Redirect the incoming edges of bb1 to bb2.  */
1522  for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1523    {
1524      pred_edge = EDGE_PRED (bb1, i - 1);
1525      pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1526      gcc_assert (pred_edge != NULL);
1527
1528      if (bb2_phi == NULL)
1529	continue;
1530
1531      /* The phi might have run out of capacity when the redirect added an
1532	 argument, which means it could have been replaced.  Refresh it.  */
1533      bb2_phi = vop_phi (bb2);
1534
1535      add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
1536		   pred_edge, UNKNOWN_LOCATION);
1537    }
1538
1539  bb2->frequency += bb1->frequency;
1540  if (bb2->frequency > BB_FREQ_MAX)
1541    bb2->frequency = BB_FREQ_MAX;
1542
1543  bb2->count += bb1->count;
1544
1545  /* Merge the outgoing edge counts from bb1 onto bb2.  */
1546  gcov_type out_sum = 0;
1547  FOR_EACH_EDGE (e1, ei, bb1->succs)
1548    {
1549      e2 = find_edge (bb2, e1->dest);
1550      gcc_assert (e2);
1551      e2->count += e1->count;
1552      out_sum += e2->count;
1553    }
1554  /* Recompute the edge probabilities from the new merged edge count.
1555     Use the sum of the new merged edge counts computed above instead
1556     of bb2's merged count, in case there are profile count insanities
1557     making the bb count inconsistent with the edge weights.  */
1558  FOR_EACH_EDGE (e2, ei, bb2->succs)
1559    {
1560      e2->probability = GCOV_COMPUTE_SCALE (e2->count, out_sum);
1561    }
1562
1563  /* Clear range info from all stmts in BB2 -- this transformation
1564     could make them out of date.  */
1565  reset_flow_sensitive_info_in_bb (bb2);
1566
1567  /* Do updates that use bb1, before deleting bb1.  */
1568  release_last_vdef (bb1);
1569  same_succ_flush_bb (bb1);
1570
1571  delete_basic_block (bb1);
1572}
1573
1574/* Bbs for which update_debug_stmt need to be called.  */
1575
1576static bitmap update_bbs;
1577
1578/* For each cluster in all_clusters, merge all cluster->bbs.  Returns
1579   number of bbs removed.  */
1580
1581static int
1582apply_clusters (void)
1583{
1584  basic_block bb1, bb2;
1585  bb_cluster c;
1586  unsigned int i, j;
1587  bitmap_iterator bj;
1588  int nr_bbs_removed = 0;
1589
1590  for (i = 0; i < all_clusters.length (); ++i)
1591    {
1592      c = all_clusters[i];
1593      if (c == NULL)
1594	continue;
1595
1596      bb2 = c->rep_bb;
1597      bitmap_set_bit (update_bbs, bb2->index);
1598
1599      bitmap_clear_bit (c->bbs, bb2->index);
1600      EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1601	{
1602	  bb1 = BASIC_BLOCK_FOR_FN (cfun, j);
1603	  bitmap_clear_bit (update_bbs, bb1->index);
1604
1605	  replace_block_by (bb1, bb2);
1606	  nr_bbs_removed++;
1607	}
1608    }
1609
1610  return nr_bbs_removed;
1611}
1612
1613/* Resets debug statement STMT if it has uses that are not dominated by their
1614   defs.  */
1615
1616static void
1617update_debug_stmt (gimple stmt)
1618{
1619  use_operand_p use_p;
1620  ssa_op_iter oi;
1621  basic_block bbuse;
1622
1623  if (!gimple_debug_bind_p (stmt))
1624    return;
1625
1626  bbuse = gimple_bb (stmt);
1627  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1628    {
1629      tree name = USE_FROM_PTR (use_p);
1630      gimple def_stmt = SSA_NAME_DEF_STMT (name);
1631      basic_block bbdef = gimple_bb (def_stmt);
1632      if (bbdef == NULL || bbuse == bbdef
1633	  || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1634	continue;
1635
1636      gimple_debug_bind_reset_value (stmt);
1637      update_stmt (stmt);
1638      break;
1639    }
1640}
1641
1642/* Resets all debug statements that have uses that are not
1643   dominated by their defs.  */
1644
1645static void
1646update_debug_stmts (void)
1647{
1648  basic_block bb;
1649  bitmap_iterator bi;
1650  unsigned int i;
1651
1652  EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1653    {
1654      gimple stmt;
1655      gimple_stmt_iterator gsi;
1656
1657      bb = BASIC_BLOCK_FOR_FN (cfun, i);
1658      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1659	{
1660	  stmt = gsi_stmt (gsi);
1661	  if (!is_gimple_debug (stmt))
1662	    continue;
1663	  update_debug_stmt (stmt);
1664	}
1665    }
1666}
1667
1668/* Runs tail merge optimization.  */
1669
1670unsigned int
1671tail_merge_optimize (unsigned int todo)
1672{
1673  int nr_bbs_removed_total = 0;
1674  int nr_bbs_removed;
1675  bool loop_entered = false;
1676  int iteration_nr = 0;
1677  int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
1678
1679  if (!flag_tree_tail_merge
1680      || max_iterations == 0)
1681    return 0;
1682
1683  timevar_push (TV_TREE_TAIL_MERGE);
1684
1685  if (!dom_info_available_p (CDI_DOMINATORS))
1686    {
1687      /* PRE can leave us with unreachable blocks, remove them now.  */
1688      delete_unreachable_blocks ();
1689      calculate_dominance_info (CDI_DOMINATORS);
1690    }
1691  init_worklist ();
1692
1693  while (!worklist.is_empty ())
1694    {
1695      if (!loop_entered)
1696	{
1697	  loop_entered = true;
1698	  alloc_cluster_vectors ();
1699	  update_bbs = BITMAP_ALLOC (NULL);
1700	}
1701      else
1702	reset_cluster_vectors ();
1703
1704      iteration_nr++;
1705      if (dump_file && (dump_flags & TDF_DETAILS))
1706	fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1707
1708      find_clusters ();
1709      gcc_assert (worklist.is_empty ());
1710      if (all_clusters.is_empty ())
1711	break;
1712
1713      nr_bbs_removed = apply_clusters ();
1714      nr_bbs_removed_total += nr_bbs_removed;
1715      if (nr_bbs_removed == 0)
1716	break;
1717
1718      free_dominance_info (CDI_DOMINATORS);
1719
1720      if (iteration_nr == max_iterations)
1721	break;
1722
1723      calculate_dominance_info (CDI_DOMINATORS);
1724      update_worklist ();
1725    }
1726
1727  if (dump_file && (dump_flags & TDF_DETAILS))
1728    fprintf (dump_file, "htab collision / search: %f\n",
1729	     same_succ_htab->collisions ());
1730
1731  if (nr_bbs_removed_total > 0)
1732    {
1733      if (MAY_HAVE_DEBUG_STMTS)
1734	{
1735	  calculate_dominance_info (CDI_DOMINATORS);
1736	  update_debug_stmts ();
1737	}
1738
1739      if (dump_file && (dump_flags & TDF_DETAILS))
1740	{
1741	  fprintf (dump_file, "Before TODOs.\n");
1742	  dump_function_to_file (current_function_decl, dump_file, dump_flags);
1743	}
1744
1745      mark_virtual_operands_for_renaming (cfun);
1746    }
1747
1748  delete_worklist ();
1749  if (loop_entered)
1750    {
1751      delete_cluster_vectors ();
1752      BITMAP_FREE (update_bbs);
1753    }
1754
1755  timevar_pop (TV_TREE_TAIL_MERGE);
1756
1757  return todo;
1758}
1759