1/* Thread edges through blocks and update the control flow and SSA graphs.
2   Copyright (C) 2004-2015 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 3, or (at your option)
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "hash-set.h"
24#include "machmode.h"
25#include "vec.h"
26#include "double-int.h"
27#include "input.h"
28#include "alias.h"
29#include "symtab.h"
30#include "options.h"
31#include "wide-int.h"
32#include "inchash.h"
33#include "tree.h"
34#include "fold-const.h"
35#include "flags.h"
36#include "predict.h"
37#include "tm.h"
38#include "hard-reg-set.h"
39#include "input.h"
40#include "function.h"
41#include "dominance.h"
42#include "cfg.h"
43#include "cfganal.h"
44#include "basic-block.h"
45#include "hash-table.h"
46#include "tree-ssa-alias.h"
47#include "internal-fn.h"
48#include "gimple-expr.h"
49#include "is-a.h"
50#include "gimple.h"
51#include "gimple-iterator.h"
52#include "gimple-ssa.h"
53#include "tree-phinodes.h"
54#include "tree-ssa.h"
55#include "tree-ssa-threadupdate.h"
56#include "ssa-iterators.h"
57#include "dumpfile.h"
58#include "cfgloop.h"
59#include "dbgcnt.h"
60#include "tree-cfg.h"
61#include "tree-pass.h"
62
63/* Given a block B, update the CFG and SSA graph to reflect redirecting
64   one or more in-edges to B to instead reach the destination of an
65   out-edge from B while preserving any side effects in B.
66
67   i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
68   side effects of executing B.
69
70     1. Make a copy of B (including its outgoing edges and statements).  Call
71	the copy B'.  Note B' has no incoming edges or PHIs at this time.
72
73     2. Remove the control statement at the end of B' and all outgoing edges
74	except B'->C.
75
76     3. Add a new argument to each PHI in C with the same value as the existing
77	argument associated with edge B->C.  Associate the new PHI arguments
78	with the edge B'->C.
79
80     4. For each PHI in B, find or create a PHI in B' with an identical
81	PHI_RESULT.  Add an argument to the PHI in B' which has the same
82	value as the PHI in B associated with the edge A->B.  Associate
83	the new argument in the PHI in B' with the edge A->B.
84
85     5. Change the edge A->B to A->B'.
86
87	5a. This automatically deletes any PHI arguments associated with the
88	    edge A->B in B.
89
90	5b. This automatically associates each new argument added in step 4
91	    with the edge A->B'.
92
93     6. Repeat for other incoming edges into B.
94
95     7. Put the duplicated resources in B and all the B' blocks into SSA form.
96
97   Note that block duplication can be minimized by first collecting the
98   set of unique destination blocks that the incoming edges should
99   be threaded to.
100
101   We reduce the number of edges and statements we create by not copying all
102   the outgoing edges and the control statement in step #1.  We instead create
103   a template block without the outgoing edges and duplicate the template.
104
105   Another case this code handles is threading through a "joiner" block.  In
106   this case, we do not know the destination of the joiner block, but one
107   of the outgoing edges from the joiner block leads to a threadable path.  This
108   case largely works as outlined above, except the duplicate of the joiner
109   block still contains a full set of outgoing edges and its control statement.
110   We just redirect one of its outgoing edges to our jump threading path.  */
111
112
113/* Steps #5 and #6 of the above algorithm are best implemented by walking
114   all the incoming edges which thread to the same destination edge at
115   the same time.  That avoids lots of table lookups to get information
116   for the destination edge.
117
118   To realize that implementation we create a list of incoming edges
119   which thread to the same outgoing edge.  Thus to implement steps
120   #5 and #6 we traverse our hash table of outgoing edge information.
121   For each entry we walk the list of incoming edges which thread to
122   the current outgoing edge.  */
123
124struct el
125{
126  edge e;
127  struct el *next;
128};
129
130/* Main data structure recording information regarding B's duplicate
131   blocks.  */
132
133/* We need to efficiently record the unique thread destinations of this
134   block and specific information associated with those destinations.  We
135   may have many incoming edges threaded to the same outgoing edge.  This
136   can be naturally implemented with a hash table.  */
137
138struct redirection_data : typed_free_remove<redirection_data>
139{
140  /* We support wiring up two block duplicates in a jump threading path.
141
142     One is a normal block copy where we remove the control statement
143     and wire up its single remaining outgoing edge to the thread path.
144
145     The other is a joiner block where we leave the control statement
146     in place, but wire one of the outgoing edges to a thread path.
147
148     In theory we could have multiple block duplicates in a jump
149     threading path, but I haven't tried that.
150
151     The duplicate blocks appear in this array in the same order in
152     which they appear in the jump thread path.  */
153  basic_block dup_blocks[2];
154
155  /* The jump threading path.  */
156  vec<jump_thread_edge *> *path;
157
158  /* A list of incoming edges which we want to thread to the
159     same path.  */
160  struct el *incoming_edges;
161
162  /* hash_table support.  */
163  typedef redirection_data value_type;
164  typedef redirection_data compare_type;
165  static inline hashval_t hash (const value_type *);
166  static inline int equal (const value_type *, const compare_type *);
167};
168
169/* Dump a jump threading path, including annotations about each
170   edge in the path.  */
171
172static void
173dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
174		       bool registering)
175{
176  fprintf (dump_file,
177	   "  %s%s jump thread: (%d, %d) incoming edge; ",
178	   (registering ? "Registering" : "Cancelling"),
179	   (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
180	   path[0]->e->src->index, path[0]->e->dest->index);
181
182  for (unsigned int i = 1; i < path.length (); i++)
183    {
184      /* We can get paths with a NULL edge when the final destination
185	 of a jump thread turns out to be a constant address.  We dump
186	 those paths when debugging, so we have to be prepared for that
187	 possibility here.  */
188      if (path[i]->e == NULL)
189	continue;
190
191      if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
192	fprintf (dump_file, " (%d, %d) joiner; ",
193		 path[i]->e->src->index, path[i]->e->dest->index);
194      if (path[i]->type == EDGE_COPY_SRC_BLOCK)
195       fprintf (dump_file, " (%d, %d) normal;",
196		 path[i]->e->src->index, path[i]->e->dest->index);
197      if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
198       fprintf (dump_file, " (%d, %d) nocopy;",
199		 path[i]->e->src->index, path[i]->e->dest->index);
200      if (path[0]->type == EDGE_FSM_THREAD)
201	fprintf (dump_file, " (%d, %d) ",
202		 path[i]->e->src->index, path[i]->e->dest->index);
203    }
204  fputc ('\n', dump_file);
205}
206
207/* Simple hashing function.  For any given incoming edge E, we're going
208   to be most concerned with the final destination of its jump thread
209   path.  So hash on the block index of the final edge in the path.  */
210
211inline hashval_t
212redirection_data::hash (const value_type *p)
213{
214  vec<jump_thread_edge *> *path = p->path;
215  return path->last ()->e->dest->index;
216}
217
218/* Given two hash table entries, return true if they have the same
219   jump threading path.  */
220inline int
221redirection_data::equal (const value_type *p1, const compare_type *p2)
222{
223  vec<jump_thread_edge *> *path1 = p1->path;
224  vec<jump_thread_edge *> *path2 = p2->path;
225
226  if (path1->length () != path2->length ())
227    return false;
228
229  for (unsigned int i = 1; i < path1->length (); i++)
230    {
231      if ((*path1)[i]->type != (*path2)[i]->type
232	  || (*path1)[i]->e != (*path2)[i]->e)
233	return false;
234    }
235
236  return true;
237}
238
239/* Data structure of information to pass to hash table traversal routines.  */
240struct ssa_local_info_t
241{
242  /* The current block we are working on.  */
243  basic_block bb;
244
245  /* We only create a template block for the first duplicated block in a
246     jump threading path as we may need many duplicates of that block.
247
248     The second duplicate block in a path is specific to that path.  Creating
249     and sharing a template for that block is considerably more difficult.  */
250  basic_block template_block;
251
252  /* TRUE if we thread one or more jumps, FALSE otherwise.  */
253  bool jumps_threaded;
254
255  /* Blocks duplicated for the thread.  */
256  bitmap duplicate_blocks;
257
258  /* When we have multiple paths through a joiner which reach different
259     final destinations, then we may need to correct for potential
260     profile insanities.  */
261  bool need_profile_correction;
262};
263
264/* Passes which use the jump threading code register jump threading
265   opportunities as they are discovered.  We keep the registered
266   jump threading opportunities in this vector as edge pairs
267   (original_edge, target_edge).  */
268static vec<vec<jump_thread_edge *> *> paths;
269
270/* When we start updating the CFG for threading, data necessary for jump
271   threading is attached to the AUX field for the incoming edge.  Use these
272   macros to access the underlying structure attached to the AUX field.  */
273#define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
274
275/* Jump threading statistics.  */
276
277struct thread_stats_d
278{
279  unsigned long num_threaded_edges;
280};
281
282struct thread_stats_d thread_stats;
283
284
285/* Remove the last statement in block BB if it is a control statement
286   Also remove all outgoing edges except the edge which reaches DEST_BB.
287   If DEST_BB is NULL, then remove all outgoing edges.  */
288
289static void
290remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
291{
292  gimple_stmt_iterator gsi;
293  edge e;
294  edge_iterator ei;
295
296  gsi = gsi_last_bb (bb);
297
298  /* If the duplicate ends with a control statement, then remove it.
299
300     Note that if we are duplicating the template block rather than the
301     original basic block, then the duplicate might not have any real
302     statements in it.  */
303  if (!gsi_end_p (gsi)
304      && gsi_stmt (gsi)
305      && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
306	  || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
307	  || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
308    gsi_remove (&gsi, true);
309
310  for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
311    {
312      if (e->dest != dest_bb)
313	remove_edge (e);
314      else
315	ei_next (&ei);
316    }
317}
318
319/* Create a duplicate of BB.  Record the duplicate block in an array
320   indexed by COUNT stored in RD.  */
321
322static void
323create_block_for_threading (basic_block bb,
324			    struct redirection_data *rd,
325			    unsigned int count,
326			    bitmap *duplicate_blocks)
327{
328  edge_iterator ei;
329  edge e;
330
331  /* We can use the generic block duplication code and simply remove
332     the stuff we do not need.  */
333  rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
334
335  FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
336    e->aux = NULL;
337
338  /* Zero out the profile, since the block is unreachable for now.  */
339  rd->dup_blocks[count]->frequency = 0;
340  rd->dup_blocks[count]->count = 0;
341  if (duplicate_blocks)
342    bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
343}
344
345/* Main data structure to hold information for duplicates of BB.  */
346
347static hash_table<redirection_data> *redirection_data;
348
349/* Given an outgoing edge E lookup and return its entry in our hash table.
350
351   If INSERT is true, then we insert the entry into the hash table if
352   it is not already present.  INCOMING_EDGE is added to the list of incoming
353   edges associated with E in the hash table.  */
354
355static struct redirection_data *
356lookup_redirection_data (edge e, enum insert_option insert)
357{
358  struct redirection_data **slot;
359  struct redirection_data *elt;
360  vec<jump_thread_edge *> *path = THREAD_PATH (e);
361
362 /* Build a hash table element so we can see if E is already
363     in the table.  */
364  elt = XNEW (struct redirection_data);
365  elt->path = path;
366  elt->dup_blocks[0] = NULL;
367  elt->dup_blocks[1] = NULL;
368  elt->incoming_edges = NULL;
369
370  slot = redirection_data->find_slot (elt, insert);
371
372  /* This will only happen if INSERT is false and the entry is not
373     in the hash table.  */
374  if (slot == NULL)
375    {
376      free (elt);
377      return NULL;
378    }
379
380  /* This will only happen if E was not in the hash table and
381     INSERT is true.  */
382  if (*slot == NULL)
383    {
384      *slot = elt;
385      elt->incoming_edges = XNEW (struct el);
386      elt->incoming_edges->e = e;
387      elt->incoming_edges->next = NULL;
388      return elt;
389    }
390  /* E was in the hash table.  */
391  else
392    {
393      /* Free ELT as we do not need it anymore, we will extract the
394	 relevant entry from the hash table itself.  */
395      free (elt);
396
397      /* Get the entry stored in the hash table.  */
398      elt = *slot;
399
400      /* If insertion was requested, then we need to add INCOMING_EDGE
401	 to the list of incoming edges associated with E.  */
402      if (insert)
403	{
404	  struct el *el = XNEW (struct el);
405	  el->next = elt->incoming_edges;
406	  el->e = e;
407	  elt->incoming_edges = el;
408	}
409
410      return elt;
411    }
412}
413
414/* Similar to copy_phi_args, except that the PHI arg exists, it just
415   does not have a value associated with it.  */
416
417static void
418copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
419{
420  int src_idx = src_e->dest_idx;
421  int tgt_idx = tgt_e->dest_idx;
422
423  /* Iterate over each PHI in e->dest.  */
424  for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
425			   gsi2 = gsi_start_phis (tgt_e->dest);
426       !gsi_end_p (gsi);
427       gsi_next (&gsi), gsi_next (&gsi2))
428    {
429      gphi *src_phi = gsi.phi ();
430      gphi *dest_phi = gsi2.phi ();
431      tree val = gimple_phi_arg_def (src_phi, src_idx);
432      source_location locus = gimple_phi_arg_location (src_phi, src_idx);
433
434      SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
435      gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
436    }
437}
438
439/* Given ssa_name DEF, backtrack jump threading PATH from node IDX
440   to see if it has constant value in a flow sensitive manner.  Set
441   LOCUS to location of the constant phi arg and return the value.
442   Return DEF directly if either PATH or idx is ZERO.  */
443
444static tree
445get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
446			 basic_block bb, int idx, source_location *locus)
447{
448  tree arg;
449  gphi *def_phi;
450  basic_block def_bb;
451
452  if (path == NULL || idx == 0)
453    return def;
454
455  def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
456  if (!def_phi)
457    return def;
458
459  def_bb = gimple_bb (def_phi);
460  /* Don't propagate loop invariants into deeper loops.  */
461  if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
462    return def;
463
464  /* Backtrack jump threading path from IDX to see if def has constant
465     value.  */
466  for (int j = idx - 1; j >= 0; j--)
467    {
468      edge e = (*path)[j]->e;
469      if (e->dest == def_bb)
470	{
471	  arg = gimple_phi_arg_def (def_phi, e->dest_idx);
472	  if (is_gimple_min_invariant (arg))
473	    {
474	      *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
475	      return arg;
476	    }
477	  break;
478	}
479    }
480
481  return def;
482}
483
484/* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
485   Try to backtrack jump threading PATH from node IDX to see if the arg
486   has constant value, copy constant value instead of argument itself
487   if yes.  */
488
489static void
490copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
491	       vec<jump_thread_edge *> *path, int idx)
492{
493  gphi_iterator gsi;
494  int src_indx = src_e->dest_idx;
495
496  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
497    {
498      gphi *phi = gsi.phi ();
499      tree def = gimple_phi_arg_def (phi, src_indx);
500      source_location locus = gimple_phi_arg_location (phi, src_indx);
501
502      if (TREE_CODE (def) == SSA_NAME
503	  && !virtual_operand_p (gimple_phi_result (phi)))
504	def = get_value_locus_in_path (def, path, bb, idx, &locus);
505
506      add_phi_arg (phi, def, tgt_e, locus);
507    }
508}
509
510/* We have recently made a copy of ORIG_BB, including its outgoing
511   edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
512   ORIG_BB has a new argument associated with edge from NEW_BB to the
513   successor.  Initialize the PHI argument so that it is equal to the PHI
514   argument associated with the edge from ORIG_BB to the successor.
515   PATH and IDX are used to check if the new PHI argument has constant
516   value in a flow sensitive manner.  */
517
518static void
519update_destination_phis (basic_block orig_bb, basic_block new_bb,
520			 vec<jump_thread_edge *> *path, int idx)
521{
522  edge_iterator ei;
523  edge e;
524
525  FOR_EACH_EDGE (e, ei, orig_bb->succs)
526    {
527      edge e2 = find_edge (new_bb, e->dest);
528      copy_phi_args (e->dest, e, e2, path, idx);
529    }
530}
531
532/* Given a duplicate block and its single destination (both stored
533   in RD).  Create an edge between the duplicate and its single
534   destination.
535
536   Add an additional argument to any PHI nodes at the single
537   destination.  IDX is the start node in jump threading path
538   we start to check to see if the new PHI argument has constant
539   value along the jump threading path.  */
540
541static void
542create_edge_and_update_destination_phis (struct redirection_data *rd,
543					 basic_block bb, int idx)
544{
545  edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
546
547  rescan_loop_exit (e, true, false);
548  e->probability = REG_BR_PROB_BASE;
549  e->count = bb->count;
550
551  /* We used to copy the thread path here.  That was added in 2007
552     and dutifully updated through the representation changes in 2013.
553
554     In 2013 we added code to thread from an interior node through
555     the backedge to another interior node.  That runs after the code
556     to thread through loop headers from outside the loop.
557
558     The latter may delete edges in the CFG, including those
559     which appeared in the jump threading path we copied here.  Thus
560     we'd end up using a dangling pointer.
561
562     After reviewing the 2007/2011 code, I can't see how anything
563     depended on copying the AUX field and clearly copying the jump
564     threading path is problematical due to embedded edge pointers.
565     It has been removed.  */
566  e->aux = NULL;
567
568  /* If there are any PHI nodes at the destination of the outgoing edge
569     from the duplicate block, then we will need to add a new argument
570     to them.  The argument should have the same value as the argument
571     associated with the outgoing edge stored in RD.  */
572  copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
573}
574
575/* Look through PATH beginning at START and return TRUE if there are
576   any additional blocks that need to be duplicated.  Otherwise,
577   return FALSE.  */
578static bool
579any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
580				 unsigned int start)
581{
582  for (unsigned int i = start + 1; i < path->length (); i++)
583    {
584      if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
585	  || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
586	return true;
587    }
588  return false;
589}
590
591
592/* Compute the amount of profile count/frequency coming into the jump threading
593   path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
594   PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
595   duplicated path, returned in PATH_OUT_COUNT_PTR.  LOCAL_INFO is used to
596   identify blocks duplicated for jump threading, which have duplicated
597   edges that need to be ignored in the analysis.  Return true if path contains
598   a joiner, false otherwise.
599
600   In the non-joiner case, this is straightforward - all the counts/frequency
601   flowing into the jump threading path should flow through the duplicated
602   block and out of the duplicated path.
603
604   In the joiner case, it is very tricky.  Some of the counts flowing into
605   the original path go offpath at the joiner.  The problem is that while
606   we know how much total count goes off-path in the original control flow,
607   we don't know how many of the counts corresponding to just the jump
608   threading path go offpath at the joiner.
609
610   For example, assume we have the following control flow and identified
611   jump threading paths:
612
613                A     B     C
614                 \    |    /
615               Ea \   |Eb / Ec
616                   \  |  /
617                    v v v
618                      J       <-- Joiner
619                     / \
620                Eoff/   \Eon
621                   /     \
622                  v       v
623                Soff     Son  <--- Normal
624                         /\
625                      Ed/  \ Ee
626                       /    \
627                      v     v
628                      D      E
629
630            Jump threading paths: A -> J -> Son -> D (path 1)
631                                  C -> J -> Son -> E (path 2)
632
633   Note that the control flow could be more complicated:
634   - Each jump threading path may have more than one incoming edge.  I.e. A and
635   Ea could represent multiple incoming blocks/edges that are included in
636   path 1.
637   - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
638   before or after the "normal" copy block).  These are not duplicated onto
639   the jump threading path, as they are single-successor.
640   - Any of the blocks along the path may have other incoming edges that
641   are not part of any jump threading path, but add profile counts along
642   the path.
643
644   In the aboe example, after all jump threading is complete, we will
645   end up with the following control flow:
646
647                A          B            C
648                |          |            |
649              Ea|          |Eb          |Ec
650                |          |            |
651                v          v            v
652               Ja          J           Jc
653               / \        / \Eon'     / \
654          Eona/   \   ---/---\--------   \Eonc
655             /     \ /  /     \           \
656            v       v  v       v          v
657           Sona     Soff      Son        Sonc
658             \                 /\         /
659              \___________    /  \  _____/
660                          \  /    \/
661                           vv      v
662                            D      E
663
664   The main issue to notice here is that when we are processing path 1
665   (A->J->Son->D) we need to figure out the outgoing edge weights to
666   the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
667   sum of the incoming weights to D remain Ed.  The problem with simply
668   assuming that Ja (and Jc when processing path 2) has the same outgoing
669   probabilities to its successors as the original block J, is that after
670   all paths are processed and other edges/counts removed (e.g. none
671   of Ec will reach D after processing path 2), we may end up with not
672   enough count flowing along duplicated edge Sona->D.
673
674   Therefore, in the case of a joiner, we keep track of all counts
675   coming in along the current path, as well as from predecessors not
676   on any jump threading path (Eb in the above example).  While we
677   first assume that the duplicated Eona for Ja->Sona has the same
678   probability as the original, we later compensate for other jump
679   threading paths that may eliminate edges.  We do that by keep track
680   of all counts coming into the original path that are not in a jump
681   thread (Eb in the above example, but as noted earlier, there could
682   be other predecessors incoming to the path at various points, such
683   as at Son).  Call this cumulative non-path count coming into the path
684   before D as Enonpath.  We then ensure that the count from Sona->D is as at
685   least as big as (Ed - Enonpath), but no bigger than the minimum
686   weight along the jump threading path.  The probabilities of both the
687   original and duplicated joiner block J and Ja will be adjusted
688   accordingly after the updates.  */
689
690static bool
691compute_path_counts (struct redirection_data *rd,
692                     ssa_local_info_t *local_info,
693                     gcov_type *path_in_count_ptr,
694                     gcov_type *path_out_count_ptr,
695                     int *path_in_freq_ptr)
696{
697  edge e = rd->incoming_edges->e;
698  vec<jump_thread_edge *> *path = THREAD_PATH (e);
699  edge elast = path->last ()->e;
700  gcov_type nonpath_count = 0;
701  bool has_joiner = false;
702  gcov_type path_in_count = 0;
703  int path_in_freq = 0;
704
705  /* Start by accumulating incoming edge counts to the path's first bb
706     into a couple buckets:
707        path_in_count: total count of incoming edges that flow into the
708                  current path.
709        nonpath_count: total count of incoming edges that are not
710                  flowing along *any* path.  These are the counts
711                  that will still flow along the original path after
712                  all path duplication is done by potentially multiple
713                  calls to this routine.
714     (any other incoming edge counts are for a different jump threading
715     path that will be handled by a later call to this routine.)
716     To make this easier, start by recording all incoming edges that flow into
717     the current path in a bitmap.  We could add up the path's incoming edge
718     counts here, but we still need to walk all the first bb's incoming edges
719     below to add up the counts of the other edges not included in this jump
720     threading path.  */
721  struct el *next, *el;
722  bitmap in_edge_srcs = BITMAP_ALLOC (NULL);
723  for (el = rd->incoming_edges; el; el = next)
724    {
725      next = el->next;
726      bitmap_set_bit (in_edge_srcs, el->e->src->index);
727    }
728  edge ein;
729  edge_iterator ei;
730  FOR_EACH_EDGE (ein, ei, e->dest->preds)
731    {
732      vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
733      /* Simply check the incoming edge src against the set captured above.  */
734      if (ein_path
735          && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
736        {
737          /* It is necessary but not sufficient that the last path edges
738             are identical.  There may be different paths that share the
739             same last path edge in the case where the last edge has a nocopy
740             source block.  */
741          gcc_assert (ein_path->last ()->e == elast);
742          path_in_count += ein->count;
743          path_in_freq += EDGE_FREQUENCY (ein);
744        }
745      else if (!ein_path)
746        {
747          /* Keep track of the incoming edges that are not on any jump-threading
748             path.  These counts will still flow out of original path after all
749             jump threading is complete.  */
750            nonpath_count += ein->count;
751        }
752    }
753
754  /* This is needed due to insane incoming frequencies.  */
755  if (path_in_freq > BB_FREQ_MAX)
756    path_in_freq = BB_FREQ_MAX;
757
758  BITMAP_FREE (in_edge_srcs);
759
760  /* Now compute the fraction of the total count coming into the first
761     path bb that is from the current threading path.  */
762  gcov_type total_count = e->dest->count;
763  /* Handle incoming profile insanities.  */
764  if (total_count < path_in_count)
765    path_in_count = total_count;
766  int onpath_scale = GCOV_COMPUTE_SCALE (path_in_count, total_count);
767
768  /* Walk the entire path to do some more computation in order to estimate
769     how much of the path_in_count will flow out of the duplicated threading
770     path.  In the non-joiner case this is straightforward (it should be
771     the same as path_in_count, although we will handle incoming profile
772     insanities by setting it equal to the minimum count along the path).
773
774     In the joiner case, we need to estimate how much of the path_in_count
775     will stay on the threading path after the joiner's conditional branch.
776     We don't really know for sure how much of the counts
777     associated with this path go to each successor of the joiner, but we'll
778     estimate based on the fraction of the total count coming into the path
779     bb was from the threading paths (computed above in onpath_scale).
780     Afterwards, we will need to do some fixup to account for other threading
781     paths and possible profile insanities.
782
783     In order to estimate the joiner case's counts we also need to update
784     nonpath_count with any additional counts coming into the path.  Other
785     blocks along the path may have additional predecessors from outside
786     the path.  */
787  gcov_type path_out_count = path_in_count;
788  gcov_type min_path_count = path_in_count;
789  for (unsigned int i = 1; i < path->length (); i++)
790    {
791      edge epath = (*path)[i]->e;
792      gcov_type cur_count = epath->count;
793      if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
794        {
795          has_joiner = true;
796          cur_count = apply_probability (cur_count, onpath_scale);
797        }
798      /* In the joiner case we need to update nonpath_count for any edges
799         coming into the path that will contribute to the count flowing
800         into the path successor.  */
801      if (has_joiner && epath != elast)
802      {
803        /* Look for other incoming edges after joiner.  */
804        FOR_EACH_EDGE (ein, ei, epath->dest->preds)
805          {
806            if (ein != epath
807                /* Ignore in edges from blocks we have duplicated for a
808                   threading path, which have duplicated edge counts until
809                   they are redirected by an invocation of this routine.  */
810                && !bitmap_bit_p (local_info->duplicate_blocks,
811                                  ein->src->index))
812              nonpath_count += ein->count;
813          }
814      }
815      if (cur_count < path_out_count)
816        path_out_count = cur_count;
817      if (epath->count < min_path_count)
818        min_path_count = epath->count;
819    }
820
821  /* We computed path_out_count above assuming that this path targeted
822     the joiner's on-path successor with the same likelihood as it
823     reached the joiner.  However, other thread paths through the joiner
824     may take a different path through the normal copy source block
825     (i.e. they have a different elast), meaning that they do not
826     contribute any counts to this path's elast.  As a result, it may
827     turn out that this path must have more count flowing to the on-path
828     successor of the joiner.  Essentially, all of this path's elast
829     count must be contributed by this path and any nonpath counts
830     (since any path through the joiner with a different elast will not
831     include a copy of this elast in its duplicated path).
832     So ensure that this path's path_out_count is at least the
833     difference between elast->count and nonpath_count.  Otherwise the edge
834     counts after threading will not be sane.  */
835  if (local_info->need_profile_correction
836      && has_joiner && path_out_count < elast->count - nonpath_count)
837  {
838    path_out_count = elast->count - nonpath_count;
839    /* But neither can we go above the minimum count along the path
840       we are duplicating.  This can be an issue due to profile
841       insanities coming in to this pass.  */
842    if (path_out_count > min_path_count)
843      path_out_count = min_path_count;
844  }
845
846  *path_in_count_ptr = path_in_count;
847  *path_out_count_ptr = path_out_count;
848  *path_in_freq_ptr = path_in_freq;
849  return has_joiner;
850}
851
852
853/* Update the counts and frequencies for both an original path
854   edge EPATH and its duplicate EDUP.  The duplicate source block
855   will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
856   and the duplicate edge EDUP will have a count of PATH_OUT_COUNT.  */
857static void
858update_profile (edge epath, edge edup, gcov_type path_in_count,
859                gcov_type path_out_count, int path_in_freq)
860{
861
862  /* First update the duplicated block's count / frequency.  */
863  if (edup)
864    {
865      basic_block dup_block = edup->src;
866      gcc_assert (dup_block->count == 0);
867      gcc_assert (dup_block->frequency == 0);
868      dup_block->count = path_in_count;
869      dup_block->frequency = path_in_freq;
870    }
871
872  /* Now update the original block's count and frequency in the
873     opposite manner - remove the counts/freq that will flow
874     into the duplicated block.  Handle underflow due to precision/
875     rounding issues.  */
876  epath->src->count -= path_in_count;
877  if (epath->src->count < 0)
878    epath->src->count = 0;
879  epath->src->frequency -= path_in_freq;
880  if (epath->src->frequency < 0)
881    epath->src->frequency = 0;
882
883  /* Next update this path edge's original and duplicated counts.  We know
884     that the duplicated path will have path_out_count flowing
885     out of it (in the joiner case this is the count along the duplicated path
886     out of the duplicated joiner).  This count can then be removed from the
887     original path edge.  */
888  if (edup)
889    edup->count = path_out_count;
890  epath->count -= path_out_count;
891  gcc_assert (epath->count >= 0);
892}
893
894
895/* The duplicate and original joiner blocks may end up with different
896   probabilities (different from both the original and from each other).
897   Recompute the probabilities here once we have updated the edge
898   counts and frequencies.  */
899
900static void
901recompute_probabilities (basic_block bb)
902{
903  edge esucc;
904  edge_iterator ei;
905  FOR_EACH_EDGE (esucc, ei, bb->succs)
906    {
907      if (!bb->count)
908        continue;
909
910      /* Prevent overflow computation due to insane profiles.  */
911      if (esucc->count < bb->count)
912        esucc->probability = GCOV_COMPUTE_SCALE (esucc->count,
913                                                 bb->count);
914      else
915        /* Can happen with missing/guessed probabilities, since we
916           may determine that more is flowing along duplicated
917           path than joiner succ probabilities allowed.
918           Counts and freqs will be insane after jump threading,
919           at least make sure probability is sane or we will
920           get a flow verification error.
921           Not much we can do to make counts/freqs sane without
922           redoing the profile estimation.  */
923        esucc->probability = REG_BR_PROB_BASE;
924    }
925}
926
927
928/* Update the counts of the original and duplicated edges from a joiner
929   that go off path, given that we have already determined that the
930   duplicate joiner DUP_BB has incoming count PATH_IN_COUNT and
931   outgoing count along the path PATH_OUT_COUNT.  The original (on-)path
932   edge from joiner is EPATH.  */
933
934static void
935update_joiner_offpath_counts (edge epath, basic_block dup_bb,
936                              gcov_type path_in_count,
937                              gcov_type path_out_count)
938{
939  /* Compute the count that currently flows off path from the joiner.
940     In other words, the total count of joiner's out edges other than
941     epath.  Compute this by walking the successors instead of
942     subtracting epath's count from the joiner bb count, since there
943     are sometimes slight insanities where the total out edge count is
944     larger than the bb count (possibly due to rounding/truncation
945     errors).  */
946  gcov_type total_orig_off_path_count = 0;
947  edge enonpath;
948  edge_iterator ei;
949  FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
950    {
951      if (enonpath == epath)
952        continue;
953      total_orig_off_path_count += enonpath->count;
954    }
955
956  /* For the path that we are duplicating, the amount that will flow
957     off path from the duplicated joiner is the delta between the
958     path's cumulative in count and the portion of that count we
959     estimated above as flowing from the joiner along the duplicated
960     path.  */
961  gcov_type total_dup_off_path_count = path_in_count - path_out_count;
962
963  /* Now do the actual updates of the off-path edges.  */
964  FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
965    {
966      /* Look for edges going off of the threading path.  */
967      if (enonpath == epath)
968        continue;
969
970      /* Find the corresponding edge out of the duplicated joiner.  */
971      edge enonpathdup = find_edge (dup_bb, enonpath->dest);
972      gcc_assert (enonpathdup);
973
974      /* We can't use the original probability of the joiner's out
975         edges, since the probabilities of the original branch
976         and the duplicated branches may vary after all threading is
977         complete.  But apportion the duplicated joiner's off-path
978         total edge count computed earlier (total_dup_off_path_count)
979         among the duplicated off-path edges based on their original
980         ratio to the full off-path count (total_orig_off_path_count).
981         */
982      int scale = GCOV_COMPUTE_SCALE (enonpath->count,
983                                      total_orig_off_path_count);
984      /* Give the duplicated offpath edge a portion of the duplicated
985         total.  */
986      enonpathdup->count = apply_scale (scale,
987                                        total_dup_off_path_count);
988      /* Now update the original offpath edge count, handling underflow
989         due to rounding errors.  */
990      enonpath->count -= enonpathdup->count;
991      if (enonpath->count < 0)
992        enonpath->count = 0;
993    }
994}
995
996
997/* Check if the paths through RD all have estimated frequencies but zero
998   profile counts.  This is more accurate than checking the entry block
999   for a zero profile count, since profile insanities sometimes creep in.  */
1000
1001static bool
1002estimated_freqs_path (struct redirection_data *rd)
1003{
1004  edge e = rd->incoming_edges->e;
1005  vec<jump_thread_edge *> *path = THREAD_PATH (e);
1006  edge ein;
1007  edge_iterator ei;
1008  bool non_zero_freq = false;
1009  FOR_EACH_EDGE (ein, ei, e->dest->preds)
1010    {
1011      if (ein->count)
1012        return false;
1013      non_zero_freq |= ein->src->frequency != 0;
1014    }
1015
1016  for (unsigned int i = 1; i < path->length (); i++)
1017    {
1018      edge epath = (*path)[i]->e;
1019      if (epath->src->count)
1020        return false;
1021      non_zero_freq |= epath->src->frequency != 0;
1022      edge esucc;
1023      FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1024        {
1025          if (esucc->count)
1026            return false;
1027          non_zero_freq |= esucc->src->frequency != 0;
1028        }
1029    }
1030  return non_zero_freq;
1031}
1032
1033
1034/* Invoked for routines that have guessed frequencies and no profile
1035   counts to record the block and edge frequencies for paths through RD
1036   in the profile count fields of those blocks and edges.  This is because
1037   ssa_fix_duplicate_block_edges incrementally updates the block and
1038   edge counts as edges are redirected, and it is difficult to do that
1039   for edge frequencies which are computed on the fly from the source
1040   block frequency and probability.  When a block frequency is updated
1041   its outgoing edge frequencies are affected and become difficult to
1042   adjust.  */
1043
1044static void
1045freqs_to_counts_path (struct redirection_data *rd)
1046{
1047  edge e = rd->incoming_edges->e;
1048  vec<jump_thread_edge *> *path = THREAD_PATH (e);
1049  edge ein;
1050  edge_iterator ei;
1051  FOR_EACH_EDGE (ein, ei, e->dest->preds)
1052    {
1053      /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1054         errors applying the probability when the frequencies are very
1055         small.  */
1056      ein->count = apply_probability (ein->src->frequency * REG_BR_PROB_BASE,
1057                                      ein->probability);
1058    }
1059
1060  for (unsigned int i = 1; i < path->length (); i++)
1061    {
1062      edge epath = (*path)[i]->e;
1063      edge esucc;
1064      /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1065         errors applying the edge probability when the frequencies are very
1066         small.  */
1067      epath->src->count = epath->src->frequency * REG_BR_PROB_BASE;
1068      FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1069        esucc->count = apply_probability (esucc->src->count,
1070                                          esucc->probability);
1071    }
1072}
1073
1074
1075/* For routines that have guessed frequencies and no profile counts, where we
1076   used freqs_to_counts_path to record block and edge frequencies for paths
1077   through RD, we clear the counts after completing all updates for RD.
1078   The updates in ssa_fix_duplicate_block_edges are based off the count fields,
1079   but the block frequencies and edge probabilities were updated as well,
1080   so we can simply clear the count fields.  */
1081
1082static void
1083clear_counts_path (struct redirection_data *rd)
1084{
1085  edge e = rd->incoming_edges->e;
1086  vec<jump_thread_edge *> *path = THREAD_PATH (e);
1087  edge ein, esucc;
1088  edge_iterator ei;
1089  FOR_EACH_EDGE (ein, ei, e->dest->preds)
1090    ein->count = 0;
1091
1092  /* First clear counts along original path.  */
1093  for (unsigned int i = 1; i < path->length (); i++)
1094    {
1095      edge epath = (*path)[i]->e;
1096      FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1097        esucc->count = 0;
1098      epath->src->count = 0;
1099    }
1100  /* Also need to clear the counts along duplicated path.  */
1101  for (unsigned int i = 0; i < 2; i++)
1102    {
1103      basic_block dup = rd->dup_blocks[i];
1104      if (!dup)
1105        continue;
1106      FOR_EACH_EDGE (esucc, ei, dup->succs)
1107        esucc->count = 0;
1108      dup->count = 0;
1109    }
1110}
1111
1112/* Wire up the outgoing edges from the duplicate blocks and
1113   update any PHIs as needed.  Also update the profile counts
1114   on the original and duplicate blocks and edges.  */
1115void
1116ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1117			       ssa_local_info_t *local_info)
1118{
1119  bool multi_incomings = (rd->incoming_edges->next != NULL);
1120  edge e = rd->incoming_edges->e;
1121  vec<jump_thread_edge *> *path = THREAD_PATH (e);
1122  edge elast = path->last ()->e;
1123  gcov_type path_in_count = 0;
1124  gcov_type path_out_count = 0;
1125  int path_in_freq = 0;
1126
1127  /* This routine updates profile counts, frequencies, and probabilities
1128     incrementally. Since it is difficult to do the incremental updates
1129     using frequencies/probabilities alone, for routines without profile
1130     data we first take a snapshot of the existing block and edge frequencies
1131     by copying them into the empty profile count fields.  These counts are
1132     then used to do the incremental updates, and cleared at the end of this
1133     routine.  If the function is marked as having a profile, we still check
1134     to see if the paths through RD are using estimated frequencies because
1135     the routine had zero profile counts.  */
1136  bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ
1137                             || estimated_freqs_path (rd));
1138  if (do_freqs_to_counts)
1139    freqs_to_counts_path (rd);
1140
1141  /* First determine how much profile count to move from original
1142     path to the duplicate path.  This is tricky in the presence of
1143     a joiner (see comments for compute_path_counts), where some portion
1144     of the path's counts will flow off-path from the joiner.  In the
1145     non-joiner case the path_in_count and path_out_count should be the
1146     same.  */
1147  bool has_joiner = compute_path_counts (rd, local_info,
1148                                         &path_in_count, &path_out_count,
1149                                         &path_in_freq);
1150
1151  int cur_path_freq = path_in_freq;
1152  for (unsigned int count = 0, i = 1; i < path->length (); i++)
1153    {
1154      edge epath = (*path)[i]->e;
1155
1156      /* If we were threading through an joiner block, then we want
1157	 to keep its control statement and redirect an outgoing edge.
1158	 Else we want to remove the control statement & edges, then create
1159	 a new outgoing edge.  In both cases we may need to update PHIs.  */
1160      if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1161	{
1162	  edge victim;
1163	  edge e2;
1164
1165          gcc_assert (has_joiner);
1166
1167	  /* This updates the PHIs at the destination of the duplicate
1168	     block.  Pass 0 instead of i if we are threading a path which
1169	     has multiple incoming edges.  */
1170	  update_destination_phis (local_info->bb, rd->dup_blocks[count],
1171				   path, multi_incomings ? 0 : i);
1172
1173	  /* Find the edge from the duplicate block to the block we're
1174	     threading through.  That's the edge we want to redirect.  */
1175	  victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1176
1177	  /* If there are no remaining blocks on the path to duplicate,
1178	     then redirect VICTIM to the final destination of the jump
1179	     threading path.  */
1180	  if (!any_remaining_duplicated_blocks (path, i))
1181	    {
1182	      e2 = redirect_edge_and_branch (victim, elast->dest);
1183	      /* If we redirected the edge, then we need to copy PHI arguments
1184		 at the target.  If the edge already existed (e2 != victim
1185		 case), then the PHIs in the target already have the correct
1186		 arguments.  */
1187	      if (e2 == victim)
1188		copy_phi_args (e2->dest, elast, e2,
1189			       path, multi_incomings ? 0 : i);
1190	    }
1191	  else
1192	    {
1193	      /* Redirect VICTIM to the next duplicated block in the path.  */
1194	      e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1195
1196	      /* We need to update the PHIs in the next duplicated block.  We
1197		 want the new PHI args to have the same value as they had
1198		 in the source of the next duplicate block.
1199
1200		 Thus, we need to know which edge we traversed into the
1201		 source of the duplicate.  Furthermore, we may have
1202		 traversed many edges to reach the source of the duplicate.
1203
1204		 Walk through the path starting at element I until we
1205		 hit an edge marked with EDGE_COPY_SRC_BLOCK.  We want
1206		 the edge from the prior element.  */
1207	      for (unsigned int j = i + 1; j < path->length (); j++)
1208		{
1209		  if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1210		    {
1211		      copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1212		      break;
1213		    }
1214		}
1215	    }
1216
1217	  /* Update the counts and frequency of both the original block
1218	     and path edge, and the duplicates.  The path duplicate's
1219	     incoming count and frequency are the totals for all edges
1220	     incoming to this jump threading path computed earlier.
1221	     And we know that the duplicated path will have path_out_count
1222	     flowing out of it (i.e. along the duplicated path out of the
1223	     duplicated joiner).  */
1224	  update_profile (epath, e2, path_in_count, path_out_count,
1225			  path_in_freq);
1226
1227	  /* Next we need to update the counts of the original and duplicated
1228	     edges from the joiner that go off path.  */
1229	  update_joiner_offpath_counts (epath, e2->src, path_in_count,
1230                                        path_out_count);
1231
1232	  /* Finally, we need to set the probabilities on the duplicated
1233	     edges out of the duplicated joiner (e2->src).  The probabilities
1234	     along the original path will all be updated below after we finish
1235	     processing the whole path.  */
1236	  recompute_probabilities (e2->src);
1237
1238	  /* Record the frequency flowing to the downstream duplicated
1239	     path blocks.  */
1240	  cur_path_freq = EDGE_FREQUENCY (e2);
1241	}
1242      else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1243	{
1244	  remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1245	  create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1246						   multi_incomings ? 0 : i);
1247	  if (count == 1)
1248	    single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1249
1250	  /* Update the counts and frequency of both the original block
1251	     and path edge, and the duplicates.  Since we are now after
1252	     any joiner that may have existed on the path, the count
1253	     flowing along the duplicated threaded path is path_out_count.
1254	     If we didn't have a joiner, then cur_path_freq was the sum
1255	     of the total frequencies along all incoming edges to the
1256	     thread path (path_in_freq).  If we had a joiner, it would have
1257	     been updated at the end of that handling to the edge frequency
1258	     along the duplicated joiner path edge.  */
1259	  update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1260			  path_out_count, path_out_count,
1261			  cur_path_freq);
1262	}
1263      else
1264        {
1265	  /* No copy case.  In this case we don't have an equivalent block
1266	     on the duplicated thread path to update, but we do need
1267	     to remove the portion of the counts/freqs that were moved
1268	     to the duplicated path from the counts/freqs flowing through
1269	     this block on the original path.  Since all the no-copy edges
1270	     are after any joiner, the removed count is the same as
1271	     path_out_count.
1272
1273	     If we didn't have a joiner, then cur_path_freq was the sum
1274	     of the total frequencies along all incoming edges to the
1275	     thread path (path_in_freq).  If we had a joiner, it would have
1276	     been updated at the end of that handling to the edge frequency
1277	     along the duplicated joiner path edge.  */
1278	     update_profile (epath, NULL, path_out_count, path_out_count,
1279			     cur_path_freq);
1280	}
1281
1282      /* Increment the index into the duplicated path when we processed
1283         a duplicated block.  */
1284      if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1285          || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1286      {
1287	  count++;
1288      }
1289    }
1290
1291  /* Now walk orig blocks and update their probabilities, since the
1292     counts and freqs should be updated properly by above loop.  */
1293  for (unsigned int i = 1; i < path->length (); i++)
1294    {
1295      edge epath = (*path)[i]->e;
1296      recompute_probabilities (epath->src);
1297    }
1298
1299  /* Done with all profile and frequency updates, clear counts if they
1300     were copied.  */
1301  if (do_freqs_to_counts)
1302    clear_counts_path (rd);
1303}
1304
1305/* Hash table traversal callback routine to create duplicate blocks.  */
1306
1307int
1308ssa_create_duplicates (struct redirection_data **slot,
1309		       ssa_local_info_t *local_info)
1310{
1311  struct redirection_data *rd = *slot;
1312
1313  /* The second duplicated block in a jump threading path is specific
1314     to the path.  So it gets stored in RD rather than in LOCAL_DATA.
1315
1316     Each time we're called, we have to look through the path and see
1317     if a second block needs to be duplicated.
1318
1319     Note the search starts with the third edge on the path.  The first
1320     edge is the incoming edge, the second edge always has its source
1321     duplicated.  Thus we start our search with the third edge.  */
1322  vec<jump_thread_edge *> *path = rd->path;
1323  for (unsigned int i = 2; i < path->length (); i++)
1324    {
1325      if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1326	  || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1327	{
1328	  create_block_for_threading ((*path)[i]->e->src, rd, 1,
1329                                      &local_info->duplicate_blocks);
1330	  break;
1331	}
1332    }
1333
1334  /* Create a template block if we have not done so already.  Otherwise
1335     use the template to create a new block.  */
1336  if (local_info->template_block == NULL)
1337    {
1338      create_block_for_threading ((*path)[1]->e->src, rd, 0,
1339                                  &local_info->duplicate_blocks);
1340      local_info->template_block = rd->dup_blocks[0];
1341
1342      /* We do not create any outgoing edges for the template.  We will
1343	 take care of that in a later traversal.  That way we do not
1344	 create edges that are going to just be deleted.  */
1345    }
1346  else
1347    {
1348      create_block_for_threading (local_info->template_block, rd, 0,
1349                                  &local_info->duplicate_blocks);
1350
1351      /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1352	 block.   */
1353      ssa_fix_duplicate_block_edges (rd, local_info);
1354    }
1355
1356  /* Keep walking the hash table.  */
1357  return 1;
1358}
1359
1360/* We did not create any outgoing edges for the template block during
1361   block creation.  This hash table traversal callback creates the
1362   outgoing edge for the template block.  */
1363
1364inline int
1365ssa_fixup_template_block (struct redirection_data **slot,
1366			  ssa_local_info_t *local_info)
1367{
1368  struct redirection_data *rd = *slot;
1369
1370  /* If this is the template block halt the traversal after updating
1371     it appropriately.
1372
1373     If we were threading through an joiner block, then we want
1374     to keep its control statement and redirect an outgoing edge.
1375     Else we want to remove the control statement & edges, then create
1376     a new outgoing edge.  In both cases we may need to update PHIs.  */
1377  if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1378    {
1379      ssa_fix_duplicate_block_edges (rd, local_info);
1380      return 0;
1381    }
1382
1383  return 1;
1384}
1385
1386/* Hash table traversal callback to redirect each incoming edge
1387   associated with this hash table element to its new destination.  */
1388
1389int
1390ssa_redirect_edges (struct redirection_data **slot,
1391		    ssa_local_info_t *local_info)
1392{
1393  struct redirection_data *rd = *slot;
1394  struct el *next, *el;
1395
1396  /* Walk over all the incoming edges associated associated with this
1397     hash table entry.  */
1398  for (el = rd->incoming_edges; el; el = next)
1399    {
1400      edge e = el->e;
1401      vec<jump_thread_edge *> *path = THREAD_PATH (e);
1402
1403      /* Go ahead and free this element from the list.  Doing this now
1404	 avoids the need for another list walk when we destroy the hash
1405	 table.  */
1406      next = el->next;
1407      free (el);
1408
1409      thread_stats.num_threaded_edges++;
1410
1411      if (rd->dup_blocks[0])
1412	{
1413	  edge e2;
1414
1415	  if (dump_file && (dump_flags & TDF_DETAILS))
1416	    fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1417		     e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1418
1419	  /* If we redirect a loop latch edge cancel its loop.  */
1420	  if (e->src == e->src->loop_father->latch)
1421	    mark_loop_for_removal (e->src->loop_father);
1422
1423	  /* Redirect the incoming edge (possibly to the joiner block) to the
1424	     appropriate duplicate block.  */
1425	  e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1426	  gcc_assert (e == e2);
1427	  flush_pending_stmts (e2);
1428	}
1429
1430      /* Go ahead and clear E->aux.  It's not needed anymore and failure
1431	 to clear it will cause all kinds of unpleasant problems later.  */
1432      delete_jump_thread_path (path);
1433      e->aux = NULL;
1434
1435    }
1436
1437  /* Indicate that we actually threaded one or more jumps.  */
1438  if (rd->incoming_edges)
1439    local_info->jumps_threaded = true;
1440
1441  return 1;
1442}
1443
1444/* Return true if this block has no executable statements other than
1445   a simple ctrl flow instruction.  When the number of outgoing edges
1446   is one, this is equivalent to a "forwarder" block.  */
1447
1448static bool
1449redirection_block_p (basic_block bb)
1450{
1451  gimple_stmt_iterator gsi;
1452
1453  /* Advance to the first executable statement.  */
1454  gsi = gsi_start_bb (bb);
1455  while (!gsi_end_p (gsi)
1456	 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1457	     || is_gimple_debug (gsi_stmt (gsi))
1458	     || gimple_nop_p (gsi_stmt (gsi))))
1459    gsi_next (&gsi);
1460
1461  /* Check if this is an empty block.  */
1462  if (gsi_end_p (gsi))
1463    return true;
1464
1465  /* Test that we've reached the terminating control statement.  */
1466  return gsi_stmt (gsi)
1467	 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1468	     || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1469	     || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1470}
1471
1472/* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1473   is reached via one or more specific incoming edges, we know which
1474   outgoing edge from BB will be traversed.
1475
1476   We want to redirect those incoming edges to the target of the
1477   appropriate outgoing edge.  Doing so avoids a conditional branch
1478   and may expose new optimization opportunities.  Note that we have
1479   to update dominator tree and SSA graph after such changes.
1480
1481   The key to keeping the SSA graph update manageable is to duplicate
1482   the side effects occurring in BB so that those side effects still
1483   occur on the paths which bypass BB after redirecting edges.
1484
1485   We accomplish this by creating duplicates of BB and arranging for
1486   the duplicates to unconditionally pass control to one specific
1487   successor of BB.  We then revector the incoming edges into BB to
1488   the appropriate duplicate of BB.
1489
1490   If NOLOOP_ONLY is true, we only perform the threading as long as it
1491   does not affect the structure of the loops in a nontrivial way.
1492
1493   If JOINERS is true, then thread through joiner blocks as well.  */
1494
1495static bool
1496thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
1497{
1498  /* E is an incoming edge into BB that we may or may not want to
1499     redirect to a duplicate of BB.  */
1500  edge e, e2;
1501  edge_iterator ei;
1502  ssa_local_info_t local_info;
1503
1504  local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1505  local_info.need_profile_correction = false;
1506
1507  /* To avoid scanning a linear array for the element we need we instead
1508     use a hash table.  For normal code there should be no noticeable
1509     difference.  However, if we have a block with a large number of
1510     incoming and outgoing edges such linear searches can get expensive.  */
1511  redirection_data
1512    = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1513
1514  /* Record each unique threaded destination into a hash table for
1515     efficient lookups.  */
1516  edge last = NULL;
1517  FOR_EACH_EDGE (e, ei, bb->preds)
1518    {
1519      if (e->aux == NULL)
1520	continue;
1521
1522      vec<jump_thread_edge *> *path = THREAD_PATH (e);
1523
1524      if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1525	  || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1526	continue;
1527
1528      e2 = path->last ()->e;
1529      if (!e2 || noloop_only)
1530	{
1531	  /* If NOLOOP_ONLY is true, we only allow threading through the
1532	     header of a loop to exit edges.  */
1533
1534	  /* One case occurs when there was loop header buried in a jump
1535	     threading path that crosses loop boundaries.  We do not try
1536	     and thread this elsewhere, so just cancel the jump threading
1537	     request by clearing the AUX field now.  */
1538	  if ((bb->loop_father != e2->src->loop_father
1539	       && !loop_exit_edge_p (e2->src->loop_father, e2))
1540	      || (e2->src->loop_father != e2->dest->loop_father
1541		  && !loop_exit_edge_p (e2->src->loop_father, e2)))
1542	    {
1543	      /* Since this case is not handled by our special code
1544		 to thread through a loop header, we must explicitly
1545		 cancel the threading request here.  */
1546	      delete_jump_thread_path (path);
1547	      e->aux = NULL;
1548	      continue;
1549	    }
1550
1551	  /* Another case occurs when trying to thread through our
1552	     own loop header, possibly from inside the loop.  We will
1553	     thread these later.  */
1554	  unsigned int i;
1555	  for (i = 1; i < path->length (); i++)
1556	    {
1557	      if ((*path)[i]->e->src == bb->loop_father->header
1558		  && (!loop_exit_edge_p (bb->loop_father, e2)
1559		      || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1560		break;
1561	    }
1562
1563	  if (i != path->length ())
1564	    continue;
1565	}
1566
1567      /* Insert the outgoing edge into the hash table if it is not
1568	 already in the hash table.  */
1569      lookup_redirection_data (e, INSERT);
1570
1571      /* When we have thread paths through a common joiner with different
1572	 final destinations, then we may need corrections to deal with
1573	 profile insanities.  See the big comment before compute_path_counts.  */
1574      if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1575	{
1576	  if (!last)
1577	    last = e2;
1578	  else if (e2 != last)
1579	    local_info.need_profile_correction = true;
1580	}
1581    }
1582
1583  /* We do not update dominance info.  */
1584  free_dominance_info (CDI_DOMINATORS);
1585
1586  /* We know we only thread through the loop header to loop exits.
1587     Let the basic block duplication hook know we are not creating
1588     a multiple entry loop.  */
1589  if (noloop_only
1590      && bb == bb->loop_father->header)
1591    set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1592
1593  /* Now create duplicates of BB.
1594
1595     Note that for a block with a high outgoing degree we can waste
1596     a lot of time and memory creating and destroying useless edges.
1597
1598     So we first duplicate BB and remove the control structure at the
1599     tail of the duplicate as well as all outgoing edges from the
1600     duplicate.  We then use that duplicate block as a template for
1601     the rest of the duplicates.  */
1602  local_info.template_block = NULL;
1603  local_info.bb = bb;
1604  local_info.jumps_threaded = false;
1605  redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1606			    (&local_info);
1607
1608  /* The template does not have an outgoing edge.  Create that outgoing
1609     edge and update PHI nodes as the edge's target as necessary.
1610
1611     We do this after creating all the duplicates to avoid creating
1612     unnecessary edges.  */
1613  redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1614			    (&local_info);
1615
1616  /* The hash table traversals above created the duplicate blocks (and the
1617     statements within the duplicate blocks).  This loop creates PHI nodes for
1618     the duplicated blocks and redirects the incoming edges into BB to reach
1619     the duplicates of BB.  */
1620  redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1621			    (&local_info);
1622
1623  /* Done with this block.  Clear REDIRECTION_DATA.  */
1624  delete redirection_data;
1625  redirection_data = NULL;
1626
1627  if (noloop_only
1628      && bb == bb->loop_father->header)
1629    set_loop_copy (bb->loop_father, NULL);
1630
1631  BITMAP_FREE (local_info.duplicate_blocks);
1632  local_info.duplicate_blocks = NULL;
1633
1634  /* Indicate to our caller whether or not any jumps were threaded.  */
1635  return local_info.jumps_threaded;
1636}
1637
1638/* Wrapper for thread_block_1 so that we can first handle jump
1639   thread paths which do not involve copying joiner blocks, then
1640   handle jump thread paths which have joiner blocks.
1641
1642   By doing things this way we can be as aggressive as possible and
1643   not worry that copying a joiner block will create a jump threading
1644   opportunity.  */
1645
1646static bool
1647thread_block (basic_block bb, bool noloop_only)
1648{
1649  bool retval;
1650  retval = thread_block_1 (bb, noloop_only, false);
1651  retval |= thread_block_1 (bb, noloop_only, true);
1652  return retval;
1653}
1654
1655
1656/* Threads edge E through E->dest to the edge THREAD_TARGET (E).  Returns the
1657   copy of E->dest created during threading, or E->dest if it was not necessary
1658   to copy it (E is its single predecessor).  */
1659
1660static basic_block
1661thread_single_edge (edge e)
1662{
1663  basic_block bb = e->dest;
1664  struct redirection_data rd;
1665  vec<jump_thread_edge *> *path = THREAD_PATH (e);
1666  edge eto = (*path)[1]->e;
1667
1668  for (unsigned int i = 0; i < path->length (); i++)
1669    delete (*path)[i];
1670  delete path;
1671  e->aux = NULL;
1672
1673  thread_stats.num_threaded_edges++;
1674
1675  if (single_pred_p (bb))
1676    {
1677      /* If BB has just a single predecessor, we should only remove the
1678	 control statements at its end, and successors except for ETO.  */
1679      remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
1680
1681      /* And fixup the flags on the single remaining edge.  */
1682      eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
1683      eto->flags |= EDGE_FALLTHRU;
1684
1685      return bb;
1686    }
1687
1688  /* Otherwise, we need to create a copy.  */
1689  if (e->dest == eto->src)
1690    update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
1691
1692  vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
1693  jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
1694  npath->safe_push (x);
1695
1696  x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
1697  npath->safe_push (x);
1698  rd.path = npath;
1699
1700  create_block_for_threading (bb, &rd, 0, NULL);
1701  remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL);
1702  create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0], 0);
1703
1704  if (dump_file && (dump_flags & TDF_DETAILS))
1705    fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1706	     e->src->index, e->dest->index, rd.dup_blocks[0]->index);
1707
1708  rd.dup_blocks[0]->count = e->count;
1709  rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e);
1710  single_succ_edge (rd.dup_blocks[0])->count = e->count;
1711  redirect_edge_and_branch (e, rd.dup_blocks[0]);
1712  flush_pending_stmts (e);
1713
1714  return rd.dup_blocks[0];
1715}
1716
1717/* Callback for dfs_enumerate_from.  Returns true if BB is different
1718   from STOP and DBDS_CE_STOP.  */
1719
1720static basic_block dbds_ce_stop;
1721static bool
1722dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1723{
1724  return (bb != (const_basic_block) stop
1725	  && bb != dbds_ce_stop);
1726}
1727
1728/* Evaluates the dominance relationship of latch of the LOOP and BB, and
1729   returns the state.  */
1730
1731enum bb_dom_status
1732{
1733  /* BB does not dominate latch of the LOOP.  */
1734  DOMST_NONDOMINATING,
1735  /* The LOOP is broken (there is no path from the header to its latch.  */
1736  DOMST_LOOP_BROKEN,
1737  /* BB dominates the latch of the LOOP.  */
1738  DOMST_DOMINATING
1739};
1740
1741static enum bb_dom_status
1742determine_bb_domination_status (struct loop *loop, basic_block bb)
1743{
1744  basic_block *bblocks;
1745  unsigned nblocks, i;
1746  bool bb_reachable = false;
1747  edge_iterator ei;
1748  edge e;
1749
1750  /* This function assumes BB is a successor of LOOP->header.
1751     If that is not the case return DOMST_NONDOMINATING which
1752     is always safe.  */
1753    {
1754      bool ok = false;
1755
1756      FOR_EACH_EDGE (e, ei, bb->preds)
1757	{
1758     	  if (e->src == loop->header)
1759	    {
1760	      ok = true;
1761	      break;
1762	    }
1763	}
1764
1765      if (!ok)
1766	return DOMST_NONDOMINATING;
1767    }
1768
1769  if (bb == loop->latch)
1770    return DOMST_DOMINATING;
1771
1772  /* Check that BB dominates LOOP->latch, and that it is back-reachable
1773     from it.  */
1774
1775  bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1776  dbds_ce_stop = loop->header;
1777  nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1778				bblocks, loop->num_nodes, bb);
1779  for (i = 0; i < nblocks; i++)
1780    FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1781      {
1782	if (e->src == loop->header)
1783	  {
1784	    free (bblocks);
1785	    return DOMST_NONDOMINATING;
1786	  }
1787	if (e->src == bb)
1788	  bb_reachable = true;
1789      }
1790
1791  free (bblocks);
1792  return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1793}
1794
1795/* Return true if BB is part of the new pre-header that is created
1796   when threading the latch to DATA.  */
1797
1798static bool
1799def_split_header_continue_p (const_basic_block bb, const void *data)
1800{
1801  const_basic_block new_header = (const_basic_block) data;
1802  const struct loop *l;
1803
1804  if (bb == new_header
1805      || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
1806    return false;
1807  for (l = bb->loop_father; l; l = loop_outer (l))
1808    if (l == new_header->loop_father)
1809      return true;
1810  return false;
1811}
1812
1813/* Thread jumps through the header of LOOP.  Returns true if cfg changes.
1814   If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1815   to the inside of the loop.  */
1816
1817static bool
1818thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
1819{
1820  basic_block header = loop->header;
1821  edge e, tgt_edge, latch = loop_latch_edge (loop);
1822  edge_iterator ei;
1823  basic_block tgt_bb, atgt_bb;
1824  enum bb_dom_status domst;
1825
1826  /* We have already threaded through headers to exits, so all the threading
1827     requests now are to the inside of the loop.  We need to avoid creating
1828     irreducible regions (i.e., loops with more than one entry block), and
1829     also loop with several latch edges, or new subloops of the loop (although
1830     there are cases where it might be appropriate, it is difficult to decide,
1831     and doing it wrongly may confuse other optimizers).
1832
1833     We could handle more general cases here.  However, the intention is to
1834     preserve some information about the loop, which is impossible if its
1835     structure changes significantly, in a way that is not well understood.
1836     Thus we only handle few important special cases, in which also updating
1837     of the loop-carried information should be feasible:
1838
1839     1) Propagation of latch edge to a block that dominates the latch block
1840	of a loop.  This aims to handle the following idiom:
1841
1842	first = 1;
1843	while (1)
1844	  {
1845	    if (first)
1846	      initialize;
1847	    first = 0;
1848	    body;
1849	  }
1850
1851	After threading the latch edge, this becomes
1852
1853	first = 1;
1854	if (first)
1855	  initialize;
1856	while (1)
1857	  {
1858	    first = 0;
1859	    body;
1860	  }
1861
1862	The original header of the loop is moved out of it, and we may thread
1863	the remaining edges through it without further constraints.
1864
1865     2) All entry edges are propagated to a single basic block that dominates
1866	the latch block of the loop.  This aims to handle the following idiom
1867	(normally created for "for" loops):
1868
1869	i = 0;
1870	while (1)
1871	  {
1872	    if (i >= 100)
1873	      break;
1874	    body;
1875	    i++;
1876	  }
1877
1878	This becomes
1879
1880	i = 0;
1881	while (1)
1882	  {
1883	    body;
1884	    i++;
1885	    if (i >= 100)
1886	      break;
1887	  }
1888     */
1889
1890  /* Threading through the header won't improve the code if the header has just
1891     one successor.  */
1892  if (single_succ_p (header))
1893    goto fail;
1894
1895  /* If we threaded the latch using a joiner block, we cancel the
1896     threading opportunity out of an abundance of caution.  However,
1897     still allow threading from outside to inside the loop.  */
1898  if (latch->aux)
1899    {
1900      vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1901      if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1902	{
1903	  delete_jump_thread_path (path);
1904	  latch->aux = NULL;
1905	}
1906    }
1907
1908  if (latch->aux)
1909    {
1910      vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1911      tgt_edge = (*path)[1]->e;
1912      tgt_bb = tgt_edge->dest;
1913    }
1914  else if (!may_peel_loop_headers
1915	   && !redirection_block_p (loop->header))
1916    goto fail;
1917  else
1918    {
1919      tgt_bb = NULL;
1920      tgt_edge = NULL;
1921      FOR_EACH_EDGE (e, ei, header->preds)
1922	{
1923	  if (!e->aux)
1924	    {
1925	      if (e == latch)
1926		continue;
1927
1928	      /* If latch is not threaded, and there is a header
1929		 edge that is not threaded, we would create loop
1930		 with multiple entries.  */
1931	      goto fail;
1932	    }
1933
1934	  vec<jump_thread_edge *> *path = THREAD_PATH (e);
1935
1936	  if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1937	    goto fail;
1938	  tgt_edge = (*path)[1]->e;
1939	  atgt_bb = tgt_edge->dest;
1940	  if (!tgt_bb)
1941	    tgt_bb = atgt_bb;
1942	  /* Two targets of threading would make us create loop
1943	     with multiple entries.  */
1944	  else if (tgt_bb != atgt_bb)
1945	    goto fail;
1946	}
1947
1948      if (!tgt_bb)
1949	{
1950	  /* There are no threading requests.  */
1951	  return false;
1952	}
1953
1954      /* Redirecting to empty loop latch is useless.  */
1955      if (tgt_bb == loop->latch
1956	  && empty_block_p (loop->latch))
1957	goto fail;
1958    }
1959
1960  /* The target block must dominate the loop latch, otherwise we would be
1961     creating a subloop.  */
1962  domst = determine_bb_domination_status (loop, tgt_bb);
1963  if (domst == DOMST_NONDOMINATING)
1964    goto fail;
1965  if (domst == DOMST_LOOP_BROKEN)
1966    {
1967      /* If the loop ceased to exist, mark it as such, and thread through its
1968	 original header.  */
1969      mark_loop_for_removal (loop);
1970      return thread_block (header, false);
1971    }
1972
1973  if (tgt_bb->loop_father->header == tgt_bb)
1974    {
1975      /* If the target of the threading is a header of a subloop, we need
1976	 to create a preheader for it, so that the headers of the two loops
1977	 do not merge.  */
1978      if (EDGE_COUNT (tgt_bb->preds) > 2)
1979	{
1980	  tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1981	  gcc_assert (tgt_bb != NULL);
1982	}
1983      else
1984	tgt_bb = split_edge (tgt_edge);
1985    }
1986
1987  if (latch->aux)
1988    {
1989      basic_block *bblocks;
1990      unsigned nblocks, i;
1991
1992      /* First handle the case latch edge is redirected.  We are copying
1993	 the loop header but not creating a multiple entry loop.  Make the
1994	 cfg manipulation code aware of that fact.  */
1995      set_loop_copy (loop, loop);
1996      loop->latch = thread_single_edge (latch);
1997      set_loop_copy (loop, NULL);
1998      gcc_assert (single_succ (loop->latch) == tgt_bb);
1999      loop->header = tgt_bb;
2000
2001      /* Remove the new pre-header blocks from our loop.  */
2002      bblocks = XCNEWVEC (basic_block, loop->num_nodes);
2003      nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
2004				    bblocks, loop->num_nodes, tgt_bb);
2005      for (i = 0; i < nblocks; i++)
2006	if (bblocks[i]->loop_father == loop)
2007	  {
2008	    remove_bb_from_loops (bblocks[i]);
2009	    add_bb_to_loop (bblocks[i], loop_outer (loop));
2010	  }
2011      free (bblocks);
2012
2013      /* If the new header has multiple latches mark it so.  */
2014      FOR_EACH_EDGE (e, ei, loop->header->preds)
2015	if (e->src->loop_father == loop
2016	    && e->src != loop->latch)
2017	  {
2018	    loop->latch = NULL;
2019	    loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
2020	  }
2021
2022      /* Cancel remaining threading requests that would make the
2023	 loop a multiple entry loop.  */
2024      FOR_EACH_EDGE (e, ei, header->preds)
2025	{
2026	  edge e2;
2027
2028	  if (e->aux == NULL)
2029	    continue;
2030
2031	  vec<jump_thread_edge *> *path = THREAD_PATH (e);
2032	  e2 = path->last ()->e;
2033
2034	  if (e->src->loop_father != e2->dest->loop_father
2035	      && e2->dest != loop->header)
2036	    {
2037	      delete_jump_thread_path (path);
2038	      e->aux = NULL;
2039	    }
2040	}
2041
2042      /* Thread the remaining edges through the former header.  */
2043      thread_block (header, false);
2044    }
2045  else
2046    {
2047      basic_block new_preheader;
2048
2049      /* Now consider the case entry edges are redirected to the new entry
2050	 block.  Remember one entry edge, so that we can find the new
2051	 preheader (its destination after threading).  */
2052      FOR_EACH_EDGE (e, ei, header->preds)
2053	{
2054	  if (e->aux)
2055	    break;
2056	}
2057
2058      /* The duplicate of the header is the new preheader of the loop.  Ensure
2059	 that it is placed correctly in the loop hierarchy.  */
2060      set_loop_copy (loop, loop_outer (loop));
2061
2062      thread_block (header, false);
2063      set_loop_copy (loop, NULL);
2064      new_preheader = e->dest;
2065
2066      /* Create the new latch block.  This is always necessary, as the latch
2067	 must have only a single successor, but the original header had at
2068	 least two successors.  */
2069      loop->latch = NULL;
2070      mfb_kj_edge = single_succ_edge (new_preheader);
2071      loop->header = mfb_kj_edge->dest;
2072      latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
2073      loop->header = latch->dest;
2074      loop->latch = latch->src;
2075    }
2076
2077  return true;
2078
2079fail:
2080  /* We failed to thread anything.  Cancel the requests.  */
2081  FOR_EACH_EDGE (e, ei, header->preds)
2082    {
2083      vec<jump_thread_edge *> *path = THREAD_PATH (e);
2084
2085      if (path)
2086	{
2087	  delete_jump_thread_path (path);
2088	  e->aux = NULL;
2089	}
2090    }
2091  return false;
2092}
2093
2094/* E1 and E2 are edges into the same basic block.  Return TRUE if the
2095   PHI arguments associated with those edges are equal or there are no
2096   PHI arguments, otherwise return FALSE.  */
2097
2098static bool
2099phi_args_equal_on_edges (edge e1, edge e2)
2100{
2101  gphi_iterator gsi;
2102  int indx1 = e1->dest_idx;
2103  int indx2 = e2->dest_idx;
2104
2105  for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2106    {
2107      gphi *phi = gsi.phi ();
2108
2109      if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
2110			    gimple_phi_arg_def (phi, indx2), 0))
2111	return false;
2112    }
2113  return true;
2114}
2115
2116/* Walk through the registered jump threads and convert them into a
2117   form convenient for this pass.
2118
2119   Any block which has incoming edges threaded to outgoing edges
2120   will have its entry in THREADED_BLOCK set.
2121
2122   Any threaded edge will have its new outgoing edge stored in the
2123   original edge's AUX field.
2124
2125   This form avoids the need to walk all the edges in the CFG to
2126   discover blocks which need processing and avoids unnecessary
2127   hash table lookups to map from threaded edge to new target.  */
2128
2129static void
2130mark_threaded_blocks (bitmap threaded_blocks)
2131{
2132  unsigned int i;
2133  bitmap_iterator bi;
2134  bitmap tmp = BITMAP_ALLOC (NULL);
2135  basic_block bb;
2136  edge e;
2137  edge_iterator ei;
2138
2139  /* It is possible to have jump threads in which one is a subpath
2140     of the other.  ie, (A, B), (B, C), (C, D) where B is a joiner
2141     block and (B, C), (C, D) where no joiner block exists.
2142
2143     When this occurs ignore the jump thread request with the joiner
2144     block.  It's totally subsumed by the simpler jump thread request.
2145
2146     This results in less block copying, simpler CFGs.  More importantly,
2147     when we duplicate the joiner block, B, in this case we will create
2148     a new threading opportunity that we wouldn't be able to optimize
2149     until the next jump threading iteration.
2150
2151     So first convert the jump thread requests which do not require a
2152     joiner block.  */
2153  for (i = 0; i < paths.length (); i++)
2154    {
2155      vec<jump_thread_edge *> *path = paths[i];
2156
2157      if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
2158	{
2159	  edge e = (*path)[0]->e;
2160	  e->aux = (void *)path;
2161	  bitmap_set_bit (tmp, e->dest->index);
2162	}
2163    }
2164
2165  /* Now iterate again, converting cases where we want to thread
2166     through a joiner block, but only if no other edge on the path
2167     already has a jump thread attached to it.  We do this in two passes,
2168     to avoid situations where the order in the paths vec can hide overlapping
2169     threads (the path is recorded on the incoming edge, so we would miss
2170     cases where the second path starts at a downstream edge on the same
2171     path).  First record all joiner paths, deleting any in the unexpected
2172     case where there is already a path for that incoming edge.  */
2173  for (i = 0; i < paths.length (); i++)
2174    {
2175      vec<jump_thread_edge *> *path = paths[i];
2176
2177      if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2178        {
2179	  /* Attach the path to the starting edge if none is yet recorded.  */
2180          if ((*path)[0]->e->aux == NULL)
2181            (*path)[0]->e->aux = path;
2182	  else if (dump_file && (dump_flags & TDF_DETAILS))
2183	    dump_jump_thread_path (dump_file, *path, false);
2184        }
2185    }
2186  /* Second, look for paths that have any other jump thread attached to
2187     them, and either finish converting them or cancel them.  */
2188  for (i = 0; i < paths.length (); i++)
2189    {
2190      vec<jump_thread_edge *> *path = paths[i];
2191      edge e = (*path)[0]->e;
2192
2193      if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
2194	{
2195	  unsigned int j;
2196	  for (j = 1; j < path->length (); j++)
2197	    if ((*path)[j]->e->aux != NULL)
2198	      break;
2199
2200	  /* If we iterated through the entire path without exiting the loop,
2201	     then we are good to go, record it.  */
2202	  if (j == path->length ())
2203	    bitmap_set_bit (tmp, e->dest->index);
2204	  else
2205	    {
2206	      e->aux = NULL;
2207	      if (dump_file && (dump_flags & TDF_DETAILS))
2208	        dump_jump_thread_path (dump_file, *path, false);
2209	    }
2210	}
2211    }
2212
2213  /* If optimizing for size, only thread through block if we don't have
2214     to duplicate it or it's an otherwise empty redirection block.  */
2215  if (optimize_function_for_size_p (cfun))
2216    {
2217      EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2218	{
2219	  bb = BASIC_BLOCK_FOR_FN (cfun, i);
2220	  if (EDGE_COUNT (bb->preds) > 1
2221	      && !redirection_block_p (bb))
2222	    {
2223	      FOR_EACH_EDGE (e, ei, bb->preds)
2224		{
2225		  if (e->aux)
2226		    {
2227		      vec<jump_thread_edge *> *path = THREAD_PATH (e);
2228		      delete_jump_thread_path (path);
2229		      e->aux = NULL;
2230		    }
2231		}
2232	    }
2233	  else
2234	    bitmap_set_bit (threaded_blocks, i);
2235	}
2236    }
2237  else
2238    bitmap_copy (threaded_blocks, tmp);
2239
2240  /* Look for jump threading paths which cross multiple loop headers.
2241
2242     The code to thread through loop headers will change the CFG in ways
2243     that break assumptions made by the loop optimization code.
2244
2245     We don't want to blindly cancel the requests.  We can instead do better
2246     by trimming off the end of the jump thread path.  */
2247  EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2248    {
2249      basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2250      FOR_EACH_EDGE (e, ei, bb->preds)
2251	{
2252	  if (e->aux)
2253	    {
2254	      vec<jump_thread_edge *> *path = THREAD_PATH (e);
2255
2256	      for (unsigned int i = 0, crossed_headers = 0;
2257		   i < path->length ();
2258		   i++)
2259		{
2260		  basic_block dest = (*path)[i]->e->dest;
2261		  crossed_headers += (dest == dest->loop_father->header);
2262		  if (crossed_headers > 1)
2263		    {
2264		      /* Trim from entry I onwards.  */
2265		      for (unsigned int j = i; j < path->length (); j++)
2266			delete (*path)[j];
2267		      path->truncate (i);
2268
2269		      /* Now that we've truncated the path, make sure
2270			 what's left is still valid.   We need at least
2271			 two edges on the path and the last edge can not
2272			 be a joiner.  This should never happen, but let's
2273			 be safe.  */
2274		      if (path->length () < 2
2275			  || (path->last ()->type
2276			      == EDGE_COPY_SRC_JOINER_BLOCK))
2277			{
2278			  delete_jump_thread_path (path);
2279			  e->aux = NULL;
2280			}
2281		      break;
2282		    }
2283		}
2284	    }
2285	}
2286    }
2287
2288  /* If we have a joiner block (J) which has two successors S1 and S2 and
2289     we are threading though S1 and the final destination of the thread
2290     is S2, then we must verify that any PHI nodes in S2 have the same
2291     PHI arguments for the edge J->S2 and J->S1->...->S2.
2292
2293     We used to detect this prior to registering the jump thread, but
2294     that prohibits propagation of edge equivalences into non-dominated
2295     PHI nodes as the equivalency test might occur before propagation.
2296
2297     This must also occur after we truncate any jump threading paths
2298     as this scenario may only show up after truncation.
2299
2300     This works for now, but will need improvement as part of the FSA
2301     optimization.
2302
2303     Note since we've moved the thread request data to the edges,
2304     we have to iterate on those rather than the threaded_edges vector.  */
2305  EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2306    {
2307      bb = BASIC_BLOCK_FOR_FN (cfun, i);
2308      FOR_EACH_EDGE (e, ei, bb->preds)
2309	{
2310	  if (e->aux)
2311	    {
2312	      vec<jump_thread_edge *> *path = THREAD_PATH (e);
2313	      bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2314
2315	      if (have_joiner)
2316		{
2317		  basic_block joiner = e->dest;
2318		  edge final_edge = path->last ()->e;
2319		  basic_block final_dest = final_edge->dest;
2320		  edge e2 = find_edge (joiner, final_dest);
2321
2322		  if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2323		    {
2324		      delete_jump_thread_path (path);
2325		      e->aux = NULL;
2326		    }
2327		}
2328	    }
2329	}
2330    }
2331
2332  BITMAP_FREE (tmp);
2333}
2334
2335
2336/* Return TRUE if BB ends with a switch statement or a computed goto.
2337   Otherwise return false.  */
2338static bool
2339bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED)
2340{
2341  gimple stmt = last_stmt (bb);
2342  if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
2343    return true;
2344  if (stmt && gimple_code (stmt) == GIMPLE_GOTO
2345      && TREE_CODE (gimple_goto_dest (stmt)) == SSA_NAME)
2346    return true;
2347  return false;
2348}
2349
2350/* Verify that the REGION is a valid jump thread.  A jump thread is a special
2351   case of SEME Single Entry Multiple Exits region in which all nodes in the
2352   REGION have exactly one incoming edge.  The only exception is the first block
2353   that may not have been connected to the rest of the cfg yet.  */
2354
2355DEBUG_FUNCTION void
2356verify_jump_thread (basic_block *region, unsigned n_region)
2357{
2358  for (unsigned i = 0; i < n_region; i++)
2359    gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
2360}
2361
2362/* Return true when BB is one of the first N items in BBS.  */
2363
2364static inline bool
2365bb_in_bbs (basic_block bb, basic_block *bbs, int n)
2366{
2367  for (int i = 0; i < n; i++)
2368    if (bb == bbs[i])
2369      return true;
2370
2371  return false;
2372}
2373
2374/* Duplicates a jump-thread path of N_REGION basic blocks.
2375   The ENTRY edge is redirected to the duplicate of the region.
2376
2377   Remove the last conditional statement in the last basic block in the REGION,
2378   and create a single fallthru edge pointing to the same destination as the
2379   EXIT edge.
2380
2381   The new basic blocks are stored to REGION_COPY in the same order as they had
2382   in REGION, provided that REGION_COPY is not NULL.
2383
2384   Returns false if it is unable to copy the region, true otherwise.  */
2385
2386static bool
2387duplicate_thread_path (edge entry, edge exit,
2388		       basic_block *region, unsigned n_region,
2389		       basic_block *region_copy)
2390{
2391  unsigned i;
2392  bool free_region_copy = false;
2393  struct loop *loop = entry->dest->loop_father;
2394  edge exit_copy;
2395  edge redirected;
2396  int total_freq = 0, entry_freq = 0;
2397  gcov_type total_count = 0, entry_count = 0;
2398
2399  if (!can_copy_bbs_p (region, n_region))
2400    return false;
2401
2402  /* Some sanity checking.  Note that we do not check for all possible
2403     missuses of the functions.  I.e. if you ask to copy something weird,
2404     it will work, but the state of structures probably will not be
2405     correct.  */
2406  for (i = 0; i < n_region; i++)
2407    {
2408      /* We do not handle subloops, i.e. all the blocks must belong to the
2409	 same loop.  */
2410      if (region[i]->loop_father != loop)
2411	return false;
2412    }
2413
2414  initialize_original_copy_tables ();
2415
2416  set_loop_copy (loop, loop);
2417
2418  if (!region_copy)
2419    {
2420      region_copy = XNEWVEC (basic_block, n_region);
2421      free_region_copy = true;
2422    }
2423
2424  if (entry->dest->count)
2425    {
2426      total_count = entry->dest->count;
2427      entry_count = entry->count;
2428      /* Fix up corner cases, to avoid division by zero or creation of negative
2429	 frequencies.  */
2430      if (entry_count > total_count)
2431	entry_count = total_count;
2432    }
2433  else
2434    {
2435      total_freq = entry->dest->frequency;
2436      entry_freq = EDGE_FREQUENCY (entry);
2437      /* Fix up corner cases, to avoid division by zero or creation of negative
2438	 frequencies.  */
2439      if (total_freq == 0)
2440	total_freq = 1;
2441      else if (entry_freq > total_freq)
2442	entry_freq = total_freq;
2443    }
2444
2445  copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2446	    split_edge_bb_loc (entry), false);
2447
2448  /* Fix up: copy_bbs redirects all edges pointing to copied blocks.  The
2449     following code ensures that all the edges exiting the jump-thread path are
2450     redirected back to the original code: these edges are exceptions
2451     invalidating the property that is propagated by executing all the blocks of
2452     the jump-thread path in order.  */
2453
2454  for (i = 0; i < n_region; i++)
2455    {
2456      edge e;
2457      edge_iterator ei;
2458      basic_block bb = region_copy[i];
2459
2460      if (single_succ_p (bb))
2461	{
2462	  /* Make sure the successor is the next node in the path.  */
2463	  gcc_assert (i + 1 == n_region
2464		      || region_copy[i + 1] == single_succ_edge (bb)->dest);
2465	  continue;
2466	}
2467
2468      /* Special case the last block on the path: make sure that it does not
2469	 jump back on the copied path.  */
2470      if (i + 1 == n_region)
2471	{
2472	  FOR_EACH_EDGE (e, ei, bb->succs)
2473	    if (bb_in_bbs (e->dest, region_copy, n_region - 1))
2474	      {
2475		basic_block orig = get_bb_original (e->dest);
2476		if (orig)
2477		  redirect_edge_and_branch_force (e, orig);
2478	      }
2479	  continue;
2480	}
2481
2482      /* Redirect all other edges jumping to non-adjacent blocks back to the
2483	 original code.  */
2484      FOR_EACH_EDGE (e, ei, bb->succs)
2485	if (region_copy[i + 1] != e->dest)
2486	  {
2487	    basic_block orig = get_bb_original (e->dest);
2488	    if (orig)
2489	      redirect_edge_and_branch_force (e, orig);
2490	  }
2491    }
2492
2493  if (total_count)
2494    {
2495      scale_bbs_frequencies_gcov_type (region, n_region,
2496				       total_count - entry_count,
2497				       total_count);
2498      scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
2499				       total_count);
2500    }
2501  else
2502    {
2503      scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
2504				 total_freq);
2505      scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
2506    }
2507
2508#ifdef ENABLE_CHECKING
2509  verify_jump_thread (region_copy, n_region);
2510#endif
2511
2512  /* Remove the last branch in the jump thread path.  */
2513  remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
2514  edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2515
2516  if (e) {
2517    rescan_loop_exit (e, true, false);
2518    e->probability = REG_BR_PROB_BASE;
2519    e->count = region_copy[n_region - 1]->count;
2520  }
2521
2522  /* Redirect the entry and add the phi node arguments.  */
2523  if (entry->dest == loop->header)
2524    mark_loop_for_removal (loop);
2525  redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2526  gcc_assert (redirected != NULL);
2527  flush_pending_stmts (entry);
2528
2529  /* Add the other PHI node arguments.  */
2530  add_phi_args_after_copy (region_copy, n_region, NULL);
2531
2532  if (free_region_copy)
2533    free (region_copy);
2534
2535  free_original_copy_tables ();
2536  return true;
2537}
2538
2539/* Return true when PATH is a valid jump-thread path.  */
2540
2541static bool
2542valid_jump_thread_path (vec<jump_thread_edge *> *path)
2543{
2544  unsigned len = path->length ();
2545
2546  /* Check that the path is connected.  */
2547  for (unsigned int j = 0; j < len - 1; j++)
2548    if ((*path)[j]->e->dest != (*path)[j+1]->e->src)
2549      return false;
2550
2551  return true;
2552}
2553
2554/* Walk through all blocks and thread incoming edges to the appropriate
2555   outgoing edge for each edge pair recorded in THREADED_EDGES.
2556
2557   It is the caller's responsibility to fix the dominance information
2558   and rewrite duplicated SSA_NAMEs back into SSA form.
2559
2560   If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2561   loop headers if it does not simplify the loop.
2562
2563   Returns true if one or more edges were threaded, false otherwise.  */
2564
2565bool
2566thread_through_all_blocks (bool may_peel_loop_headers)
2567{
2568  bool retval = false;
2569  unsigned int i;
2570  bitmap_iterator bi;
2571  bitmap threaded_blocks;
2572  struct loop *loop;
2573
2574  if (!paths.exists ())
2575    return false;
2576
2577  threaded_blocks = BITMAP_ALLOC (NULL);
2578  memset (&thread_stats, 0, sizeof (thread_stats));
2579
2580  /* Jump-thread all FSM threads before other jump-threads.  */
2581  for (i = 0; i < paths.length ();)
2582    {
2583      vec<jump_thread_edge *> *path = paths[i];
2584      edge entry = (*path)[0]->e;
2585
2586      /* Only code-generate FSM jump-threads in this loop.  */
2587      if ((*path)[0]->type != EDGE_FSM_THREAD)
2588	{
2589	  i++;
2590	  continue;
2591	}
2592
2593      /* Do not jump-thread twice from the same block.  */
2594      if (bitmap_bit_p (threaded_blocks, entry->src->index)
2595	  /* Verify that the jump thread path is still valid: a
2596	     previous jump-thread may have changed the CFG, and
2597	     invalidated the current path.  */
2598	  || !valid_jump_thread_path (path))
2599	{
2600	  /* Remove invalid FSM jump-thread paths.  */
2601	  delete_jump_thread_path (path);
2602	  paths.unordered_remove (i);
2603	  continue;
2604	}
2605
2606      unsigned len = path->length ();
2607      edge exit = (*path)[len - 1]->e;
2608      basic_block *region = XNEWVEC (basic_block, len - 1);
2609
2610      for (unsigned int j = 0; j < len - 1; j++)
2611	region[j] = (*path)[j]->e->dest;
2612
2613      if (duplicate_thread_path (entry, exit, region, len - 1, NULL))
2614	{
2615	  /* We do not update dominance info.  */
2616	  free_dominance_info (CDI_DOMINATORS);
2617	  bitmap_set_bit (threaded_blocks, entry->src->index);
2618	  retval = true;
2619	}
2620
2621      delete_jump_thread_path (path);
2622      paths.unordered_remove (i);
2623    }
2624
2625  /* Remove from PATHS all the jump-threads starting with an edge already
2626     jump-threaded.  */
2627  for (i = 0; i < paths.length ();)
2628    {
2629      vec<jump_thread_edge *> *path = paths[i];
2630      edge entry = (*path)[0]->e;
2631
2632      /* Do not jump-thread twice from the same block.  */
2633      if (bitmap_bit_p (threaded_blocks, entry->src->index))
2634	{
2635	  delete_jump_thread_path (path);
2636	  paths.unordered_remove (i);
2637	}
2638      else
2639	i++;
2640    }
2641
2642  bitmap_clear (threaded_blocks);
2643
2644  mark_threaded_blocks (threaded_blocks);
2645
2646  initialize_original_copy_tables ();
2647
2648  /* First perform the threading requests that do not affect
2649     loop structure.  */
2650  EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
2651    {
2652      basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2653
2654      if (EDGE_COUNT (bb->preds) > 0)
2655	retval |= thread_block (bb, true);
2656    }
2657
2658  /* Then perform the threading through loop headers.  We start with the
2659     innermost loop, so that the changes in cfg we perform won't affect
2660     further threading.  */
2661  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2662    {
2663      if (!loop->header
2664	  || !bitmap_bit_p (threaded_blocks, loop->header->index))
2665	continue;
2666
2667      retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2668    }
2669
2670  /* Any jump threading paths that are still attached to edges at this
2671     point must be one of two cases.
2672
2673     First, we could have a jump threading path which went from outside
2674     a loop to inside a loop that was ignored because a prior jump thread
2675     across a backedge was realized (which indirectly causes the loop
2676     above to ignore the latter thread).  We can detect these because the
2677     loop structures will be different and we do not currently try to
2678     optimize this case.
2679
2680     Second, we could be threading across a backedge to a point within the
2681     same loop.  This occurrs for the FSA/FSM optimization and we would
2682     like to optimize it.  However, we have to be very careful as this
2683     may completely scramble the loop structures, with the result being
2684     irreducible loops causing us to throw away our loop structure.
2685
2686     As a compromise for the latter case, if the thread path ends in
2687     a block where the last statement is a multiway branch, then go
2688     ahead and thread it, else ignore it.  */
2689  basic_block bb;
2690  edge e;
2691  FOR_EACH_BB_FN (bb, cfun)
2692    {
2693      /* If we do end up threading here, we can remove elements from
2694	 BB->preds.  Thus we can not use the FOR_EACH_EDGE iterator.  */
2695      for (edge_iterator ei = ei_start (bb->preds);
2696	   (e = ei_safe_edge (ei));)
2697	if (e->aux)
2698	  {
2699	    vec<jump_thread_edge *> *path = THREAD_PATH (e);
2700
2701	    /* Case 1, threading from outside to inside the loop
2702	       after we'd already threaded through the header.  */
2703	    if ((*path)[0]->e->dest->loop_father
2704		!= path->last ()->e->src->loop_father)
2705	      {
2706		delete_jump_thread_path (path);
2707		e->aux = NULL;
2708		ei_next (&ei);
2709	      }
2710	   else if (bb_ends_with_multiway_branch (path->last ()->e->src))
2711	      {
2712		/* The code to thread through loop headers may have
2713		   split a block with jump threads attached to it.
2714
2715		   We can identify this with a disjoint jump threading
2716		   path.  If found, just remove it.  */
2717		for (unsigned int i = 0; i < path->length () - 1; i++)
2718		  if ((*path)[i]->e->dest != (*path)[i + 1]->e->src)
2719		    {
2720		      delete_jump_thread_path (path);
2721		      e->aux = NULL;
2722		      ei_next (&ei);
2723		      break;
2724		    }
2725
2726		/* Our path is still valid, thread it.  */
2727	        if (e->aux)
2728		  {
2729		    if (thread_block ((*path)[0]->e->dest, false))
2730		      e->aux = NULL;
2731		    else
2732		      {
2733		        delete_jump_thread_path (path);
2734			e->aux = NULL;
2735			ei_next (&ei);
2736		      }
2737		  }
2738	      }
2739	   else
2740	      {
2741		delete_jump_thread_path (path);
2742		e->aux = NULL;
2743		ei_next (&ei);
2744	      }
2745 	  }
2746	else
2747	  ei_next (&ei);
2748    }
2749
2750  statistics_counter_event (cfun, "Jumps threaded",
2751			    thread_stats.num_threaded_edges);
2752
2753  free_original_copy_tables ();
2754
2755  BITMAP_FREE (threaded_blocks);
2756  threaded_blocks = NULL;
2757  paths.release ();
2758
2759  if (retval)
2760    loops_state_set (LOOPS_NEED_FIXUP);
2761
2762  return retval;
2763}
2764
2765/* Delete the jump threading path PATH.  We have to explcitly delete
2766   each entry in the vector, then the container.  */
2767
2768void
2769delete_jump_thread_path (vec<jump_thread_edge *> *path)
2770{
2771  for (unsigned int i = 0; i < path->length (); i++)
2772    delete (*path)[i];
2773  path->release();
2774  delete path;
2775}
2776
2777/* Register a jump threading opportunity.  We queue up all the jump
2778   threading opportunities discovered by a pass and update the CFG
2779   and SSA form all at once.
2780
2781   E is the edge we can thread, E2 is the new target edge, i.e., we
2782   are effectively recording that E->dest can be changed to E2->dest
2783   after fixing the SSA graph.  */
2784
2785void
2786register_jump_thread (vec<jump_thread_edge *> *path)
2787{
2788  if (!dbg_cnt (registered_jump_thread))
2789    {
2790      delete_jump_thread_path (path);
2791      return;
2792    }
2793
2794  /* First make sure there are no NULL outgoing edges on the jump threading
2795     path.  That can happen for jumping to a constant address.  */
2796  for (unsigned int i = 0; i < path->length (); i++)
2797    if ((*path)[i]->e == NULL)
2798      {
2799	if (dump_file && (dump_flags & TDF_DETAILS))
2800	  {
2801	    fprintf (dump_file,
2802		     "Found NULL edge in jump threading path.  Cancelling jump thread:\n");
2803	    dump_jump_thread_path (dump_file, *path, false);
2804	  }
2805
2806	delete_jump_thread_path (path);
2807	return;
2808      }
2809
2810  if (dump_file && (dump_flags & TDF_DETAILS))
2811    dump_jump_thread_path (dump_file, *path, true);
2812
2813  if (!paths.exists ())
2814    paths.create (5);
2815
2816  paths.safe_push (path);
2817}
2818