1/* Basic block reordering routines for the GNU compiler.
2   Copyright (C) 2000-2015 Free Software Foundation, Inc.
3
4   This file is part of GCC.
5
6   GCC is free software; you can redistribute it and/or modify it
7   under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3, or (at your option)
9   any later version.
10
11   GCC is distributed in the hope that it will be useful, but WITHOUT
12   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14   License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with GCC; see the file COPYING3.  If not see
18   <http://www.gnu.org/licenses/>.  */
19
20/* This (greedy) algorithm constructs traces in several rounds.
21   The construction starts from "seeds".  The seed for the first round
22   is the entry point of the function.  When there are more than one seed,
23   the one with the lowest key in the heap is selected first (see bb_to_key).
24   Then the algorithm repeatedly adds the most probable successor to the end
25   of a trace.  Finally it connects the traces.
26
27   There are two parameters: Branch Threshold and Exec Threshold.
28   If the probability of an edge to a successor of the current basic block is
29   lower than Branch Threshold or its frequency is lower than Exec Threshold,
30   then the successor will be the seed in one of the next rounds.
31   Each round has these parameters lower than the previous one.
32   The last round has to have these parameters set to zero so that the
33   remaining blocks are picked up.
34
35   The algorithm selects the most probable successor from all unvisited
36   successors and successors that have been added to this trace.
37   The other successors (that has not been "sent" to the next round) will be
38   other seeds for this round and the secondary traces will start from them.
39   If the successor has not been visited in this trace, it is added to the
40   trace (however, there is some heuristic for simple branches).
41   If the successor has been visited in this trace, a loop has been found.
42   If the loop has many iterations, the loop is rotated so that the source
43   block of the most probable edge going out of the loop is the last block
44   of the trace.
45   If the loop has few iterations and there is no edge from the last block of
46   the loop going out of the loop, the loop header is duplicated.
47
48   When connecting traces, the algorithm first checks whether there is an edge
49   from the last block of a trace to the first block of another trace.
50   When there are still some unconnected traces it checks whether there exists
51   a basic block BB such that BB is a successor of the last block of a trace
52   and BB is a predecessor of the first block of another trace.  In this case,
53   BB is duplicated, added at the end of the first trace and the traces are
54   connected through it.
55   The rest of traces are simply connected so there will be a jump to the
56   beginning of the rest of traces.
57
58   The above description is for the full algorithm, which is used when the
59   function is optimized for speed.  When the function is optimized for size,
60   in order to reduce long jumps and connect more fallthru edges, the
61   algorithm is modified as follows:
62   (1) Break long traces to short ones.  A trace is broken at a block that has
63   multiple predecessors/ successors during trace discovery.  When connecting
64   traces, only connect Trace n with Trace n + 1.  This change reduces most
65   long jumps compared with the above algorithm.
66   (2) Ignore the edge probability and frequency for fallthru edges.
67   (3) Keep the original order of blocks when there is no chance to fall
68   through.  We rely on the results of cfg_cleanup.
69
70   To implement the change for code size optimization, block's index is
71   selected as the key and all traces are found in one round.
72
73   References:
74
75   "Software Trace Cache"
76   A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
77   http://citeseer.nj.nec.com/15361.html
78
79*/
80
81#include "config.h"
82#include "system.h"
83#include "coretypes.h"
84#include "tm.h"
85#include "hash-set.h"
86#include "machmode.h"
87#include "vec.h"
88#include "double-int.h"
89#include "input.h"
90#include "alias.h"
91#include "symtab.h"
92#include "wide-int.h"
93#include "inchash.h"
94#include "tree.h"
95#include "rtl.h"
96#include "regs.h"
97#include "flags.h"
98#include "output.h"
99#include "target.h"
100#include "hashtab.h"
101#include "hard-reg-set.h"
102#include "function.h"
103#include "tm_p.h"
104#include "obstack.h"
105#include "statistics.h"
106#include "real.h"
107#include "fixed-value.h"
108#include "insn-config.h"
109#include "expmed.h"
110#include "dojump.h"
111#include "explow.h"
112#include "calls.h"
113#include "emit-rtl.h"
114#include "varasm.h"
115#include "stmt.h"
116#include "expr.h"
117#include "optabs.h"
118#include "params.h"
119#include "diagnostic-core.h"
120#include "toplev.h" /* user_defined_section_attribute */
121#include "tree-pass.h"
122#include "dominance.h"
123#include "cfg.h"
124#include "cfgrtl.h"
125#include "cfganal.h"
126#include "cfgbuild.h"
127#include "cfgcleanup.h"
128#include "predict.h"
129#include "basic-block.h"
130#include "df.h"
131#include "bb-reorder.h"
132#include "hash-map.h"
133#include "is-a.h"
134#include "plugin-api.h"
135#include "ipa-ref.h"
136#include "cgraph.h"
137#include "except.h"
138#include "fibonacci_heap.h"
139
140/* The number of rounds.  In most cases there will only be 4 rounds, but
141   when partitioning hot and cold basic blocks into separate sections of
142   the object file there will be an extra round.  */
143#define N_ROUNDS 5
144
145/* Stubs in case we don't have a return insn.
146   We have to check at run time too, not only compile time.  */
147
148#ifndef HAVE_return
149#define HAVE_return 0
150#define gen_return() NULL_RTX
151#endif
152
153
154struct target_bb_reorder default_target_bb_reorder;
155#if SWITCHABLE_TARGET
156struct target_bb_reorder *this_target_bb_reorder = &default_target_bb_reorder;
157#endif
158
159#define uncond_jump_length \
160  (this_target_bb_reorder->x_uncond_jump_length)
161
162/* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
163static const int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
164
165/* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
166static const int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
167
168/* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
169   block the edge destination is not duplicated while connecting traces.  */
170#define DUPLICATION_THRESHOLD 100
171
172typedef fibonacci_heap <long, basic_block_def> bb_heap_t;
173typedef fibonacci_node <long, basic_block_def> bb_heap_node_t;
174
175/* Structure to hold needed information for each basic block.  */
176typedef struct bbro_basic_block_data_def
177{
178  /* Which trace is the bb start of (-1 means it is not a start of any).  */
179  int start_of_trace;
180
181  /* Which trace is the bb end of (-1 means it is not an end of any).  */
182  int end_of_trace;
183
184  /* Which trace is the bb in?  */
185  int in_trace;
186
187  /* Which trace was this bb visited in?  */
188  int visited;
189
190  /* Cached maximum frequency of interesting incoming edges.
191     Minus one means not yet computed.  */
192  int priority;
193
194  /* Which heap is BB in (if any)?  */
195  bb_heap_t *heap;
196
197  /* Which heap node is BB in (if any)?  */
198  bb_heap_node_t *node;
199} bbro_basic_block_data;
200
201/* The current size of the following dynamic array.  */
202static int array_size;
203
204/* The array which holds needed information for basic blocks.  */
205static bbro_basic_block_data *bbd;
206
207/* To avoid frequent reallocation the size of arrays is greater than needed,
208   the number of elements is (not less than) 1.25 * size_wanted.  */
209#define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
210
211/* Free the memory and set the pointer to NULL.  */
212#define FREE(P) (gcc_assert (P), free (P), P = 0)
213
214/* Structure for holding information about a trace.  */
215struct trace
216{
217  /* First and last basic block of the trace.  */
218  basic_block first, last;
219
220  /* The round of the STC creation which this trace was found in.  */
221  int round;
222
223  /* The length (i.e. the number of basic blocks) of the trace.  */
224  int length;
225};
226
227/* Maximum frequency and count of one of the entry blocks.  */
228static int max_entry_frequency;
229static gcov_type max_entry_count;
230
231/* Local function prototypes.  */
232static void find_traces (int *, struct trace *);
233static basic_block rotate_loop (edge, struct trace *, int);
234static void mark_bb_visited (basic_block, int);
235static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
236				 int, bb_heap_t **, int);
237static basic_block copy_bb (basic_block, edge, basic_block, int);
238static long bb_to_key (basic_block);
239static bool better_edge_p (const_basic_block, const_edge, int, int, int, int,
240			   const_edge);
241static bool connect_better_edge_p (const_edge, bool, int, const_edge,
242				   struct trace *);
243static void connect_traces (int, struct trace *);
244static bool copy_bb_p (const_basic_block, int);
245static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type);
246
247/* Return the trace number in which BB was visited.  */
248
249static int
250bb_visited_trace (const_basic_block bb)
251{
252  gcc_assert (bb->index < array_size);
253  return bbd[bb->index].visited;
254}
255
256/* This function marks BB that it was visited in trace number TRACE.  */
257
258static void
259mark_bb_visited (basic_block bb, int trace)
260{
261  bbd[bb->index].visited = trace;
262  if (bbd[bb->index].heap)
263    {
264      bbd[bb->index].heap->delete_node (bbd[bb->index].node);
265      bbd[bb->index].heap = NULL;
266      bbd[bb->index].node = NULL;
267    }
268}
269
270/* Check to see if bb should be pushed into the next round of trace
271   collections or not.  Reasons for pushing the block forward are 1).
272   If the block is cold, we are doing partitioning, and there will be
273   another round (cold partition blocks are not supposed to be
274   collected into traces until the very last round); or 2). There will
275   be another round, and the basic block is not "hot enough" for the
276   current round of trace collection.  */
277
278static bool
279push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
280		      int exec_th, gcov_type count_th)
281{
282  bool there_exists_another_round;
283  bool block_not_hot_enough;
284
285  there_exists_another_round = round < number_of_rounds - 1;
286
287  block_not_hot_enough = (bb->frequency < exec_th
288			  || bb->count < count_th
289			  || probably_never_executed_bb_p (cfun, bb));
290
291  if (there_exists_another_round
292      && block_not_hot_enough)
293    return true;
294  else
295    return false;
296}
297
298/* Find the traces for Software Trace Cache.  Chain each trace through
299   RBI()->next.  Store the number of traces to N_TRACES and description of
300   traces to TRACES.  */
301
302static void
303find_traces (int *n_traces, struct trace *traces)
304{
305  int i;
306  int number_of_rounds;
307  edge e;
308  edge_iterator ei;
309  bb_heap_t *heap = new bb_heap_t (LONG_MIN);
310
311  /* Add one extra round of trace collection when partitioning hot/cold
312     basic blocks into separate sections.  The last round is for all the
313     cold blocks (and ONLY the cold blocks).  */
314
315  number_of_rounds = N_ROUNDS - 1;
316
317  /* Insert entry points of function into heap.  */
318  max_entry_frequency = 0;
319  max_entry_count = 0;
320  FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
321    {
322      bbd[e->dest->index].heap = heap;
323      bbd[e->dest->index].node = heap->insert (bb_to_key (e->dest), e->dest);
324      if (e->dest->frequency > max_entry_frequency)
325	max_entry_frequency = e->dest->frequency;
326      if (e->dest->count > max_entry_count)
327	max_entry_count = e->dest->count;
328    }
329
330  /* Find the traces.  */
331  for (i = 0; i < number_of_rounds; i++)
332    {
333      gcov_type count_threshold;
334
335      if (dump_file)
336	fprintf (dump_file, "STC - round %d\n", i + 1);
337
338      if (max_entry_count < INT_MAX / 1000)
339	count_threshold = max_entry_count * exec_threshold[i] / 1000;
340      else
341	count_threshold = max_entry_count / 1000 * exec_threshold[i];
342
343      find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
344			   max_entry_frequency * exec_threshold[i] / 1000,
345			   count_threshold, traces, n_traces, i, &heap,
346			   number_of_rounds);
347    }
348  delete heap;
349
350  if (dump_file)
351    {
352      for (i = 0; i < *n_traces; i++)
353	{
354	  basic_block bb;
355	  fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
356		   traces[i].round + 1);
357	  for (bb = traces[i].first;
358	       bb != traces[i].last;
359	       bb = (basic_block) bb->aux)
360	    fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
361	  fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
362	}
363      fflush (dump_file);
364    }
365}
366
367/* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
368   (with sequential number TRACE_N).  */
369
370static basic_block
371rotate_loop (edge back_edge, struct trace *trace, int trace_n)
372{
373  basic_block bb;
374
375  /* Information about the best end (end after rotation) of the loop.  */
376  basic_block best_bb = NULL;
377  edge best_edge = NULL;
378  int best_freq = -1;
379  gcov_type best_count = -1;
380  /* The best edge is preferred when its destination is not visited yet
381     or is a start block of some trace.  */
382  bool is_preferred = false;
383
384  /* Find the most frequent edge that goes out from current trace.  */
385  bb = back_edge->dest;
386  do
387    {
388      edge e;
389      edge_iterator ei;
390
391      FOR_EACH_EDGE (e, ei, bb->succs)
392	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
393	    && bb_visited_trace (e->dest) != trace_n
394	    && (e->flags & EDGE_CAN_FALLTHRU)
395	    && !(e->flags & EDGE_COMPLEX))
396	{
397	  if (is_preferred)
398	    {
399	      /* The best edge is preferred.  */
400	      if (!bb_visited_trace (e->dest)
401		  || bbd[e->dest->index].start_of_trace >= 0)
402		{
403		  /* The current edge E is also preferred.  */
404		  int freq = EDGE_FREQUENCY (e);
405		  if (freq > best_freq || e->count > best_count)
406		    {
407		      best_freq = freq;
408		      best_count = e->count;
409		      best_edge = e;
410		      best_bb = bb;
411		    }
412		}
413	    }
414	  else
415	    {
416	      if (!bb_visited_trace (e->dest)
417		  || bbd[e->dest->index].start_of_trace >= 0)
418		{
419		  /* The current edge E is preferred.  */
420		  is_preferred = true;
421		  best_freq = EDGE_FREQUENCY (e);
422		  best_count = e->count;
423		  best_edge = e;
424		  best_bb = bb;
425		}
426	      else
427		{
428		  int freq = EDGE_FREQUENCY (e);
429		  if (!best_edge || freq > best_freq || e->count > best_count)
430		    {
431		      best_freq = freq;
432		      best_count = e->count;
433		      best_edge = e;
434		      best_bb = bb;
435		    }
436		}
437	    }
438	}
439      bb = (basic_block) bb->aux;
440    }
441  while (bb != back_edge->dest);
442
443  if (best_bb)
444    {
445      /* Rotate the loop so that the BEST_EDGE goes out from the last block of
446	 the trace.  */
447      if (back_edge->dest == trace->first)
448	{
449	  trace->first = (basic_block) best_bb->aux;
450	}
451      else
452	{
453	  basic_block prev_bb;
454
455	  for (prev_bb = trace->first;
456	       prev_bb->aux != back_edge->dest;
457	       prev_bb = (basic_block) prev_bb->aux)
458	    ;
459	  prev_bb->aux = best_bb->aux;
460
461	  /* Try to get rid of uncond jump to cond jump.  */
462	  if (single_succ_p (prev_bb))
463	    {
464	      basic_block header = single_succ (prev_bb);
465
466	      /* Duplicate HEADER if it is a small block containing cond jump
467		 in the end.  */
468	      if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
469		  && !CROSSING_JUMP_P (BB_END (header)))
470		copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
471	    }
472	}
473    }
474  else
475    {
476      /* We have not found suitable loop tail so do no rotation.  */
477      best_bb = back_edge->src;
478    }
479  best_bb->aux = NULL;
480  return best_bb;
481}
482
483/* One round of finding traces.  Find traces for BRANCH_TH and EXEC_TH i.e. do
484   not include basic blocks whose probability is lower than BRANCH_TH or whose
485   frequency is lower than EXEC_TH into traces (or whose count is lower than
486   COUNT_TH).  Store the new traces into TRACES and modify the number of
487   traces *N_TRACES.  Set the round (which the trace belongs to) to ROUND.
488   The function expects starting basic blocks to be in *HEAP and will delete
489   *HEAP and store starting points for the next round into new *HEAP.  */
490
491static void
492find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
493		     struct trace *traces, int *n_traces, int round,
494		     bb_heap_t **heap, int number_of_rounds)
495{
496  /* Heap for discarded basic blocks which are possible starting points for
497     the next round.  */
498  bb_heap_t *new_heap = new bb_heap_t (LONG_MIN);
499  bool for_size = optimize_function_for_size_p (cfun);
500
501  while (!(*heap)->empty ())
502    {
503      basic_block bb;
504      struct trace *trace;
505      edge best_edge, e;
506      long key;
507      edge_iterator ei;
508
509      bb = (*heap)->extract_min ();
510      bbd[bb->index].heap = NULL;
511      bbd[bb->index].node = NULL;
512
513      if (dump_file)
514	fprintf (dump_file, "Getting bb %d\n", bb->index);
515
516      /* If the BB's frequency is too low, send BB to the next round.  When
517	 partitioning hot/cold blocks into separate sections, make sure all
518	 the cold blocks (and ONLY the cold blocks) go into the (extra) final
519	 round.  When optimizing for size, do not push to next round.  */
520
521      if (!for_size
522	  && push_to_next_round_p (bb, round, number_of_rounds, exec_th,
523				   count_th))
524	{
525	  int key = bb_to_key (bb);
526	  bbd[bb->index].heap = new_heap;
527	  bbd[bb->index].node = new_heap->insert (key, bb);
528
529	  if (dump_file)
530	    fprintf (dump_file,
531		     "  Possible start point of next round: %d (key: %d)\n",
532		     bb->index, key);
533	  continue;
534	}
535
536      trace = traces + *n_traces;
537      trace->first = bb;
538      trace->round = round;
539      trace->length = 0;
540      bbd[bb->index].in_trace = *n_traces;
541      (*n_traces)++;
542
543      do
544	{
545	  int prob, freq;
546	  bool ends_in_call;
547
548	  /* The probability and frequency of the best edge.  */
549	  int best_prob = INT_MIN / 2;
550	  int best_freq = INT_MIN / 2;
551
552	  best_edge = NULL;
553	  mark_bb_visited (bb, *n_traces);
554	  trace->length++;
555
556	  if (dump_file)
557	    fprintf (dump_file, "Basic block %d was visited in trace %d\n",
558		     bb->index, *n_traces - 1);
559
560	  ends_in_call = block_ends_with_call_p (bb);
561
562	  /* Select the successor that will be placed after BB.  */
563	  FOR_EACH_EDGE (e, ei, bb->succs)
564	    {
565	      gcc_assert (!(e->flags & EDGE_FAKE));
566
567	      if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
568		continue;
569
570	      if (bb_visited_trace (e->dest)
571		  && bb_visited_trace (e->dest) != *n_traces)
572		continue;
573
574	      if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
575		continue;
576
577	      prob = e->probability;
578	      freq = e->dest->frequency;
579
580	      /* The only sensible preference for a call instruction is the
581		 fallthru edge.  Don't bother selecting anything else.  */
582	      if (ends_in_call)
583		{
584		  if (e->flags & EDGE_CAN_FALLTHRU)
585		    {
586		      best_edge = e;
587		      best_prob = prob;
588		      best_freq = freq;
589		    }
590		  continue;
591		}
592
593	      /* Edge that cannot be fallthru or improbable or infrequent
594		 successor (i.e. it is unsuitable successor).  When optimizing
595		 for size, ignore the probability and frequency.  */
596	      if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
597		  || ((prob < branch_th || EDGE_FREQUENCY (e) < exec_th
598		      || e->count < count_th) && (!for_size)))
599		continue;
600
601	      /* If partitioning hot/cold basic blocks, don't consider edges
602		 that cross section boundaries.  */
603
604	      if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
605				 best_edge))
606		{
607		  best_edge = e;
608		  best_prob = prob;
609		  best_freq = freq;
610		}
611	    }
612
613	  /* If the best destination has multiple predecessors, and can be
614	     duplicated cheaper than a jump, don't allow it to be added
615	     to a trace.  We'll duplicate it when connecting traces.  */
616	  if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
617	      && copy_bb_p (best_edge->dest, 0))
618	    best_edge = NULL;
619
620	  /* If the best destination has multiple successors or predecessors,
621	     don't allow it to be added when optimizing for size.  This makes
622	     sure predecessors with smaller index are handled before the best
623	     destinarion.  It breaks long trace and reduces long jumps.
624
625	     Take if-then-else as an example.
626		A
627	       / \
628	      B   C
629	       \ /
630		D
631	     If we do not remove the best edge B->D/C->D, the final order might
632	     be A B D ... C.  C is at the end of the program.  If D's successors
633	     and D are complicated, might need long jumps for A->C and C->D.
634	     Similar issue for order: A C D ... B.
635
636	     After removing the best edge, the final result will be ABCD/ ACBD.
637	     It does not add jump compared with the previous order.  But it
638	     reduces the possibility of long jumps.  */
639	  if (best_edge && for_size
640	      && (EDGE_COUNT (best_edge->dest->succs) > 1
641		 || EDGE_COUNT (best_edge->dest->preds) > 1))
642	    best_edge = NULL;
643
644	  /* Add all non-selected successors to the heaps.  */
645	  FOR_EACH_EDGE (e, ei, bb->succs)
646	    {
647	      if (e == best_edge
648		  || e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
649		  || bb_visited_trace (e->dest))
650		continue;
651
652	      key = bb_to_key (e->dest);
653
654	      if (bbd[e->dest->index].heap)
655		{
656		  /* E->DEST is already in some heap.  */
657		  if (key != bbd[e->dest->index].node->get_key ())
658		    {
659		      if (dump_file)
660			{
661			  fprintf (dump_file,
662				   "Changing key for bb %d from %ld to %ld.\n",
663				   e->dest->index,
664				   (long) bbd[e->dest->index].node->get_key (),
665				   key);
666			}
667		      bbd[e->dest->index].heap->replace_key
668		        (bbd[e->dest->index].node, key);
669		    }
670		}
671	      else
672		{
673		  bb_heap_t *which_heap = *heap;
674
675		  prob = e->probability;
676		  freq = EDGE_FREQUENCY (e);
677
678		  if (!(e->flags & EDGE_CAN_FALLTHRU)
679		      || (e->flags & EDGE_COMPLEX)
680		      || prob < branch_th || freq < exec_th
681		      || e->count < count_th)
682		    {
683		      /* When partitioning hot/cold basic blocks, make sure
684			 the cold blocks (and only the cold blocks) all get
685			 pushed to the last round of trace collection.  When
686			 optimizing for size, do not push to next round.  */
687
688		      if (!for_size && push_to_next_round_p (e->dest, round,
689							     number_of_rounds,
690							     exec_th, count_th))
691			which_heap = new_heap;
692		    }
693
694		  bbd[e->dest->index].heap = which_heap;
695		  bbd[e->dest->index].node = which_heap->insert (key, e->dest);
696
697		  if (dump_file)
698		    {
699		      fprintf (dump_file,
700			       "  Possible start of %s round: %d (key: %ld)\n",
701			       (which_heap == new_heap) ? "next" : "this",
702			       e->dest->index, (long) key);
703		    }
704
705		}
706	    }
707
708	  if (best_edge) /* Suitable successor was found.  */
709	    {
710	      if (bb_visited_trace (best_edge->dest) == *n_traces)
711		{
712		  /* We do nothing with one basic block loops.  */
713		  if (best_edge->dest != bb)
714		    {
715		      if (EDGE_FREQUENCY (best_edge)
716			  > 4 * best_edge->dest->frequency / 5)
717			{
718			  /* The loop has at least 4 iterations.  If the loop
719			     header is not the first block of the function
720			     we can rotate the loop.  */
721
722			  if (best_edge->dest
723			      != ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)
724			    {
725			      if (dump_file)
726				{
727				  fprintf (dump_file,
728					   "Rotating loop %d - %d\n",
729					   best_edge->dest->index, bb->index);
730				}
731			      bb->aux = best_edge->dest;
732			      bbd[best_edge->dest->index].in_trace =
733							     (*n_traces) - 1;
734			      bb = rotate_loop (best_edge, trace, *n_traces);
735			    }
736			}
737		      else
738			{
739			  /* The loop has less than 4 iterations.  */
740
741			  if (single_succ_p (bb)
742			      && copy_bb_p (best_edge->dest,
743			      		    optimize_edge_for_speed_p
744			      		    (best_edge)))
745			    {
746			      bb = copy_bb (best_edge->dest, best_edge, bb,
747					    *n_traces);
748			      trace->length++;
749			    }
750			}
751		    }
752
753		  /* Terminate the trace.  */
754		  break;
755		}
756	      else
757		{
758		  /* Check for a situation
759
760		    A
761		   /|
762		  B |
763		   \|
764		    C
765
766		  where
767		  EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
768		    >= EDGE_FREQUENCY (AC).
769		  (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
770		  Best ordering is then A B C.
771
772		  When optimizing for size, A B C is always the best order.
773
774		  This situation is created for example by:
775
776		  if (A) B;
777		  C;
778
779		  */
780
781		  FOR_EACH_EDGE (e, ei, bb->succs)
782		    if (e != best_edge
783			&& (e->flags & EDGE_CAN_FALLTHRU)
784			&& !(e->flags & EDGE_COMPLEX)
785			&& !bb_visited_trace (e->dest)
786			&& single_pred_p (e->dest)
787			&& !(e->flags & EDGE_CROSSING)
788			&& single_succ_p (e->dest)
789			&& (single_succ_edge (e->dest)->flags
790			    & EDGE_CAN_FALLTHRU)
791			&& !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
792			&& single_succ (e->dest) == best_edge->dest
793			&& (2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge)
794			    || for_size))
795		      {
796			best_edge = e;
797			if (dump_file)
798			  fprintf (dump_file, "Selecting BB %d\n",
799				   best_edge->dest->index);
800			break;
801		      }
802
803		  bb->aux = best_edge->dest;
804		  bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
805		  bb = best_edge->dest;
806		}
807	    }
808	}
809      while (best_edge);
810      trace->last = bb;
811      bbd[trace->first->index].start_of_trace = *n_traces - 1;
812      if (bbd[trace->last->index].end_of_trace != *n_traces - 1)
813	{
814	  bbd[trace->last->index].end_of_trace = *n_traces - 1;
815	  /* Update the cached maximum frequency for interesting predecessor
816	     edges for successors of the new trace end.  */
817	  FOR_EACH_EDGE (e, ei, trace->last->succs)
818	    if (EDGE_FREQUENCY (e) > bbd[e->dest->index].priority)
819	      bbd[e->dest->index].priority = EDGE_FREQUENCY (e);
820	}
821
822      /* The trace is terminated so we have to recount the keys in heap
823	 (some block can have a lower key because now one of its predecessors
824	 is an end of the trace).  */
825      FOR_EACH_EDGE (e, ei, bb->succs)
826	{
827	  if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
828	      || bb_visited_trace (e->dest))
829	    continue;
830
831	  if (bbd[e->dest->index].heap)
832	    {
833	      key = bb_to_key (e->dest);
834	      if (key != bbd[e->dest->index].node->get_key ())
835		{
836		  if (dump_file)
837		    {
838		      fprintf (dump_file,
839			       "Changing key for bb %d from %ld to %ld.\n",
840			       e->dest->index,
841			       (long) bbd[e->dest->index].node->get_key (), key);
842		    }
843		  bbd[e->dest->index].heap->replace_key
844		    (bbd[e->dest->index].node, key);
845		}
846	    }
847	}
848    }
849
850  delete (*heap);
851
852  /* "Return" the new heap.  */
853  *heap = new_heap;
854}
855
856/* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
857   it to trace after BB, mark OLD_BB visited and update pass' data structures
858   (TRACE is a number of trace which OLD_BB is duplicated to).  */
859
860static basic_block
861copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
862{
863  basic_block new_bb;
864
865  new_bb = duplicate_block (old_bb, e, bb);
866  BB_COPY_PARTITION (new_bb, old_bb);
867
868  gcc_assert (e->dest == new_bb);
869
870  if (dump_file)
871    fprintf (dump_file,
872	     "Duplicated bb %d (created bb %d)\n",
873	     old_bb->index, new_bb->index);
874
875  if (new_bb->index >= array_size
876      || last_basic_block_for_fn (cfun) > array_size)
877    {
878      int i;
879      int new_size;
880
881      new_size = MAX (last_basic_block_for_fn (cfun), new_bb->index + 1);
882      new_size = GET_ARRAY_SIZE (new_size);
883      bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
884      for (i = array_size; i < new_size; i++)
885	{
886	  bbd[i].start_of_trace = -1;
887	  bbd[i].end_of_trace = -1;
888	  bbd[i].in_trace = -1;
889	  bbd[i].visited = 0;
890	  bbd[i].priority = -1;
891	  bbd[i].heap = NULL;
892	  bbd[i].node = NULL;
893	}
894      array_size = new_size;
895
896      if (dump_file)
897	{
898	  fprintf (dump_file,
899		   "Growing the dynamic array to %d elements.\n",
900		   array_size);
901	}
902    }
903
904  gcc_assert (!bb_visited_trace (e->dest));
905  mark_bb_visited (new_bb, trace);
906  new_bb->aux = bb->aux;
907  bb->aux = new_bb;
908
909  bbd[new_bb->index].in_trace = trace;
910
911  return new_bb;
912}
913
914/* Compute and return the key (for the heap) of the basic block BB.  */
915
916static long
917bb_to_key (basic_block bb)
918{
919  edge e;
920  edge_iterator ei;
921
922  /* Use index as key to align with its original order.  */
923  if (optimize_function_for_size_p (cfun))
924    return bb->index;
925
926  /* Do not start in probably never executed blocks.  */
927
928  if (BB_PARTITION (bb) == BB_COLD_PARTITION
929      || probably_never_executed_bb_p (cfun, bb))
930    return BB_FREQ_MAX;
931
932  /* Prefer blocks whose predecessor is an end of some trace
933     or whose predecessor edge is EDGE_DFS_BACK.  */
934  int priority = bbd[bb->index].priority;
935  if (priority == -1)
936    {
937      priority = 0;
938      FOR_EACH_EDGE (e, ei, bb->preds)
939	{
940	  if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
941	       && bbd[e->src->index].end_of_trace >= 0)
942	      || (e->flags & EDGE_DFS_BACK))
943	    {
944	      int edge_freq = EDGE_FREQUENCY (e);
945
946	      if (edge_freq > priority)
947		priority = edge_freq;
948	    }
949	}
950      bbd[bb->index].priority = priority;
951    }
952
953  if (priority)
954    /* The block with priority should have significantly lower key.  */
955    return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
956
957  return -bb->frequency;
958}
959
960/* Return true when the edge E from basic block BB is better than the temporary
961   best edge (details are in function).  The probability of edge E is PROB. The
962   frequency of the successor is FREQ.  The current best probability is
963   BEST_PROB, the best frequency is BEST_FREQ.
964   The edge is considered to be equivalent when PROB does not differ much from
965   BEST_PROB; similarly for frequency.  */
966
967static bool
968better_edge_p (const_basic_block bb, const_edge e, int prob, int freq,
969	       int best_prob, int best_freq, const_edge cur_best_edge)
970{
971  bool is_better_edge;
972
973  /* The BEST_* values do not have to be best, but can be a bit smaller than
974     maximum values.  */
975  int diff_prob = best_prob / 10;
976  int diff_freq = best_freq / 10;
977
978  /* The smaller one is better to keep the original order.  */
979  if (optimize_function_for_size_p (cfun))
980    return !cur_best_edge
981	   || cur_best_edge->dest->index > e->dest->index;
982
983  if (prob > best_prob + diff_prob)
984    /* The edge has higher probability than the temporary best edge.  */
985    is_better_edge = true;
986  else if (prob < best_prob - diff_prob)
987    /* The edge has lower probability than the temporary best edge.  */
988    is_better_edge = false;
989  else if (freq < best_freq - diff_freq)
990    /* The edge and the temporary best edge  have almost equivalent
991       probabilities.  The higher frequency of a successor now means
992       that there is another edge going into that successor.
993       This successor has lower frequency so it is better.  */
994    is_better_edge = true;
995  else if (freq > best_freq + diff_freq)
996    /* This successor has higher frequency so it is worse.  */
997    is_better_edge = false;
998  else if (e->dest->prev_bb == bb)
999    /* The edges have equivalent probabilities and the successors
1000       have equivalent frequencies.  Select the previous successor.  */
1001    is_better_edge = true;
1002  else
1003    is_better_edge = false;
1004
1005  /* If we are doing hot/cold partitioning, make sure that we always favor
1006     non-crossing edges over crossing edges.  */
1007
1008  if (!is_better_edge
1009      && flag_reorder_blocks_and_partition
1010      && cur_best_edge
1011      && (cur_best_edge->flags & EDGE_CROSSING)
1012      && !(e->flags & EDGE_CROSSING))
1013    is_better_edge = true;
1014
1015  return is_better_edge;
1016}
1017
1018/* Return true when the edge E is better than the temporary best edge
1019   CUR_BEST_EDGE.  If SRC_INDEX_P is true, the function compares the src bb of
1020   E and CUR_BEST_EDGE; otherwise it will compare the dest bb.
1021   BEST_LEN is the trace length of src (or dest) bb in CUR_BEST_EDGE.
1022   TRACES record the information about traces.
1023   When optimizing for size, the edge with smaller index is better.
1024   When optimizing for speed, the edge with bigger probability or longer trace
1025   is better.  */
1026
1027static bool
1028connect_better_edge_p (const_edge e, bool src_index_p, int best_len,
1029		       const_edge cur_best_edge, struct trace *traces)
1030{
1031  int e_index;
1032  int b_index;
1033  bool is_better_edge;
1034
1035  if (!cur_best_edge)
1036    return true;
1037
1038  if (optimize_function_for_size_p (cfun))
1039    {
1040      e_index = src_index_p ? e->src->index : e->dest->index;
1041      b_index = src_index_p ? cur_best_edge->src->index
1042			      : cur_best_edge->dest->index;
1043      /* The smaller one is better to keep the original order.  */
1044      return b_index > e_index;
1045    }
1046
1047  if (src_index_p)
1048    {
1049      e_index = e->src->index;
1050
1051      if (e->probability > cur_best_edge->probability)
1052	/* The edge has higher probability than the temporary best edge.  */
1053	is_better_edge = true;
1054      else if (e->probability < cur_best_edge->probability)
1055	/* The edge has lower probability than the temporary best edge.  */
1056	is_better_edge = false;
1057      else if (traces[bbd[e_index].end_of_trace].length > best_len)
1058	/* The edge and the temporary best edge have equivalent probabilities.
1059	   The edge with longer trace is better.  */
1060	is_better_edge = true;
1061      else
1062	is_better_edge = false;
1063    }
1064  else
1065    {
1066      e_index = e->dest->index;
1067
1068      if (e->probability > cur_best_edge->probability)
1069	/* The edge has higher probability than the temporary best edge.  */
1070	is_better_edge = true;
1071      else if (e->probability < cur_best_edge->probability)
1072	/* The edge has lower probability than the temporary best edge.  */
1073	is_better_edge = false;
1074      else if (traces[bbd[e_index].start_of_trace].length > best_len)
1075	/* The edge and the temporary best edge have equivalent probabilities.
1076	   The edge with longer trace is better.  */
1077	is_better_edge = true;
1078      else
1079	is_better_edge = false;
1080    }
1081
1082  return is_better_edge;
1083}
1084
1085/* Connect traces in array TRACES, N_TRACES is the count of traces.  */
1086
1087static void
1088connect_traces (int n_traces, struct trace *traces)
1089{
1090  int i;
1091  bool *connected;
1092  bool two_passes;
1093  int last_trace;
1094  int current_pass;
1095  int current_partition;
1096  int freq_threshold;
1097  gcov_type count_threshold;
1098  bool for_size = optimize_function_for_size_p (cfun);
1099
1100  freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
1101  if (max_entry_count < INT_MAX / 1000)
1102    count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
1103  else
1104    count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
1105
1106  connected = XCNEWVEC (bool, n_traces);
1107  last_trace = -1;
1108  current_pass = 1;
1109  current_partition = BB_PARTITION (traces[0].first);
1110  two_passes = false;
1111
1112  if (crtl->has_bb_partition)
1113    for (i = 0; i < n_traces && !two_passes; i++)
1114      if (BB_PARTITION (traces[0].first)
1115	  != BB_PARTITION (traces[i].first))
1116	two_passes = true;
1117
1118  for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
1119    {
1120      int t = i;
1121      int t2;
1122      edge e, best;
1123      int best_len;
1124
1125      if (i >= n_traces)
1126	{
1127	  gcc_assert (two_passes && current_pass == 1);
1128	  i = 0;
1129	  t = i;
1130	  current_pass = 2;
1131	  if (current_partition == BB_HOT_PARTITION)
1132	    current_partition = BB_COLD_PARTITION;
1133	  else
1134	    current_partition = BB_HOT_PARTITION;
1135	}
1136
1137      if (connected[t])
1138	continue;
1139
1140      if (two_passes
1141	  && BB_PARTITION (traces[t].first) != current_partition)
1142	continue;
1143
1144      connected[t] = true;
1145
1146      /* Find the predecessor traces.  */
1147      for (t2 = t; t2 > 0;)
1148	{
1149	  edge_iterator ei;
1150	  best = NULL;
1151	  best_len = 0;
1152	  FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
1153	    {
1154	      int si = e->src->index;
1155
1156	      if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1157		  && (e->flags & EDGE_CAN_FALLTHRU)
1158		  && !(e->flags & EDGE_COMPLEX)
1159		  && bbd[si].end_of_trace >= 0
1160		  && !connected[bbd[si].end_of_trace]
1161		  && (BB_PARTITION (e->src) == current_partition)
1162		  && connect_better_edge_p (e, true, best_len, best, traces))
1163		{
1164		  best = e;
1165		  best_len = traces[bbd[si].end_of_trace].length;
1166		}
1167	    }
1168	  if (best)
1169	    {
1170	      best->src->aux = best->dest;
1171	      t2 = bbd[best->src->index].end_of_trace;
1172	      connected[t2] = true;
1173
1174	      if (dump_file)
1175		{
1176		  fprintf (dump_file, "Connection: %d %d\n",
1177			   best->src->index, best->dest->index);
1178		}
1179	    }
1180	  else
1181	    break;
1182	}
1183
1184      if (last_trace >= 0)
1185	traces[last_trace].last->aux = traces[t2].first;
1186      last_trace = t;
1187
1188      /* Find the successor traces.  */
1189      while (1)
1190	{
1191	  /* Find the continuation of the chain.  */
1192	  edge_iterator ei;
1193	  best = NULL;
1194	  best_len = 0;
1195	  FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1196	    {
1197	      int di = e->dest->index;
1198
1199	      if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1200		  && (e->flags & EDGE_CAN_FALLTHRU)
1201		  && !(e->flags & EDGE_COMPLEX)
1202		  && bbd[di].start_of_trace >= 0
1203		  && !connected[bbd[di].start_of_trace]
1204		  && (BB_PARTITION (e->dest) == current_partition)
1205		  && connect_better_edge_p (e, false, best_len, best, traces))
1206		{
1207		  best = e;
1208		  best_len = traces[bbd[di].start_of_trace].length;
1209		}
1210	    }
1211
1212	  if (for_size)
1213	    {
1214	      if (!best)
1215		/* Stop finding the successor traces.  */
1216		break;
1217
1218	      /* It is OK to connect block n with block n + 1 or a block
1219		 before n.  For others, only connect to the loop header.  */
1220	      if (best->dest->index > (traces[t].last->index + 1))
1221		{
1222		  int count = EDGE_COUNT (best->dest->preds);
1223
1224		  FOR_EACH_EDGE (e, ei, best->dest->preds)
1225		    if (e->flags & EDGE_DFS_BACK)
1226		      count--;
1227
1228		  /* If dest has multiple predecessors, skip it.  We expect
1229		     that one predecessor with smaller index connects with it
1230		     later.  */
1231		  if (count != 1)
1232		    break;
1233		}
1234
1235	      /* Only connect Trace n with Trace n + 1.  It is conservative
1236		 to keep the order as close as possible to the original order.
1237		 It also helps to reduce long jumps.  */
1238	      if (last_trace != bbd[best->dest->index].start_of_trace - 1)
1239		break;
1240
1241	      if (dump_file)
1242		fprintf (dump_file, "Connection: %d %d\n",
1243			 best->src->index, best->dest->index);
1244
1245	      t = bbd[best->dest->index].start_of_trace;
1246	      traces[last_trace].last->aux = traces[t].first;
1247	      connected[t] = true;
1248	      last_trace = t;
1249	    }
1250	  else if (best)
1251	    {
1252	      if (dump_file)
1253		{
1254		  fprintf (dump_file, "Connection: %d %d\n",
1255			   best->src->index, best->dest->index);
1256		}
1257	      t = bbd[best->dest->index].start_of_trace;
1258	      traces[last_trace].last->aux = traces[t].first;
1259	      connected[t] = true;
1260	      last_trace = t;
1261	    }
1262	  else
1263	    {
1264	      /* Try to connect the traces by duplication of 1 block.  */
1265	      edge e2;
1266	      basic_block next_bb = NULL;
1267	      bool try_copy = false;
1268
1269	      FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1270		if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1271		    && (e->flags & EDGE_CAN_FALLTHRU)
1272		    && !(e->flags & EDGE_COMPLEX)
1273		    && (!best || e->probability > best->probability))
1274		  {
1275		    edge_iterator ei;
1276		    edge best2 = NULL;
1277		    int best2_len = 0;
1278
1279		    /* If the destination is a start of a trace which is only
1280		       one block long, then no need to search the successor
1281		       blocks of the trace.  Accept it.  */
1282		    if (bbd[e->dest->index].start_of_trace >= 0
1283			&& traces[bbd[e->dest->index].start_of_trace].length
1284			   == 1)
1285		      {
1286			best = e;
1287			try_copy = true;
1288			continue;
1289		      }
1290
1291		    FOR_EACH_EDGE (e2, ei, e->dest->succs)
1292		      {
1293			int di = e2->dest->index;
1294
1295			if (e2->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
1296			    || ((e2->flags & EDGE_CAN_FALLTHRU)
1297				&& !(e2->flags & EDGE_COMPLEX)
1298				&& bbd[di].start_of_trace >= 0
1299				&& !connected[bbd[di].start_of_trace]
1300				&& BB_PARTITION (e2->dest) == current_partition
1301				&& EDGE_FREQUENCY (e2) >= freq_threshold
1302				&& e2->count >= count_threshold
1303				&& (!best2
1304				    || e2->probability > best2->probability
1305				    || (e2->probability == best2->probability
1306					&& traces[bbd[di].start_of_trace].length
1307					   > best2_len))))
1308			  {
1309			    best = e;
1310			    best2 = e2;
1311			    if (e2->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1312			      best2_len = traces[bbd[di].start_of_trace].length;
1313			    else
1314			      best2_len = INT_MAX;
1315			    next_bb = e2->dest;
1316			    try_copy = true;
1317			  }
1318		      }
1319		  }
1320
1321	      if (crtl->has_bb_partition)
1322		try_copy = false;
1323
1324	      /* Copy tiny blocks always; copy larger blocks only when the
1325		 edge is traversed frequently enough.  */
1326	      if (try_copy
1327		  && copy_bb_p (best->dest,
1328				optimize_edge_for_speed_p (best)
1329				&& EDGE_FREQUENCY (best) >= freq_threshold
1330				&& best->count >= count_threshold))
1331		{
1332		  basic_block new_bb;
1333
1334		  if (dump_file)
1335		    {
1336		      fprintf (dump_file, "Connection: %d %d ",
1337			       traces[t].last->index, best->dest->index);
1338		      if (!next_bb)
1339			fputc ('\n', dump_file);
1340		      else if (next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1341			fprintf (dump_file, "exit\n");
1342		      else
1343			fprintf (dump_file, "%d\n", next_bb->index);
1344		    }
1345
1346		  new_bb = copy_bb (best->dest, best, traces[t].last, t);
1347		  traces[t].last = new_bb;
1348		  if (next_bb && next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1349		    {
1350		      t = bbd[next_bb->index].start_of_trace;
1351		      traces[last_trace].last->aux = traces[t].first;
1352		      connected[t] = true;
1353		      last_trace = t;
1354		    }
1355		  else
1356		    break;	/* Stop finding the successor traces.  */
1357		}
1358	      else
1359		break;	/* Stop finding the successor traces.  */
1360	    }
1361	}
1362    }
1363
1364  if (dump_file)
1365    {
1366      basic_block bb;
1367
1368      fprintf (dump_file, "Final order:\n");
1369      for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1370	fprintf (dump_file, "%d ", bb->index);
1371      fprintf (dump_file, "\n");
1372      fflush (dump_file);
1373    }
1374
1375  FREE (connected);
1376}
1377
1378/* Return true when BB can and should be copied. CODE_MAY_GROW is true
1379   when code size is allowed to grow by duplication.  */
1380
1381static bool
1382copy_bb_p (const_basic_block bb, int code_may_grow)
1383{
1384  int size = 0;
1385  int max_size = uncond_jump_length;
1386  rtx_insn *insn;
1387
1388  if (!bb->frequency)
1389    return false;
1390  if (EDGE_COUNT (bb->preds) < 2)
1391    return false;
1392  if (!can_duplicate_block_p (bb))
1393    return false;
1394
1395  /* Avoid duplicating blocks which have many successors (PR/13430).  */
1396  if (EDGE_COUNT (bb->succs) > 8)
1397    return false;
1398
1399  if (code_may_grow && optimize_bb_for_speed_p (bb))
1400    max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1401
1402  FOR_BB_INSNS (bb, insn)
1403    {
1404      if (INSN_P (insn))
1405	size += get_attr_min_length (insn);
1406    }
1407
1408  if (size <= max_size)
1409    return true;
1410
1411  if (dump_file)
1412    {
1413      fprintf (dump_file,
1414	       "Block %d can't be copied because its size = %d.\n",
1415	       bb->index, size);
1416    }
1417
1418  return false;
1419}
1420
1421/* Return the length of unconditional jump instruction.  */
1422
1423int
1424get_uncond_jump_length (void)
1425{
1426  rtx_insn *label, *jump;
1427  int length;
1428
1429  start_sequence ();
1430  label = emit_label (gen_label_rtx ());
1431  jump = emit_jump_insn (gen_jump (label));
1432  length = get_attr_min_length (jump);
1433  end_sequence ();
1434
1435  return length;
1436}
1437
1438/* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1439   Duplicate the landing pad and split the edges so that no EH edge
1440   crosses partitions.  */
1441
1442static void
1443fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb)
1444{
1445  eh_landing_pad new_lp;
1446  basic_block new_bb, last_bb, post_bb;
1447  rtx_insn *new_label, *jump;
1448  rtx post_label;
1449  unsigned new_partition;
1450  edge_iterator ei;
1451  edge e;
1452
1453  /* Generate the new landing-pad structure.  */
1454  new_lp = gen_eh_landing_pad (old_lp->region);
1455  new_lp->post_landing_pad = old_lp->post_landing_pad;
1456  new_lp->landing_pad = gen_label_rtx ();
1457  LABEL_PRESERVE_P (new_lp->landing_pad) = 1;
1458
1459  /* Put appropriate instructions in new bb.  */
1460  new_label = emit_label (new_lp->landing_pad);
1461
1462  expand_dw2_landing_pad_for_region (old_lp->region);
1463
1464  post_bb = BLOCK_FOR_INSN (old_lp->landing_pad);
1465  post_bb = single_succ (post_bb);
1466  post_label = block_label (post_bb);
1467  jump = emit_jump_insn (gen_jump (post_label));
1468  JUMP_LABEL (jump) = post_label;
1469
1470  /* Create new basic block to be dest for lp.  */
1471  last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
1472  new_bb = create_basic_block (new_label, jump, last_bb);
1473  new_bb->aux = last_bb->aux;
1474  last_bb->aux = new_bb;
1475
1476  emit_barrier_after_bb (new_bb);
1477
1478  make_edge (new_bb, post_bb, 0);
1479
1480  /* Make sure new bb is in the other partition.  */
1481  new_partition = BB_PARTITION (old_bb);
1482  new_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1483  BB_SET_PARTITION (new_bb, new_partition);
1484
1485  /* Fix up the edges.  */
1486  for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1487    if (BB_PARTITION (e->src) == new_partition)
1488      {
1489	rtx_insn *insn = BB_END (e->src);
1490	rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1491
1492	gcc_assert (note != NULL);
1493	gcc_checking_assert (INTVAL (XEXP (note, 0)) == old_lp->index);
1494	XEXP (note, 0) = GEN_INT (new_lp->index);
1495
1496	/* Adjust the edge to the new destination.  */
1497	redirect_edge_succ (e, new_bb);
1498      }
1499    else
1500      ei_next (&ei);
1501}
1502
1503
1504/* Ensure that all hot bbs are included in a hot path through the
1505   procedure. This is done by calling this function twice, once
1506   with WALK_UP true (to look for paths from the entry to hot bbs) and
1507   once with WALK_UP false (to look for paths from hot bbs to the exit).
1508   Returns the updated value of COLD_BB_COUNT and adds newly-hot bbs
1509   to BBS_IN_HOT_PARTITION.  */
1510
1511static unsigned int
1512sanitize_hot_paths (bool walk_up, unsigned int cold_bb_count,
1513                    vec<basic_block> *bbs_in_hot_partition)
1514{
1515  /* Callers check this.  */
1516  gcc_checking_assert (cold_bb_count);
1517
1518  /* Keep examining hot bbs while we still have some left to check
1519     and there are remaining cold bbs.  */
1520  vec<basic_block> hot_bbs_to_check = bbs_in_hot_partition->copy ();
1521  while (! hot_bbs_to_check.is_empty ()
1522         && cold_bb_count)
1523    {
1524      basic_block bb = hot_bbs_to_check.pop ();
1525      vec<edge, va_gc> *edges = walk_up ? bb->preds : bb->succs;
1526      edge e;
1527      edge_iterator ei;
1528      int highest_probability = 0;
1529      int highest_freq = 0;
1530      gcov_type highest_count = 0;
1531      bool found = false;
1532
1533      /* Walk the preds/succs and check if there is at least one already
1534         marked hot. Keep track of the most frequent pred/succ so that we
1535         can mark it hot if we don't find one.  */
1536      FOR_EACH_EDGE (e, ei, edges)
1537        {
1538          basic_block reach_bb = walk_up ? e->src : e->dest;
1539
1540          if (e->flags & EDGE_DFS_BACK)
1541            continue;
1542
1543          if (BB_PARTITION (reach_bb) != BB_COLD_PARTITION)
1544          {
1545            found = true;
1546            break;
1547          }
1548          /* The following loop will look for the hottest edge via
1549             the edge count, if it is non-zero, then fallback to the edge
1550             frequency and finally the edge probability.  */
1551          if (e->count > highest_count)
1552            highest_count = e->count;
1553          int edge_freq = EDGE_FREQUENCY (e);
1554          if (edge_freq > highest_freq)
1555            highest_freq = edge_freq;
1556          if (e->probability > highest_probability)
1557            highest_probability = e->probability;
1558        }
1559
1560      /* If bb is reached by (or reaches, in the case of !WALK_UP) another hot
1561         block (or unpartitioned, e.g. the entry block) then it is ok. If not,
1562         then the most frequent pred (or succ) needs to be adjusted.  In the
1563         case where multiple preds/succs have the same frequency (e.g. a
1564         50-50 branch), then both will be adjusted.  */
1565      if (found)
1566        continue;
1567
1568      FOR_EACH_EDGE (e, ei, edges)
1569        {
1570          if (e->flags & EDGE_DFS_BACK)
1571            continue;
1572          /* Select the hottest edge using the edge count, if it is non-zero,
1573             then fallback to the edge frequency and finally the edge
1574             probability.  */
1575          if (highest_count)
1576            {
1577              if (e->count < highest_count)
1578                continue;
1579            }
1580          else if (highest_freq)
1581            {
1582              if (EDGE_FREQUENCY (e) < highest_freq)
1583                continue;
1584            }
1585          else if (e->probability < highest_probability)
1586            continue;
1587
1588          basic_block reach_bb = walk_up ? e->src : e->dest;
1589
1590          /* We have a hot bb with an immediate dominator that is cold.
1591             The dominator needs to be re-marked hot.  */
1592          BB_SET_PARTITION (reach_bb, BB_HOT_PARTITION);
1593          cold_bb_count--;
1594
1595          /* Now we need to examine newly-hot reach_bb to see if it is also
1596             dominated by a cold bb.  */
1597          bbs_in_hot_partition->safe_push (reach_bb);
1598          hot_bbs_to_check.safe_push (reach_bb);
1599        }
1600    }
1601
1602  return cold_bb_count;
1603}
1604
1605
1606/* Find the basic blocks that are rarely executed and need to be moved to
1607   a separate section of the .o file (to cut down on paging and improve
1608   cache locality).  Return a vector of all edges that cross.  */
1609
1610static vec<edge>
1611find_rarely_executed_basic_blocks_and_crossing_edges (void)
1612{
1613  vec<edge> crossing_edges = vNULL;
1614  basic_block bb;
1615  edge e;
1616  edge_iterator ei;
1617  unsigned int cold_bb_count = 0;
1618  auto_vec<basic_block> bbs_in_hot_partition;
1619
1620  /* Mark which partition (hot/cold) each basic block belongs in.  */
1621  FOR_EACH_BB_FN (bb, cfun)
1622    {
1623      bool cold_bb = false;
1624
1625      if (probably_never_executed_bb_p (cfun, bb))
1626        {
1627          /* Handle profile insanities created by upstream optimizations
1628             by also checking the incoming edge weights. If there is a non-cold
1629             incoming edge, conservatively prevent this block from being split
1630             into the cold section.  */
1631          cold_bb = true;
1632          FOR_EACH_EDGE (e, ei, bb->preds)
1633            if (!probably_never_executed_edge_p (cfun, e))
1634              {
1635                cold_bb = false;
1636                break;
1637              }
1638        }
1639      if (cold_bb)
1640        {
1641          BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1642          cold_bb_count++;
1643        }
1644      else
1645        {
1646          BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1647          bbs_in_hot_partition.safe_push (bb);
1648        }
1649    }
1650
1651  /* Ensure that hot bbs are included along a hot path from the entry to exit.
1652     Several different possibilities may include cold bbs along all paths
1653     to/from a hot bb. One is that there are edge weight insanities
1654     due to optimization phases that do not properly update basic block profile
1655     counts. The second is that the entry of the function may not be hot, because
1656     it is entered fewer times than the number of profile training runs, but there
1657     is a loop inside the function that causes blocks within the function to be
1658     above the threshold for hotness. This is fixed by walking up from hot bbs
1659     to the entry block, and then down from hot bbs to the exit, performing
1660     partitioning fixups as necessary.  */
1661  if (cold_bb_count)
1662    {
1663      mark_dfs_back_edges ();
1664      cold_bb_count = sanitize_hot_paths (true, cold_bb_count,
1665                                          &bbs_in_hot_partition);
1666      if (cold_bb_count)
1667        sanitize_hot_paths (false, cold_bb_count, &bbs_in_hot_partition);
1668    }
1669
1670  /* The format of .gcc_except_table does not allow landing pads to
1671     be in a different partition as the throw.  Fix this by either
1672     moving or duplicating the landing pads.  */
1673  if (cfun->eh->lp_array)
1674    {
1675      unsigned i;
1676      eh_landing_pad lp;
1677
1678      FOR_EACH_VEC_ELT (*cfun->eh->lp_array, i, lp)
1679	{
1680	  bool all_same, all_diff;
1681
1682	  if (lp == NULL
1683	      || lp->landing_pad == NULL_RTX
1684	      || !LABEL_P (lp->landing_pad))
1685	    continue;
1686
1687	  all_same = all_diff = true;
1688	  bb = BLOCK_FOR_INSN (lp->landing_pad);
1689	  FOR_EACH_EDGE (e, ei, bb->preds)
1690	    {
1691	      gcc_assert (e->flags & EDGE_EH);
1692	      if (BB_PARTITION (bb) == BB_PARTITION (e->src))
1693		all_diff = false;
1694	      else
1695		all_same = false;
1696	    }
1697
1698	  if (all_same)
1699	    ;
1700	  else if (all_diff)
1701	    {
1702	      int which = BB_PARTITION (bb);
1703	      which ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1704	      BB_SET_PARTITION (bb, which);
1705	    }
1706	  else
1707	    fix_up_crossing_landing_pad (lp, bb);
1708	}
1709    }
1710
1711  /* Mark every edge that crosses between sections.  */
1712
1713  FOR_EACH_BB_FN (bb, cfun)
1714    FOR_EACH_EDGE (e, ei, bb->succs)
1715      {
1716	unsigned int flags = e->flags;
1717
1718        /* We should never have EDGE_CROSSING set yet.  */
1719	gcc_checking_assert ((flags & EDGE_CROSSING) == 0);
1720
1721	if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1722	    && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1723	    && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1724	  {
1725	    crossing_edges.safe_push (e);
1726	    flags |= EDGE_CROSSING;
1727	  }
1728
1729	/* Now that we've split eh edges as appropriate, allow landing pads
1730	   to be merged with the post-landing pads.  */
1731	flags &= ~EDGE_PRESERVE;
1732
1733	e->flags = flags;
1734      }
1735
1736  return crossing_edges;
1737}
1738
1739/* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru.  */
1740
1741static void
1742set_edge_can_fallthru_flag (void)
1743{
1744  basic_block bb;
1745
1746  FOR_EACH_BB_FN (bb, cfun)
1747    {
1748      edge e;
1749      edge_iterator ei;
1750
1751      FOR_EACH_EDGE (e, ei, bb->succs)
1752	{
1753	  e->flags &= ~EDGE_CAN_FALLTHRU;
1754
1755	  /* The FALLTHRU edge is also CAN_FALLTHRU edge.  */
1756	  if (e->flags & EDGE_FALLTHRU)
1757	    e->flags |= EDGE_CAN_FALLTHRU;
1758	}
1759
1760      /* If the BB ends with an invertible condjump all (2) edges are
1761	 CAN_FALLTHRU edges.  */
1762      if (EDGE_COUNT (bb->succs) != 2)
1763	continue;
1764      if (!any_condjump_p (BB_END (bb)))
1765	continue;
1766      if (!invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0))
1767	continue;
1768      invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0);
1769      EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU;
1770      EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU;
1771    }
1772}
1773
1774/* If any destination of a crossing edge does not have a label, add label;
1775   Convert any easy fall-through crossing edges to unconditional jumps.  */
1776
1777static void
1778add_labels_and_missing_jumps (vec<edge> crossing_edges)
1779{
1780  size_t i;
1781  edge e;
1782
1783  FOR_EACH_VEC_ELT (crossing_edges, i, e)
1784    {
1785      basic_block src = e->src;
1786      basic_block dest = e->dest;
1787      rtx label;
1788      rtx_insn *new_jump;
1789
1790      if (dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1791	continue;
1792
1793      /* Make sure dest has a label.  */
1794      label = block_label (dest);
1795
1796      /* Nothing to do for non-fallthru edges.  */
1797      if (src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1798	continue;
1799      if ((e->flags & EDGE_FALLTHRU) == 0)
1800	continue;
1801
1802      /* If the block does not end with a control flow insn, then we
1803	 can trivially add a jump to the end to fixup the crossing.
1804	 Otherwise the jump will have to go in a new bb, which will
1805	 be handled by fix_up_fall_thru_edges function.  */
1806      if (control_flow_insn_p (BB_END (src)))
1807	continue;
1808
1809      /* Make sure there's only one successor.  */
1810      gcc_assert (single_succ_p (src));
1811
1812      new_jump = emit_jump_insn_after (gen_jump (label), BB_END (src));
1813      BB_END (src) = new_jump;
1814      JUMP_LABEL (new_jump) = label;
1815      LABEL_NUSES (label) += 1;
1816
1817      emit_barrier_after_bb (src);
1818
1819      /* Mark edge as non-fallthru.  */
1820      e->flags &= ~EDGE_FALLTHRU;
1821    }
1822}
1823
1824/* Find any bb's where the fall-through edge is a crossing edge (note that
1825   these bb's must also contain a conditional jump or end with a call
1826   instruction; we've already dealt with fall-through edges for blocks
1827   that didn't have a conditional jump or didn't end with call instruction
1828   in the call to add_labels_and_missing_jumps).  Convert the fall-through
1829   edge to non-crossing edge by inserting a new bb to fall-through into.
1830   The new bb will contain an unconditional jump (crossing edge) to the
1831   original fall through destination.  */
1832
1833static void
1834fix_up_fall_thru_edges (void)
1835{
1836  basic_block cur_bb;
1837  basic_block new_bb;
1838  edge succ1;
1839  edge succ2;
1840  edge fall_thru;
1841  edge cond_jump = NULL;
1842  edge e;
1843  bool cond_jump_crosses;
1844  int invert_worked;
1845  rtx_insn *old_jump;
1846  rtx fall_thru_label;
1847
1848  FOR_EACH_BB_FN (cur_bb, cfun)
1849    {
1850      fall_thru = NULL;
1851      if (EDGE_COUNT (cur_bb->succs) > 0)
1852	succ1 = EDGE_SUCC (cur_bb, 0);
1853      else
1854	succ1 = NULL;
1855
1856      if (EDGE_COUNT (cur_bb->succs) > 1)
1857	succ2 = EDGE_SUCC (cur_bb, 1);
1858      else
1859	succ2 = NULL;
1860
1861      /* Find the fall-through edge.  */
1862
1863      if (succ1
1864	  && (succ1->flags & EDGE_FALLTHRU))
1865	{
1866	  fall_thru = succ1;
1867	  cond_jump = succ2;
1868	}
1869      else if (succ2
1870	       && (succ2->flags & EDGE_FALLTHRU))
1871	{
1872	  fall_thru = succ2;
1873	  cond_jump = succ1;
1874	}
1875      else if (succ1
1876	       && (block_ends_with_call_p (cur_bb)
1877		   || can_throw_internal (BB_END (cur_bb))))
1878	{
1879	  edge e;
1880	  edge_iterator ei;
1881
1882	  FOR_EACH_EDGE (e, ei, cur_bb->succs)
1883	    if (e->flags & EDGE_FALLTHRU)
1884	      {
1885		fall_thru = e;
1886		break;
1887	      }
1888	}
1889
1890      if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)))
1891	{
1892	  /* Check to see if the fall-thru edge is a crossing edge.  */
1893
1894	  if (fall_thru->flags & EDGE_CROSSING)
1895	    {
1896	      /* The fall_thru edge crosses; now check the cond jump edge, if
1897		 it exists.  */
1898
1899	      cond_jump_crosses = true;
1900	      invert_worked  = 0;
1901	      old_jump = BB_END (cur_bb);
1902
1903	      /* Find the jump instruction, if there is one.  */
1904
1905	      if (cond_jump)
1906		{
1907		  if (!(cond_jump->flags & EDGE_CROSSING))
1908		    cond_jump_crosses = false;
1909
1910		  /* We know the fall-thru edge crosses; if the cond
1911		     jump edge does NOT cross, and its destination is the
1912		     next block in the bb order, invert the jump
1913		     (i.e. fix it so the fall through does not cross and
1914		     the cond jump does).  */
1915
1916		  if (!cond_jump_crosses)
1917		    {
1918		      /* Find label in fall_thru block. We've already added
1919			 any missing labels, so there must be one.  */
1920
1921		      fall_thru_label = block_label (fall_thru->dest);
1922
1923		      if (old_jump && JUMP_P (old_jump) && fall_thru_label)
1924			invert_worked = invert_jump (old_jump,
1925						     fall_thru_label,0);
1926		      if (invert_worked)
1927			{
1928			  fall_thru->flags &= ~EDGE_FALLTHRU;
1929			  cond_jump->flags |= EDGE_FALLTHRU;
1930			  update_br_prob_note (cur_bb);
1931			  e = fall_thru;
1932			  fall_thru = cond_jump;
1933			  cond_jump = e;
1934			  cond_jump->flags |= EDGE_CROSSING;
1935			  fall_thru->flags &= ~EDGE_CROSSING;
1936			}
1937		    }
1938		}
1939
1940	      if (cond_jump_crosses || !invert_worked)
1941		{
1942		  /* This is the case where both edges out of the basic
1943		     block are crossing edges. Here we will fix up the
1944		     fall through edge. The jump edge will be taken care
1945		     of later.  The EDGE_CROSSING flag of fall_thru edge
1946		     is unset before the call to force_nonfallthru
1947		     function because if a new basic-block is created
1948		     this edge remains in the current section boundary
1949		     while the edge between new_bb and the fall_thru->dest
1950		     becomes EDGE_CROSSING.  */
1951
1952		  fall_thru->flags &= ~EDGE_CROSSING;
1953		  new_bb = force_nonfallthru (fall_thru);
1954
1955		  if (new_bb)
1956		    {
1957		      new_bb->aux = cur_bb->aux;
1958		      cur_bb->aux = new_bb;
1959
1960                      /* This is done by force_nonfallthru_and_redirect.  */
1961		      gcc_assert (BB_PARTITION (new_bb)
1962                                  == BB_PARTITION (cur_bb));
1963
1964		      single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1965		    }
1966		  else
1967		    {
1968		      /* If a new basic-block was not created; restore
1969			 the EDGE_CROSSING flag.  */
1970		      fall_thru->flags |= EDGE_CROSSING;
1971		    }
1972
1973		  /* Add barrier after new jump */
1974		  emit_barrier_after_bb (new_bb ? new_bb : cur_bb);
1975		}
1976	    }
1977	}
1978    }
1979}
1980
1981/* This function checks the destination block of a "crossing jump" to
1982   see if it has any crossing predecessors that begin with a code label
1983   and end with an unconditional jump.  If so, it returns that predecessor
1984   block.  (This is to avoid creating lots of new basic blocks that all
1985   contain unconditional jumps to the same destination).  */
1986
1987static basic_block
1988find_jump_block (basic_block jump_dest)
1989{
1990  basic_block source_bb = NULL;
1991  edge e;
1992  rtx_insn *insn;
1993  edge_iterator ei;
1994
1995  FOR_EACH_EDGE (e, ei, jump_dest->preds)
1996    if (e->flags & EDGE_CROSSING)
1997      {
1998	basic_block src = e->src;
1999
2000	/* Check each predecessor to see if it has a label, and contains
2001	   only one executable instruction, which is an unconditional jump.
2002	   If so, we can use it.  */
2003
2004	if (LABEL_P (BB_HEAD (src)))
2005	  for (insn = BB_HEAD (src);
2006	       !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
2007	       insn = NEXT_INSN (insn))
2008	    {
2009	      if (INSN_P (insn)
2010		  && insn == BB_END (src)
2011		  && JUMP_P (insn)
2012		  && !any_condjump_p (insn))
2013		{
2014		  source_bb = src;
2015		  break;
2016		}
2017	    }
2018
2019	if (source_bb)
2020	  break;
2021      }
2022
2023  return source_bb;
2024}
2025
2026/* Find all BB's with conditional jumps that are crossing edges;
2027   insert a new bb and make the conditional jump branch to the new
2028   bb instead (make the new bb same color so conditional branch won't
2029   be a 'crossing' edge).  Insert an unconditional jump from the
2030   new bb to the original destination of the conditional jump.  */
2031
2032static void
2033fix_crossing_conditional_branches (void)
2034{
2035  basic_block cur_bb;
2036  basic_block new_bb;
2037  basic_block dest;
2038  edge succ1;
2039  edge succ2;
2040  edge crossing_edge;
2041  edge new_edge;
2042  rtx_insn *old_jump;
2043  rtx set_src;
2044  rtx old_label = NULL_RTX;
2045  rtx new_label;
2046
2047  FOR_EACH_BB_FN (cur_bb, cfun)
2048    {
2049      crossing_edge = NULL;
2050      if (EDGE_COUNT (cur_bb->succs) > 0)
2051	succ1 = EDGE_SUCC (cur_bb, 0);
2052      else
2053	succ1 = NULL;
2054
2055      if (EDGE_COUNT (cur_bb->succs) > 1)
2056	succ2 = EDGE_SUCC (cur_bb, 1);
2057      else
2058	succ2 = NULL;
2059
2060      /* We already took care of fall-through edges, so only one successor
2061	 can be a crossing edge.  */
2062
2063      if (succ1 && (succ1->flags & EDGE_CROSSING))
2064	crossing_edge = succ1;
2065      else if (succ2 && (succ2->flags & EDGE_CROSSING))
2066	crossing_edge = succ2;
2067
2068      if (crossing_edge)
2069	{
2070	  old_jump = BB_END (cur_bb);
2071
2072	  /* Check to make sure the jump instruction is a
2073	     conditional jump.  */
2074
2075	  set_src = NULL_RTX;
2076
2077	  if (any_condjump_p (old_jump))
2078	    {
2079	      if (GET_CODE (PATTERN (old_jump)) == SET)
2080		set_src = SET_SRC (PATTERN (old_jump));
2081	      else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
2082		{
2083		  set_src = XVECEXP (PATTERN (old_jump), 0,0);
2084		  if (GET_CODE (set_src) == SET)
2085		    set_src = SET_SRC (set_src);
2086		  else
2087		    set_src = NULL_RTX;
2088		}
2089	    }
2090
2091	  if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
2092	    {
2093	      if (GET_CODE (XEXP (set_src, 1)) == PC)
2094		old_label = XEXP (set_src, 2);
2095	      else if (GET_CODE (XEXP (set_src, 2)) == PC)
2096		old_label = XEXP (set_src, 1);
2097
2098	      /* Check to see if new bb for jumping to that dest has
2099		 already been created; if so, use it; if not, create
2100		 a new one.  */
2101
2102	      new_bb = find_jump_block (crossing_edge->dest);
2103
2104	      if (new_bb)
2105		new_label = block_label (new_bb);
2106	      else
2107		{
2108		  basic_block last_bb;
2109		  rtx_insn *new_jump;
2110
2111		  /* Create new basic block to be dest for
2112		     conditional jump.  */
2113
2114		  /* Put appropriate instructions in new bb.  */
2115
2116		  new_label = gen_label_rtx ();
2117		  emit_label (new_label);
2118
2119		  gcc_assert (GET_CODE (old_label) == LABEL_REF);
2120		  old_label = JUMP_LABEL (old_jump);
2121		  new_jump = emit_jump_insn (gen_jump (old_label));
2122		  JUMP_LABEL (new_jump) = old_label;
2123
2124		  last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
2125		  new_bb = create_basic_block (new_label, new_jump, last_bb);
2126		  new_bb->aux = last_bb->aux;
2127		  last_bb->aux = new_bb;
2128
2129		  emit_barrier_after_bb (new_bb);
2130
2131		  /* Make sure new bb is in same partition as source
2132		     of conditional branch.  */
2133		  BB_COPY_PARTITION (new_bb, cur_bb);
2134		}
2135
2136	      /* Make old jump branch to new bb.  */
2137
2138	      redirect_jump (old_jump, new_label, 0);
2139
2140	      /* Remove crossing_edge as predecessor of 'dest'.  */
2141
2142	      dest = crossing_edge->dest;
2143
2144	      redirect_edge_succ (crossing_edge, new_bb);
2145
2146	      /* Make a new edge from new_bb to old dest; new edge
2147		 will be a successor for new_bb and a predecessor
2148		 for 'dest'.  */
2149
2150	      if (EDGE_COUNT (new_bb->succs) == 0)
2151		new_edge = make_edge (new_bb, dest, 0);
2152	      else
2153		new_edge = EDGE_SUCC (new_bb, 0);
2154
2155	      crossing_edge->flags &= ~EDGE_CROSSING;
2156	      new_edge->flags |= EDGE_CROSSING;
2157	    }
2158	}
2159    }
2160}
2161
2162/* Find any unconditional branches that cross between hot and cold
2163   sections.  Convert them into indirect jumps instead.  */
2164
2165static void
2166fix_crossing_unconditional_branches (void)
2167{
2168  basic_block cur_bb;
2169  rtx_insn *last_insn;
2170  rtx label;
2171  rtx label_addr;
2172  rtx_insn *indirect_jump_sequence;
2173  rtx_insn *jump_insn = NULL;
2174  rtx new_reg;
2175  rtx_insn *cur_insn;
2176  edge succ;
2177
2178  FOR_EACH_BB_FN (cur_bb, cfun)
2179    {
2180      last_insn = BB_END (cur_bb);
2181
2182      if (EDGE_COUNT (cur_bb->succs) < 1)
2183	continue;
2184
2185      succ = EDGE_SUCC (cur_bb, 0);
2186
2187      /* Check to see if bb ends in a crossing (unconditional) jump.  At
2188	 this point, no crossing jumps should be conditional.  */
2189
2190      if (JUMP_P (last_insn)
2191	  && (succ->flags & EDGE_CROSSING))
2192	{
2193	  gcc_assert (!any_condjump_p (last_insn));
2194
2195	  /* Make sure the jump is not already an indirect or table jump.  */
2196
2197	  if (!computed_jump_p (last_insn)
2198	      && !tablejump_p (last_insn, NULL, NULL))
2199	    {
2200	      /* We have found a "crossing" unconditional branch.  Now
2201		 we must convert it to an indirect jump.  First create
2202		 reference of label, as target for jump.  */
2203
2204	      label = JUMP_LABEL (last_insn);
2205	      label_addr = gen_rtx_LABEL_REF (Pmode, label);
2206	      LABEL_NUSES (label) += 1;
2207
2208	      /* Get a register to use for the indirect jump.  */
2209
2210	      new_reg = gen_reg_rtx (Pmode);
2211
2212	      /* Generate indirect the jump sequence.  */
2213
2214	      start_sequence ();
2215	      emit_move_insn (new_reg, label_addr);
2216	      emit_indirect_jump (new_reg);
2217	      indirect_jump_sequence = get_insns ();
2218	      end_sequence ();
2219
2220	      /* Make sure every instruction in the new jump sequence has
2221		 its basic block set to be cur_bb.  */
2222
2223	      for (cur_insn = indirect_jump_sequence; cur_insn;
2224		   cur_insn = NEXT_INSN (cur_insn))
2225		{
2226		  if (!BARRIER_P (cur_insn))
2227		    BLOCK_FOR_INSN (cur_insn) = cur_bb;
2228		  if (JUMP_P (cur_insn))
2229		    jump_insn = cur_insn;
2230		}
2231
2232	      /* Insert the new (indirect) jump sequence immediately before
2233		 the unconditional jump, then delete the unconditional jump.  */
2234
2235	      emit_insn_before (indirect_jump_sequence, last_insn);
2236	      delete_insn (last_insn);
2237
2238	      JUMP_LABEL (jump_insn) = label;
2239	      LABEL_NUSES (label)++;
2240
2241	      /* Make BB_END for cur_bb be the jump instruction (NOT the
2242		 barrier instruction at the end of the sequence...).  */
2243
2244	      BB_END (cur_bb) = jump_insn;
2245	    }
2246	}
2247    }
2248}
2249
2250/* Update CROSSING_JUMP_P flags on all jump insns.  */
2251
2252static void
2253update_crossing_jump_flags (void)
2254{
2255  basic_block bb;
2256  edge e;
2257  edge_iterator ei;
2258
2259  FOR_EACH_BB_FN (bb, cfun)
2260    FOR_EACH_EDGE (e, ei, bb->succs)
2261      if (e->flags & EDGE_CROSSING)
2262	{
2263	  if (JUMP_P (BB_END (bb))
2264	      /* Some flags were added during fix_up_fall_thru_edges, via
2265		 force_nonfallthru_and_redirect.  */
2266	      && !CROSSING_JUMP_P (BB_END (bb)))
2267	    CROSSING_JUMP_P (BB_END (bb)) = 1;
2268	  break;
2269	}
2270}
2271
2272/* Reorder basic blocks.  The main entry point to this file.  FLAGS is
2273   the set of flags to pass to cfg_layout_initialize().  */
2274
2275static void
2276reorder_basic_blocks (void)
2277{
2278  int n_traces;
2279  int i;
2280  struct trace *traces;
2281
2282  gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
2283
2284  if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
2285    return;
2286
2287  set_edge_can_fallthru_flag ();
2288  mark_dfs_back_edges ();
2289
2290  /* We are estimating the length of uncond jump insn only once since the code
2291     for getting the insn length always returns the minimal length now.  */
2292  if (uncond_jump_length == 0)
2293    uncond_jump_length = get_uncond_jump_length ();
2294
2295  /* We need to know some information for each basic block.  */
2296  array_size = GET_ARRAY_SIZE (last_basic_block_for_fn (cfun));
2297  bbd = XNEWVEC (bbro_basic_block_data, array_size);
2298  for (i = 0; i < array_size; i++)
2299    {
2300      bbd[i].start_of_trace = -1;
2301      bbd[i].end_of_trace = -1;
2302      bbd[i].in_trace = -1;
2303      bbd[i].visited = 0;
2304      bbd[i].priority = -1;
2305      bbd[i].heap = NULL;
2306      bbd[i].node = NULL;
2307    }
2308
2309  traces = XNEWVEC (struct trace, n_basic_blocks_for_fn (cfun));
2310  n_traces = 0;
2311  find_traces (&n_traces, traces);
2312  connect_traces (n_traces, traces);
2313  FREE (traces);
2314  FREE (bbd);
2315
2316  relink_block_chain (/*stay_in_cfglayout_mode=*/true);
2317
2318  if (dump_file)
2319    {
2320      if (dump_flags & TDF_DETAILS)
2321	dump_reg_info (dump_file);
2322      dump_flow_info (dump_file, dump_flags);
2323    }
2324
2325  /* Signal that rtl_verify_flow_info_1 can now verify that there
2326     is at most one switch between hot/cold sections.  */
2327  crtl->bb_reorder_complete = true;
2328}
2329
2330/* Determine which partition the first basic block in the function
2331   belongs to, then find the first basic block in the current function
2332   that belongs to a different section, and insert a
2333   NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2334   instruction stream.  When writing out the assembly code,
2335   encountering this note will make the compiler switch between the
2336   hot and cold text sections.  */
2337
2338void
2339insert_section_boundary_note (void)
2340{
2341  basic_block bb;
2342  bool switched_sections = false;
2343  int current_partition = 0;
2344
2345  if (!crtl->has_bb_partition)
2346    return;
2347
2348  FOR_EACH_BB_FN (bb, cfun)
2349    {
2350      if (!current_partition)
2351	current_partition = BB_PARTITION (bb);
2352      if (BB_PARTITION (bb) != current_partition)
2353	{
2354	  gcc_assert (!switched_sections);
2355          switched_sections = true;
2356          emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS, BB_HEAD (bb));
2357          current_partition = BB_PARTITION (bb);
2358	}
2359    }
2360}
2361
2362namespace {
2363
2364const pass_data pass_data_reorder_blocks =
2365{
2366  RTL_PASS, /* type */
2367  "bbro", /* name */
2368  OPTGROUP_NONE, /* optinfo_flags */
2369  TV_REORDER_BLOCKS, /* tv_id */
2370  0, /* properties_required */
2371  0, /* properties_provided */
2372  0, /* properties_destroyed */
2373  0, /* todo_flags_start */
2374  0, /* todo_flags_finish */
2375};
2376
2377class pass_reorder_blocks : public rtl_opt_pass
2378{
2379public:
2380  pass_reorder_blocks (gcc::context *ctxt)
2381    : rtl_opt_pass (pass_data_reorder_blocks, ctxt)
2382  {}
2383
2384  /* opt_pass methods: */
2385  virtual bool gate (function *)
2386    {
2387      if (targetm.cannot_modify_jumps_p ())
2388	return false;
2389      return (optimize > 0
2390	      && (flag_reorder_blocks || flag_reorder_blocks_and_partition));
2391    }
2392
2393  virtual unsigned int execute (function *);
2394
2395}; // class pass_reorder_blocks
2396
2397unsigned int
2398pass_reorder_blocks::execute (function *fun)
2399{
2400  basic_block bb;
2401
2402  /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2403     splitting possibly introduced more crossjumping opportunities.  */
2404  cfg_layout_initialize (CLEANUP_EXPENSIVE);
2405
2406  reorder_basic_blocks ();
2407  cleanup_cfg (CLEANUP_EXPENSIVE);
2408
2409  FOR_EACH_BB_FN (bb, fun)
2410    if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2411      bb->aux = bb->next_bb;
2412  cfg_layout_finalize ();
2413
2414  return 0;
2415}
2416
2417} // anon namespace
2418
2419rtl_opt_pass *
2420make_pass_reorder_blocks (gcc::context *ctxt)
2421{
2422  return new pass_reorder_blocks (ctxt);
2423}
2424
2425/* Duplicate the blocks containing computed gotos.  This basically unfactors
2426   computed gotos that were factored early on in the compilation process to
2427   speed up edge based data flow.  We used to not unfactoring them again,
2428   which can seriously pessimize code with many computed jumps in the source
2429   code, such as interpreters.  See e.g. PR15242.  */
2430
2431namespace {
2432
2433const pass_data pass_data_duplicate_computed_gotos =
2434{
2435  RTL_PASS, /* type */
2436  "compgotos", /* name */
2437  OPTGROUP_NONE, /* optinfo_flags */
2438  TV_REORDER_BLOCKS, /* tv_id */
2439  0, /* properties_required */
2440  0, /* properties_provided */
2441  0, /* properties_destroyed */
2442  0, /* todo_flags_start */
2443  0, /* todo_flags_finish */
2444};
2445
2446class pass_duplicate_computed_gotos : public rtl_opt_pass
2447{
2448public:
2449  pass_duplicate_computed_gotos (gcc::context *ctxt)
2450    : rtl_opt_pass (pass_data_duplicate_computed_gotos, ctxt)
2451  {}
2452
2453  /* opt_pass methods: */
2454  virtual bool gate (function *);
2455  virtual unsigned int execute (function *);
2456
2457}; // class pass_duplicate_computed_gotos
2458
2459bool
2460pass_duplicate_computed_gotos::gate (function *fun)
2461{
2462  if (targetm.cannot_modify_jumps_p ())
2463    return false;
2464  return (optimize > 0
2465	  && flag_expensive_optimizations
2466	  && ! optimize_function_for_size_p (fun));
2467}
2468
2469unsigned int
2470pass_duplicate_computed_gotos::execute (function *fun)
2471{
2472  basic_block bb, new_bb;
2473  bitmap candidates;
2474  int max_size;
2475  bool changed = false;
2476
2477  if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2478    return 0;
2479
2480  clear_bb_flags ();
2481  cfg_layout_initialize (0);
2482
2483  /* We are estimating the length of uncond jump insn only once
2484     since the code for getting the insn length always returns
2485     the minimal length now.  */
2486  if (uncond_jump_length == 0)
2487    uncond_jump_length = get_uncond_jump_length ();
2488
2489  max_size
2490    = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2491  candidates = BITMAP_ALLOC (NULL);
2492
2493  /* Look for blocks that end in a computed jump, and see if such blocks
2494     are suitable for unfactoring.  If a block is a candidate for unfactoring,
2495     mark it in the candidates.  */
2496  FOR_EACH_BB_FN (bb, fun)
2497    {
2498      rtx_insn *insn;
2499      edge e;
2500      edge_iterator ei;
2501      int size, all_flags;
2502
2503      /* Build the reorder chain for the original order of blocks.  */
2504      if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2505	bb->aux = bb->next_bb;
2506
2507      /* Obviously the block has to end in a computed jump.  */
2508      if (!computed_jump_p (BB_END (bb)))
2509	continue;
2510
2511      /* Only consider blocks that can be duplicated.  */
2512      if (CROSSING_JUMP_P (BB_END (bb))
2513	  || !can_duplicate_block_p (bb))
2514	continue;
2515
2516      /* Make sure that the block is small enough.  */
2517      size = 0;
2518      FOR_BB_INSNS (bb, insn)
2519	if (INSN_P (insn))
2520	  {
2521	    size += get_attr_min_length (insn);
2522	    if (size > max_size)
2523	       break;
2524	  }
2525      if (size > max_size)
2526	continue;
2527
2528      /* Final check: there must not be any incoming abnormal edges.  */
2529      all_flags = 0;
2530      FOR_EACH_EDGE (e, ei, bb->preds)
2531	all_flags |= e->flags;
2532      if (all_flags & EDGE_COMPLEX)
2533	continue;
2534
2535      bitmap_set_bit (candidates, bb->index);
2536    }
2537
2538  /* Nothing to do if there is no computed jump here.  */
2539  if (bitmap_empty_p (candidates))
2540    goto done;
2541
2542  /* Duplicate computed gotos.  */
2543  FOR_EACH_BB_FN (bb, fun)
2544    {
2545      if (bb->flags & BB_VISITED)
2546	continue;
2547
2548      bb->flags |= BB_VISITED;
2549
2550      /* BB must have one outgoing edge.  That edge must not lead to
2551	 the exit block or the next block.
2552	 The destination must have more than one predecessor.  */
2553      if (!single_succ_p (bb)
2554	  || single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (fun)
2555	  || single_succ (bb) == bb->next_bb
2556	  || single_pred_p (single_succ (bb)))
2557	continue;
2558
2559      /* The successor block has to be a duplication candidate.  */
2560      if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2561	continue;
2562
2563      /* Don't duplicate a partition crossing edge, which requires difficult
2564         fixup.  */
2565      if (JUMP_P (BB_END (bb)) && CROSSING_JUMP_P (BB_END (bb)))
2566	continue;
2567
2568      new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
2569      new_bb->aux = bb->aux;
2570      bb->aux = new_bb;
2571      new_bb->flags |= BB_VISITED;
2572      changed = true;
2573    }
2574
2575 done:
2576  if (changed)
2577    {
2578      /* Duplicating blocks above will redirect edges and may cause hot
2579	 blocks previously reached by both hot and cold blocks to become
2580	 dominated only by cold blocks.  */
2581      fixup_partitions ();
2582
2583      /* Merge the duplicated blocks into predecessors, when possible.  */
2584      cfg_layout_finalize ();
2585      cleanup_cfg (0);
2586    }
2587  else
2588    cfg_layout_finalize ();
2589
2590  BITMAP_FREE (candidates);
2591  return 0;
2592}
2593
2594} // anon namespace
2595
2596rtl_opt_pass *
2597make_pass_duplicate_computed_gotos (gcc::context *ctxt)
2598{
2599  return new pass_duplicate_computed_gotos (ctxt);
2600}
2601
2602/* This function is the main 'entrance' for the optimization that
2603   partitions hot and cold basic blocks into separate sections of the
2604   .o file (to improve performance and cache locality).  Ideally it
2605   would be called after all optimizations that rearrange the CFG have
2606   been called.  However part of this optimization may introduce new
2607   register usage, so it must be called before register allocation has
2608   occurred.  This means that this optimization is actually called
2609   well before the optimization that reorders basic blocks (see
2610   function above).
2611
2612   This optimization checks the feedback information to determine
2613   which basic blocks are hot/cold, updates flags on the basic blocks
2614   to indicate which section they belong in.  This information is
2615   later used for writing out sections in the .o file.  Because hot
2616   and cold sections can be arbitrarily large (within the bounds of
2617   memory), far beyond the size of a single function, it is necessary
2618   to fix up all edges that cross section boundaries, to make sure the
2619   instructions used can actually span the required distance.  The
2620   fixes are described below.
2621
2622   Fall-through edges must be changed into jumps; it is not safe or
2623   legal to fall through across a section boundary.  Whenever a
2624   fall-through edge crossing a section boundary is encountered, a new
2625   basic block is inserted (in the same section as the fall-through
2626   source), and the fall through edge is redirected to the new basic
2627   block.  The new basic block contains an unconditional jump to the
2628   original fall-through target.  (If the unconditional jump is
2629   insufficient to cross section boundaries, that is dealt with a
2630   little later, see below).
2631
2632   In order to deal with architectures that have short conditional
2633   branches (which cannot span all of memory) we take any conditional
2634   jump that attempts to cross a section boundary and add a level of
2635   indirection: it becomes a conditional jump to a new basic block, in
2636   the same section.  The new basic block contains an unconditional
2637   jump to the original target, in the other section.
2638
2639   For those architectures whose unconditional branch is also
2640   incapable of reaching all of memory, those unconditional jumps are
2641   converted into indirect jumps, through a register.
2642
2643   IMPORTANT NOTE: This optimization causes some messy interactions
2644   with the cfg cleanup optimizations; those optimizations want to
2645   merge blocks wherever possible, and to collapse indirect jump
2646   sequences (change "A jumps to B jumps to C" directly into "A jumps
2647   to C").  Those optimizations can undo the jump fixes that
2648   partitioning is required to make (see above), in order to ensure
2649   that jumps attempting to cross section boundaries are really able
2650   to cover whatever distance the jump requires (on many architectures
2651   conditional or unconditional jumps are not able to reach all of
2652   memory).  Therefore tests have to be inserted into each such
2653   optimization to make sure that it does not undo stuff necessary to
2654   cross partition boundaries.  This would be much less of a problem
2655   if we could perform this optimization later in the compilation, but
2656   unfortunately the fact that we may need to create indirect jumps
2657   (through registers) requires that this optimization be performed
2658   before register allocation.
2659
2660   Hot and cold basic blocks are partitioned and put in separate
2661   sections of the .o file, to reduce paging and improve cache
2662   performance (hopefully).  This can result in bits of code from the
2663   same function being widely separated in the .o file.  However this
2664   is not obvious to the current bb structure.  Therefore we must take
2665   care to ensure that: 1). There are no fall_thru edges that cross
2666   between sections; 2). For those architectures which have "short"
2667   conditional branches, all conditional branches that attempt to
2668   cross between sections are converted to unconditional branches;
2669   and, 3). For those architectures which have "short" unconditional
2670   branches, all unconditional branches that attempt to cross between
2671   sections are converted to indirect jumps.
2672
2673   The code for fixing up fall_thru edges that cross between hot and
2674   cold basic blocks does so by creating new basic blocks containing
2675   unconditional branches to the appropriate label in the "other"
2676   section.  The new basic block is then put in the same (hot or cold)
2677   section as the original conditional branch, and the fall_thru edge
2678   is modified to fall into the new basic block instead.  By adding
2679   this level of indirection we end up with only unconditional branches
2680   crossing between hot and cold sections.
2681
2682   Conditional branches are dealt with by adding a level of indirection.
2683   A new basic block is added in the same (hot/cold) section as the
2684   conditional branch, and the conditional branch is retargeted to the
2685   new basic block.  The new basic block contains an unconditional branch
2686   to the original target of the conditional branch (in the other section).
2687
2688   Unconditional branches are dealt with by converting them into
2689   indirect jumps.  */
2690
2691namespace {
2692
2693const pass_data pass_data_partition_blocks =
2694{
2695  RTL_PASS, /* type */
2696  "bbpart", /* name */
2697  OPTGROUP_NONE, /* optinfo_flags */
2698  TV_REORDER_BLOCKS, /* tv_id */
2699  PROP_cfglayout, /* properties_required */
2700  0, /* properties_provided */
2701  0, /* properties_destroyed */
2702  0, /* todo_flags_start */
2703  0, /* todo_flags_finish */
2704};
2705
2706class pass_partition_blocks : public rtl_opt_pass
2707{
2708public:
2709  pass_partition_blocks (gcc::context *ctxt)
2710    : rtl_opt_pass (pass_data_partition_blocks, ctxt)
2711  {}
2712
2713  /* opt_pass methods: */
2714  virtual bool gate (function *);
2715  virtual unsigned int execute (function *);
2716
2717}; // class pass_partition_blocks
2718
2719bool
2720pass_partition_blocks::gate (function *fun)
2721{
2722  /* The optimization to partition hot/cold basic blocks into separate
2723     sections of the .o file does not work well with linkonce or with
2724     user defined section attributes.  Don't call it if either case
2725     arises.  */
2726  return (flag_reorder_blocks_and_partition
2727	  && optimize
2728	  /* See gate_handle_reorder_blocks.  We should not partition if
2729	     we are going to omit the reordering.  */
2730	  && optimize_function_for_speed_p (fun)
2731	  && !DECL_COMDAT_GROUP (current_function_decl)
2732	  && !user_defined_section_attribute);
2733}
2734
2735unsigned
2736pass_partition_blocks::execute (function *fun)
2737{
2738  vec<edge> crossing_edges;
2739
2740  if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2741    return 0;
2742
2743  df_set_flags (DF_DEFER_INSN_RESCAN);
2744
2745  crossing_edges = find_rarely_executed_basic_blocks_and_crossing_edges ();
2746  if (!crossing_edges.exists ())
2747    return 0;
2748
2749  crtl->has_bb_partition = true;
2750
2751  /* Make sure the source of any crossing edge ends in a jump and the
2752     destination of any crossing edge has a label.  */
2753  add_labels_and_missing_jumps (crossing_edges);
2754
2755  /* Convert all crossing fall_thru edges to non-crossing fall
2756     thrus to unconditional jumps (that jump to the original fall
2757     through dest).  */
2758  fix_up_fall_thru_edges ();
2759
2760  /* If the architecture does not have conditional branches that can
2761     span all of memory, convert crossing conditional branches into
2762     crossing unconditional branches.  */
2763  if (!HAS_LONG_COND_BRANCH)
2764    fix_crossing_conditional_branches ();
2765
2766  /* If the architecture does not have unconditional branches that
2767     can span all of memory, convert crossing unconditional branches
2768     into indirect jumps.  Since adding an indirect jump also adds
2769     a new register usage, update the register usage information as
2770     well.  */
2771  if (!HAS_LONG_UNCOND_BRANCH)
2772    fix_crossing_unconditional_branches ();
2773
2774  update_crossing_jump_flags ();
2775
2776  /* Clear bb->aux fields that the above routines were using.  */
2777  clear_aux_for_blocks ();
2778
2779  crossing_edges.release ();
2780
2781  /* ??? FIXME: DF generates the bb info for a block immediately.
2782     And by immediately, I mean *during* creation of the block.
2783
2784	#0  df_bb_refs_collect
2785	#1  in df_bb_refs_record
2786	#2  in create_basic_block_structure
2787
2788     Which means that the bb_has_eh_pred test in df_bb_refs_collect
2789     will *always* fail, because no edges can have been added to the
2790     block yet.  Which of course means we don't add the right
2791     artificial refs, which means we fail df_verify (much) later.
2792
2793     Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2794     that we also shouldn't grab data from the new blocks those new
2795     insns are in either.  In this way one can create the block, link
2796     it up properly, and have everything Just Work later, when deferred
2797     insns are processed.
2798
2799     In the meantime, we have no other option but to throw away all
2800     of the DF data and recompute it all.  */
2801  if (fun->eh->lp_array)
2802    {
2803      df_finish_pass (true);
2804      df_scan_alloc (NULL);
2805      df_scan_blocks ();
2806      /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2807	 data.  We blindly generated all of them when creating the new
2808	 landing pad.  Delete those assignments we don't use.  */
2809      df_set_flags (DF_LR_RUN_DCE);
2810      df_analyze ();
2811    }
2812
2813  return 0;
2814}
2815
2816} // anon namespace
2817
2818rtl_opt_pass *
2819make_pass_partition_blocks (gcc::context *ctxt)
2820{
2821  return new pass_partition_blocks (ctxt);
2822}
2823