1/* The tracer pass for the GNU compiler.
2   Contributed by Jan Hubicka, SuSE Labs.
3   Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4
5   This file is part of GCC.
6
7   GCC is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 2, or (at your option)
10   any later version.
11
12   GCC is distributed in the hope that it will be useful, but WITHOUT
13   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15   License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with GCC; see the file COPYING.  If not, write to the Free
19   Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20   02110-1301, USA.  */
21
22/* This pass performs the tail duplication needed for superblock formation.
23   For more information see:
24
25     Design and Analysis of Profile-Based Optimization in Compaq's
26     Compilation Tools for Alpha; Journal of Instruction-Level
27     Parallelism 3 (2000) 1-25
28
29   Unlike Compaq's implementation we don't do the loop peeling as most
30   probably a better job can be done by a special pass and we don't
31   need to worry too much about the code size implications as the tail
32   duplicates are crossjumped again if optimizations are not
33   performed.  */
34
35
36#include "config.h"
37#include "system.h"
38#include "coretypes.h"
39#include "tm.h"
40#include "tree.h"
41#include "rtl.h"
42#include "hard-reg-set.h"
43#include "basic-block.h"
44#include "output.h"
45#include "cfglayout.h"
46#include "fibheap.h"
47#include "flags.h"
48#include "timevar.h"
49#include "params.h"
50#include "coverage.h"
51#include "tree-pass.h"
52
53static int count_insns (basic_block);
54static bool ignore_bb_p (basic_block);
55static bool better_p (edge, edge);
56static edge find_best_successor (basic_block);
57static edge find_best_predecessor (basic_block);
58static int find_trace (basic_block, basic_block *);
59static void tail_duplicate (void);
60static void layout_superblocks (void);
61
62/* Minimal outgoing edge probability considered for superblock formation.  */
63static int probability_cutoff;
64static int branch_ratio_cutoff;
65
66/* Return true if BB has been seen - it is connected to some trace
67   already.  */
68
69#define seen(bb) (bb->il.rtl->visited || bb->aux)
70
71/* Return true if we should ignore the basic block for purposes of tracing.  */
72static bool
73ignore_bb_p (basic_block bb)
74{
75  if (bb->index < NUM_FIXED_BLOCKS)
76    return true;
77  if (!maybe_hot_bb_p (bb))
78    return true;
79  return false;
80}
81
82/* Return number of instructions in the block.  */
83
84static int
85count_insns (basic_block bb)
86{
87  rtx insn;
88  int n = 0;
89
90  for (insn = BB_HEAD (bb);
91       insn != NEXT_INSN (BB_END (bb));
92       insn = NEXT_INSN (insn))
93    if (active_insn_p (insn))
94      n++;
95  return n;
96}
97
98/* Return true if E1 is more frequent than E2.  */
99static bool
100better_p (edge e1, edge e2)
101{
102  if (e1->count != e2->count)
103    return e1->count > e2->count;
104  if (e1->src->frequency * e1->probability !=
105      e2->src->frequency * e2->probability)
106    return (e1->src->frequency * e1->probability
107	    > e2->src->frequency * e2->probability);
108  /* This is needed to avoid changes in the decision after
109     CFG is modified.  */
110  if (e1->src != e2->src)
111    return e1->src->index > e2->src->index;
112  return e1->dest->index > e2->dest->index;
113}
114
115/* Return most frequent successor of basic block BB.  */
116
117static edge
118find_best_successor (basic_block bb)
119{
120  edge e;
121  edge best = NULL;
122  edge_iterator ei;
123
124  FOR_EACH_EDGE (e, ei, bb->succs)
125    if (!best || better_p (e, best))
126      best = e;
127  if (!best || ignore_bb_p (best->dest))
128    return NULL;
129  if (best->probability <= probability_cutoff)
130    return NULL;
131  return best;
132}
133
134/* Return most frequent predecessor of basic block BB.  */
135
136static edge
137find_best_predecessor (basic_block bb)
138{
139  edge e;
140  edge best = NULL;
141  edge_iterator ei;
142
143  FOR_EACH_EDGE (e, ei, bb->preds)
144    if (!best || better_p (e, best))
145      best = e;
146  if (!best || ignore_bb_p (best->src))
147    return NULL;
148  if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE
149      < bb->frequency * branch_ratio_cutoff)
150    return NULL;
151  return best;
152}
153
154/* Find the trace using bb and record it in the TRACE array.
155   Return number of basic blocks recorded.  */
156
157static int
158find_trace (basic_block bb, basic_block *trace)
159{
160  int i = 0;
161  edge e;
162
163  if (dump_file)
164    fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->frequency);
165
166  while ((e = find_best_predecessor (bb)) != NULL)
167    {
168      basic_block bb2 = e->src;
169      if (seen (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
170	  || find_best_successor (bb2) != e)
171	break;
172      if (dump_file)
173	fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency);
174      bb = bb2;
175    }
176  if (dump_file)
177    fprintf (dump_file, " forward %i [%i]", bb->index, bb->frequency);
178  trace[i++] = bb;
179
180  /* Follow the trace in forward direction.  */
181  while ((e = find_best_successor (bb)) != NULL)
182    {
183      bb = e->dest;
184      if (seen (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
185	  || find_best_predecessor (bb) != e)
186	break;
187      if (dump_file)
188	fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency);
189      trace[i++] = bb;
190    }
191  if (dump_file)
192    fprintf (dump_file, "\n");
193  return i;
194}
195
196/* Look for basic blocks in frequency order, construct traces and tail duplicate
197   if profitable.  */
198
199static void
200tail_duplicate (void)
201{
202  fibnode_t *blocks = XCNEWVEC (fibnode_t, last_basic_block);
203  basic_block *trace = XNEWVEC (basic_block, n_basic_blocks);
204  int *counts = XNEWVEC (int, last_basic_block);
205  int ninsns = 0, nduplicated = 0;
206  gcov_type weighted_insns = 0, traced_insns = 0;
207  fibheap_t heap = fibheap_new ();
208  gcov_type cover_insns;
209  int max_dup_insns;
210  basic_block bb;
211
212  if (profile_info && flag_branch_probabilities)
213    probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK);
214  else
215    probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY);
216  probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
217
218  branch_ratio_cutoff =
219    (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO));
220
221  FOR_EACH_BB (bb)
222    {
223      int n = count_insns (bb);
224      if (!ignore_bb_p (bb))
225	blocks[bb->index] = fibheap_insert (heap, -bb->frequency,
226					    bb);
227
228      counts [bb->index] = n;
229      ninsns += n;
230      weighted_insns += n * bb->frequency;
231    }
232
233  if (profile_info && flag_branch_probabilities)
234    cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK);
235  else
236    cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE);
237  cover_insns = (weighted_insns * cover_insns + 50) / 100;
238  max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100;
239
240  while (traced_insns < cover_insns && nduplicated < max_dup_insns
241         && !fibheap_empty (heap))
242    {
243      basic_block bb = fibheap_extract_min (heap);
244      int n, pos;
245
246      if (!bb)
247	break;
248
249      blocks[bb->index] = NULL;
250
251      if (ignore_bb_p (bb))
252	continue;
253      gcc_assert (!seen (bb));
254
255      n = find_trace (bb, trace);
256
257      bb = trace[0];
258      traced_insns += bb->frequency * counts [bb->index];
259      if (blocks[bb->index])
260	{
261	  fibheap_delete_node (heap, blocks[bb->index]);
262	  blocks[bb->index] = NULL;
263	}
264
265      for (pos = 1; pos < n; pos++)
266	{
267	  basic_block bb2 = trace[pos];
268
269	  if (blocks[bb2->index])
270	    {
271	      fibheap_delete_node (heap, blocks[bb2->index]);
272	      blocks[bb2->index] = NULL;
273	    }
274	  traced_insns += bb2->frequency * counts [bb2->index];
275	  if (EDGE_COUNT (bb2->preds) > 1
276	      && can_duplicate_block_p (bb2))
277	    {
278	      edge e;
279	      basic_block old = bb2;
280
281	      e = find_edge (bb, bb2);
282
283	      nduplicated += counts [bb2->index];
284	      bb2 = duplicate_block (bb2, e, bb);
285
286	      /* Reconsider the original copy of block we've duplicated.
287	         Removing the most common predecessor may make it to be
288	         head.  */
289	      blocks[old->index] =
290		fibheap_insert (heap, -old->frequency, old);
291
292	      if (dump_file)
293		fprintf (dump_file, "Duplicated %i as %i [%i]\n",
294			 old->index, bb2->index, bb2->frequency);
295	    }
296	  bb->aux = bb2;
297	  bb2->il.rtl->visited = 1;
298	  bb = bb2;
299	  /* In case the trace became infrequent, stop duplicating.  */
300	  if (ignore_bb_p (bb))
301	    break;
302	}
303      if (dump_file)
304	fprintf (dump_file, " covered now %.1f\n\n",
305		 traced_insns * 100.0 / weighted_insns);
306    }
307  if (dump_file)
308    fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated,
309	     nduplicated * 100 / ninsns);
310
311  free (blocks);
312  free (trace);
313  free (counts);
314  fibheap_delete (heap);
315}
316
317/* Connect the superblocks into linear sequence.  At the moment we attempt to keep
318   the original order as much as possible, but the algorithm may be made smarter
319   later if needed.  BB reordering pass should void most of the benefits of such
320   change though.  */
321
322static void
323layout_superblocks (void)
324{
325  basic_block end = single_succ (ENTRY_BLOCK_PTR);
326  basic_block bb = end->next_bb;
327
328  while (bb != EXIT_BLOCK_PTR)
329    {
330      edge_iterator ei;
331      edge e, best = NULL;
332      while (end->aux)
333	end = end->aux;
334
335      FOR_EACH_EDGE (e, ei, end->succs)
336	if (e->dest != EXIT_BLOCK_PTR
337	    && e->dest != single_succ (ENTRY_BLOCK_PTR)
338	    && !e->dest->il.rtl->visited
339	    && (!best || EDGE_FREQUENCY (e) > EDGE_FREQUENCY (best)))
340	  best = e;
341
342      if (best)
343	{
344	  end->aux = best->dest;
345	  best->dest->il.rtl->visited = 1;
346	}
347      else
348	for (; bb != EXIT_BLOCK_PTR; bb = bb->next_bb)
349	  {
350	    if (!bb->il.rtl->visited)
351	      {
352		end->aux = bb;
353		bb->il.rtl->visited = 1;
354		break;
355	      }
356	  }
357    }
358}
359
360/* Main entry point to this file.  FLAGS is the set of flags to pass
361   to cfg_layout_initialize().  */
362
363void
364tracer (unsigned int flags)
365{
366  if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
367    return;
368
369  cfg_layout_initialize (flags);
370  mark_dfs_back_edges ();
371  if (dump_file)
372    dump_flow_info (dump_file, dump_flags);
373  tail_duplicate ();
374  layout_superblocks ();
375  if (dump_file)
376    dump_flow_info (dump_file, dump_flags);
377  cfg_layout_finalize ();
378
379  /* Merge basic blocks in duplicated traces.  */
380  cleanup_cfg (CLEANUP_EXPENSIVE);
381}
382
383static bool
384gate_handle_tracer (void)
385{
386  return (optimize > 0 && flag_tracer);
387}
388
389/* Run tracer.  */
390static unsigned int
391rest_of_handle_tracer (void)
392{
393  if (dump_file)
394    dump_flow_info (dump_file, dump_flags);
395  tracer (0);
396  cleanup_cfg (CLEANUP_EXPENSIVE);
397  reg_scan (get_insns (), max_reg_num ());
398  return 0;
399}
400
401struct tree_opt_pass pass_tracer =
402{
403  "tracer",                             /* name */
404  gate_handle_tracer,                   /* gate */
405  rest_of_handle_tracer,                /* execute */
406  NULL,                                 /* sub */
407  NULL,                                 /* next */
408  0,                                    /* static_pass_number */
409  TV_TRACER,                            /* tv_id */
410  0,                                    /* properties_required */
411  0,                                    /* properties_provided */
412  0,                                    /* properties_destroyed */
413  0,                                    /* todo_flags_start */
414  TODO_dump_func,                       /* todo_flags_finish */
415  'T'                                   /* letter */
416};
417
418