1/* Branch prediction routines for the GNU compiler.
2   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005
3   Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 2, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING.  If not, write to the Free
19Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
2002110-1301, USA.  */
21
22/* References:
23
24   [1] "Branch Prediction for Free"
25       Ball and Larus; PLDI '93.
26   [2] "Static Branch Frequency and Program Profile Analysis"
27       Wu and Larus; MICRO-27.
28   [3] "Corpus-based Static Branch Prediction"
29       Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
30
31
32#include "config.h"
33#include "system.h"
34#include "coretypes.h"
35#include "tm.h"
36#include "tree.h"
37#include "rtl.h"
38#include "tm_p.h"
39#include "hard-reg-set.h"
40#include "basic-block.h"
41#include "insn-config.h"
42#include "regs.h"
43#include "flags.h"
44#include "output.h"
45#include "function.h"
46#include "except.h"
47#include "toplev.h"
48#include "recog.h"
49#include "expr.h"
50#include "predict.h"
51#include "coverage.h"
52#include "sreal.h"
53#include "params.h"
54#include "target.h"
55#include "cfgloop.h"
56#include "tree-flow.h"
57#include "ggc.h"
58#include "tree-dump.h"
59#include "tree-pass.h"
60#include "timevar.h"
61#include "tree-scalar-evolution.h"
62#include "cfgloop.h"
63
64/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
65		   1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
66static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
67	     real_inv_br_prob_base, real_one_half, real_bb_freq_max;
68
69/* Random guesstimation given names.  */
70#define PROB_VERY_UNLIKELY	(REG_BR_PROB_BASE / 100 - 1)
71#define PROB_EVEN		(REG_BR_PROB_BASE / 2)
72#define PROB_VERY_LIKELY	(REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
73#define PROB_ALWAYS		(REG_BR_PROB_BASE)
74
75static void combine_predictions_for_insn (rtx, basic_block);
76static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
77static void estimate_loops_at_level (struct loop *, bitmap);
78static void propagate_freq (struct loop *, bitmap);
79static void estimate_bb_frequencies (struct loops *);
80static void predict_paths_leading_to (basic_block, int *, enum br_predictor, enum prediction);
81static bool last_basic_block_p (basic_block);
82static void compute_function_frequency (void);
83static void choose_function_section (void);
84static bool can_predict_insn_p (rtx);
85
86/* Information we hold about each branch predictor.
87   Filled using information from predict.def.  */
88
89struct predictor_info
90{
91  const char *const name;	/* Name used in the debugging dumps.  */
92  const int hitrate;		/* Expected hitrate used by
93				   predict_insn_def call.  */
94  const int flags;
95};
96
97/* Use given predictor without Dempster-Shaffer theory if it matches
98   using first_match heuristics.  */
99#define PRED_FLAG_FIRST_MATCH 1
100
101/* Recompute hitrate in percent to our representation.  */
102
103#define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
104
105#define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
106static const struct predictor_info predictor_info[]= {
107#include "predict.def"
108
109  /* Upper bound on predictors.  */
110  {NULL, 0, 0}
111};
112#undef DEF_PREDICTOR
113
114/* Return true in case BB can be CPU intensive and should be optimized
115   for maximal performance.  */
116
117bool
118maybe_hot_bb_p (basic_block bb)
119{
120  if (profile_info && flag_branch_probabilities
121      && (bb->count
122	  < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
123    return false;
124  if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
125    return false;
126  return true;
127}
128
129/* Return true in case BB is cold and should be optimized for size.  */
130
131bool
132probably_cold_bb_p (basic_block bb)
133{
134  if (profile_info && flag_branch_probabilities
135      && (bb->count
136	  < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
137    return true;
138  if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
139    return true;
140  return false;
141}
142
143/* Return true in case BB is probably never executed.  */
144bool
145probably_never_executed_bb_p (basic_block bb)
146{
147  if (profile_info && flag_branch_probabilities)
148    return ((bb->count + profile_info->runs / 2) / profile_info->runs) == 0;
149  return false;
150}
151
152/* Return true if the one of outgoing edges is already predicted by
153   PREDICTOR.  */
154
155bool
156rtl_predicted_by_p (basic_block bb, enum br_predictor predictor)
157{
158  rtx note;
159  if (!INSN_P (BB_END (bb)))
160    return false;
161  for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
162    if (REG_NOTE_KIND (note) == REG_BR_PRED
163	&& INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
164      return true;
165  return false;
166}
167
168/* Return true if the one of outgoing edges is already predicted by
169   PREDICTOR.  */
170
171bool
172tree_predicted_by_p (basic_block bb, enum br_predictor predictor)
173{
174  struct edge_prediction *i;
175  for (i = bb->predictions; i; i = i->ep_next)
176    if (i->ep_predictor == predictor)
177      return true;
178  return false;
179}
180
181/* Return true when the probability of edge is reliable.
182
183   The profile guessing code is good at predicting branch outcome (ie.
184   taken/not taken), that is predicted right slightly over 75% of time.
185   It is however notoriously poor on predicting the probability itself.
186   In general the profile appear a lot flatter (with probabilities closer
187   to 50%) than the reality so it is bad idea to use it to drive optimization
188   such as those disabling dynamic branch prediction for well predictable
189   branches.
190
191   There are two exceptions - edges leading to noreturn edges and edges
192   predicted by number of iterations heuristics are predicted well.  This macro
193   should be able to distinguish those, but at the moment it simply check for
194   noreturn heuristic that is only one giving probability over 99% or bellow
195   1%.  In future we might want to propagate reliability information across the
196   CFG if we find this information useful on multiple places.   */
197static bool
198probability_reliable_p (int prob)
199{
200  return (profile_status == PROFILE_READ
201	  || (profile_status == PROFILE_GUESSED
202	      && (prob <= HITRATE (1) || prob >= HITRATE (99))));
203}
204
205/* Same predicate as above, working on edges.  */
206bool
207edge_probability_reliable_p (edge e)
208{
209  return probability_reliable_p (e->probability);
210}
211
212/* Same predicate as edge_probability_reliable_p, working on notes.  */
213bool
214br_prob_note_reliable_p (rtx note)
215{
216  gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
217  return probability_reliable_p (INTVAL (XEXP (note, 0)));
218}
219
220static void
221predict_insn (rtx insn, enum br_predictor predictor, int probability)
222{
223  gcc_assert (any_condjump_p (insn));
224  if (!flag_guess_branch_prob)
225    return;
226
227  REG_NOTES (insn)
228    = gen_rtx_EXPR_LIST (REG_BR_PRED,
229			 gen_rtx_CONCAT (VOIDmode,
230					 GEN_INT ((int) predictor),
231					 GEN_INT ((int) probability)),
232			 REG_NOTES (insn));
233}
234
235/* Predict insn by given predictor.  */
236
237void
238predict_insn_def (rtx insn, enum br_predictor predictor,
239		  enum prediction taken)
240{
241   int probability = predictor_info[(int) predictor].hitrate;
242
243   if (taken != TAKEN)
244     probability = REG_BR_PROB_BASE - probability;
245
246   predict_insn (insn, predictor, probability);
247}
248
249/* Predict edge E with given probability if possible.  */
250
251void
252rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
253{
254  rtx last_insn;
255  last_insn = BB_END (e->src);
256
257  /* We can store the branch prediction information only about
258     conditional jumps.  */
259  if (!any_condjump_p (last_insn))
260    return;
261
262  /* We always store probability of branching.  */
263  if (e->flags & EDGE_FALLTHRU)
264    probability = REG_BR_PROB_BASE - probability;
265
266  predict_insn (last_insn, predictor, probability);
267}
268
269/* Predict edge E with the given PROBABILITY.  */
270void
271tree_predict_edge (edge e, enum br_predictor predictor, int probability)
272{
273  gcc_assert (profile_status != PROFILE_GUESSED);
274  if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1)
275      && flag_guess_branch_prob && optimize)
276    {
277      struct edge_prediction *i = ggc_alloc (sizeof (struct edge_prediction));
278
279      i->ep_next = e->src->predictions;
280      e->src->predictions = i;
281      i->ep_probability = probability;
282      i->ep_predictor = predictor;
283      i->ep_edge = e;
284    }
285}
286
287/* Remove all predictions on given basic block that are attached
288   to edge E.  */
289void
290remove_predictions_associated_with_edge (edge e)
291{
292  if (e->src->predictions)
293    {
294      struct edge_prediction **prediction = &e->src->predictions;
295      while (*prediction)
296	{
297	  if ((*prediction)->ep_edge == e)
298	    *prediction = (*prediction)->ep_next;
299	  else
300	    prediction = &((*prediction)->ep_next);
301	}
302    }
303}
304
305/* Return true when we can store prediction on insn INSN.
306   At the moment we represent predictions only on conditional
307   jumps, not at computed jump or other complicated cases.  */
308static bool
309can_predict_insn_p (rtx insn)
310{
311  return (JUMP_P (insn)
312	  && any_condjump_p (insn)
313	  && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
314}
315
316/* Predict edge E by given predictor if possible.  */
317
318void
319predict_edge_def (edge e, enum br_predictor predictor,
320		  enum prediction taken)
321{
322   int probability = predictor_info[(int) predictor].hitrate;
323
324   if (taken != TAKEN)
325     probability = REG_BR_PROB_BASE - probability;
326
327   predict_edge (e, predictor, probability);
328}
329
330/* Invert all branch predictions or probability notes in the INSN.  This needs
331   to be done each time we invert the condition used by the jump.  */
332
333void
334invert_br_probabilities (rtx insn)
335{
336  rtx note;
337
338  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
339    if (REG_NOTE_KIND (note) == REG_BR_PROB)
340      XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0)));
341    else if (REG_NOTE_KIND (note) == REG_BR_PRED)
342      XEXP (XEXP (note, 0), 1)
343	= GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
344}
345
346/* Dump information about the branch prediction to the output file.  */
347
348static void
349dump_prediction (FILE *file, enum br_predictor predictor, int probability,
350		 basic_block bb, int used)
351{
352  edge e;
353  edge_iterator ei;
354
355  if (!file)
356    return;
357
358  FOR_EACH_EDGE (e, ei, bb->succs)
359    if (! (e->flags & EDGE_FALLTHRU))
360      break;
361
362  fprintf (file, "  %s heuristics%s: %.1f%%",
363	   predictor_info[predictor].name,
364	   used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
365
366  if (bb->count)
367    {
368      fprintf (file, "  exec ");
369      fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
370      if (e)
371	{
372	  fprintf (file, " hit ");
373	  fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
374	  fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
375	}
376    }
377
378  fprintf (file, "\n");
379}
380
381/* We can not predict the probabilities of outgoing edges of bb.  Set them
382   evenly and hope for the best.  */
383static void
384set_even_probabilities (basic_block bb)
385{
386  int nedges = 0;
387  edge e;
388  edge_iterator ei;
389
390  FOR_EACH_EDGE (e, ei, bb->succs)
391    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
392      nedges ++;
393  FOR_EACH_EDGE (e, ei, bb->succs)
394    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
395      e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
396    else
397      e->probability = 0;
398}
399
400/* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
401   note if not already present.  Remove now useless REG_BR_PRED notes.  */
402
403static void
404combine_predictions_for_insn (rtx insn, basic_block bb)
405{
406  rtx prob_note;
407  rtx *pnote;
408  rtx note;
409  int best_probability = PROB_EVEN;
410  int best_predictor = END_PREDICTORS;
411  int combined_probability = REG_BR_PROB_BASE / 2;
412  int d;
413  bool first_match = false;
414  bool found = false;
415
416  if (!can_predict_insn_p (insn))
417    {
418      set_even_probabilities (bb);
419      return;
420    }
421
422  prob_note = find_reg_note (insn, REG_BR_PROB, 0);
423  pnote = &REG_NOTES (insn);
424  if (dump_file)
425    fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
426	     bb->index);
427
428  /* We implement "first match" heuristics and use probability guessed
429     by predictor with smallest index.  */
430  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
431    if (REG_NOTE_KIND (note) == REG_BR_PRED)
432      {
433	int predictor = INTVAL (XEXP (XEXP (note, 0), 0));
434	int probability = INTVAL (XEXP (XEXP (note, 0), 1));
435
436	found = true;
437	if (best_predictor > predictor)
438	  best_probability = probability, best_predictor = predictor;
439
440	d = (combined_probability * probability
441	     + (REG_BR_PROB_BASE - combined_probability)
442	     * (REG_BR_PROB_BASE - probability));
443
444	/* Use FP math to avoid overflows of 32bit integers.  */
445	if (d == 0)
446	  /* If one probability is 0% and one 100%, avoid division by zero.  */
447	  combined_probability = REG_BR_PROB_BASE / 2;
448	else
449	  combined_probability = (((double) combined_probability) * probability
450				  * REG_BR_PROB_BASE / d + 0.5);
451      }
452
453  /* Decide which heuristic to use.  In case we didn't match anything,
454     use no_prediction heuristic, in case we did match, use either
455     first match or Dempster-Shaffer theory depending on the flags.  */
456
457  if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
458    first_match = true;
459
460  if (!found)
461    dump_prediction (dump_file, PRED_NO_PREDICTION,
462		     combined_probability, bb, true);
463  else
464    {
465      dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
466		       bb, !first_match);
467      dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
468		       bb, first_match);
469    }
470
471  if (first_match)
472    combined_probability = best_probability;
473  dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
474
475  while (*pnote)
476    {
477      if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
478	{
479	  int predictor = INTVAL (XEXP (XEXP (*pnote, 0), 0));
480	  int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
481
482	  dump_prediction (dump_file, predictor, probability, bb,
483			   !first_match || best_predictor == predictor);
484	  *pnote = XEXP (*pnote, 1);
485	}
486      else
487	pnote = &XEXP (*pnote, 1);
488    }
489
490  if (!prob_note)
491    {
492      REG_NOTES (insn)
493	= gen_rtx_EXPR_LIST (REG_BR_PROB,
494			     GEN_INT (combined_probability), REG_NOTES (insn));
495
496      /* Save the prediction into CFG in case we are seeing non-degenerated
497	 conditional jump.  */
498      if (!single_succ_p (bb))
499	{
500	  BRANCH_EDGE (bb)->probability = combined_probability;
501	  FALLTHRU_EDGE (bb)->probability
502	    = REG_BR_PROB_BASE - combined_probability;
503	}
504    }
505  else if (!single_succ_p (bb))
506    {
507      int prob = INTVAL (XEXP (prob_note, 0));
508
509      BRANCH_EDGE (bb)->probability = prob;
510      FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
511    }
512  else
513    single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
514}
515
516/* Combine predictions into single probability and store them into CFG.
517   Remove now useless prediction entries.  */
518
519static void
520combine_predictions_for_bb (basic_block bb)
521{
522  int best_probability = PROB_EVEN;
523  int best_predictor = END_PREDICTORS;
524  int combined_probability = REG_BR_PROB_BASE / 2;
525  int d;
526  bool first_match = false;
527  bool found = false;
528  struct edge_prediction *pred;
529  int nedges = 0;
530  edge e, first = NULL, second = NULL;
531  edge_iterator ei;
532
533  FOR_EACH_EDGE (e, ei, bb->succs)
534    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
535      {
536	nedges ++;
537	if (first && !second)
538	  second = e;
539	if (!first)
540	  first = e;
541      }
542
543  /* When there is no successor or only one choice, prediction is easy.
544
545     We are lazy for now and predict only basic blocks with two outgoing
546     edges.  It is possible to predict generic case too, but we have to
547     ignore first match heuristics and do more involved combining.  Implement
548     this later.  */
549  if (nedges != 2)
550    {
551      if (!bb->count)
552	set_even_probabilities (bb);
553      bb->predictions = NULL;
554      if (dump_file)
555	fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
556		 nedges, bb->index);
557      return;
558    }
559
560  if (dump_file)
561    fprintf (dump_file, "Predictions for bb %i\n", bb->index);
562
563  /* We implement "first match" heuristics and use probability guessed
564     by predictor with smallest index.  */
565  for (pred = bb->predictions; pred; pred = pred->ep_next)
566    {
567      int predictor = pred->ep_predictor;
568      int probability = pred->ep_probability;
569
570      if (pred->ep_edge != first)
571	probability = REG_BR_PROB_BASE - probability;
572
573      found = true;
574      if (best_predictor > predictor)
575	best_probability = probability, best_predictor = predictor;
576
577      d = (combined_probability * probability
578	   + (REG_BR_PROB_BASE - combined_probability)
579	   * (REG_BR_PROB_BASE - probability));
580
581      /* Use FP math to avoid overflows of 32bit integers.  */
582      if (d == 0)
583	/* If one probability is 0% and one 100%, avoid division by zero.  */
584	combined_probability = REG_BR_PROB_BASE / 2;
585      else
586	combined_probability = (((double) combined_probability) * probability
587				* REG_BR_PROB_BASE / d + 0.5);
588    }
589
590  /* Decide which heuristic to use.  In case we didn't match anything,
591     use no_prediction heuristic, in case we did match, use either
592     first match or Dempster-Shaffer theory depending on the flags.  */
593
594  if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
595    first_match = true;
596
597  if (!found)
598    dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
599  else
600    {
601      dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
602		       !first_match);
603      dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
604		       first_match);
605    }
606
607  if (first_match)
608    combined_probability = best_probability;
609  dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
610
611  for (pred = bb->predictions; pred; pred = pred->ep_next)
612    {
613      int predictor = pred->ep_predictor;
614      int probability = pred->ep_probability;
615
616      if (pred->ep_edge != EDGE_SUCC (bb, 0))
617	probability = REG_BR_PROB_BASE - probability;
618      dump_prediction (dump_file, predictor, probability, bb,
619		       !first_match || best_predictor == predictor);
620    }
621  bb->predictions = NULL;
622
623  if (!bb->count)
624    {
625      first->probability = combined_probability;
626      second->probability = REG_BR_PROB_BASE - combined_probability;
627    }
628}
629
630/* Predict edge probabilities by exploiting loop structure.
631   When RTLSIMPLELOOPS is set, attempt to count number of iterations by analyzing
632   RTL otherwise use tree based approach.  */
633static void
634predict_loops (struct loops *loops_info, bool rtlsimpleloops)
635{
636  unsigned i;
637
638  if (!rtlsimpleloops)
639    scev_initialize (loops_info);
640
641  /* Try to predict out blocks in a loop that are not part of a
642     natural loop.  */
643  for (i = 1; i < loops_info->num; i++)
644    {
645      basic_block bb, *bbs;
646      unsigned j;
647      unsigned n_exits;
648      struct loop *loop = loops_info->parray[i];
649      struct niter_desc desc;
650      unsigned HOST_WIDE_INT niter;
651      edge *exits;
652
653      exits = get_loop_exit_edges (loop, &n_exits);
654
655      if (rtlsimpleloops)
656	{
657	  iv_analysis_loop_init (loop);
658	  find_simple_exit (loop, &desc);
659
660	  if (desc.simple_p && desc.const_iter)
661	    {
662	      int prob;
663	      niter = desc.niter + 1;
664	      if (niter == 0)        /* We might overflow here.  */
665		niter = desc.niter;
666	      if (niter
667		  > (unsigned int) PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS))
668		niter = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
669
670	      prob = (REG_BR_PROB_BASE
671		      - (REG_BR_PROB_BASE + niter /2) / niter);
672	      /* Branch prediction algorithm gives 0 frequency for everything
673		 after the end of loop for loop having 0 probability to finish.  */
674	      if (prob == REG_BR_PROB_BASE)
675		prob = REG_BR_PROB_BASE - 1;
676	      predict_edge (desc.in_edge, PRED_LOOP_ITERATIONS,
677			    prob);
678	    }
679	}
680      else
681	{
682	  struct tree_niter_desc niter_desc;
683
684	  for (j = 0; j < n_exits; j++)
685	    {
686	      tree niter = NULL;
687
688	      if (number_of_iterations_exit (loop, exits[j], &niter_desc, false))
689		niter = niter_desc.niter;
690	      if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
691	        niter = loop_niter_by_eval (loop, exits[j]);
692
693	      if (TREE_CODE (niter) == INTEGER_CST)
694		{
695		  int probability;
696		  int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
697		  if (host_integerp (niter, 1)
698		      && tree_int_cst_lt (niter,
699				          build_int_cstu (NULL_TREE, max - 1)))
700		    {
701		      HOST_WIDE_INT nitercst = tree_low_cst (niter, 1) + 1;
702		      probability = ((REG_BR_PROB_BASE + nitercst / 2)
703				     / nitercst);
704		    }
705		  else
706		    probability = ((REG_BR_PROB_BASE + max / 2) / max);
707
708		  predict_edge (exits[j], PRED_LOOP_ITERATIONS, probability);
709		}
710	    }
711
712	}
713      free (exits);
714
715      bbs = get_loop_body (loop);
716
717      for (j = 0; j < loop->num_nodes; j++)
718	{
719	  int header_found = 0;
720	  edge e;
721	  edge_iterator ei;
722
723	  bb = bbs[j];
724
725	  /* Bypass loop heuristics on continue statement.  These
726	     statements construct loops via "non-loop" constructs
727	     in the source language and are better to be handled
728	     separately.  */
729	  if ((rtlsimpleloops && !can_predict_insn_p (BB_END (bb)))
730	      || predicted_by_p (bb, PRED_CONTINUE))
731	    continue;
732
733	  /* Loop branch heuristics - predict an edge back to a
734	     loop's head as taken.  */
735	  if (bb == loop->latch)
736	    {
737	      e = find_edge (loop->latch, loop->header);
738	      if (e)
739		{
740		  header_found = 1;
741		  predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
742		}
743	    }
744
745	  /* Loop exit heuristics - predict an edge exiting the loop if the
746	     conditional has no loop header successors as not taken.  */
747	  if (!header_found)
748	    {
749	      /* For loop with many exits we don't want to predict all exits
750	         with the pretty large probability, because if all exits are
751		 considered in row, the loop would be predicted to iterate
752		 almost never.  The code to divide probability by number of
753		 exits is very rough.  It should compute the number of exits
754		 taken in each patch through function (not the overall number
755		 of exits that might be a lot higher for loops with wide switch
756		 statements in them) and compute n-th square root.
757
758		 We limit the minimal probability by 2% to avoid
759		 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
760		 as this was causing regression in perl benchmark containing such
761		 a wide loop.  */
762
763	      int probability = ((REG_BR_PROB_BASE
764		                  - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
765				 / n_exits);
766	      if (probability < HITRATE (2))
767		probability = HITRATE (2);
768	      FOR_EACH_EDGE (e, ei, bb->succs)
769		if (e->dest->index < NUM_FIXED_BLOCKS
770		    || !flow_bb_inside_loop_p (loop, e->dest))
771		  predict_edge (e, PRED_LOOP_EXIT, probability);
772	    }
773	}
774
775      /* Free basic blocks from get_loop_body.  */
776      free (bbs);
777    }
778
779  if (!rtlsimpleloops)
780    {
781      scev_finalize ();
782      current_loops = NULL;
783    }
784}
785
786/* Attempt to predict probabilities of BB outgoing edges using local
787   properties.  */
788static void
789bb_estimate_probability_locally (basic_block bb)
790{
791  rtx last_insn = BB_END (bb);
792  rtx cond;
793
794  if (! can_predict_insn_p (last_insn))
795    return;
796  cond = get_condition (last_insn, NULL, false, false);
797  if (! cond)
798    return;
799
800  /* Try "pointer heuristic."
801     A comparison ptr == 0 is predicted as false.
802     Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
803  if (COMPARISON_P (cond)
804      && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
805	  || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
806    {
807      if (GET_CODE (cond) == EQ)
808	predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
809      else if (GET_CODE (cond) == NE)
810	predict_insn_def (last_insn, PRED_POINTER, TAKEN);
811    }
812  else
813
814  /* Try "opcode heuristic."
815     EQ tests are usually false and NE tests are usually true. Also,
816     most quantities are positive, so we can make the appropriate guesses
817     about signed comparisons against zero.  */
818    switch (GET_CODE (cond))
819      {
820      case CONST_INT:
821	/* Unconditional branch.  */
822	predict_insn_def (last_insn, PRED_UNCONDITIONAL,
823			  cond == const0_rtx ? NOT_TAKEN : TAKEN);
824	break;
825
826      case EQ:
827      case UNEQ:
828	/* Floating point comparisons appears to behave in a very
829	   unpredictable way because of special role of = tests in
830	   FP code.  */
831	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
832	  ;
833	/* Comparisons with 0 are often used for booleans and there is
834	   nothing useful to predict about them.  */
835	else if (XEXP (cond, 1) == const0_rtx
836		 || XEXP (cond, 0) == const0_rtx)
837	  ;
838	else
839	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
840	break;
841
842      case NE:
843      case LTGT:
844	/* Floating point comparisons appears to behave in a very
845	   unpredictable way because of special role of = tests in
846	   FP code.  */
847	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
848	  ;
849	/* Comparisons with 0 are often used for booleans and there is
850	   nothing useful to predict about them.  */
851	else if (XEXP (cond, 1) == const0_rtx
852		 || XEXP (cond, 0) == const0_rtx)
853	  ;
854	else
855	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
856	break;
857
858      case ORDERED:
859	predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
860	break;
861
862      case UNORDERED:
863	predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
864	break;
865
866      case LE:
867      case LT:
868	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
869	    || XEXP (cond, 1) == constm1_rtx)
870	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
871	break;
872
873      case GE:
874      case GT:
875	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
876	    || XEXP (cond, 1) == constm1_rtx)
877	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
878	break;
879
880      default:
881	break;
882      }
883}
884
885/* Set edge->probability for each successor edge of BB.  */
886void
887guess_outgoing_edge_probabilities (basic_block bb)
888{
889  bb_estimate_probability_locally (bb);
890  combine_predictions_for_insn (BB_END (bb), bb);
891}
892
893/* Return constant EXPR will likely have at execution time, NULL if unknown.
894   The function is used by builtin_expect branch predictor so the evidence
895   must come from this construct and additional possible constant folding.
896
897   We may want to implement more involved value guess (such as value range
898   propagation based prediction), but such tricks shall go to new
899   implementation.  */
900
901static tree
902expr_expected_value (tree expr, bitmap visited)
903{
904  if (TREE_CONSTANT (expr))
905    return expr;
906  else if (TREE_CODE (expr) == SSA_NAME)
907    {
908      tree def = SSA_NAME_DEF_STMT (expr);
909
910      /* If we were already here, break the infinite cycle.  */
911      if (bitmap_bit_p (visited, SSA_NAME_VERSION (expr)))
912	return NULL;
913      bitmap_set_bit (visited, SSA_NAME_VERSION (expr));
914
915      if (TREE_CODE (def) == PHI_NODE)
916	{
917	  /* All the arguments of the PHI node must have the same constant
918	     length.  */
919	  int i;
920	  tree val = NULL, new_val;
921
922	  for (i = 0; i < PHI_NUM_ARGS (def); i++)
923	    {
924	      tree arg = PHI_ARG_DEF (def, i);
925
926	      /* If this PHI has itself as an argument, we cannot
927		 determine the string length of this argument.  However,
928		 if we can find an expected constant value for the other
929		 PHI args then we can still be sure that this is
930		 likely a constant.  So be optimistic and just
931		 continue with the next argument.  */
932	      if (arg == PHI_RESULT (def))
933		continue;
934
935	      new_val = expr_expected_value (arg, visited);
936	      if (!new_val)
937		return NULL;
938	      if (!val)
939		val = new_val;
940	      else if (!operand_equal_p (val, new_val, false))
941		return NULL;
942	    }
943	  return val;
944	}
945      if (TREE_CODE (def) != MODIFY_EXPR || TREE_OPERAND (def, 0) != expr)
946	return NULL;
947      return expr_expected_value (TREE_OPERAND (def, 1), visited);
948    }
949  else if (TREE_CODE (expr) == CALL_EXPR)
950    {
951      tree decl = get_callee_fndecl (expr);
952      if (!decl)
953	return NULL;
954      if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL
955	  && DECL_FUNCTION_CODE (decl) == BUILT_IN_EXPECT)
956	{
957	  tree arglist = TREE_OPERAND (expr, 1);
958	  tree val;
959
960	  if (arglist == NULL_TREE
961	      || TREE_CHAIN (arglist) == NULL_TREE)
962	    return NULL;
963	  val = TREE_VALUE (TREE_CHAIN (TREE_OPERAND (expr, 1)));
964	  if (TREE_CONSTANT (val))
965	    return val;
966	  return TREE_VALUE (TREE_CHAIN (TREE_OPERAND (expr, 1)));
967	}
968    }
969  if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
970    {
971      tree op0, op1, res;
972      op0 = expr_expected_value (TREE_OPERAND (expr, 0), visited);
973      if (!op0)
974	return NULL;
975      op1 = expr_expected_value (TREE_OPERAND (expr, 1), visited);
976      if (!op1)
977	return NULL;
978      res = fold_build2 (TREE_CODE (expr), TREE_TYPE (expr), op0, op1);
979      if (TREE_CONSTANT (res))
980	return res;
981      return NULL;
982    }
983  if (UNARY_CLASS_P (expr))
984    {
985      tree op0, res;
986      op0 = expr_expected_value (TREE_OPERAND (expr, 0), visited);
987      if (!op0)
988	return NULL;
989      res = fold_build1 (TREE_CODE (expr), TREE_TYPE (expr), op0);
990      if (TREE_CONSTANT (res))
991	return res;
992      return NULL;
993    }
994  return NULL;
995}
996
997/* Get rid of all builtin_expect calls we no longer need.  */
998static void
999strip_builtin_expect (void)
1000{
1001  basic_block bb;
1002  FOR_EACH_BB (bb)
1003    {
1004      block_stmt_iterator bi;
1005      for (bi = bsi_start (bb); !bsi_end_p (bi); bsi_next (&bi))
1006	{
1007	  tree stmt = bsi_stmt (bi);
1008	  tree fndecl;
1009	  tree arglist;
1010
1011	  if (TREE_CODE (stmt) == MODIFY_EXPR
1012	      && TREE_CODE (TREE_OPERAND (stmt, 1)) == CALL_EXPR
1013	      && (fndecl = get_callee_fndecl (TREE_OPERAND (stmt, 1)))
1014	      && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
1015	      && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
1016	      && (arglist = TREE_OPERAND (TREE_OPERAND (stmt, 1), 1))
1017	      && TREE_CHAIN (arglist))
1018	    {
1019	      TREE_OPERAND (stmt, 1) = TREE_VALUE (arglist);
1020	      update_stmt (stmt);
1021	    }
1022	}
1023    }
1024}
1025
1026/* Predict using opcode of the last statement in basic block.  */
1027static void
1028tree_predict_by_opcode (basic_block bb)
1029{
1030  tree stmt = last_stmt (bb);
1031  edge then_edge;
1032  tree cond;
1033  tree op0;
1034  tree type;
1035  tree val;
1036  bitmap visited;
1037  edge_iterator ei;
1038
1039  if (!stmt || TREE_CODE (stmt) != COND_EXPR)
1040    return;
1041  FOR_EACH_EDGE (then_edge, ei, bb->succs)
1042    if (then_edge->flags & EDGE_TRUE_VALUE)
1043      break;
1044  cond = TREE_OPERAND (stmt, 0);
1045  if (!COMPARISON_CLASS_P (cond))
1046    return;
1047  op0 = TREE_OPERAND (cond, 0);
1048  type = TREE_TYPE (op0);
1049  visited = BITMAP_ALLOC (NULL);
1050  val = expr_expected_value (cond, visited);
1051  BITMAP_FREE (visited);
1052  if (val)
1053    {
1054      if (integer_zerop (val))
1055	predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, NOT_TAKEN);
1056      else
1057	predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, TAKEN);
1058      return;
1059    }
1060  /* Try "pointer heuristic."
1061     A comparison ptr == 0 is predicted as false.
1062     Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1063  if (POINTER_TYPE_P (type))
1064    {
1065      if (TREE_CODE (cond) == EQ_EXPR)
1066	predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1067      else if (TREE_CODE (cond) == NE_EXPR)
1068	predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1069    }
1070  else
1071
1072  /* Try "opcode heuristic."
1073     EQ tests are usually false and NE tests are usually true. Also,
1074     most quantities are positive, so we can make the appropriate guesses
1075     about signed comparisons against zero.  */
1076    switch (TREE_CODE (cond))
1077      {
1078      case EQ_EXPR:
1079      case UNEQ_EXPR:
1080	/* Floating point comparisons appears to behave in a very
1081	   unpredictable way because of special role of = tests in
1082	   FP code.  */
1083	if (FLOAT_TYPE_P (type))
1084	  ;
1085	/* Comparisons with 0 are often used for booleans and there is
1086	   nothing useful to predict about them.  */
1087	else if (integer_zerop (op0)
1088		 || integer_zerop (TREE_OPERAND (cond, 1)))
1089	  ;
1090	else
1091	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
1092	break;
1093
1094      case NE_EXPR:
1095      case LTGT_EXPR:
1096	/* Floating point comparisons appears to behave in a very
1097	   unpredictable way because of special role of = tests in
1098	   FP code.  */
1099	if (FLOAT_TYPE_P (type))
1100	  ;
1101	/* Comparisons with 0 are often used for booleans and there is
1102	   nothing useful to predict about them.  */
1103	else if (integer_zerop (op0)
1104		 || integer_zerop (TREE_OPERAND (cond, 1)))
1105	  ;
1106	else
1107	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
1108	break;
1109
1110      case ORDERED_EXPR:
1111	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
1112	break;
1113
1114      case UNORDERED_EXPR:
1115	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
1116	break;
1117
1118      case LE_EXPR:
1119      case LT_EXPR:
1120	if (integer_zerop (TREE_OPERAND (cond, 1))
1121	    || integer_onep (TREE_OPERAND (cond, 1))
1122	    || integer_all_onesp (TREE_OPERAND (cond, 1))
1123	    || real_zerop (TREE_OPERAND (cond, 1))
1124	    || real_onep (TREE_OPERAND (cond, 1))
1125	    || real_minus_onep (TREE_OPERAND (cond, 1)))
1126	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
1127	break;
1128
1129      case GE_EXPR:
1130      case GT_EXPR:
1131	if (integer_zerop (TREE_OPERAND (cond, 1))
1132	    || integer_onep (TREE_OPERAND (cond, 1))
1133	    || integer_all_onesp (TREE_OPERAND (cond, 1))
1134	    || real_zerop (TREE_OPERAND (cond, 1))
1135	    || real_onep (TREE_OPERAND (cond, 1))
1136	    || real_minus_onep (TREE_OPERAND (cond, 1)))
1137	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
1138	break;
1139
1140      default:
1141	break;
1142      }
1143}
1144
1145/* Try to guess whether the value of return means error code.  */
1146static enum br_predictor
1147return_prediction (tree val, enum prediction *prediction)
1148{
1149  /* VOID.  */
1150  if (!val)
1151    return PRED_NO_PREDICTION;
1152  /* Different heuristics for pointers and scalars.  */
1153  if (POINTER_TYPE_P (TREE_TYPE (val)))
1154    {
1155      /* NULL is usually not returned.  */
1156      if (integer_zerop (val))
1157	{
1158	  *prediction = NOT_TAKEN;
1159	  return PRED_NULL_RETURN;
1160	}
1161    }
1162  else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
1163    {
1164      /* Negative return values are often used to indicate
1165         errors.  */
1166      if (TREE_CODE (val) == INTEGER_CST
1167	  && tree_int_cst_sgn (val) < 0)
1168	{
1169	  *prediction = NOT_TAKEN;
1170	  return PRED_NEGATIVE_RETURN;
1171	}
1172      /* Constant return values seems to be commonly taken.
1173         Zero/one often represent booleans so exclude them from the
1174	 heuristics.  */
1175      if (TREE_CONSTANT (val)
1176	  && (!integer_zerop (val) && !integer_onep (val)))
1177	{
1178	  *prediction = TAKEN;
1179	  return PRED_NEGATIVE_RETURN;
1180	}
1181    }
1182  return PRED_NO_PREDICTION;
1183}
1184
1185/* Find the basic block with return expression and look up for possible
1186   return value trying to apply RETURN_PREDICTION heuristics.  */
1187static void
1188apply_return_prediction (int *heads)
1189{
1190  tree return_stmt = NULL;
1191  tree return_val;
1192  edge e;
1193  tree phi;
1194  int phi_num_args, i;
1195  enum br_predictor pred;
1196  enum prediction direction;
1197  edge_iterator ei;
1198
1199  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
1200    {
1201      return_stmt = last_stmt (e->src);
1202      if (TREE_CODE (return_stmt) == RETURN_EXPR)
1203	break;
1204    }
1205  if (!e)
1206    return;
1207  return_val = TREE_OPERAND (return_stmt, 0);
1208  if (!return_val)
1209    return;
1210  if (TREE_CODE (return_val) == MODIFY_EXPR)
1211    return_val = TREE_OPERAND (return_val, 1);
1212  if (TREE_CODE (return_val) != SSA_NAME
1213      || !SSA_NAME_DEF_STMT (return_val)
1214      || TREE_CODE (SSA_NAME_DEF_STMT (return_val)) != PHI_NODE)
1215    return;
1216  for (phi = SSA_NAME_DEF_STMT (return_val); phi; phi = PHI_CHAIN (phi))
1217    if (PHI_RESULT (phi) == return_val)
1218      break;
1219  if (!phi)
1220    return;
1221  phi_num_args = PHI_NUM_ARGS (phi);
1222  pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
1223
1224  /* Avoid the degenerate case where all return values form the function
1225     belongs to same category (ie they are all positive constants)
1226     so we can hardly say something about them.  */
1227  for (i = 1; i < phi_num_args; i++)
1228    if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
1229      break;
1230  if (i != phi_num_args)
1231    for (i = 0; i < phi_num_args; i++)
1232      {
1233	pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
1234	if (pred != PRED_NO_PREDICTION)
1235	  predict_paths_leading_to (PHI_ARG_EDGE (phi, i)->src, heads, pred,
1236				    direction);
1237      }
1238}
1239
1240/* Look for basic block that contains unlikely to happen events
1241   (such as noreturn calls) and mark all paths leading to execution
1242   of this basic blocks as unlikely.  */
1243
1244static void
1245tree_bb_level_predictions (void)
1246{
1247  basic_block bb;
1248  int *heads;
1249
1250  heads = XNEWVEC (int, last_basic_block);
1251  memset (heads, ENTRY_BLOCK, sizeof (int) * last_basic_block);
1252  heads[ENTRY_BLOCK_PTR->next_bb->index] = last_basic_block;
1253
1254  apply_return_prediction (heads);
1255
1256  FOR_EACH_BB (bb)
1257    {
1258      block_stmt_iterator bsi = bsi_last (bb);
1259
1260      for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1261	{
1262	  tree stmt = bsi_stmt (bsi);
1263	  switch (TREE_CODE (stmt))
1264	    {
1265	      case MODIFY_EXPR:
1266		if (TREE_CODE (TREE_OPERAND (stmt, 1)) == CALL_EXPR)
1267		  {
1268		    stmt = TREE_OPERAND (stmt, 1);
1269		    goto call_expr;
1270		  }
1271		break;
1272	      case CALL_EXPR:
1273call_expr:;
1274		if (call_expr_flags (stmt) & ECF_NORETURN)
1275		  predict_paths_leading_to (bb, heads, PRED_NORETURN,
1276		      			    NOT_TAKEN);
1277		break;
1278	      default:
1279		break;
1280	    }
1281	}
1282    }
1283
1284  free (heads);
1285}
1286
1287/* Predict branch probabilities and estimate profile of the tree CFG.  */
1288static unsigned int
1289tree_estimate_probability (void)
1290{
1291  basic_block bb;
1292  struct loops loops_info;
1293
1294  flow_loops_find (&loops_info);
1295  if (dump_file && (dump_flags & TDF_DETAILS))
1296    flow_loops_dump (&loops_info, dump_file, NULL, 0);
1297
1298  add_noreturn_fake_exit_edges ();
1299  connect_infinite_loops_to_exit ();
1300  calculate_dominance_info (CDI_DOMINATORS);
1301  calculate_dominance_info (CDI_POST_DOMINATORS);
1302
1303  tree_bb_level_predictions ();
1304
1305  mark_irreducible_loops (&loops_info);
1306  predict_loops (&loops_info, false);
1307
1308  FOR_EACH_BB (bb)
1309    {
1310      edge e;
1311      edge_iterator ei;
1312
1313      FOR_EACH_EDGE (e, ei, bb->succs)
1314	{
1315	  /* Predict early returns to be probable, as we've already taken
1316	     care for error returns and other cases are often used for
1317	     fast paths through function.  */
1318	  if (e->dest == EXIT_BLOCK_PTR
1319	      && TREE_CODE (last_stmt (bb)) == RETURN_EXPR
1320	      && !single_pred_p (bb))
1321	    {
1322	      edge e1;
1323	      edge_iterator ei1;
1324
1325	      FOR_EACH_EDGE (e1, ei1, bb->preds)
1326	      	if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
1327		    && !predicted_by_p (e1->src, PRED_CONST_RETURN)
1328		    && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN)
1329		    && !last_basic_block_p (e1->src))
1330		  predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
1331	    }
1332
1333	  /* Look for block we are guarding (ie we dominate it,
1334	     but it doesn't postdominate us).  */
1335	  if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
1336	      && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
1337	      && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
1338	    {
1339	      block_stmt_iterator bi;
1340
1341	      /* The call heuristic claims that a guarded function call
1342		 is improbable.  This is because such calls are often used
1343		 to signal exceptional situations such as printing error
1344		 messages.  */
1345	      for (bi = bsi_start (e->dest); !bsi_end_p (bi);
1346		   bsi_next (&bi))
1347		{
1348		  tree stmt = bsi_stmt (bi);
1349		  if ((TREE_CODE (stmt) == CALL_EXPR
1350		       || (TREE_CODE (stmt) == MODIFY_EXPR
1351			   && TREE_CODE (TREE_OPERAND (stmt, 1)) == CALL_EXPR))
1352		      /* Constant and pure calls are hardly used to signalize
1353			 something exceptional.  */
1354		      && TREE_SIDE_EFFECTS (stmt))
1355		    {
1356		      predict_edge_def (e, PRED_CALL, NOT_TAKEN);
1357		      break;
1358		    }
1359		}
1360	    }
1361	}
1362      tree_predict_by_opcode (bb);
1363    }
1364  FOR_EACH_BB (bb)
1365    combine_predictions_for_bb (bb);
1366
1367  strip_builtin_expect ();
1368  estimate_bb_frequencies (&loops_info);
1369  free_dominance_info (CDI_POST_DOMINATORS);
1370  remove_fake_exit_edges ();
1371  flow_loops_free (&loops_info);
1372  if (dump_file && (dump_flags & TDF_DETAILS))
1373    dump_tree_cfg (dump_file, dump_flags);
1374  if (profile_status == PROFILE_ABSENT)
1375    profile_status = PROFILE_GUESSED;
1376  return 0;
1377}
1378
1379/* __builtin_expect dropped tokens into the insn stream describing expected
1380   values of registers.  Generate branch probabilities based off these
1381   values.  */
1382
1383void
1384expected_value_to_br_prob (void)
1385{
1386  rtx insn, cond, ev = NULL_RTX, ev_reg = NULL_RTX;
1387
1388  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
1389    {
1390      switch (GET_CODE (insn))
1391	{
1392	case NOTE:
1393	  /* Look for expected value notes.  */
1394	  if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EXPECTED_VALUE)
1395	    {
1396	      ev = NOTE_EXPECTED_VALUE (insn);
1397	      ev_reg = XEXP (ev, 0);
1398	      delete_insn (insn);
1399	    }
1400	  continue;
1401
1402	case CODE_LABEL:
1403	  /* Never propagate across labels.  */
1404	  ev = NULL_RTX;
1405	  continue;
1406
1407	case JUMP_INSN:
1408	  /* Look for simple conditional branches.  If we haven't got an
1409	     expected value yet, no point going further.  */
1410	  if (!JUMP_P (insn) || ev == NULL_RTX
1411	      || ! any_condjump_p (insn))
1412	    continue;
1413	  break;
1414
1415	default:
1416	  /* Look for insns that clobber the EV register.  */
1417	  if (ev && reg_set_p (ev_reg, insn))
1418	    ev = NULL_RTX;
1419	  continue;
1420	}
1421
1422      /* Collect the branch condition, hopefully relative to EV_REG.  */
1423      /* ???  At present we'll miss things like
1424		(expected_value (eq r70 0))
1425		(set r71 -1)
1426		(set r80 (lt r70 r71))
1427		(set pc (if_then_else (ne r80 0) ...))
1428	 as canonicalize_condition will render this to us as
1429		(lt r70, r71)
1430	 Could use cselib to try and reduce this further.  */
1431      cond = XEXP (SET_SRC (pc_set (insn)), 0);
1432      cond = canonicalize_condition (insn, cond, 0, NULL, ev_reg,
1433				     false, false);
1434      if (! cond || XEXP (cond, 0) != ev_reg
1435	  || GET_CODE (XEXP (cond, 1)) != CONST_INT)
1436	continue;
1437
1438      /* Substitute and simplify.  Given that the expression we're
1439	 building involves two constants, we should wind up with either
1440	 true or false.  */
1441      cond = gen_rtx_fmt_ee (GET_CODE (cond), VOIDmode,
1442			     XEXP (ev, 1), XEXP (cond, 1));
1443      cond = simplify_rtx (cond);
1444
1445      /* Turn the condition into a scaled branch probability.  */
1446      gcc_assert (cond == const_true_rtx || cond == const0_rtx);
1447      predict_insn_def (insn, PRED_BUILTIN_EXPECT,
1448		        cond == const_true_rtx ? TAKEN : NOT_TAKEN);
1449    }
1450}
1451
1452/* Check whether this is the last basic block of function.  Commonly
1453   there is one extra common cleanup block.  */
1454static bool
1455last_basic_block_p (basic_block bb)
1456{
1457  if (bb == EXIT_BLOCK_PTR)
1458    return false;
1459
1460  return (bb->next_bb == EXIT_BLOCK_PTR
1461	  || (bb->next_bb->next_bb == EXIT_BLOCK_PTR
1462	      && single_succ_p (bb)
1463	      && single_succ (bb)->next_bb == EXIT_BLOCK_PTR));
1464}
1465
1466/* Sets branch probabilities according to PREDiction and
1467   FLAGS. HEADS[bb->index] should be index of basic block in that we
1468   need to alter branch predictions (i.e. the first of our dominators
1469   such that we do not post-dominate it) (but we fill this information
1470   on demand, so -1 may be there in case this was not needed yet).  */
1471
1472static void
1473predict_paths_leading_to (basic_block bb, int *heads, enum br_predictor pred,
1474			  enum prediction taken)
1475{
1476  edge e;
1477  edge_iterator ei;
1478  int y;
1479
1480  if (heads[bb->index] == ENTRY_BLOCK)
1481    {
1482      /* This is first time we need this field in heads array; so
1483         find first dominator that we do not post-dominate (we are
1484         using already known members of heads array).  */
1485      basic_block ai = bb;
1486      basic_block next_ai = get_immediate_dominator (CDI_DOMINATORS, bb);
1487      int head;
1488
1489      while (heads[next_ai->index] == ENTRY_BLOCK)
1490	{
1491	  if (!dominated_by_p (CDI_POST_DOMINATORS, next_ai, bb))
1492	    break;
1493	  heads[next_ai->index] = ai->index;
1494	  ai = next_ai;
1495	  next_ai = get_immediate_dominator (CDI_DOMINATORS, next_ai);
1496	}
1497      if (!dominated_by_p (CDI_POST_DOMINATORS, next_ai, bb))
1498	head = next_ai->index;
1499      else
1500	head = heads[next_ai->index];
1501      while (next_ai != bb)
1502	{
1503	  next_ai = ai;
1504	  ai = BASIC_BLOCK (heads[ai->index]);
1505	  heads[next_ai->index] = head;
1506	}
1507    }
1508  y = heads[bb->index];
1509
1510  /* Now find the edge that leads to our branch and aply the prediction.  */
1511
1512  if (y == last_basic_block)
1513    return;
1514  FOR_EACH_EDGE (e, ei, BASIC_BLOCK (y)->succs)
1515    if (e->dest->index >= NUM_FIXED_BLOCKS
1516	&& dominated_by_p (CDI_POST_DOMINATORS, e->dest, bb))
1517      predict_edge_def (e, pred, taken);
1518}
1519
1520/* This is used to carry information about basic blocks.  It is
1521   attached to the AUX field of the standard CFG block.  */
1522
1523typedef struct block_info_def
1524{
1525  /* Estimated frequency of execution of basic_block.  */
1526  sreal frequency;
1527
1528  /* To keep queue of basic blocks to process.  */
1529  basic_block next;
1530
1531  /* Number of predecessors we need to visit first.  */
1532  int npredecessors;
1533} *block_info;
1534
1535/* Similar information for edges.  */
1536typedef struct edge_info_def
1537{
1538  /* In case edge is a loopback edge, the probability edge will be reached
1539     in case header is.  Estimated number of iterations of the loop can be
1540     then computed as 1 / (1 - back_edge_prob).  */
1541  sreal back_edge_prob;
1542  /* True if the edge is a loopback edge in the natural loop.  */
1543  unsigned int back_edge:1;
1544} *edge_info;
1545
1546#define BLOCK_INFO(B)	((block_info) (B)->aux)
1547#define EDGE_INFO(E)	((edge_info) (E)->aux)
1548
1549/* Helper function for estimate_bb_frequencies.
1550   Propagate the frequencies for LOOP.  */
1551
1552static void
1553propagate_freq (struct loop *loop, bitmap tovisit)
1554{
1555  basic_block head = loop->header;
1556  basic_block bb;
1557  basic_block last;
1558  unsigned i;
1559  edge e;
1560  basic_block nextbb;
1561  bitmap_iterator bi;
1562
1563  /* For each basic block we need to visit count number of his predecessors
1564     we need to visit first.  */
1565  EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
1566    {
1567      edge_iterator ei;
1568      int count = 0;
1569
1570       /* The outermost "loop" includes the exit block, which we can not
1571	  look up via BASIC_BLOCK.  Detect this and use EXIT_BLOCK_PTR
1572	  directly.  Do the same for the entry block.  */
1573      bb = BASIC_BLOCK (i);
1574
1575      FOR_EACH_EDGE (e, ei, bb->preds)
1576	{
1577	  bool visit = bitmap_bit_p (tovisit, e->src->index);
1578
1579	  if (visit && !(e->flags & EDGE_DFS_BACK))
1580	    count++;
1581	  else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
1582	    fprintf (dump_file,
1583		     "Irreducible region hit, ignoring edge to %i->%i\n",
1584		     e->src->index, bb->index);
1585	}
1586      BLOCK_INFO (bb)->npredecessors = count;
1587    }
1588
1589  memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
1590  last = head;
1591  for (bb = head; bb; bb = nextbb)
1592    {
1593      edge_iterator ei;
1594      sreal cyclic_probability, frequency;
1595
1596      memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
1597      memcpy (&frequency, &real_zero, sizeof (real_zero));
1598
1599      nextbb = BLOCK_INFO (bb)->next;
1600      BLOCK_INFO (bb)->next = NULL;
1601
1602      /* Compute frequency of basic block.  */
1603      if (bb != head)
1604	{
1605#ifdef ENABLE_CHECKING
1606	  FOR_EACH_EDGE (e, ei, bb->preds)
1607	    gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
1608			|| (e->flags & EDGE_DFS_BACK));
1609#endif
1610
1611	  FOR_EACH_EDGE (e, ei, bb->preds)
1612	    if (EDGE_INFO (e)->back_edge)
1613	      {
1614		sreal_add (&cyclic_probability, &cyclic_probability,
1615			   &EDGE_INFO (e)->back_edge_prob);
1616	      }
1617	    else if (!(e->flags & EDGE_DFS_BACK))
1618	      {
1619		sreal tmp;
1620
1621		/*  frequency += (e->probability
1622				  * BLOCK_INFO (e->src)->frequency /
1623				  REG_BR_PROB_BASE);  */
1624
1625		sreal_init (&tmp, e->probability, 0);
1626		sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
1627		sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
1628		sreal_add (&frequency, &frequency, &tmp);
1629	      }
1630
1631	  if (sreal_compare (&cyclic_probability, &real_zero) == 0)
1632	    {
1633	      memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
1634		      sizeof (frequency));
1635	    }
1636	  else
1637	    {
1638	      if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
1639		{
1640		  memcpy (&cyclic_probability, &real_almost_one,
1641			  sizeof (real_almost_one));
1642		}
1643
1644	      /* BLOCK_INFO (bb)->frequency = frequency
1645					      / (1 - cyclic_probability) */
1646
1647	      sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
1648	      sreal_div (&BLOCK_INFO (bb)->frequency,
1649			 &frequency, &cyclic_probability);
1650	    }
1651	}
1652
1653      bitmap_clear_bit (tovisit, bb->index);
1654
1655      e = find_edge (bb, head);
1656      if (e)
1657	{
1658	  sreal tmp;
1659
1660	  /* EDGE_INFO (e)->back_edge_prob
1661	     = ((e->probability * BLOCK_INFO (bb)->frequency)
1662	     / REG_BR_PROB_BASE); */
1663
1664	  sreal_init (&tmp, e->probability, 0);
1665	  sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
1666	  sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1667		     &tmp, &real_inv_br_prob_base);
1668	}
1669
1670      /* Propagate to successor blocks.  */
1671      FOR_EACH_EDGE (e, ei, bb->succs)
1672	if (!(e->flags & EDGE_DFS_BACK)
1673	    && BLOCK_INFO (e->dest)->npredecessors)
1674	  {
1675	    BLOCK_INFO (e->dest)->npredecessors--;
1676	    if (!BLOCK_INFO (e->dest)->npredecessors)
1677	      {
1678		if (!nextbb)
1679		  nextbb = e->dest;
1680		else
1681		  BLOCK_INFO (last)->next = e->dest;
1682
1683		last = e->dest;
1684	      }
1685	  }
1686    }
1687}
1688
1689/* Estimate probabilities of loopback edges in loops at same nest level.  */
1690
1691static void
1692estimate_loops_at_level (struct loop *first_loop, bitmap tovisit)
1693{
1694  struct loop *loop;
1695
1696  for (loop = first_loop; loop; loop = loop->next)
1697    {
1698      edge e;
1699      basic_block *bbs;
1700      unsigned i;
1701
1702      estimate_loops_at_level (loop->inner, tovisit);
1703
1704      /* Do not do this for dummy function loop.  */
1705      if (EDGE_COUNT (loop->latch->succs) > 0)
1706	{
1707	  /* Find current loop back edge and mark it.  */
1708	  e = loop_latch_edge (loop);
1709	  EDGE_INFO (e)->back_edge = 1;
1710       }
1711
1712      bbs = get_loop_body (loop);
1713      for (i = 0; i < loop->num_nodes; i++)
1714	bitmap_set_bit (tovisit, bbs[i]->index);
1715      free (bbs);
1716      propagate_freq (loop, tovisit);
1717    }
1718}
1719
1720/* Convert counts measured by profile driven feedback to frequencies.
1721   Return nonzero iff there was any nonzero execution count.  */
1722
1723int
1724counts_to_freqs (void)
1725{
1726  gcov_type count_max, true_count_max = 0;
1727  basic_block bb;
1728
1729  FOR_EACH_BB (bb)
1730    true_count_max = MAX (bb->count, true_count_max);
1731
1732  count_max = MAX (true_count_max, 1);
1733  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1734    bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
1735  return true_count_max;
1736}
1737
1738/* Return true if function is likely to be expensive, so there is no point to
1739   optimize performance of prologue, epilogue or do inlining at the expense
1740   of code size growth.  THRESHOLD is the limit of number of instructions
1741   function can execute at average to be still considered not expensive.  */
1742
1743bool
1744expensive_function_p (int threshold)
1745{
1746  unsigned int sum = 0;
1747  basic_block bb;
1748  unsigned int limit;
1749
1750  /* We can not compute accurately for large thresholds due to scaled
1751     frequencies.  */
1752  gcc_assert (threshold <= BB_FREQ_MAX);
1753
1754  /* Frequencies are out of range.  This either means that function contains
1755     internal loop executing more than BB_FREQ_MAX times or profile feedback
1756     is available and function has not been executed at all.  */
1757  if (ENTRY_BLOCK_PTR->frequency == 0)
1758    return true;
1759
1760  /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
1761  limit = ENTRY_BLOCK_PTR->frequency * threshold;
1762  FOR_EACH_BB (bb)
1763    {
1764      rtx insn;
1765
1766      for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
1767	   insn = NEXT_INSN (insn))
1768	if (active_insn_p (insn))
1769	  {
1770	    sum += bb->frequency;
1771	    if (sum > limit)
1772	      return true;
1773	}
1774    }
1775
1776  return false;
1777}
1778
1779/* Estimate basic blocks frequency by given branch probabilities.  */
1780
1781static void
1782estimate_bb_frequencies (struct loops *loops)
1783{
1784  basic_block bb;
1785  sreal freq_max;
1786
1787  if (!flag_branch_probabilities || !counts_to_freqs ())
1788    {
1789      static int real_values_initialized = 0;
1790      bitmap tovisit;
1791
1792      if (!real_values_initialized)
1793        {
1794	  real_values_initialized = 1;
1795	  sreal_init (&real_zero, 0, 0);
1796	  sreal_init (&real_one, 1, 0);
1797	  sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
1798	  sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
1799	  sreal_init (&real_one_half, 1, -1);
1800	  sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
1801	  sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
1802	}
1803
1804      mark_dfs_back_edges ();
1805
1806      single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE;
1807
1808      /* Set up block info for each basic block.  */
1809      tovisit = BITMAP_ALLOC (NULL);
1810      alloc_aux_for_blocks (sizeof (struct block_info_def));
1811      alloc_aux_for_edges (sizeof (struct edge_info_def));
1812      FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1813	{
1814	  edge e;
1815	  edge_iterator ei;
1816
1817	  FOR_EACH_EDGE (e, ei, bb->succs)
1818	    {
1819	      sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
1820	      sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1821			 &EDGE_INFO (e)->back_edge_prob,
1822			 &real_inv_br_prob_base);
1823	    }
1824	}
1825
1826      /* First compute probabilities locally for each loop from innermost
1827         to outermost to examine probabilities for back edges.  */
1828      estimate_loops_at_level (loops->tree_root, tovisit);
1829
1830      memcpy (&freq_max, &real_zero, sizeof (real_zero));
1831      FOR_EACH_BB (bb)
1832	if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
1833	  memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
1834
1835      sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
1836      FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1837	{
1838	  sreal tmp;
1839
1840	  sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
1841	  sreal_add (&tmp, &tmp, &real_one_half);
1842	  bb->frequency = sreal_to_int (&tmp);
1843	}
1844
1845      free_aux_for_blocks ();
1846      free_aux_for_edges ();
1847      BITMAP_FREE (tovisit);
1848    }
1849  compute_function_frequency ();
1850  if (flag_reorder_functions)
1851    choose_function_section ();
1852}
1853
1854/* Decide whether function is hot, cold or unlikely executed.  */
1855static void
1856compute_function_frequency (void)
1857{
1858  basic_block bb;
1859
1860  if (!profile_info || !flag_branch_probabilities)
1861    return;
1862  cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
1863  FOR_EACH_BB (bb)
1864    {
1865      if (maybe_hot_bb_p (bb))
1866	{
1867	  cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
1868	  return;
1869	}
1870      if (!probably_never_executed_bb_p (bb))
1871	cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
1872    }
1873}
1874
1875/* Choose appropriate section for the function.  */
1876static void
1877choose_function_section (void)
1878{
1879  if (DECL_SECTION_NAME (current_function_decl)
1880      || !targetm.have_named_sections
1881      /* Theoretically we can split the gnu.linkonce text section too,
1882	 but this requires more work as the frequency needs to match
1883	 for all generated objects so we need to merge the frequency
1884	 of all instances.  For now just never set frequency for these.  */
1885      || DECL_ONE_ONLY (current_function_decl))
1886    return;
1887
1888  /* If we are doing the partitioning optimization, let the optimization
1889     choose the correct section into which to put things.  */
1890
1891  if (flag_reorder_blocks_and_partition)
1892    return;
1893
1894  if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
1895    DECL_SECTION_NAME (current_function_decl) =
1896      build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME);
1897  if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
1898    DECL_SECTION_NAME (current_function_decl) =
1899      build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME),
1900		    UNLIKELY_EXECUTED_TEXT_SECTION_NAME);
1901}
1902
1903static bool
1904gate_estimate_probability (void)
1905{
1906  return flag_guess_branch_prob;
1907}
1908
1909struct tree_opt_pass pass_profile =
1910{
1911  "profile",				/* name */
1912  gate_estimate_probability,		/* gate */
1913  tree_estimate_probability,		/* execute */
1914  NULL,					/* sub */
1915  NULL,					/* next */
1916  0,					/* static_pass_number */
1917  TV_BRANCH_PROB,			/* tv_id */
1918  PROP_cfg,				/* properties_required */
1919  0,					/* properties_provided */
1920  0,					/* properties_destroyed */
1921  0,					/* todo_flags_start */
1922  TODO_ggc_collect | TODO_verify_ssa,			/* todo_flags_finish */
1923  0					/* letter */
1924};
1925