1/* Branch prediction routines for the GNU compiler. 2 Copyright (C) 2000-2015 Free Software Foundation, Inc. 3 4This file is part of GCC. 5 6GCC is free software; you can redistribute it and/or modify it under 7the terms of the GNU General Public License as published by the Free 8Software Foundation; either version 3, or (at your option) any later 9version. 10 11GCC is distributed in the hope that it will be useful, but WITHOUT ANY 12WARRANTY; without even the implied warranty of MERCHANTABILITY or 13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14for more details. 15 16You should have received a copy of the GNU General Public License 17along with GCC; see the file COPYING3. If not see 18<http://www.gnu.org/licenses/>. */ 19 20/* References: 21 22 [1] "Branch Prediction for Free" 23 Ball and Larus; PLDI '93. 24 [2] "Static Branch Frequency and Program Profile Analysis" 25 Wu and Larus; MICRO-27. 26 [3] "Corpus-based Static Branch Prediction" 27 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */ 28 29 30#include "config.h" 31#include "system.h" 32#include "coretypes.h" 33#include "tm.h" 34#include "hash-set.h" 35#include "machmode.h" 36#include "vec.h" 37#include "double-int.h" 38#include "input.h" 39#include "alias.h" 40#include "symtab.h" 41#include "wide-int.h" 42#include "inchash.h" 43#include "tree.h" 44#include "fold-const.h" 45#include "calls.h" 46#include "rtl.h" 47#include "tm_p.h" 48#include "hard-reg-set.h" 49#include "predict.h" 50#include "function.h" 51#include "dominance.h" 52#include "cfg.h" 53#include "cfganal.h" 54#include "basic-block.h" 55#include "insn-config.h" 56#include "regs.h" 57#include "flags.h" 58#include "profile.h" 59#include "except.h" 60#include "diagnostic-core.h" 61#include "recog.h" 62#include "hashtab.h" 63#include "statistics.h" 64#include "real.h" 65#include "fixed-value.h" 66#include "expmed.h" 67#include "dojump.h" 68#include "explow.h" 69#include "emit-rtl.h" 70#include "varasm.h" 71#include "stmt.h" 72#include "expr.h" 73#include "coverage.h" 74#include "sreal.h" 75#include "params.h" 76#include "target.h" 77#include "cfgloop.h" 78#include "hash-map.h" 79#include "tree-ssa-alias.h" 80#include "internal-fn.h" 81#include "gimple-expr.h" 82#include "is-a.h" 83#include "gimple.h" 84#include "gimple-iterator.h" 85#include "gimple-ssa.h" 86#include "plugin-api.h" 87#include "ipa-ref.h" 88#include "cgraph.h" 89#include "tree-cfg.h" 90#include "tree-phinodes.h" 91#include "ssa-iterators.h" 92#include "tree-ssa-loop-niter.h" 93#include "tree-ssa-loop.h" 94#include "tree-pass.h" 95#include "tree-scalar-evolution.h" 96 97/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, 98 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */ 99static sreal real_almost_one, real_br_prob_base, 100 real_inv_br_prob_base, real_one_half, real_bb_freq_max; 101 102static void combine_predictions_for_insn (rtx_insn *, basic_block); 103static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int); 104static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction); 105static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction); 106static bool can_predict_insn_p (const rtx_insn *); 107 108/* Information we hold about each branch predictor. 109 Filled using information from predict.def. */ 110 111struct predictor_info 112{ 113 const char *const name; /* Name used in the debugging dumps. */ 114 const int hitrate; /* Expected hitrate used by 115 predict_insn_def call. */ 116 const int flags; 117}; 118 119/* Use given predictor without Dempster-Shaffer theory if it matches 120 using first_match heuristics. */ 121#define PRED_FLAG_FIRST_MATCH 1 122 123/* Recompute hitrate in percent to our representation. */ 124 125#define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100) 126 127#define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS}, 128static const struct predictor_info predictor_info[]= { 129#include "predict.def" 130 131 /* Upper bound on predictors. */ 132 {NULL, 0, 0} 133}; 134#undef DEF_PREDICTOR 135 136/* Return TRUE if frequency FREQ is considered to be hot. */ 137 138static inline bool 139maybe_hot_frequency_p (struct function *fun, int freq) 140{ 141 struct cgraph_node *node = cgraph_node::get (fun->decl); 142 if (!profile_info 143 || !opt_for_fn (fun->decl, flag_branch_probabilities)) 144 { 145 if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) 146 return false; 147 if (node->frequency == NODE_FREQUENCY_HOT) 148 return true; 149 } 150 if (profile_status_for_fn (fun) == PROFILE_ABSENT) 151 return true; 152 if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE 153 && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3)) 154 return false; 155 if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0) 156 return false; 157 if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency 158 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))) 159 return false; 160 return true; 161} 162 163static gcov_type min_count = -1; 164 165/* Determine the threshold for hot BB counts. */ 166 167gcov_type 168get_hot_bb_threshold () 169{ 170 gcov_working_set_t *ws; 171 if (min_count == -1) 172 { 173 ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE)); 174 gcc_assert (ws); 175 min_count = ws->min_counter; 176 } 177 return min_count; 178} 179 180/* Set the threshold for hot BB counts. */ 181 182void 183set_hot_bb_threshold (gcov_type min) 184{ 185 min_count = min; 186} 187 188/* Return TRUE if frequency FREQ is considered to be hot. */ 189 190bool 191maybe_hot_count_p (struct function *fun, gcov_type count) 192{ 193 if (fun && profile_status_for_fn (fun) != PROFILE_READ) 194 return true; 195 /* Code executed at most once is not hot. */ 196 if (profile_info->runs >= count) 197 return false; 198 return (count >= get_hot_bb_threshold ()); 199} 200 201/* Return true in case BB can be CPU intensive and should be optimized 202 for maximal performance. */ 203 204bool 205maybe_hot_bb_p (struct function *fun, const_basic_block bb) 206{ 207 gcc_checking_assert (fun); 208 if (profile_status_for_fn (fun) == PROFILE_READ) 209 return maybe_hot_count_p (fun, bb->count); 210 return maybe_hot_frequency_p (fun, bb->frequency); 211} 212 213/* Return true in case BB can be CPU intensive and should be optimized 214 for maximal performance. */ 215 216bool 217maybe_hot_edge_p (edge e) 218{ 219 if (profile_status_for_fn (cfun) == PROFILE_READ) 220 return maybe_hot_count_p (cfun, e->count); 221 return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e)); 222} 223 224/* Return true if profile COUNT and FREQUENCY, or function FUN static 225 node frequency reflects never being executed. */ 226 227static bool 228probably_never_executed (struct function *fun, 229 gcov_type count, int frequency) 230{ 231 gcc_checking_assert (fun); 232 if (profile_status_for_fn (fun) == PROFILE_READ) 233 { 234 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION); 235 if (count * unlikely_count_fraction >= profile_info->runs) 236 return false; 237 if (!frequency) 238 return true; 239 if (!ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency) 240 return false; 241 if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count) 242 { 243 gcov_type computed_count; 244 /* Check for possibility of overflow, in which case entry bb count 245 is large enough to do the division first without losing much 246 precision. */ 247 if (ENTRY_BLOCK_PTR_FOR_FN (fun)->count < REG_BR_PROB_BASE * 248 REG_BR_PROB_BASE) 249 { 250 gcov_type scaled_count 251 = frequency * ENTRY_BLOCK_PTR_FOR_FN (fun)->count * 252 unlikely_count_fraction; 253 computed_count = RDIV (scaled_count, 254 ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency); 255 } 256 else 257 { 258 computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (fun)->count, 259 ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency); 260 computed_count *= frequency * unlikely_count_fraction; 261 } 262 if (computed_count >= profile_info->runs) 263 return false; 264 } 265 return true; 266 } 267 if ((!profile_info || !(opt_for_fn (fun->decl, flag_branch_probabilities))) 268 && (cgraph_node::get (fun->decl)->frequency 269 == NODE_FREQUENCY_UNLIKELY_EXECUTED)) 270 return true; 271 return false; 272} 273 274 275/* Return true in case BB is probably never executed. */ 276 277bool 278probably_never_executed_bb_p (struct function *fun, const_basic_block bb) 279{ 280 return probably_never_executed (fun, bb->count, bb->frequency); 281} 282 283 284/* Return true in case edge E is probably never executed. */ 285 286bool 287probably_never_executed_edge_p (struct function *fun, edge e) 288{ 289 return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e)); 290} 291 292/* Return true when current function should always be optimized for size. */ 293 294bool 295optimize_function_for_size_p (struct function *fun) 296{ 297 if (!fun || !fun->decl) 298 return optimize_size; 299 cgraph_node *n = cgraph_node::get (fun->decl); 300 return n && n->optimize_for_size_p (); 301} 302 303/* Return true when current function should always be optimized for speed. */ 304 305bool 306optimize_function_for_speed_p (struct function *fun) 307{ 308 return !optimize_function_for_size_p (fun); 309} 310 311/* Return TRUE when BB should be optimized for size. */ 312 313bool 314optimize_bb_for_size_p (const_basic_block bb) 315{ 316 return (optimize_function_for_size_p (cfun) 317 || (bb && !maybe_hot_bb_p (cfun, bb))); 318} 319 320/* Return TRUE when BB should be optimized for speed. */ 321 322bool 323optimize_bb_for_speed_p (const_basic_block bb) 324{ 325 return !optimize_bb_for_size_p (bb); 326} 327 328/* Return TRUE when BB should be optimized for size. */ 329 330bool 331optimize_edge_for_size_p (edge e) 332{ 333 return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e); 334} 335 336/* Return TRUE when BB should be optimized for speed. */ 337 338bool 339optimize_edge_for_speed_p (edge e) 340{ 341 return !optimize_edge_for_size_p (e); 342} 343 344/* Return TRUE when BB should be optimized for size. */ 345 346bool 347optimize_insn_for_size_p (void) 348{ 349 return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p; 350} 351 352/* Return TRUE when BB should be optimized for speed. */ 353 354bool 355optimize_insn_for_speed_p (void) 356{ 357 return !optimize_insn_for_size_p (); 358} 359 360/* Return TRUE when LOOP should be optimized for size. */ 361 362bool 363optimize_loop_for_size_p (struct loop *loop) 364{ 365 return optimize_bb_for_size_p (loop->header); 366} 367 368/* Return TRUE when LOOP should be optimized for speed. */ 369 370bool 371optimize_loop_for_speed_p (struct loop *loop) 372{ 373 return optimize_bb_for_speed_p (loop->header); 374} 375 376/* Return TRUE when LOOP nest should be optimized for speed. */ 377 378bool 379optimize_loop_nest_for_speed_p (struct loop *loop) 380{ 381 struct loop *l = loop; 382 if (optimize_loop_for_speed_p (loop)) 383 return true; 384 l = loop->inner; 385 while (l && l != loop) 386 { 387 if (optimize_loop_for_speed_p (l)) 388 return true; 389 if (l->inner) 390 l = l->inner; 391 else if (l->next) 392 l = l->next; 393 else 394 { 395 while (l != loop && !l->next) 396 l = loop_outer (l); 397 if (l != loop) 398 l = l->next; 399 } 400 } 401 return false; 402} 403 404/* Return TRUE when LOOP nest should be optimized for size. */ 405 406bool 407optimize_loop_nest_for_size_p (struct loop *loop) 408{ 409 return !optimize_loop_nest_for_speed_p (loop); 410} 411 412/* Return true when edge E is likely to be well predictable by branch 413 predictor. */ 414 415bool 416predictable_edge_p (edge e) 417{ 418 if (profile_status_for_fn (cfun) == PROFILE_ABSENT) 419 return false; 420 if ((e->probability 421 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100) 422 || (REG_BR_PROB_BASE - e->probability 423 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)) 424 return true; 425 return false; 426} 427 428 429/* Set RTL expansion for BB profile. */ 430 431void 432rtl_profile_for_bb (basic_block bb) 433{ 434 crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb); 435} 436 437/* Set RTL expansion for edge profile. */ 438 439void 440rtl_profile_for_edge (edge e) 441{ 442 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e); 443} 444 445/* Set RTL expansion to default mode (i.e. when profile info is not known). */ 446void 447default_rtl_profile (void) 448{ 449 crtl->maybe_hot_insn_p = true; 450} 451 452/* Return true if the one of outgoing edges is already predicted by 453 PREDICTOR. */ 454 455bool 456rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor) 457{ 458 rtx note; 459 if (!INSN_P (BB_END (bb))) 460 return false; 461 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1)) 462 if (REG_NOTE_KIND (note) == REG_BR_PRED 463 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor) 464 return true; 465 return false; 466} 467 468/* Structure representing predictions in tree level. */ 469 470struct edge_prediction { 471 struct edge_prediction *ep_next; 472 edge ep_edge; 473 enum br_predictor ep_predictor; 474 int ep_probability; 475}; 476 477/* This map contains for a basic block the list of predictions for the 478 outgoing edges. */ 479 480static hash_map<const_basic_block, edge_prediction *> *bb_predictions; 481 482/* Return true if the one of outgoing edges is already predicted by 483 PREDICTOR. */ 484 485bool 486gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor) 487{ 488 struct edge_prediction *i; 489 edge_prediction **preds = bb_predictions->get (bb); 490 491 if (!preds) 492 return false; 493 494 for (i = *preds; i; i = i->ep_next) 495 if (i->ep_predictor == predictor) 496 return true; 497 return false; 498} 499 500/* Return true when the probability of edge is reliable. 501 502 The profile guessing code is good at predicting branch outcome (ie. 503 taken/not taken), that is predicted right slightly over 75% of time. 504 It is however notoriously poor on predicting the probability itself. 505 In general the profile appear a lot flatter (with probabilities closer 506 to 50%) than the reality so it is bad idea to use it to drive optimization 507 such as those disabling dynamic branch prediction for well predictable 508 branches. 509 510 There are two exceptions - edges leading to noreturn edges and edges 511 predicted by number of iterations heuristics are predicted well. This macro 512 should be able to distinguish those, but at the moment it simply check for 513 noreturn heuristic that is only one giving probability over 99% or bellow 514 1%. In future we might want to propagate reliability information across the 515 CFG if we find this information useful on multiple places. */ 516static bool 517probability_reliable_p (int prob) 518{ 519 return (profile_status_for_fn (cfun) == PROFILE_READ 520 || (profile_status_for_fn (cfun) == PROFILE_GUESSED 521 && (prob <= HITRATE (1) || prob >= HITRATE (99)))); 522} 523 524/* Same predicate as above, working on edges. */ 525bool 526edge_probability_reliable_p (const_edge e) 527{ 528 return probability_reliable_p (e->probability); 529} 530 531/* Same predicate as edge_probability_reliable_p, working on notes. */ 532bool 533br_prob_note_reliable_p (const_rtx note) 534{ 535 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB); 536 return probability_reliable_p (XINT (note, 0)); 537} 538 539static void 540predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability) 541{ 542 gcc_assert (any_condjump_p (insn)); 543 if (!flag_guess_branch_prob) 544 return; 545 546 add_reg_note (insn, REG_BR_PRED, 547 gen_rtx_CONCAT (VOIDmode, 548 GEN_INT ((int) predictor), 549 GEN_INT ((int) probability))); 550} 551 552/* Predict insn by given predictor. */ 553 554void 555predict_insn_def (rtx_insn *insn, enum br_predictor predictor, 556 enum prediction taken) 557{ 558 int probability = predictor_info[(int) predictor].hitrate; 559 560 if (taken != TAKEN) 561 probability = REG_BR_PROB_BASE - probability; 562 563 predict_insn (insn, predictor, probability); 564} 565 566/* Predict edge E with given probability if possible. */ 567 568void 569rtl_predict_edge (edge e, enum br_predictor predictor, int probability) 570{ 571 rtx_insn *last_insn; 572 last_insn = BB_END (e->src); 573 574 /* We can store the branch prediction information only about 575 conditional jumps. */ 576 if (!any_condjump_p (last_insn)) 577 return; 578 579 /* We always store probability of branching. */ 580 if (e->flags & EDGE_FALLTHRU) 581 probability = REG_BR_PROB_BASE - probability; 582 583 predict_insn (last_insn, predictor, probability); 584} 585 586/* Predict edge E with the given PROBABILITY. */ 587void 588gimple_predict_edge (edge e, enum br_predictor predictor, int probability) 589{ 590 gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED); 591 if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) > 592 1) 593 && flag_guess_branch_prob && optimize) 594 { 595 struct edge_prediction *i = XNEW (struct edge_prediction); 596 edge_prediction *&preds = bb_predictions->get_or_insert (e->src); 597 598 i->ep_next = preds; 599 preds = i; 600 i->ep_probability = probability; 601 i->ep_predictor = predictor; 602 i->ep_edge = e; 603 } 604} 605 606/* Remove all predictions on given basic block that are attached 607 to edge E. */ 608void 609remove_predictions_associated_with_edge (edge e) 610{ 611 if (!bb_predictions) 612 return; 613 614 edge_prediction **preds = bb_predictions->get (e->src); 615 616 if (preds) 617 { 618 struct edge_prediction **prediction = preds; 619 struct edge_prediction *next; 620 621 while (*prediction) 622 { 623 if ((*prediction)->ep_edge == e) 624 { 625 next = (*prediction)->ep_next; 626 free (*prediction); 627 *prediction = next; 628 } 629 else 630 prediction = &((*prediction)->ep_next); 631 } 632 } 633} 634 635/* Clears the list of predictions stored for BB. */ 636 637static void 638clear_bb_predictions (basic_block bb) 639{ 640 edge_prediction **preds = bb_predictions->get (bb); 641 struct edge_prediction *pred, *next; 642 643 if (!preds) 644 return; 645 646 for (pred = *preds; pred; pred = next) 647 { 648 next = pred->ep_next; 649 free (pred); 650 } 651 *preds = NULL; 652} 653 654/* Return true when we can store prediction on insn INSN. 655 At the moment we represent predictions only on conditional 656 jumps, not at computed jump or other complicated cases. */ 657static bool 658can_predict_insn_p (const rtx_insn *insn) 659{ 660 return (JUMP_P (insn) 661 && any_condjump_p (insn) 662 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2); 663} 664 665/* Predict edge E by given predictor if possible. */ 666 667void 668predict_edge_def (edge e, enum br_predictor predictor, 669 enum prediction taken) 670{ 671 int probability = predictor_info[(int) predictor].hitrate; 672 673 if (taken != TAKEN) 674 probability = REG_BR_PROB_BASE - probability; 675 676 predict_edge (e, predictor, probability); 677} 678 679/* Invert all branch predictions or probability notes in the INSN. This needs 680 to be done each time we invert the condition used by the jump. */ 681 682void 683invert_br_probabilities (rtx insn) 684{ 685 rtx note; 686 687 for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) 688 if (REG_NOTE_KIND (note) == REG_BR_PROB) 689 XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0); 690 else if (REG_NOTE_KIND (note) == REG_BR_PRED) 691 XEXP (XEXP (note, 0), 1) 692 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1))); 693} 694 695/* Dump information about the branch prediction to the output file. */ 696 697static void 698dump_prediction (FILE *file, enum br_predictor predictor, int probability, 699 basic_block bb, int used) 700{ 701 edge e; 702 edge_iterator ei; 703 704 if (!file) 705 return; 706 707 FOR_EACH_EDGE (e, ei, bb->succs) 708 if (! (e->flags & EDGE_FALLTHRU)) 709 break; 710 711 fprintf (file, " %s heuristics%s: %.1f%%", 712 predictor_info[predictor].name, 713 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE); 714 715 if (bb->count) 716 { 717 fprintf (file, " exec %"PRId64, bb->count); 718 if (e) 719 { 720 fprintf (file, " hit %"PRId64, e->count); 721 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count); 722 } 723 } 724 725 fprintf (file, "\n"); 726} 727 728/* We can not predict the probabilities of outgoing edges of bb. Set them 729 evenly and hope for the best. */ 730static void 731set_even_probabilities (basic_block bb) 732{ 733 int nedges = 0; 734 edge e; 735 edge_iterator ei; 736 737 FOR_EACH_EDGE (e, ei, bb->succs) 738 if (!(e->flags & (EDGE_EH | EDGE_FAKE))) 739 nedges ++; 740 FOR_EACH_EDGE (e, ei, bb->succs) 741 if (!(e->flags & (EDGE_EH | EDGE_FAKE))) 742 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges; 743 else 744 e->probability = 0; 745} 746 747/* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB 748 note if not already present. Remove now useless REG_BR_PRED notes. */ 749 750static void 751combine_predictions_for_insn (rtx_insn *insn, basic_block bb) 752{ 753 rtx prob_note; 754 rtx *pnote; 755 rtx note; 756 int best_probability = PROB_EVEN; 757 enum br_predictor best_predictor = END_PREDICTORS; 758 int combined_probability = REG_BR_PROB_BASE / 2; 759 int d; 760 bool first_match = false; 761 bool found = false; 762 763 if (!can_predict_insn_p (insn)) 764 { 765 set_even_probabilities (bb); 766 return; 767 } 768 769 prob_note = find_reg_note (insn, REG_BR_PROB, 0); 770 pnote = ®_NOTES (insn); 771 if (dump_file) 772 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn), 773 bb->index); 774 775 /* We implement "first match" heuristics and use probability guessed 776 by predictor with smallest index. */ 777 for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) 778 if (REG_NOTE_KIND (note) == REG_BR_PRED) 779 { 780 enum br_predictor predictor = ((enum br_predictor) 781 INTVAL (XEXP (XEXP (note, 0), 0))); 782 int probability = INTVAL (XEXP (XEXP (note, 0), 1)); 783 784 found = true; 785 if (best_predictor > predictor) 786 best_probability = probability, best_predictor = predictor; 787 788 d = (combined_probability * probability 789 + (REG_BR_PROB_BASE - combined_probability) 790 * (REG_BR_PROB_BASE - probability)); 791 792 /* Use FP math to avoid overflows of 32bit integers. */ 793 if (d == 0) 794 /* If one probability is 0% and one 100%, avoid division by zero. */ 795 combined_probability = REG_BR_PROB_BASE / 2; 796 else 797 combined_probability = (((double) combined_probability) * probability 798 * REG_BR_PROB_BASE / d + 0.5); 799 } 800 801 /* Decide which heuristic to use. In case we didn't match anything, 802 use no_prediction heuristic, in case we did match, use either 803 first match or Dempster-Shaffer theory depending on the flags. */ 804 805 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH) 806 first_match = true; 807 808 if (!found) 809 dump_prediction (dump_file, PRED_NO_PREDICTION, 810 combined_probability, bb, true); 811 else 812 { 813 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, 814 bb, !first_match); 815 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, 816 bb, first_match); 817 } 818 819 if (first_match) 820 combined_probability = best_probability; 821 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true); 822 823 while (*pnote) 824 { 825 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED) 826 { 827 enum br_predictor predictor = ((enum br_predictor) 828 INTVAL (XEXP (XEXP (*pnote, 0), 0))); 829 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1)); 830 831 dump_prediction (dump_file, predictor, probability, bb, 832 !first_match || best_predictor == predictor); 833 *pnote = XEXP (*pnote, 1); 834 } 835 else 836 pnote = &XEXP (*pnote, 1); 837 } 838 839 if (!prob_note) 840 { 841 add_int_reg_note (insn, REG_BR_PROB, combined_probability); 842 843 /* Save the prediction into CFG in case we are seeing non-degenerated 844 conditional jump. */ 845 if (!single_succ_p (bb)) 846 { 847 BRANCH_EDGE (bb)->probability = combined_probability; 848 FALLTHRU_EDGE (bb)->probability 849 = REG_BR_PROB_BASE - combined_probability; 850 } 851 } 852 else if (!single_succ_p (bb)) 853 { 854 int prob = XINT (prob_note, 0); 855 856 BRANCH_EDGE (bb)->probability = prob; 857 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob; 858 } 859 else 860 single_succ_edge (bb)->probability = REG_BR_PROB_BASE; 861} 862 863/* Combine predictions into single probability and store them into CFG. 864 Remove now useless prediction entries. */ 865 866static void 867combine_predictions_for_bb (basic_block bb) 868{ 869 int best_probability = PROB_EVEN; 870 enum br_predictor best_predictor = END_PREDICTORS; 871 int combined_probability = REG_BR_PROB_BASE / 2; 872 int d; 873 bool first_match = false; 874 bool found = false; 875 struct edge_prediction *pred; 876 int nedges = 0; 877 edge e, first = NULL, second = NULL; 878 edge_iterator ei; 879 880 FOR_EACH_EDGE (e, ei, bb->succs) 881 if (!(e->flags & (EDGE_EH | EDGE_FAKE))) 882 { 883 nedges ++; 884 if (first && !second) 885 second = e; 886 if (!first) 887 first = e; 888 } 889 890 /* When there is no successor or only one choice, prediction is easy. 891 892 We are lazy for now and predict only basic blocks with two outgoing 893 edges. It is possible to predict generic case too, but we have to 894 ignore first match heuristics and do more involved combining. Implement 895 this later. */ 896 if (nedges != 2) 897 { 898 if (!bb->count) 899 set_even_probabilities (bb); 900 clear_bb_predictions (bb); 901 if (dump_file) 902 fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n", 903 nedges, bb->index); 904 return; 905 } 906 907 if (dump_file) 908 fprintf (dump_file, "Predictions for bb %i\n", bb->index); 909 910 edge_prediction **preds = bb_predictions->get (bb); 911 if (preds) 912 { 913 /* We implement "first match" heuristics and use probability guessed 914 by predictor with smallest index. */ 915 for (pred = *preds; pred; pred = pred->ep_next) 916 { 917 enum br_predictor predictor = pred->ep_predictor; 918 int probability = pred->ep_probability; 919 920 if (pred->ep_edge != first) 921 probability = REG_BR_PROB_BASE - probability; 922 923 found = true; 924 /* First match heuristics would be widly confused if we predicted 925 both directions. */ 926 if (best_predictor > predictor) 927 { 928 struct edge_prediction *pred2; 929 int prob = probability; 930 931 for (pred2 = (struct edge_prediction *) *preds; 932 pred2; pred2 = pred2->ep_next) 933 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor) 934 { 935 int probability2 = pred->ep_probability; 936 937 if (pred2->ep_edge != first) 938 probability2 = REG_BR_PROB_BASE - probability2; 939 940 if ((probability < REG_BR_PROB_BASE / 2) != 941 (probability2 < REG_BR_PROB_BASE / 2)) 942 break; 943 944 /* If the same predictor later gave better result, go for it! */ 945 if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability)) 946 || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability))) 947 prob = probability2; 948 } 949 if (!pred2) 950 best_probability = prob, best_predictor = predictor; 951 } 952 953 d = (combined_probability * probability 954 + (REG_BR_PROB_BASE - combined_probability) 955 * (REG_BR_PROB_BASE - probability)); 956 957 /* Use FP math to avoid overflows of 32bit integers. */ 958 if (d == 0) 959 /* If one probability is 0% and one 100%, avoid division by zero. */ 960 combined_probability = REG_BR_PROB_BASE / 2; 961 else 962 combined_probability = (((double) combined_probability) 963 * probability 964 * REG_BR_PROB_BASE / d + 0.5); 965 } 966 } 967 968 /* Decide which heuristic to use. In case we didn't match anything, 969 use no_prediction heuristic, in case we did match, use either 970 first match or Dempster-Shaffer theory depending on the flags. */ 971 972 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH) 973 first_match = true; 974 975 if (!found) 976 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true); 977 else 978 { 979 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb, 980 !first_match); 981 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb, 982 first_match); 983 } 984 985 if (first_match) 986 combined_probability = best_probability; 987 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true); 988 989 if (preds) 990 { 991 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next) 992 { 993 enum br_predictor predictor = pred->ep_predictor; 994 int probability = pred->ep_probability; 995 996 if (pred->ep_edge != EDGE_SUCC (bb, 0)) 997 probability = REG_BR_PROB_BASE - probability; 998 dump_prediction (dump_file, predictor, probability, bb, 999 !first_match || best_predictor == predictor); 1000 } 1001 } 1002 clear_bb_predictions (bb); 1003 1004 if (!bb->count) 1005 { 1006 first->probability = combined_probability; 1007 second->probability = REG_BR_PROB_BASE - combined_probability; 1008 } 1009} 1010 1011/* Check if T1 and T2 satisfy the IV_COMPARE condition. 1012 Return the SSA_NAME if the condition satisfies, NULL otherwise. 1013 1014 T1 and T2 should be one of the following cases: 1015 1. T1 is SSA_NAME, T2 is NULL 1016 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4] 1017 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */ 1018 1019static tree 1020strips_small_constant (tree t1, tree t2) 1021{ 1022 tree ret = NULL; 1023 int value = 0; 1024 1025 if (!t1) 1026 return NULL; 1027 else if (TREE_CODE (t1) == SSA_NAME) 1028 ret = t1; 1029 else if (tree_fits_shwi_p (t1)) 1030 value = tree_to_shwi (t1); 1031 else 1032 return NULL; 1033 1034 if (!t2) 1035 return ret; 1036 else if (tree_fits_shwi_p (t2)) 1037 value = tree_to_shwi (t2); 1038 else if (TREE_CODE (t2) == SSA_NAME) 1039 { 1040 if (ret) 1041 return NULL; 1042 else 1043 ret = t2; 1044 } 1045 1046 if (value <= 4 && value >= -4) 1047 return ret; 1048 else 1049 return NULL; 1050} 1051 1052/* Return the SSA_NAME in T or T's operands. 1053 Return NULL if SSA_NAME cannot be found. */ 1054 1055static tree 1056get_base_value (tree t) 1057{ 1058 if (TREE_CODE (t) == SSA_NAME) 1059 return t; 1060 1061 if (!BINARY_CLASS_P (t)) 1062 return NULL; 1063 1064 switch (TREE_OPERAND_LENGTH (t)) 1065 { 1066 case 1: 1067 return strips_small_constant (TREE_OPERAND (t, 0), NULL); 1068 case 2: 1069 return strips_small_constant (TREE_OPERAND (t, 0), 1070 TREE_OPERAND (t, 1)); 1071 default: 1072 return NULL; 1073 } 1074} 1075 1076/* Check the compare STMT in LOOP. If it compares an induction 1077 variable to a loop invariant, return true, and save 1078 LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP. 1079 Otherwise return false and set LOOP_INVAIANT to NULL. */ 1080 1081static bool 1082is_comparison_with_loop_invariant_p (gcond *stmt, struct loop *loop, 1083 tree *loop_invariant, 1084 enum tree_code *compare_code, 1085 tree *loop_step, 1086 tree *loop_iv_base) 1087{ 1088 tree op0, op1, bound, base; 1089 affine_iv iv0, iv1; 1090 enum tree_code code; 1091 tree step; 1092 1093 code = gimple_cond_code (stmt); 1094 *loop_invariant = NULL; 1095 1096 switch (code) 1097 { 1098 case GT_EXPR: 1099 case GE_EXPR: 1100 case NE_EXPR: 1101 case LT_EXPR: 1102 case LE_EXPR: 1103 case EQ_EXPR: 1104 break; 1105 1106 default: 1107 return false; 1108 } 1109 1110 op0 = gimple_cond_lhs (stmt); 1111 op1 = gimple_cond_rhs (stmt); 1112 1113 if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST) 1114 || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST)) 1115 return false; 1116 if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true)) 1117 return false; 1118 if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true)) 1119 return false; 1120 if (TREE_CODE (iv0.step) != INTEGER_CST 1121 || TREE_CODE (iv1.step) != INTEGER_CST) 1122 return false; 1123 if ((integer_zerop (iv0.step) && integer_zerop (iv1.step)) 1124 || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step))) 1125 return false; 1126 1127 if (integer_zerop (iv0.step)) 1128 { 1129 if (code != NE_EXPR && code != EQ_EXPR) 1130 code = invert_tree_comparison (code, false); 1131 bound = iv0.base; 1132 base = iv1.base; 1133 if (tree_fits_shwi_p (iv1.step)) 1134 step = iv1.step; 1135 else 1136 return false; 1137 } 1138 else 1139 { 1140 bound = iv1.base; 1141 base = iv0.base; 1142 if (tree_fits_shwi_p (iv0.step)) 1143 step = iv0.step; 1144 else 1145 return false; 1146 } 1147 1148 if (TREE_CODE (bound) != INTEGER_CST) 1149 bound = get_base_value (bound); 1150 if (!bound) 1151 return false; 1152 if (TREE_CODE (base) != INTEGER_CST) 1153 base = get_base_value (base); 1154 if (!base) 1155 return false; 1156 1157 *loop_invariant = bound; 1158 *compare_code = code; 1159 *loop_step = step; 1160 *loop_iv_base = base; 1161 return true; 1162} 1163 1164/* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */ 1165 1166static bool 1167expr_coherent_p (tree t1, tree t2) 1168{ 1169 gimple stmt; 1170 tree ssa_name_1 = NULL; 1171 tree ssa_name_2 = NULL; 1172 1173 gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST); 1174 gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST); 1175 1176 if (t1 == t2) 1177 return true; 1178 1179 if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST) 1180 return true; 1181 if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST) 1182 return false; 1183 1184 /* Check to see if t1 is expressed/defined with t2. */ 1185 stmt = SSA_NAME_DEF_STMT (t1); 1186 gcc_assert (stmt != NULL); 1187 if (is_gimple_assign (stmt)) 1188 { 1189 ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE); 1190 if (ssa_name_1 && ssa_name_1 == t2) 1191 return true; 1192 } 1193 1194 /* Check to see if t2 is expressed/defined with t1. */ 1195 stmt = SSA_NAME_DEF_STMT (t2); 1196 gcc_assert (stmt != NULL); 1197 if (is_gimple_assign (stmt)) 1198 { 1199 ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE); 1200 if (ssa_name_2 && ssa_name_2 == t1) 1201 return true; 1202 } 1203 1204 /* Compare if t1 and t2's def_stmts are identical. */ 1205 if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2) 1206 return true; 1207 else 1208 return false; 1209} 1210 1211/* Predict branch probability of BB when BB contains a branch that compares 1212 an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The 1213 loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP. 1214 1215 E.g. 1216 for (int i = 0; i < bound; i++) { 1217 if (i < bound - 2) 1218 computation_1(); 1219 else 1220 computation_2(); 1221 } 1222 1223 In this loop, we will predict the branch inside the loop to be taken. */ 1224 1225static void 1226predict_iv_comparison (struct loop *loop, basic_block bb, 1227 tree loop_bound_var, 1228 tree loop_iv_base_var, 1229 enum tree_code loop_bound_code, 1230 int loop_bound_step) 1231{ 1232 gimple stmt; 1233 tree compare_var, compare_base; 1234 enum tree_code compare_code; 1235 tree compare_step_var; 1236 edge then_edge; 1237 edge_iterator ei; 1238 1239 if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED) 1240 || predicted_by_p (bb, PRED_LOOP_ITERATIONS) 1241 || predicted_by_p (bb, PRED_LOOP_EXIT)) 1242 return; 1243 1244 stmt = last_stmt (bb); 1245 if (!stmt || gimple_code (stmt) != GIMPLE_COND) 1246 return; 1247 if (!is_comparison_with_loop_invariant_p (as_a <gcond *> (stmt), 1248 loop, &compare_var, 1249 &compare_code, 1250 &compare_step_var, 1251 &compare_base)) 1252 return; 1253 1254 /* Find the taken edge. */ 1255 FOR_EACH_EDGE (then_edge, ei, bb->succs) 1256 if (then_edge->flags & EDGE_TRUE_VALUE) 1257 break; 1258 1259 /* When comparing an IV to a loop invariant, NE is more likely to be 1260 taken while EQ is more likely to be not-taken. */ 1261 if (compare_code == NE_EXPR) 1262 { 1263 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); 1264 return; 1265 } 1266 else if (compare_code == EQ_EXPR) 1267 { 1268 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN); 1269 return; 1270 } 1271 1272 if (!expr_coherent_p (loop_iv_base_var, compare_base)) 1273 return; 1274 1275 /* If loop bound, base and compare bound are all constants, we can 1276 calculate the probability directly. */ 1277 if (tree_fits_shwi_p (loop_bound_var) 1278 && tree_fits_shwi_p (compare_var) 1279 && tree_fits_shwi_p (compare_base)) 1280 { 1281 int probability; 1282 bool overflow, overall_overflow = false; 1283 widest_int compare_count, tem; 1284 1285 /* (loop_bound - base) / compare_step */ 1286 tem = wi::sub (wi::to_widest (loop_bound_var), 1287 wi::to_widest (compare_base), SIGNED, &overflow); 1288 overall_overflow |= overflow; 1289 widest_int loop_count = wi::div_trunc (tem, 1290 wi::to_widest (compare_step_var), 1291 SIGNED, &overflow); 1292 overall_overflow |= overflow; 1293 1294 if (!wi::neg_p (wi::to_widest (compare_step_var)) 1295 ^ (compare_code == LT_EXPR || compare_code == LE_EXPR)) 1296 { 1297 /* (loop_bound - compare_bound) / compare_step */ 1298 tem = wi::sub (wi::to_widest (loop_bound_var), 1299 wi::to_widest (compare_var), SIGNED, &overflow); 1300 overall_overflow |= overflow; 1301 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var), 1302 SIGNED, &overflow); 1303 overall_overflow |= overflow; 1304 } 1305 else 1306 { 1307 /* (compare_bound - base) / compare_step */ 1308 tem = wi::sub (wi::to_widest (compare_var), 1309 wi::to_widest (compare_base), SIGNED, &overflow); 1310 overall_overflow |= overflow; 1311 compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var), 1312 SIGNED, &overflow); 1313 overall_overflow |= overflow; 1314 } 1315 if (compare_code == LE_EXPR || compare_code == GE_EXPR) 1316 ++compare_count; 1317 if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR) 1318 ++loop_count; 1319 if (wi::neg_p (compare_count)) 1320 compare_count = 0; 1321 if (wi::neg_p (loop_count)) 1322 loop_count = 0; 1323 if (loop_count == 0) 1324 probability = 0; 1325 else if (wi::cmps (compare_count, loop_count) == 1) 1326 probability = REG_BR_PROB_BASE; 1327 else 1328 { 1329 tem = compare_count * REG_BR_PROB_BASE; 1330 tem = wi::udiv_trunc (tem, loop_count); 1331 probability = tem.to_uhwi (); 1332 } 1333 1334 if (!overall_overflow) 1335 predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability); 1336 1337 return; 1338 } 1339 1340 if (expr_coherent_p (loop_bound_var, compare_var)) 1341 { 1342 if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR) 1343 && (compare_code == LT_EXPR || compare_code == LE_EXPR)) 1344 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); 1345 else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR) 1346 && (compare_code == GT_EXPR || compare_code == GE_EXPR)) 1347 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); 1348 else if (loop_bound_code == NE_EXPR) 1349 { 1350 /* If the loop backedge condition is "(i != bound)", we do 1351 the comparison based on the step of IV: 1352 * step < 0 : backedge condition is like (i > bound) 1353 * step > 0 : backedge condition is like (i < bound) */ 1354 gcc_assert (loop_bound_step != 0); 1355 if (loop_bound_step > 0 1356 && (compare_code == LT_EXPR 1357 || compare_code == LE_EXPR)) 1358 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); 1359 else if (loop_bound_step < 0 1360 && (compare_code == GT_EXPR 1361 || compare_code == GE_EXPR)) 1362 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); 1363 else 1364 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN); 1365 } 1366 else 1367 /* The branch is predicted not-taken if loop_bound_code is 1368 opposite with compare_code. */ 1369 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN); 1370 } 1371 else if (expr_coherent_p (loop_iv_base_var, compare_var)) 1372 { 1373 /* For cases like: 1374 for (i = s; i < h; i++) 1375 if (i > s + 2) .... 1376 The branch should be predicted taken. */ 1377 if (loop_bound_step > 0 1378 && (compare_code == GT_EXPR || compare_code == GE_EXPR)) 1379 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); 1380 else if (loop_bound_step < 0 1381 && (compare_code == LT_EXPR || compare_code == LE_EXPR)) 1382 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN); 1383 else 1384 predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN); 1385 } 1386} 1387 1388/* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop 1389 exits are resulted from short-circuit conditions that will generate an 1390 if_tmp. E.g.: 1391 1392 if (foo() || global > 10) 1393 break; 1394 1395 This will be translated into: 1396 1397 BB3: 1398 loop header... 1399 BB4: 1400 if foo() goto BB6 else goto BB5 1401 BB5: 1402 if global > 10 goto BB6 else goto BB7 1403 BB6: 1404 goto BB7 1405 BB7: 1406 iftmp = (PHI 0(BB5), 1(BB6)) 1407 if iftmp == 1 goto BB8 else goto BB3 1408 BB8: 1409 outside of the loop... 1410 1411 The edge BB7->BB8 is loop exit because BB8 is outside of the loop. 1412 From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop 1413 exits. This function takes BB7->BB8 as input, and finds out the extra loop 1414 exits to predict them using PRED_LOOP_EXIT. */ 1415 1416static void 1417predict_extra_loop_exits (edge exit_edge) 1418{ 1419 unsigned i; 1420 bool check_value_one; 1421 gimple lhs_def_stmt; 1422 gphi *phi_stmt; 1423 tree cmp_rhs, cmp_lhs; 1424 gimple last; 1425 gcond *cmp_stmt; 1426 1427 last = last_stmt (exit_edge->src); 1428 if (!last) 1429 return; 1430 cmp_stmt = dyn_cast <gcond *> (last); 1431 if (!cmp_stmt) 1432 return; 1433 1434 cmp_rhs = gimple_cond_rhs (cmp_stmt); 1435 cmp_lhs = gimple_cond_lhs (cmp_stmt); 1436 if (!TREE_CONSTANT (cmp_rhs) 1437 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs))) 1438 return; 1439 if (TREE_CODE (cmp_lhs) != SSA_NAME) 1440 return; 1441 1442 /* If check_value_one is true, only the phi_args with value '1' will lead 1443 to loop exit. Otherwise, only the phi_args with value '0' will lead to 1444 loop exit. */ 1445 check_value_one = (((integer_onep (cmp_rhs)) 1446 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR)) 1447 ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0)); 1448 1449 lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs); 1450 if (!lhs_def_stmt) 1451 return; 1452 1453 phi_stmt = dyn_cast <gphi *> (lhs_def_stmt); 1454 if (!phi_stmt) 1455 return; 1456 1457 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++) 1458 { 1459 edge e1; 1460 edge_iterator ei; 1461 tree val = gimple_phi_arg_def (phi_stmt, i); 1462 edge e = gimple_phi_arg_edge (phi_stmt, i); 1463 1464 if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val))) 1465 continue; 1466 if ((check_value_one ^ integer_onep (val)) == 1) 1467 continue; 1468 if (EDGE_COUNT (e->src->succs) != 1) 1469 { 1470 predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN); 1471 continue; 1472 } 1473 1474 FOR_EACH_EDGE (e1, ei, e->src->preds) 1475 predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN); 1476 } 1477} 1478 1479/* Predict edge probabilities by exploiting loop structure. */ 1480 1481static void 1482predict_loops (void) 1483{ 1484 struct loop *loop; 1485 1486 /* Try to predict out blocks in a loop that are not part of a 1487 natural loop. */ 1488 FOR_EACH_LOOP (loop, 0) 1489 { 1490 basic_block bb, *bbs; 1491 unsigned j, n_exits; 1492 vec<edge> exits; 1493 struct tree_niter_desc niter_desc; 1494 edge ex; 1495 struct nb_iter_bound *nb_iter; 1496 enum tree_code loop_bound_code = ERROR_MARK; 1497 tree loop_bound_step = NULL; 1498 tree loop_bound_var = NULL; 1499 tree loop_iv_base = NULL; 1500 gcond *stmt = NULL; 1501 1502 exits = get_loop_exit_edges (loop); 1503 n_exits = exits.length (); 1504 if (!n_exits) 1505 { 1506 exits.release (); 1507 continue; 1508 } 1509 1510 FOR_EACH_VEC_ELT (exits, j, ex) 1511 { 1512 tree niter = NULL; 1513 HOST_WIDE_INT nitercst; 1514 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS); 1515 int probability; 1516 enum br_predictor predictor; 1517 1518 predict_extra_loop_exits (ex); 1519 1520 if (number_of_iterations_exit (loop, ex, &niter_desc, false, false)) 1521 niter = niter_desc.niter; 1522 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST) 1523 niter = loop_niter_by_eval (loop, ex); 1524 1525 if (TREE_CODE (niter) == INTEGER_CST) 1526 { 1527 if (tree_fits_uhwi_p (niter) 1528 && max 1529 && compare_tree_int (niter, max - 1) == -1) 1530 nitercst = tree_to_uhwi (niter) + 1; 1531 else 1532 nitercst = max; 1533 predictor = PRED_LOOP_ITERATIONS; 1534 } 1535 /* If we have just one exit and we can derive some information about 1536 the number of iterations of the loop from the statements inside 1537 the loop, use it to predict this exit. */ 1538 else if (n_exits == 1) 1539 { 1540 nitercst = estimated_stmt_executions_int (loop); 1541 if (nitercst < 0) 1542 continue; 1543 if (nitercst > max) 1544 nitercst = max; 1545 1546 predictor = PRED_LOOP_ITERATIONS_GUESSED; 1547 } 1548 else 1549 continue; 1550 1551 /* If the prediction for number of iterations is zero, do not 1552 predict the exit edges. */ 1553 if (nitercst == 0) 1554 continue; 1555 1556 probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst); 1557 predict_edge (ex, predictor, probability); 1558 } 1559 exits.release (); 1560 1561 /* Find information about loop bound variables. */ 1562 for (nb_iter = loop->bounds; nb_iter; 1563 nb_iter = nb_iter->next) 1564 if (nb_iter->stmt 1565 && gimple_code (nb_iter->stmt) == GIMPLE_COND) 1566 { 1567 stmt = as_a <gcond *> (nb_iter->stmt); 1568 break; 1569 } 1570 if (!stmt && last_stmt (loop->header) 1571 && gimple_code (last_stmt (loop->header)) == GIMPLE_COND) 1572 stmt = as_a <gcond *> (last_stmt (loop->header)); 1573 if (stmt) 1574 is_comparison_with_loop_invariant_p (stmt, loop, 1575 &loop_bound_var, 1576 &loop_bound_code, 1577 &loop_bound_step, 1578 &loop_iv_base); 1579 1580 bbs = get_loop_body (loop); 1581 1582 for (j = 0; j < loop->num_nodes; j++) 1583 { 1584 int header_found = 0; 1585 edge e; 1586 edge_iterator ei; 1587 1588 bb = bbs[j]; 1589 1590 /* Bypass loop heuristics on continue statement. These 1591 statements construct loops via "non-loop" constructs 1592 in the source language and are better to be handled 1593 separately. */ 1594 if (predicted_by_p (bb, PRED_CONTINUE)) 1595 continue; 1596 1597 /* Loop branch heuristics - predict an edge back to a 1598 loop's head as taken. */ 1599 if (bb == loop->latch) 1600 { 1601 e = find_edge (loop->latch, loop->header); 1602 if (e) 1603 { 1604 header_found = 1; 1605 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN); 1606 } 1607 } 1608 1609 /* Loop exit heuristics - predict an edge exiting the loop if the 1610 conditional has no loop header successors as not taken. */ 1611 if (!header_found 1612 /* If we already used more reliable loop exit predictors, do not 1613 bother with PRED_LOOP_EXIT. */ 1614 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED) 1615 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS)) 1616 { 1617 /* For loop with many exits we don't want to predict all exits 1618 with the pretty large probability, because if all exits are 1619 considered in row, the loop would be predicted to iterate 1620 almost never. The code to divide probability by number of 1621 exits is very rough. It should compute the number of exits 1622 taken in each patch through function (not the overall number 1623 of exits that might be a lot higher for loops with wide switch 1624 statements in them) and compute n-th square root. 1625 1626 We limit the minimal probability by 2% to avoid 1627 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction 1628 as this was causing regression in perl benchmark containing such 1629 a wide loop. */ 1630 1631 int probability = ((REG_BR_PROB_BASE 1632 - predictor_info [(int) PRED_LOOP_EXIT].hitrate) 1633 / n_exits); 1634 if (probability < HITRATE (2)) 1635 probability = HITRATE (2); 1636 FOR_EACH_EDGE (e, ei, bb->succs) 1637 if (e->dest->index < NUM_FIXED_BLOCKS 1638 || !flow_bb_inside_loop_p (loop, e->dest)) 1639 predict_edge (e, PRED_LOOP_EXIT, probability); 1640 } 1641 if (loop_bound_var) 1642 predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base, 1643 loop_bound_code, 1644 tree_to_shwi (loop_bound_step)); 1645 } 1646 1647 /* Free basic blocks from get_loop_body. */ 1648 free (bbs); 1649 } 1650} 1651 1652/* Attempt to predict probabilities of BB outgoing edges using local 1653 properties. */ 1654static void 1655bb_estimate_probability_locally (basic_block bb) 1656{ 1657 rtx_insn *last_insn = BB_END (bb); 1658 rtx cond; 1659 1660 if (! can_predict_insn_p (last_insn)) 1661 return; 1662 cond = get_condition (last_insn, NULL, false, false); 1663 if (! cond) 1664 return; 1665 1666 /* Try "pointer heuristic." 1667 A comparison ptr == 0 is predicted as false. 1668 Similarly, a comparison ptr1 == ptr2 is predicted as false. */ 1669 if (COMPARISON_P (cond) 1670 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0))) 1671 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1))))) 1672 { 1673 if (GET_CODE (cond) == EQ) 1674 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN); 1675 else if (GET_CODE (cond) == NE) 1676 predict_insn_def (last_insn, PRED_POINTER, TAKEN); 1677 } 1678 else 1679 1680 /* Try "opcode heuristic." 1681 EQ tests are usually false and NE tests are usually true. Also, 1682 most quantities are positive, so we can make the appropriate guesses 1683 about signed comparisons against zero. */ 1684 switch (GET_CODE (cond)) 1685 { 1686 case CONST_INT: 1687 /* Unconditional branch. */ 1688 predict_insn_def (last_insn, PRED_UNCONDITIONAL, 1689 cond == const0_rtx ? NOT_TAKEN : TAKEN); 1690 break; 1691 1692 case EQ: 1693 case UNEQ: 1694 /* Floating point comparisons appears to behave in a very 1695 unpredictable way because of special role of = tests in 1696 FP code. */ 1697 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0)))) 1698 ; 1699 /* Comparisons with 0 are often used for booleans and there is 1700 nothing useful to predict about them. */ 1701 else if (XEXP (cond, 1) == const0_rtx 1702 || XEXP (cond, 0) == const0_rtx) 1703 ; 1704 else 1705 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN); 1706 break; 1707 1708 case NE: 1709 case LTGT: 1710 /* Floating point comparisons appears to behave in a very 1711 unpredictable way because of special role of = tests in 1712 FP code. */ 1713 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0)))) 1714 ; 1715 /* Comparisons with 0 are often used for booleans and there is 1716 nothing useful to predict about them. */ 1717 else if (XEXP (cond, 1) == const0_rtx 1718 || XEXP (cond, 0) == const0_rtx) 1719 ; 1720 else 1721 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN); 1722 break; 1723 1724 case ORDERED: 1725 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN); 1726 break; 1727 1728 case UNORDERED: 1729 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN); 1730 break; 1731 1732 case LE: 1733 case LT: 1734 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx 1735 || XEXP (cond, 1) == constm1_rtx) 1736 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN); 1737 break; 1738 1739 case GE: 1740 case GT: 1741 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx 1742 || XEXP (cond, 1) == constm1_rtx) 1743 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN); 1744 break; 1745 1746 default: 1747 break; 1748 } 1749} 1750 1751/* Set edge->probability for each successor edge of BB. */ 1752void 1753guess_outgoing_edge_probabilities (basic_block bb) 1754{ 1755 bb_estimate_probability_locally (bb); 1756 combine_predictions_for_insn (BB_END (bb), bb); 1757} 1758 1759static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor); 1760 1761/* Helper function for expr_expected_value. */ 1762 1763static tree 1764expr_expected_value_1 (tree type, tree op0, enum tree_code code, 1765 tree op1, bitmap visited, enum br_predictor *predictor) 1766{ 1767 gimple def; 1768 1769 if (predictor) 1770 *predictor = PRED_UNCONDITIONAL; 1771 1772 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) 1773 { 1774 if (TREE_CONSTANT (op0)) 1775 return op0; 1776 1777 if (code != SSA_NAME) 1778 return NULL_TREE; 1779 1780 def = SSA_NAME_DEF_STMT (op0); 1781 1782 /* If we were already here, break the infinite cycle. */ 1783 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0))) 1784 return NULL; 1785 1786 if (gimple_code (def) == GIMPLE_PHI) 1787 { 1788 /* All the arguments of the PHI node must have the same constant 1789 length. */ 1790 int i, n = gimple_phi_num_args (def); 1791 tree val = NULL, new_val; 1792 1793 for (i = 0; i < n; i++) 1794 { 1795 tree arg = PHI_ARG_DEF (def, i); 1796 enum br_predictor predictor2; 1797 1798 /* If this PHI has itself as an argument, we cannot 1799 determine the string length of this argument. However, 1800 if we can find an expected constant value for the other 1801 PHI args then we can still be sure that this is 1802 likely a constant. So be optimistic and just 1803 continue with the next argument. */ 1804 if (arg == PHI_RESULT (def)) 1805 continue; 1806 1807 new_val = expr_expected_value (arg, visited, &predictor2); 1808 1809 /* It is difficult to combine value predictors. Simply assume 1810 that later predictor is weaker and take its prediction. */ 1811 if (predictor && *predictor < predictor2) 1812 *predictor = predictor2; 1813 if (!new_val) 1814 return NULL; 1815 if (!val) 1816 val = new_val; 1817 else if (!operand_equal_p (val, new_val, false)) 1818 return NULL; 1819 } 1820 return val; 1821 } 1822 if (is_gimple_assign (def)) 1823 { 1824 if (gimple_assign_lhs (def) != op0) 1825 return NULL; 1826 1827 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)), 1828 gimple_assign_rhs1 (def), 1829 gimple_assign_rhs_code (def), 1830 gimple_assign_rhs2 (def), 1831 visited, predictor); 1832 } 1833 1834 if (is_gimple_call (def)) 1835 { 1836 tree decl = gimple_call_fndecl (def); 1837 if (!decl) 1838 { 1839 if (gimple_call_internal_p (def) 1840 && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT) 1841 { 1842 gcc_assert (gimple_call_num_args (def) == 3); 1843 tree val = gimple_call_arg (def, 0); 1844 if (TREE_CONSTANT (val)) 1845 return val; 1846 if (predictor) 1847 { 1848 tree val2 = gimple_call_arg (def, 2); 1849 gcc_assert (TREE_CODE (val2) == INTEGER_CST 1850 && tree_fits_uhwi_p (val2) 1851 && tree_to_uhwi (val2) < END_PREDICTORS); 1852 *predictor = (enum br_predictor) tree_to_uhwi (val2); 1853 } 1854 return gimple_call_arg (def, 1); 1855 } 1856 return NULL; 1857 } 1858 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL) 1859 switch (DECL_FUNCTION_CODE (decl)) 1860 { 1861 case BUILT_IN_EXPECT: 1862 { 1863 tree val; 1864 if (gimple_call_num_args (def) != 2) 1865 return NULL; 1866 val = gimple_call_arg (def, 0); 1867 if (TREE_CONSTANT (val)) 1868 return val; 1869 if (predictor) 1870 *predictor = PRED_BUILTIN_EXPECT; 1871 return gimple_call_arg (def, 1); 1872 } 1873 1874 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N: 1875 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1: 1876 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2: 1877 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4: 1878 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8: 1879 case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16: 1880 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE: 1881 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N: 1882 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1: 1883 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2: 1884 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4: 1885 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8: 1886 case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16: 1887 /* Assume that any given atomic operation has low contention, 1888 and thus the compare-and-swap operation succeeds. */ 1889 if (predictor) 1890 *predictor = PRED_COMPARE_AND_SWAP; 1891 return boolean_true_node; 1892 default: 1893 break; 1894 } 1895 } 1896 1897 return NULL; 1898 } 1899 1900 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS) 1901 { 1902 tree res; 1903 enum br_predictor predictor2; 1904 op0 = expr_expected_value (op0, visited, predictor); 1905 if (!op0) 1906 return NULL; 1907 op1 = expr_expected_value (op1, visited, &predictor2); 1908 if (predictor && *predictor < predictor2) 1909 *predictor = predictor2; 1910 if (!op1) 1911 return NULL; 1912 res = fold_build2 (code, type, op0, op1); 1913 if (TREE_CONSTANT (res)) 1914 return res; 1915 return NULL; 1916 } 1917 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS) 1918 { 1919 tree res; 1920 op0 = expr_expected_value (op0, visited, predictor); 1921 if (!op0) 1922 return NULL; 1923 res = fold_build1 (code, type, op0); 1924 if (TREE_CONSTANT (res)) 1925 return res; 1926 return NULL; 1927 } 1928 return NULL; 1929} 1930 1931/* Return constant EXPR will likely have at execution time, NULL if unknown. 1932 The function is used by builtin_expect branch predictor so the evidence 1933 must come from this construct and additional possible constant folding. 1934 1935 We may want to implement more involved value guess (such as value range 1936 propagation based prediction), but such tricks shall go to new 1937 implementation. */ 1938 1939static tree 1940expr_expected_value (tree expr, bitmap visited, 1941 enum br_predictor *predictor) 1942{ 1943 enum tree_code code; 1944 tree op0, op1; 1945 1946 if (TREE_CONSTANT (expr)) 1947 { 1948 if (predictor) 1949 *predictor = PRED_UNCONDITIONAL; 1950 return expr; 1951 } 1952 1953 extract_ops_from_tree (expr, &code, &op0, &op1); 1954 return expr_expected_value_1 (TREE_TYPE (expr), 1955 op0, code, op1, visited, predictor); 1956} 1957 1958/* Predict using opcode of the last statement in basic block. */ 1959static void 1960tree_predict_by_opcode (basic_block bb) 1961{ 1962 gimple stmt = last_stmt (bb); 1963 edge then_edge; 1964 tree op0, op1; 1965 tree type; 1966 tree val; 1967 enum tree_code cmp; 1968 bitmap visited; 1969 edge_iterator ei; 1970 enum br_predictor predictor; 1971 1972 if (!stmt || gimple_code (stmt) != GIMPLE_COND) 1973 return; 1974 FOR_EACH_EDGE (then_edge, ei, bb->succs) 1975 if (then_edge->flags & EDGE_TRUE_VALUE) 1976 break; 1977 op0 = gimple_cond_lhs (stmt); 1978 op1 = gimple_cond_rhs (stmt); 1979 cmp = gimple_cond_code (stmt); 1980 type = TREE_TYPE (op0); 1981 visited = BITMAP_ALLOC (NULL); 1982 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited, 1983 &predictor); 1984 BITMAP_FREE (visited); 1985 if (val && TREE_CODE (val) == INTEGER_CST) 1986 { 1987 if (predictor == PRED_BUILTIN_EXPECT) 1988 { 1989 int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY); 1990 1991 gcc_assert (percent >= 0 && percent <= 100); 1992 if (integer_zerop (val)) 1993 percent = 100 - percent; 1994 predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent)); 1995 } 1996 else 1997 predict_edge (then_edge, predictor, 1998 integer_zerop (val) ? NOT_TAKEN : TAKEN); 1999 } 2000 /* Try "pointer heuristic." 2001 A comparison ptr == 0 is predicted as false. 2002 Similarly, a comparison ptr1 == ptr2 is predicted as false. */ 2003 if (POINTER_TYPE_P (type)) 2004 { 2005 if (cmp == EQ_EXPR) 2006 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN); 2007 else if (cmp == NE_EXPR) 2008 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN); 2009 } 2010 else 2011 2012 /* Try "opcode heuristic." 2013 EQ tests are usually false and NE tests are usually true. Also, 2014 most quantities are positive, so we can make the appropriate guesses 2015 about signed comparisons against zero. */ 2016 switch (cmp) 2017 { 2018 case EQ_EXPR: 2019 case UNEQ_EXPR: 2020 /* Floating point comparisons appears to behave in a very 2021 unpredictable way because of special role of = tests in 2022 FP code. */ 2023 if (FLOAT_TYPE_P (type)) 2024 ; 2025 /* Comparisons with 0 are often used for booleans and there is 2026 nothing useful to predict about them. */ 2027 else if (integer_zerop (op0) || integer_zerop (op1)) 2028 ; 2029 else 2030 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN); 2031 break; 2032 2033 case NE_EXPR: 2034 case LTGT_EXPR: 2035 /* Floating point comparisons appears to behave in a very 2036 unpredictable way because of special role of = tests in 2037 FP code. */ 2038 if (FLOAT_TYPE_P (type)) 2039 ; 2040 /* Comparisons with 0 are often used for booleans and there is 2041 nothing useful to predict about them. */ 2042 else if (integer_zerop (op0) 2043 || integer_zerop (op1)) 2044 ; 2045 else 2046 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN); 2047 break; 2048 2049 case ORDERED_EXPR: 2050 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN); 2051 break; 2052 2053 case UNORDERED_EXPR: 2054 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN); 2055 break; 2056 2057 case LE_EXPR: 2058 case LT_EXPR: 2059 if (integer_zerop (op1) 2060 || integer_onep (op1) 2061 || integer_all_onesp (op1) 2062 || real_zerop (op1) 2063 || real_onep (op1) 2064 || real_minus_onep (op1)) 2065 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN); 2066 break; 2067 2068 case GE_EXPR: 2069 case GT_EXPR: 2070 if (integer_zerop (op1) 2071 || integer_onep (op1) 2072 || integer_all_onesp (op1) 2073 || real_zerop (op1) 2074 || real_onep (op1) 2075 || real_minus_onep (op1)) 2076 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN); 2077 break; 2078 2079 default: 2080 break; 2081 } 2082} 2083 2084/* Try to guess whether the value of return means error code. */ 2085 2086static enum br_predictor 2087return_prediction (tree val, enum prediction *prediction) 2088{ 2089 /* VOID. */ 2090 if (!val) 2091 return PRED_NO_PREDICTION; 2092 /* Different heuristics for pointers and scalars. */ 2093 if (POINTER_TYPE_P (TREE_TYPE (val))) 2094 { 2095 /* NULL is usually not returned. */ 2096 if (integer_zerop (val)) 2097 { 2098 *prediction = NOT_TAKEN; 2099 return PRED_NULL_RETURN; 2100 } 2101 } 2102 else if (INTEGRAL_TYPE_P (TREE_TYPE (val))) 2103 { 2104 /* Negative return values are often used to indicate 2105 errors. */ 2106 if (TREE_CODE (val) == INTEGER_CST 2107 && tree_int_cst_sgn (val) < 0) 2108 { 2109 *prediction = NOT_TAKEN; 2110 return PRED_NEGATIVE_RETURN; 2111 } 2112 /* Constant return values seems to be commonly taken. 2113 Zero/one often represent booleans so exclude them from the 2114 heuristics. */ 2115 if (TREE_CONSTANT (val) 2116 && (!integer_zerop (val) && !integer_onep (val))) 2117 { 2118 *prediction = TAKEN; 2119 return PRED_CONST_RETURN; 2120 } 2121 } 2122 return PRED_NO_PREDICTION; 2123} 2124 2125/* Find the basic block with return expression and look up for possible 2126 return value trying to apply RETURN_PREDICTION heuristics. */ 2127static void 2128apply_return_prediction (void) 2129{ 2130 greturn *return_stmt = NULL; 2131 tree return_val; 2132 edge e; 2133 gphi *phi; 2134 int phi_num_args, i; 2135 enum br_predictor pred; 2136 enum prediction direction; 2137 edge_iterator ei; 2138 2139 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) 2140 { 2141 gimple last = last_stmt (e->src); 2142 if (last 2143 && gimple_code (last) == GIMPLE_RETURN) 2144 { 2145 return_stmt = as_a <greturn *> (last); 2146 break; 2147 } 2148 } 2149 if (!e) 2150 return; 2151 return_val = gimple_return_retval (return_stmt); 2152 if (!return_val) 2153 return; 2154 if (TREE_CODE (return_val) != SSA_NAME 2155 || !SSA_NAME_DEF_STMT (return_val) 2156 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI) 2157 return; 2158 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val)); 2159 phi_num_args = gimple_phi_num_args (phi); 2160 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction); 2161 2162 /* Avoid the degenerate case where all return values form the function 2163 belongs to same category (ie they are all positive constants) 2164 so we can hardly say something about them. */ 2165 for (i = 1; i < phi_num_args; i++) 2166 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction)) 2167 break; 2168 if (i != phi_num_args) 2169 for (i = 0; i < phi_num_args; i++) 2170 { 2171 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction); 2172 if (pred != PRED_NO_PREDICTION) 2173 predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred, 2174 direction); 2175 } 2176} 2177 2178/* Look for basic block that contains unlikely to happen events 2179 (such as noreturn calls) and mark all paths leading to execution 2180 of this basic blocks as unlikely. */ 2181 2182static void 2183tree_bb_level_predictions (void) 2184{ 2185 basic_block bb; 2186 bool has_return_edges = false; 2187 edge e; 2188 edge_iterator ei; 2189 2190 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) 2191 if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH))) 2192 { 2193 has_return_edges = true; 2194 break; 2195 } 2196 2197 apply_return_prediction (); 2198 2199 FOR_EACH_BB_FN (bb, cfun) 2200 { 2201 gimple_stmt_iterator gsi; 2202 2203 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 2204 { 2205 gimple stmt = gsi_stmt (gsi); 2206 tree decl; 2207 2208 if (is_gimple_call (stmt)) 2209 { 2210 if ((gimple_call_flags (stmt) & ECF_NORETURN) 2211 && has_return_edges) 2212 predict_paths_leading_to (bb, PRED_NORETURN, 2213 NOT_TAKEN); 2214 decl = gimple_call_fndecl (stmt); 2215 if (decl 2216 && lookup_attribute ("cold", 2217 DECL_ATTRIBUTES (decl))) 2218 predict_paths_leading_to (bb, PRED_COLD_FUNCTION, 2219 NOT_TAKEN); 2220 } 2221 else if (gimple_code (stmt) == GIMPLE_PREDICT) 2222 { 2223 predict_paths_leading_to (bb, gimple_predict_predictor (stmt), 2224 gimple_predict_outcome (stmt)); 2225 /* Keep GIMPLE_PREDICT around so early inlining will propagate 2226 hints to callers. */ 2227 } 2228 } 2229 } 2230} 2231 2232#ifdef ENABLE_CHECKING 2233 2234/* Callback for hash_map::traverse, asserts that the pointer map is 2235 empty. */ 2236 2237bool 2238assert_is_empty (const_basic_block const &, edge_prediction *const &value, 2239 void *) 2240{ 2241 gcc_assert (!value); 2242 return false; 2243} 2244#endif 2245 2246/* Predict branch probabilities and estimate profile for basic block BB. */ 2247 2248static void 2249tree_estimate_probability_bb (basic_block bb) 2250{ 2251 edge e; 2252 edge_iterator ei; 2253 gimple last; 2254 2255 FOR_EACH_EDGE (e, ei, bb->succs) 2256 { 2257 /* Predict edges to user labels with attributes. */ 2258 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) 2259 { 2260 gimple_stmt_iterator gi; 2261 for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi)) 2262 { 2263 glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi)); 2264 tree decl; 2265 2266 if (!label_stmt) 2267 break; 2268 decl = gimple_label_label (label_stmt); 2269 if (DECL_ARTIFICIAL (decl)) 2270 continue; 2271 2272 /* Finally, we have a user-defined label. */ 2273 if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl))) 2274 predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN); 2275 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl))) 2276 predict_edge_def (e, PRED_HOT_LABEL, TAKEN); 2277 } 2278 } 2279 2280 /* Predict early returns to be probable, as we've already taken 2281 care for error returns and other cases are often used for 2282 fast paths through function. 2283 2284 Since we've already removed the return statements, we are 2285 looking for CFG like: 2286 2287 if (conditional) 2288 { 2289 .. 2290 goto return_block 2291 } 2292 some other blocks 2293 return_block: 2294 return_stmt. */ 2295 if (e->dest != bb->next_bb 2296 && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) 2297 && single_succ_p (e->dest) 2298 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun) 2299 && (last = last_stmt (e->dest)) != NULL 2300 && gimple_code (last) == GIMPLE_RETURN) 2301 { 2302 edge e1; 2303 edge_iterator ei1; 2304 2305 if (single_succ_p (bb)) 2306 { 2307 FOR_EACH_EDGE (e1, ei1, bb->preds) 2308 if (!predicted_by_p (e1->src, PRED_NULL_RETURN) 2309 && !predicted_by_p (e1->src, PRED_CONST_RETURN) 2310 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN)) 2311 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN); 2312 } 2313 else 2314 if (!predicted_by_p (e->src, PRED_NULL_RETURN) 2315 && !predicted_by_p (e->src, PRED_CONST_RETURN) 2316 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN)) 2317 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN); 2318 } 2319 2320 /* Look for block we are guarding (ie we dominate it, 2321 but it doesn't postdominate us). */ 2322 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb 2323 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src) 2324 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest)) 2325 { 2326 gimple_stmt_iterator bi; 2327 2328 /* The call heuristic claims that a guarded function call 2329 is improbable. This is because such calls are often used 2330 to signal exceptional situations such as printing error 2331 messages. */ 2332 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi); 2333 gsi_next (&bi)) 2334 { 2335 gimple stmt = gsi_stmt (bi); 2336 if (is_gimple_call (stmt) 2337 /* Constant and pure calls are hardly used to signalize 2338 something exceptional. */ 2339 && gimple_has_side_effects (stmt)) 2340 { 2341 predict_edge_def (e, PRED_CALL, NOT_TAKEN); 2342 break; 2343 } 2344 } 2345 } 2346 } 2347 tree_predict_by_opcode (bb); 2348} 2349 2350/* Predict branch probabilities and estimate profile of the tree CFG. 2351 This function can be called from the loop optimizers to recompute 2352 the profile information. */ 2353 2354void 2355tree_estimate_probability (void) 2356{ 2357 basic_block bb; 2358 2359 add_noreturn_fake_exit_edges (); 2360 connect_infinite_loops_to_exit (); 2361 /* We use loop_niter_by_eval, which requires that the loops have 2362 preheaders. */ 2363 create_preheaders (CP_SIMPLE_PREHEADERS); 2364 calculate_dominance_info (CDI_POST_DOMINATORS); 2365 2366 bb_predictions = new hash_map<const_basic_block, edge_prediction *>; 2367 tree_bb_level_predictions (); 2368 record_loop_exits (); 2369 2370 if (number_of_loops (cfun) > 1) 2371 predict_loops (); 2372 2373 FOR_EACH_BB_FN (bb, cfun) 2374 tree_estimate_probability_bb (bb); 2375 2376 FOR_EACH_BB_FN (bb, cfun) 2377 combine_predictions_for_bb (bb); 2378 2379#ifdef ENABLE_CHECKING 2380 bb_predictions->traverse<void *, assert_is_empty> (NULL); 2381#endif 2382 delete bb_predictions; 2383 bb_predictions = NULL; 2384 2385 estimate_bb_frequencies (false); 2386 free_dominance_info (CDI_POST_DOMINATORS); 2387 remove_fake_exit_edges (); 2388} 2389 2390/* Predict edges to successors of CUR whose sources are not postdominated by 2391 BB by PRED and recurse to all postdominators. */ 2392 2393static void 2394predict_paths_for_bb (basic_block cur, basic_block bb, 2395 enum br_predictor pred, 2396 enum prediction taken, 2397 bitmap visited) 2398{ 2399 edge e; 2400 edge_iterator ei; 2401 basic_block son; 2402 2403 /* We are looking for all edges forming edge cut induced by 2404 set of all blocks postdominated by BB. */ 2405 FOR_EACH_EDGE (e, ei, cur->preds) 2406 if (e->src->index >= NUM_FIXED_BLOCKS 2407 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb)) 2408 { 2409 edge e2; 2410 edge_iterator ei2; 2411 bool found = false; 2412 2413 /* Ignore fake edges and eh, we predict them as not taken anyway. */ 2414 if (e->flags & (EDGE_EH | EDGE_FAKE)) 2415 continue; 2416 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb)); 2417 2418 /* See if there is an edge from e->src that is not abnormal 2419 and does not lead to BB. */ 2420 FOR_EACH_EDGE (e2, ei2, e->src->succs) 2421 if (e2 != e 2422 && !(e2->flags & (EDGE_EH | EDGE_FAKE)) 2423 && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb)) 2424 { 2425 found = true; 2426 break; 2427 } 2428 2429 /* If there is non-abnormal path leaving e->src, predict edge 2430 using predictor. Otherwise we need to look for paths 2431 leading to e->src. 2432 2433 The second may lead to infinite loop in the case we are predicitng 2434 regions that are only reachable by abnormal edges. We simply 2435 prevent visiting given BB twice. */ 2436 if (found) 2437 predict_edge_def (e, pred, taken); 2438 else if (bitmap_set_bit (visited, e->src->index)) 2439 predict_paths_for_bb (e->src, e->src, pred, taken, visited); 2440 } 2441 for (son = first_dom_son (CDI_POST_DOMINATORS, cur); 2442 son; 2443 son = next_dom_son (CDI_POST_DOMINATORS, son)) 2444 predict_paths_for_bb (son, bb, pred, taken, visited); 2445} 2446 2447/* Sets branch probabilities according to PREDiction and 2448 FLAGS. */ 2449 2450static void 2451predict_paths_leading_to (basic_block bb, enum br_predictor pred, 2452 enum prediction taken) 2453{ 2454 bitmap visited = BITMAP_ALLOC (NULL); 2455 predict_paths_for_bb (bb, bb, pred, taken, visited); 2456 BITMAP_FREE (visited); 2457} 2458 2459/* Like predict_paths_leading_to but take edge instead of basic block. */ 2460 2461static void 2462predict_paths_leading_to_edge (edge e, enum br_predictor pred, 2463 enum prediction taken) 2464{ 2465 bool has_nonloop_edge = false; 2466 edge_iterator ei; 2467 edge e2; 2468 2469 basic_block bb = e->src; 2470 FOR_EACH_EDGE (e2, ei, bb->succs) 2471 if (e2->dest != e->src && e2->dest != e->dest 2472 && !(e->flags & (EDGE_EH | EDGE_FAKE)) 2473 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest)) 2474 { 2475 has_nonloop_edge = true; 2476 break; 2477 } 2478 if (!has_nonloop_edge) 2479 { 2480 bitmap visited = BITMAP_ALLOC (NULL); 2481 predict_paths_for_bb (bb, bb, pred, taken, visited); 2482 BITMAP_FREE (visited); 2483 } 2484 else 2485 predict_edge_def (e, pred, taken); 2486} 2487 2488/* This is used to carry information about basic blocks. It is 2489 attached to the AUX field of the standard CFG block. */ 2490 2491struct block_info 2492{ 2493 /* Estimated frequency of execution of basic_block. */ 2494 sreal frequency; 2495 2496 /* To keep queue of basic blocks to process. */ 2497 basic_block next; 2498 2499 /* Number of predecessors we need to visit first. */ 2500 int npredecessors; 2501}; 2502 2503/* Similar information for edges. */ 2504struct edge_prob_info 2505{ 2506 /* In case edge is a loopback edge, the probability edge will be reached 2507 in case header is. Estimated number of iterations of the loop can be 2508 then computed as 1 / (1 - back_edge_prob). */ 2509 sreal back_edge_prob; 2510 /* True if the edge is a loopback edge in the natural loop. */ 2511 unsigned int back_edge:1; 2512}; 2513 2514#define BLOCK_INFO(B) ((block_info *) (B)->aux) 2515#undef EDGE_INFO 2516#define EDGE_INFO(E) ((edge_prob_info *) (E)->aux) 2517 2518/* Helper function for estimate_bb_frequencies. 2519 Propagate the frequencies in blocks marked in 2520 TOVISIT, starting in HEAD. */ 2521 2522static void 2523propagate_freq (basic_block head, bitmap tovisit) 2524{ 2525 basic_block bb; 2526 basic_block last; 2527 unsigned i; 2528 edge e; 2529 basic_block nextbb; 2530 bitmap_iterator bi; 2531 2532 /* For each basic block we need to visit count number of his predecessors 2533 we need to visit first. */ 2534 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi) 2535 { 2536 edge_iterator ei; 2537 int count = 0; 2538 2539 bb = BASIC_BLOCK_FOR_FN (cfun, i); 2540 2541 FOR_EACH_EDGE (e, ei, bb->preds) 2542 { 2543 bool visit = bitmap_bit_p (tovisit, e->src->index); 2544 2545 if (visit && !(e->flags & EDGE_DFS_BACK)) 2546 count++; 2547 else if (visit && dump_file && !EDGE_INFO (e)->back_edge) 2548 fprintf (dump_file, 2549 "Irreducible region hit, ignoring edge to %i->%i\n", 2550 e->src->index, bb->index); 2551 } 2552 BLOCK_INFO (bb)->npredecessors = count; 2553 /* When function never returns, we will never process exit block. */ 2554 if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun)) 2555 bb->count = bb->frequency = 0; 2556 } 2557 2558 BLOCK_INFO (head)->frequency = 1; 2559 last = head; 2560 for (bb = head; bb; bb = nextbb) 2561 { 2562 edge_iterator ei; 2563 sreal cyclic_probability = 0; 2564 sreal frequency = 0; 2565 2566 nextbb = BLOCK_INFO (bb)->next; 2567 BLOCK_INFO (bb)->next = NULL; 2568 2569 /* Compute frequency of basic block. */ 2570 if (bb != head) 2571 { 2572#ifdef ENABLE_CHECKING 2573 FOR_EACH_EDGE (e, ei, bb->preds) 2574 gcc_assert (!bitmap_bit_p (tovisit, e->src->index) 2575 || (e->flags & EDGE_DFS_BACK)); 2576#endif 2577 2578 FOR_EACH_EDGE (e, ei, bb->preds) 2579 if (EDGE_INFO (e)->back_edge) 2580 { 2581 cyclic_probability += EDGE_INFO (e)->back_edge_prob; 2582 } 2583 else if (!(e->flags & EDGE_DFS_BACK)) 2584 { 2585 /* frequency += (e->probability 2586 * BLOCK_INFO (e->src)->frequency / 2587 REG_BR_PROB_BASE); */ 2588 2589 sreal tmp = e->probability; 2590 tmp *= BLOCK_INFO (e->src)->frequency; 2591 tmp *= real_inv_br_prob_base; 2592 frequency += tmp; 2593 } 2594 2595 if (cyclic_probability == 0) 2596 { 2597 BLOCK_INFO (bb)->frequency = frequency; 2598 } 2599 else 2600 { 2601 if (cyclic_probability > real_almost_one) 2602 cyclic_probability = real_almost_one; 2603 2604 /* BLOCK_INFO (bb)->frequency = frequency 2605 / (1 - cyclic_probability) */ 2606 2607 cyclic_probability = sreal (1) - cyclic_probability; 2608 BLOCK_INFO (bb)->frequency = frequency / cyclic_probability; 2609 } 2610 } 2611 2612 bitmap_clear_bit (tovisit, bb->index); 2613 2614 e = find_edge (bb, head); 2615 if (e) 2616 { 2617 /* EDGE_INFO (e)->back_edge_prob 2618 = ((e->probability * BLOCK_INFO (bb)->frequency) 2619 / REG_BR_PROB_BASE); */ 2620 2621 sreal tmp = e->probability; 2622 tmp *= BLOCK_INFO (bb)->frequency; 2623 EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base; 2624 } 2625 2626 /* Propagate to successor blocks. */ 2627 FOR_EACH_EDGE (e, ei, bb->succs) 2628 if (!(e->flags & EDGE_DFS_BACK) 2629 && BLOCK_INFO (e->dest)->npredecessors) 2630 { 2631 BLOCK_INFO (e->dest)->npredecessors--; 2632 if (!BLOCK_INFO (e->dest)->npredecessors) 2633 { 2634 if (!nextbb) 2635 nextbb = e->dest; 2636 else 2637 BLOCK_INFO (last)->next = e->dest; 2638 2639 last = e->dest; 2640 } 2641 } 2642 } 2643} 2644 2645/* Estimate frequencies in loops at same nest level. */ 2646 2647static void 2648estimate_loops_at_level (struct loop *first_loop) 2649{ 2650 struct loop *loop; 2651 2652 for (loop = first_loop; loop; loop = loop->next) 2653 { 2654 edge e; 2655 basic_block *bbs; 2656 unsigned i; 2657 bitmap tovisit = BITMAP_ALLOC (NULL); 2658 2659 estimate_loops_at_level (loop->inner); 2660 2661 /* Find current loop back edge and mark it. */ 2662 e = loop_latch_edge (loop); 2663 EDGE_INFO (e)->back_edge = 1; 2664 2665 bbs = get_loop_body (loop); 2666 for (i = 0; i < loop->num_nodes; i++) 2667 bitmap_set_bit (tovisit, bbs[i]->index); 2668 free (bbs); 2669 propagate_freq (loop->header, tovisit); 2670 BITMAP_FREE (tovisit); 2671 } 2672} 2673 2674/* Propagates frequencies through structure of loops. */ 2675 2676static void 2677estimate_loops (void) 2678{ 2679 bitmap tovisit = BITMAP_ALLOC (NULL); 2680 basic_block bb; 2681 2682 /* Start by estimating the frequencies in the loops. */ 2683 if (number_of_loops (cfun) > 1) 2684 estimate_loops_at_level (current_loops->tree_root->inner); 2685 2686 /* Now propagate the frequencies through all the blocks. */ 2687 FOR_ALL_BB_FN (bb, cfun) 2688 { 2689 bitmap_set_bit (tovisit, bb->index); 2690 } 2691 propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit); 2692 BITMAP_FREE (tovisit); 2693} 2694 2695/* Drop the profile for NODE to guessed, and update its frequency based on 2696 whether it is expected to be hot given the CALL_COUNT. */ 2697 2698static void 2699drop_profile (struct cgraph_node *node, gcov_type call_count) 2700{ 2701 struct function *fn = DECL_STRUCT_FUNCTION (node->decl); 2702 /* In the case where this was called by another function with a 2703 dropped profile, call_count will be 0. Since there are no 2704 non-zero call counts to this function, we don't know for sure 2705 whether it is hot, and therefore it will be marked normal below. */ 2706 bool hot = maybe_hot_count_p (NULL, call_count); 2707 2708 if (dump_file) 2709 fprintf (dump_file, 2710 "Dropping 0 profile for %s/%i. %s based on calls.\n", 2711 node->name (), node->order, 2712 hot ? "Function is hot" : "Function is normal"); 2713 /* We only expect to miss profiles for functions that are reached 2714 via non-zero call edges in cases where the function may have 2715 been linked from another module or library (COMDATs and extern 2716 templates). See the comments below for handle_missing_profiles. 2717 Also, only warn in cases where the missing counts exceed the 2718 number of training runs. In certain cases with an execv followed 2719 by a no-return call the profile for the no-return call is not 2720 dumped and there can be a mismatch. */ 2721 if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl) 2722 && call_count > profile_info->runs) 2723 { 2724 if (flag_profile_correction) 2725 { 2726 if (dump_file) 2727 fprintf (dump_file, 2728 "Missing counts for called function %s/%i\n", 2729 node->name (), node->order); 2730 } 2731 else 2732 warning (0, "Missing counts for called function %s/%i", 2733 node->name (), node->order); 2734 } 2735 2736 profile_status_for_fn (fn) 2737 = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT); 2738 node->frequency 2739 = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL; 2740} 2741 2742/* In the case of COMDAT routines, multiple object files will contain the same 2743 function and the linker will select one for the binary. In that case 2744 all the other copies from the profile instrument binary will be missing 2745 profile counts. Look for cases where this happened, due to non-zero 2746 call counts going to 0-count functions, and drop the profile to guessed 2747 so that we can use the estimated probabilities and avoid optimizing only 2748 for size. 2749 2750 The other case where the profile may be missing is when the routine 2751 is not going to be emitted to the object file, e.g. for "extern template" 2752 class methods. Those will be marked DECL_EXTERNAL. Emit a warning in 2753 all other cases of non-zero calls to 0-count functions. */ 2754 2755void 2756handle_missing_profiles (void) 2757{ 2758 struct cgraph_node *node; 2759 int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION); 2760 vec<struct cgraph_node *> worklist; 2761 worklist.create (64); 2762 2763 /* See if 0 count function has non-0 count callers. In this case we 2764 lost some profile. Drop its function profile to PROFILE_GUESSED. */ 2765 FOR_EACH_DEFINED_FUNCTION (node) 2766 { 2767 struct cgraph_edge *e; 2768 gcov_type call_count = 0; 2769 gcov_type max_tp_first_run = 0; 2770 struct function *fn = DECL_STRUCT_FUNCTION (node->decl); 2771 2772 if (node->count) 2773 continue; 2774 for (e = node->callers; e; e = e->next_caller) 2775 { 2776 call_count += e->count; 2777 2778 if (e->caller->tp_first_run > max_tp_first_run) 2779 max_tp_first_run = e->caller->tp_first_run; 2780 } 2781 2782 /* If time profile is missing, let assign the maximum that comes from 2783 caller functions. */ 2784 if (!node->tp_first_run && max_tp_first_run) 2785 node->tp_first_run = max_tp_first_run + 1; 2786 2787 if (call_count 2788 && fn && fn->cfg 2789 && (call_count * unlikely_count_fraction >= profile_info->runs)) 2790 { 2791 drop_profile (node, call_count); 2792 worklist.safe_push (node); 2793 } 2794 } 2795 2796 /* Propagate the profile dropping to other 0-count COMDATs that are 2797 potentially called by COMDATs we already dropped the profile on. */ 2798 while (worklist.length () > 0) 2799 { 2800 struct cgraph_edge *e; 2801 2802 node = worklist.pop (); 2803 for (e = node->callees; e; e = e->next_caller) 2804 { 2805 struct cgraph_node *callee = e->callee; 2806 struct function *fn = DECL_STRUCT_FUNCTION (callee->decl); 2807 2808 if (callee->count > 0) 2809 continue; 2810 if (DECL_COMDAT (callee->decl) && fn && fn->cfg 2811 && profile_status_for_fn (fn) == PROFILE_READ) 2812 { 2813 drop_profile (node, 0); 2814 worklist.safe_push (callee); 2815 } 2816 } 2817 } 2818 worklist.release (); 2819} 2820 2821/* Convert counts measured by profile driven feedback to frequencies. 2822 Return nonzero iff there was any nonzero execution count. */ 2823 2824int 2825counts_to_freqs (void) 2826{ 2827 gcov_type count_max, true_count_max = 0; 2828 basic_block bb; 2829 2830 /* Don't overwrite the estimated frequencies when the profile for 2831 the function is missing. We may drop this function PROFILE_GUESSED 2832 later in drop_profile (). */ 2833 if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count) 2834 return 0; 2835 2836 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) 2837 true_count_max = MAX (bb->count, true_count_max); 2838 2839 count_max = MAX (true_count_max, 1); 2840 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) 2841 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max; 2842 2843 return true_count_max; 2844} 2845 2846/* Return true if function is likely to be expensive, so there is no point to 2847 optimize performance of prologue, epilogue or do inlining at the expense 2848 of code size growth. THRESHOLD is the limit of number of instructions 2849 function can execute at average to be still considered not expensive. */ 2850 2851bool 2852expensive_function_p (int threshold) 2853{ 2854 unsigned int sum = 0; 2855 basic_block bb; 2856 unsigned int limit; 2857 2858 /* We can not compute accurately for large thresholds due to scaled 2859 frequencies. */ 2860 gcc_assert (threshold <= BB_FREQ_MAX); 2861 2862 /* Frequencies are out of range. This either means that function contains 2863 internal loop executing more than BB_FREQ_MAX times or profile feedback 2864 is available and function has not been executed at all. */ 2865 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0) 2866 return true; 2867 2868 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */ 2869 limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold; 2870 FOR_EACH_BB_FN (bb, cfun) 2871 { 2872 rtx_insn *insn; 2873 2874 FOR_BB_INSNS (bb, insn) 2875 if (active_insn_p (insn)) 2876 { 2877 sum += bb->frequency; 2878 if (sum > limit) 2879 return true; 2880 } 2881 } 2882 2883 return false; 2884} 2885 2886/* Estimate and propagate basic block frequencies using the given branch 2887 probabilities. If FORCE is true, the frequencies are used to estimate 2888 the counts even when there are already non-zero profile counts. */ 2889 2890void 2891estimate_bb_frequencies (bool force) 2892{ 2893 basic_block bb; 2894 sreal freq_max; 2895 2896 if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ()) 2897 { 2898 static int real_values_initialized = 0; 2899 2900 if (!real_values_initialized) 2901 { 2902 real_values_initialized = 1; 2903 real_br_prob_base = REG_BR_PROB_BASE; 2904 real_bb_freq_max = BB_FREQ_MAX; 2905 real_one_half = sreal (1, -1); 2906 real_inv_br_prob_base = sreal (1) / real_br_prob_base; 2907 real_almost_one = sreal (1) - real_inv_br_prob_base; 2908 } 2909 2910 mark_dfs_back_edges (); 2911 2912 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability = 2913 REG_BR_PROB_BASE; 2914 2915 /* Set up block info for each basic block. */ 2916 alloc_aux_for_blocks (sizeof (block_info)); 2917 alloc_aux_for_edges (sizeof (edge_prob_info)); 2918 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) 2919 { 2920 edge e; 2921 edge_iterator ei; 2922 2923 FOR_EACH_EDGE (e, ei, bb->succs) 2924 { 2925 EDGE_INFO (e)->back_edge_prob = e->probability; 2926 EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base; 2927 } 2928 } 2929 2930 /* First compute frequencies locally for each loop from innermost 2931 to outermost to examine frequencies for back edges. */ 2932 estimate_loops (); 2933 2934 freq_max = 0; 2935 FOR_EACH_BB_FN (bb, cfun) 2936 if (freq_max < BLOCK_INFO (bb)->frequency) 2937 freq_max = BLOCK_INFO (bb)->frequency; 2938 2939 freq_max = real_bb_freq_max / freq_max; 2940 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) 2941 { 2942 sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half; 2943 bb->frequency = tmp.to_int (); 2944 } 2945 2946 free_aux_for_blocks (); 2947 free_aux_for_edges (); 2948 } 2949 compute_function_frequency (); 2950} 2951 2952/* Decide whether function is hot, cold or unlikely executed. */ 2953void 2954compute_function_frequency (void) 2955{ 2956 basic_block bb; 2957 struct cgraph_node *node = cgraph_node::get (current_function_decl); 2958 2959 if (DECL_STATIC_CONSTRUCTOR (current_function_decl) 2960 || MAIN_NAME_P (DECL_NAME (current_function_decl))) 2961 node->only_called_at_startup = true; 2962 if (DECL_STATIC_DESTRUCTOR (current_function_decl)) 2963 node->only_called_at_exit = true; 2964 2965 if (profile_status_for_fn (cfun) != PROFILE_READ) 2966 { 2967 int flags = flags_from_decl_or_type (current_function_decl); 2968 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)) 2969 != NULL) 2970 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; 2971 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl)) 2972 != NULL) 2973 node->frequency = NODE_FREQUENCY_HOT; 2974 else if (flags & ECF_NORETURN) 2975 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; 2976 else if (MAIN_NAME_P (DECL_NAME (current_function_decl))) 2977 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; 2978 else if (DECL_STATIC_CONSTRUCTOR (current_function_decl) 2979 || DECL_STATIC_DESTRUCTOR (current_function_decl)) 2980 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; 2981 return; 2982 } 2983 2984 /* Only first time try to drop function into unlikely executed. 2985 After inlining the roundoff errors may confuse us. 2986 Ipa-profile pass will drop functions only called from unlikely 2987 functions to unlikely and that is most of what we care about. */ 2988 if (!cfun->after_inlining) 2989 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; 2990 FOR_EACH_BB_FN (bb, cfun) 2991 { 2992 if (maybe_hot_bb_p (cfun, bb)) 2993 { 2994 node->frequency = NODE_FREQUENCY_HOT; 2995 return; 2996 } 2997 if (!probably_never_executed_bb_p (cfun, bb)) 2998 node->frequency = NODE_FREQUENCY_NORMAL; 2999 } 3000} 3001 3002/* Build PREDICT_EXPR. */ 3003tree 3004build_predict_expr (enum br_predictor predictor, enum prediction taken) 3005{ 3006 tree t = build1 (PREDICT_EXPR, void_type_node, 3007 build_int_cst (integer_type_node, predictor)); 3008 SET_PREDICT_EXPR_OUTCOME (t, taken); 3009 return t; 3010} 3011 3012const char * 3013predictor_name (enum br_predictor predictor) 3014{ 3015 return predictor_info[predictor].name; 3016} 3017 3018/* Predict branch probabilities and estimate profile of the tree CFG. */ 3019 3020namespace { 3021 3022const pass_data pass_data_profile = 3023{ 3024 GIMPLE_PASS, /* type */ 3025 "profile_estimate", /* name */ 3026 OPTGROUP_NONE, /* optinfo_flags */ 3027 TV_BRANCH_PROB, /* tv_id */ 3028 PROP_cfg, /* properties_required */ 3029 0, /* properties_provided */ 3030 0, /* properties_destroyed */ 3031 0, /* todo_flags_start */ 3032 0, /* todo_flags_finish */ 3033}; 3034 3035class pass_profile : public gimple_opt_pass 3036{ 3037public: 3038 pass_profile (gcc::context *ctxt) 3039 : gimple_opt_pass (pass_data_profile, ctxt) 3040 {} 3041 3042 /* opt_pass methods: */ 3043 virtual bool gate (function *) { return flag_guess_branch_prob; } 3044 virtual unsigned int execute (function *); 3045 3046}; // class pass_profile 3047 3048unsigned int 3049pass_profile::execute (function *fun) 3050{ 3051 unsigned nb_loops; 3052 3053 if (profile_status_for_fn (cfun) == PROFILE_GUESSED) 3054 return 0; 3055 3056 loop_optimizer_init (LOOPS_NORMAL); 3057 if (dump_file && (dump_flags & TDF_DETAILS)) 3058 flow_loops_dump (dump_file, NULL, 0); 3059 3060 mark_irreducible_loops (); 3061 3062 nb_loops = number_of_loops (fun); 3063 if (nb_loops > 1) 3064 scev_initialize (); 3065 3066 tree_estimate_probability (); 3067 3068 if (nb_loops > 1) 3069 scev_finalize (); 3070 3071 loop_optimizer_finalize (); 3072 if (dump_file && (dump_flags & TDF_DETAILS)) 3073 gimple_dump_cfg (dump_file, dump_flags); 3074 if (profile_status_for_fn (fun) == PROFILE_ABSENT) 3075 profile_status_for_fn (fun) = PROFILE_GUESSED; 3076 return 0; 3077} 3078 3079} // anon namespace 3080 3081gimple_opt_pass * 3082make_pass_profile (gcc::context *ctxt) 3083{ 3084 return new pass_profile (ctxt); 3085} 3086 3087namespace { 3088 3089const pass_data pass_data_strip_predict_hints = 3090{ 3091 GIMPLE_PASS, /* type */ 3092 "*strip_predict_hints", /* name */ 3093 OPTGROUP_NONE, /* optinfo_flags */ 3094 TV_BRANCH_PROB, /* tv_id */ 3095 PROP_cfg, /* properties_required */ 3096 0, /* properties_provided */ 3097 0, /* properties_destroyed */ 3098 0, /* todo_flags_start */ 3099 0, /* todo_flags_finish */ 3100}; 3101 3102class pass_strip_predict_hints : public gimple_opt_pass 3103{ 3104public: 3105 pass_strip_predict_hints (gcc::context *ctxt) 3106 : gimple_opt_pass (pass_data_strip_predict_hints, ctxt) 3107 {} 3108 3109 /* opt_pass methods: */ 3110 opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); } 3111 virtual unsigned int execute (function *); 3112 3113}; // class pass_strip_predict_hints 3114 3115/* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements 3116 we no longer need. */ 3117unsigned int 3118pass_strip_predict_hints::execute (function *fun) 3119{ 3120 basic_block bb; 3121 gimple ass_stmt; 3122 tree var; 3123 3124 FOR_EACH_BB_FN (bb, fun) 3125 { 3126 gimple_stmt_iterator bi; 3127 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);) 3128 { 3129 gimple stmt = gsi_stmt (bi); 3130 3131 if (gimple_code (stmt) == GIMPLE_PREDICT) 3132 { 3133 gsi_remove (&bi, true); 3134 continue; 3135 } 3136 else if (is_gimple_call (stmt)) 3137 { 3138 tree fndecl = gimple_call_fndecl (stmt); 3139 3140 if ((fndecl 3141 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL 3142 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT 3143 && gimple_call_num_args (stmt) == 2) 3144 || (gimple_call_internal_p (stmt) 3145 && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT)) 3146 { 3147 var = gimple_call_lhs (stmt); 3148 if (var) 3149 { 3150 ass_stmt 3151 = gimple_build_assign (var, gimple_call_arg (stmt, 0)); 3152 gsi_replace (&bi, ass_stmt, true); 3153 } 3154 else 3155 { 3156 gsi_remove (&bi, true); 3157 continue; 3158 } 3159 } 3160 } 3161 gsi_next (&bi); 3162 } 3163 } 3164 return 0; 3165} 3166 3167} // anon namespace 3168 3169gimple_opt_pass * 3170make_pass_strip_predict_hints (gcc::context *ctxt) 3171{ 3172 return new pass_strip_predict_hints (ctxt); 3173} 3174 3175/* Rebuild function frequencies. Passes are in general expected to 3176 maintain profile by hand, however in some cases this is not possible: 3177 for example when inlining several functions with loops freuqencies might run 3178 out of scale and thus needs to be recomputed. */ 3179 3180void 3181rebuild_frequencies (void) 3182{ 3183 timevar_push (TV_REBUILD_FREQUENCIES); 3184 3185 /* When the max bb count in the function is small, there is a higher 3186 chance that there were truncation errors in the integer scaling 3187 of counts by inlining and other optimizations. This could lead 3188 to incorrect classification of code as being cold when it isn't. 3189 In that case, force the estimation of bb counts/frequencies from the 3190 branch probabilities, rather than computing frequencies from counts, 3191 which may also lead to frequencies incorrectly reduced to 0. There 3192 is less precision in the probabilities, so we only do this for small 3193 max counts. */ 3194 gcov_type count_max = 0; 3195 basic_block bb; 3196 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) 3197 count_max = MAX (bb->count, count_max); 3198 3199 if (profile_status_for_fn (cfun) == PROFILE_GUESSED 3200 || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ 3201 && count_max < REG_BR_PROB_BASE/10)) 3202 { 3203 loop_optimizer_init (0); 3204 add_noreturn_fake_exit_edges (); 3205 mark_irreducible_loops (); 3206 connect_infinite_loops_to_exit (); 3207 estimate_bb_frequencies (true); 3208 remove_fake_exit_edges (); 3209 loop_optimizer_finalize (); 3210 } 3211 else if (profile_status_for_fn (cfun) == PROFILE_READ) 3212 counts_to_freqs (); 3213 else 3214 gcc_unreachable (); 3215 timevar_pop (TV_REBUILD_FREQUENCIES); 3216} 3217