1/* Tail merging for gimple. 2 Copyright (C) 2011-2015 Free Software Foundation, Inc. 3 Contributed by Tom de Vries (tom@codesourcery.com) 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 3, or (at your option) 10any later version. 11 12GCC is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING3. If not see 19<http://www.gnu.org/licenses/>. */ 20 21/* Pass overview. 22 23 24 MOTIVATIONAL EXAMPLE 25 26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at 27 28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601) 29 { 30 struct FILED.1638 * fpD.2605; 31 charD.1 fileNameD.2604[1000]; 32 intD.0 D.3915; 33 const charD.1 * restrict outputFileName.0D.3914; 34 35 # BLOCK 2 freq:10000 36 # PRED: ENTRY [100.0%] (fallthru,exec) 37 # PT = nonlocal { D.3926 } (restr) 38 outputFileName.0D.3914_3 39 = (const charD.1 * restrict) outputFileNameD.2600_2(D); 40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)> 41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3); 44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13> 45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1); 48 if (D.3915_4 == 0) 49 goto <bb 3>; 50 else 51 goto <bb 4>; 52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec) 53 54 # BLOCK 3 freq:1000 55 # PRED: 2 [10.0%] (true,exec) 56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14> 57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 59 freeD.898 (ctxD.2601_5(D)); 60 goto <bb 7>; 61 # SUCC: 7 [100.0%] (fallthru,exec) 62 63 # BLOCK 4 freq:9000 64 # PRED: 2 [90.0%] (false,exec) 65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14> 66 # PT = nonlocal escaped 67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B); 70 if (fpD.2605_8 == 0B) 71 goto <bb 5>; 72 else 73 goto <bb 6>; 74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec) 75 76 # BLOCK 5 freq:173 77 # PRED: 4 [1.9%] (true,exec) 78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16> 79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 81 freeD.898 (ctxD.2601_5(D)); 82 goto <bb 7>; 83 # SUCC: 7 [100.0%] (fallthru,exec) 84 85 # BLOCK 6 freq:8827 86 # PRED: 4 [98.1%] (false,exec) 87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16> 88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8); 91 # SUCC: 7 [100.0%] (fallthru,exec) 92 93 # BLOCK 7 freq:10000 94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec) 95 6 [100.0%] (fallthru,exec) 96 # PT = nonlocal null 97 98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)> 99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5), 100 .MEMD.3923_18(6)> 101 # VUSE <.MEMD.3923_11> 102 return ctxD.2601_1; 103 # SUCC: EXIT [100.0%] 104 } 105 106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the 107 same successors, and the same operations. 108 109 110 CONTEXT 111 112 A technique called tail merging (or cross jumping) can fix the example 113 above. For a block, we look for common code at the end (the tail) of the 114 predecessor blocks, and insert jumps from one block to the other. 115 The example is a special case for tail merging, in that 2 whole blocks 116 can be merged, rather than just the end parts of it. 117 We currently only focus on whole block merging, so in that sense 118 calling this pass tail merge is a bit of a misnomer. 119 120 We distinguish 2 kinds of situations in which blocks can be merged: 121 - same operations, same predecessors. The successor edges coming from one 122 block are redirected to come from the other block. 123 - same operations, same successors. The predecessor edges entering one block 124 are redirected to enter the other block. Note that this operation might 125 involve introducing phi operations. 126 127 For efficient implementation, we would like to value numbers the blocks, and 128 have a comparison operator that tells us whether the blocks are equal. 129 Besides being runtime efficient, block value numbering should also abstract 130 from irrelevant differences in order of operations, much like normal value 131 numbering abstracts from irrelevant order of operations. 132 133 For the first situation (same_operations, same predecessors), normal value 134 numbering fits well. We can calculate a block value number based on the 135 value numbers of the defs and vdefs. 136 137 For the second situation (same operations, same successors), this approach 138 doesn't work so well. We can illustrate this using the example. The calls 139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will 140 remain different in value numbering, since they represent different memory 141 states. So the resulting vdefs of the frees will be different in value 142 numbering, so the block value numbers will be different. 143 144 The reason why we call the blocks equal is not because they define the same 145 values, but because uses in the blocks use (possibly different) defs in the 146 same way. To be able to detect this efficiently, we need to do some kind of 147 reverse value numbering, meaning number the uses rather than the defs, and 148 calculate a block value number based on the value number of the uses. 149 Ideally, a block comparison operator will also indicate which phis are needed 150 to merge the blocks. 151 152 For the moment, we don't do block value numbering, but we do insn-by-insn 153 matching, using scc value numbers to match operations with results, and 154 structural comparison otherwise, while ignoring vop mismatches. 155 156 157 IMPLEMENTATION 158 159 1. The pass first determines all groups of blocks with the same successor 160 blocks. 161 2. Within each group, it tries to determine clusters of equal basic blocks. 162 3. The clusters are applied. 163 4. The same successor groups are updated. 164 5. This process is repeated from 2 onwards, until no more changes. 165 166 167 LIMITATIONS/TODO 168 169 - block only 170 - handles only 'same operations, same successors'. 171 It handles same predecessors as a special subcase though. 172 - does not implement the reverse value numbering and block value numbering. 173 - improve memory allocation: use garbage collected memory, obstacks, 174 allocpools where appropriate. 175 - no insertion of gimple_reg phis, We only introduce vop-phis. 176 - handle blocks with gimple_reg phi_nodes. 177 178 179 PASS PLACEMENT 180 This 'pass' is not a stand-alone gimple pass, but runs as part of 181 pass_pre, in order to share the value numbering. 182 183 184 SWITCHES 185 186 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */ 187 188#include "config.h" 189#include "system.h" 190#include "coretypes.h" 191#include "tm.h" 192#include "hash-set.h" 193#include "machmode.h" 194#include "vec.h" 195#include "double-int.h" 196#include "input.h" 197#include "alias.h" 198#include "symtab.h" 199#include "wide-int.h" 200#include "inchash.h" 201#include "real.h" 202#include "tree.h" 203#include "fold-const.h" 204#include "stor-layout.h" 205#include "trans-mem.h" 206#include "inchash.h" 207#include "tm_p.h" 208#include "predict.h" 209#include "hard-reg-set.h" 210#include "input.h" 211#include "function.h" 212#include "dominance.h" 213#include "cfg.h" 214#include "cfganal.h" 215#include "cfgcleanup.h" 216#include "basic-block.h" 217#include "flags.h" 218#include "hash-table.h" 219#include "tree-ssa-alias.h" 220#include "internal-fn.h" 221#include "tree-eh.h" 222#include "gimple-expr.h" 223#include "is-a.h" 224#include "gimple.h" 225#include "gimple-iterator.h" 226#include "gimple-ssa.h" 227#include "tree-cfg.h" 228#include "tree-phinodes.h" 229#include "ssa-iterators.h" 230#include "tree-into-ssa.h" 231#include "params.h" 232#include "gimple-pretty-print.h" 233#include "tree-ssa-sccvn.h" 234#include "tree-dump.h" 235#include "cfgloop.h" 236#include "tree-pass.h" 237#include "trans-mem.h" 238#include "stringpool.h" 239#include "tree-ssanames.h" 240 241/* Describes a group of bbs with the same successors. The successor bbs are 242 cached in succs, and the successor edge flags are cached in succ_flags. 243 If a bb has the EDGE_TRUE/FALSE_VALUE flags swapped compared to succ_flags, 244 it's marked in inverse. 245 Additionally, the hash value for the struct is cached in hashval, and 246 in_worklist indicates whether it's currently part of worklist. */ 247 248struct same_succ_def 249{ 250 /* The bbs that have the same successor bbs. */ 251 bitmap bbs; 252 /* The successor bbs. */ 253 bitmap succs; 254 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for 255 bb. */ 256 bitmap inverse; 257 /* The edge flags for each of the successor bbs. */ 258 vec<int> succ_flags; 259 /* Indicates whether the struct is currently in the worklist. */ 260 bool in_worklist; 261 /* The hash value of the struct. */ 262 hashval_t hashval; 263 264 /* hash_table support. */ 265 typedef same_succ_def value_type; 266 typedef same_succ_def compare_type; 267 static inline hashval_t hash (const value_type *); 268 static int equal (const value_type *, const compare_type *); 269 static void remove (value_type *); 270}; 271typedef struct same_succ_def *same_succ; 272typedef const struct same_succ_def *const_same_succ; 273 274/* hash routine for hash_table support, returns hashval of E. */ 275 276inline hashval_t 277same_succ_def::hash (const value_type *e) 278{ 279 return e->hashval; 280} 281 282/* A group of bbs where 1 bb from bbs can replace the other bbs. */ 283 284struct bb_cluster_def 285{ 286 /* The bbs in the cluster. */ 287 bitmap bbs; 288 /* The preds of the bbs in the cluster. */ 289 bitmap preds; 290 /* Index in all_clusters vector. */ 291 int index; 292 /* The bb to replace the cluster with. */ 293 basic_block rep_bb; 294}; 295typedef struct bb_cluster_def *bb_cluster; 296typedef const struct bb_cluster_def *const_bb_cluster; 297 298/* Per bb-info. */ 299 300struct aux_bb_info 301{ 302 /* The number of non-debug statements in the bb. */ 303 int size; 304 /* The same_succ that this bb is a member of. */ 305 same_succ bb_same_succ; 306 /* The cluster that this bb is a member of. */ 307 bb_cluster cluster; 308 /* The vop state at the exit of a bb. This is shortlived data, used to 309 communicate data between update_block_by and update_vuses. */ 310 tree vop_at_exit; 311 /* The bb that either contains or is dominated by the dependencies of the 312 bb. */ 313 basic_block dep_bb; 314}; 315 316/* Macros to access the fields of struct aux_bb_info. */ 317 318#define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size) 319#define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ) 320#define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster) 321#define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit) 322#define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb) 323 324/* Returns true if the only effect a statement STMT has, is to define locally 325 used SSA_NAMEs. */ 326 327static bool 328stmt_local_def (gimple stmt) 329{ 330 basic_block bb, def_bb; 331 imm_use_iterator iter; 332 use_operand_p use_p; 333 tree val; 334 def_operand_p def_p; 335 336 if (gimple_vdef (stmt) != NULL_TREE 337 || gimple_has_side_effects (stmt) 338 || gimple_could_trap_p_1 (stmt, false, false) 339 || gimple_vuse (stmt) != NULL_TREE) 340 return false; 341 342 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF); 343 if (def_p == NULL) 344 return false; 345 346 val = DEF_FROM_PTR (def_p); 347 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME) 348 return false; 349 350 def_bb = gimple_bb (stmt); 351 352 FOR_EACH_IMM_USE_FAST (use_p, iter, val) 353 { 354 if (is_gimple_debug (USE_STMT (use_p))) 355 continue; 356 bb = gimple_bb (USE_STMT (use_p)); 357 if (bb == def_bb) 358 continue; 359 360 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI 361 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb) 362 continue; 363 364 return false; 365 } 366 367 return true; 368} 369 370/* Let GSI skip forwards over local defs. */ 371 372static void 373gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi) 374{ 375 gimple stmt; 376 377 while (true) 378 { 379 if (gsi_end_p (*gsi)) 380 return; 381 stmt = gsi_stmt (*gsi); 382 if (!stmt_local_def (stmt)) 383 return; 384 gsi_next_nondebug (gsi); 385 } 386} 387 388/* VAL1 and VAL2 are either: 389 - uses in BB1 and BB2, or 390 - phi alternatives for BB1 and BB2. 391 Return true if the uses have the same gvn value. */ 392 393static bool 394gvn_uses_equal (tree val1, tree val2) 395{ 396 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE); 397 398 if (val1 == val2) 399 return true; 400 401 if (vn_valueize (val1) != vn_valueize (val2)) 402 return false; 403 404 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1)) 405 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2))); 406} 407 408/* Prints E to FILE. */ 409 410static void 411same_succ_print (FILE *file, const same_succ e) 412{ 413 unsigned int i; 414 bitmap_print (file, e->bbs, "bbs:", "\n"); 415 bitmap_print (file, e->succs, "succs:", "\n"); 416 bitmap_print (file, e->inverse, "inverse:", "\n"); 417 fprintf (file, "flags:"); 418 for (i = 0; i < e->succ_flags.length (); ++i) 419 fprintf (file, " %x", e->succ_flags[i]); 420 fprintf (file, "\n"); 421} 422 423/* Prints same_succ VE to VFILE. */ 424 425inline int 426ssa_same_succ_print_traverse (same_succ *pe, FILE *file) 427{ 428 const same_succ e = *pe; 429 same_succ_print (file, e); 430 return 1; 431} 432 433/* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */ 434 435static void 436update_dep_bb (basic_block use_bb, tree val) 437{ 438 basic_block dep_bb; 439 440 /* Not a dep. */ 441 if (TREE_CODE (val) != SSA_NAME) 442 return; 443 444 /* Skip use of global def. */ 445 if (SSA_NAME_IS_DEFAULT_DEF (val)) 446 return; 447 448 /* Skip use of local def. */ 449 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val)); 450 if (dep_bb == use_bb) 451 return; 452 453 if (BB_DEP_BB (use_bb) == NULL 454 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb))) 455 BB_DEP_BB (use_bb) = dep_bb; 456} 457 458/* Update BB_DEP_BB, given the dependencies in STMT. */ 459 460static void 461stmt_update_dep_bb (gimple stmt) 462{ 463 ssa_op_iter iter; 464 use_operand_p use; 465 466 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE) 467 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use)); 468} 469 470/* Calculates hash value for same_succ VE. */ 471 472static hashval_t 473same_succ_hash (const_same_succ e) 474{ 475 inchash::hash hstate (bitmap_hash (e->succs)); 476 int flags; 477 unsigned int i; 478 unsigned int first = bitmap_first_set_bit (e->bbs); 479 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, first); 480 int size = 0; 481 gimple stmt; 482 tree arg; 483 unsigned int s; 484 bitmap_iterator bs; 485 486 for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb); 487 !gsi_end_p (gsi); gsi_next_nondebug (&gsi)) 488 { 489 stmt = gsi_stmt (gsi); 490 stmt_update_dep_bb (stmt); 491 if (stmt_local_def (stmt)) 492 continue; 493 size++; 494 495 hstate.add_int (gimple_code (stmt)); 496 if (is_gimple_assign (stmt)) 497 hstate.add_int (gimple_assign_rhs_code (stmt)); 498 if (!is_gimple_call (stmt)) 499 continue; 500 if (gimple_call_internal_p (stmt)) 501 hstate.add_int (gimple_call_internal_fn (stmt)); 502 else 503 { 504 inchash::add_expr (gimple_call_fn (stmt), hstate); 505 if (gimple_call_chain (stmt)) 506 inchash::add_expr (gimple_call_chain (stmt), hstate); 507 } 508 for (i = 0; i < gimple_call_num_args (stmt); i++) 509 { 510 arg = gimple_call_arg (stmt, i); 511 arg = vn_valueize (arg); 512 inchash::add_expr (arg, hstate); 513 } 514 } 515 516 hstate.add_int (size); 517 BB_SIZE (bb) = size; 518 519 for (i = 0; i < e->succ_flags.length (); ++i) 520 { 521 flags = e->succ_flags[i]; 522 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); 523 hstate.add_int (flags); 524 } 525 526 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs) 527 { 528 int n = find_edge (bb, BASIC_BLOCK_FOR_FN (cfun, s))->dest_idx; 529 for (gphi_iterator gsi = gsi_start_phis (BASIC_BLOCK_FOR_FN (cfun, s)); 530 !gsi_end_p (gsi); 531 gsi_next (&gsi)) 532 { 533 gphi *phi = gsi.phi (); 534 tree lhs = gimple_phi_result (phi); 535 tree val = gimple_phi_arg_def (phi, n); 536 537 if (virtual_operand_p (lhs)) 538 continue; 539 update_dep_bb (bb, val); 540 } 541 } 542 543 return hstate.end (); 544} 545 546/* Returns true if E1 and E2 have 2 successors, and if the successor flags 547 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for 548 the other edge flags. */ 549 550static bool 551inverse_flags (const_same_succ e1, const_same_succ e2) 552{ 553 int f1a, f1b, f2a, f2b; 554 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); 555 556 if (e1->succ_flags.length () != 2) 557 return false; 558 559 f1a = e1->succ_flags[0]; 560 f1b = e1->succ_flags[1]; 561 f2a = e2->succ_flags[0]; 562 f2b = e2->succ_flags[1]; 563 564 if (f1a == f2a && f1b == f2b) 565 return false; 566 567 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask); 568} 569 570/* Compares SAME_SUCCs E1 and E2. */ 571 572int 573same_succ_def::equal (const value_type *e1, const compare_type *e2) 574{ 575 unsigned int i, first1, first2; 576 gimple_stmt_iterator gsi1, gsi2; 577 gimple s1, s2; 578 basic_block bb1, bb2; 579 580 if (e1->hashval != e2->hashval) 581 return 0; 582 583 if (e1->succ_flags.length () != e2->succ_flags.length ()) 584 return 0; 585 586 if (!bitmap_equal_p (e1->succs, e2->succs)) 587 return 0; 588 589 if (!inverse_flags (e1, e2)) 590 { 591 for (i = 0; i < e1->succ_flags.length (); ++i) 592 if (e1->succ_flags[i] != e2->succ_flags[i]) 593 return 0; 594 } 595 596 first1 = bitmap_first_set_bit (e1->bbs); 597 first2 = bitmap_first_set_bit (e2->bbs); 598 599 bb1 = BASIC_BLOCK_FOR_FN (cfun, first1); 600 bb2 = BASIC_BLOCK_FOR_FN (cfun, first2); 601 602 if (BB_SIZE (bb1) != BB_SIZE (bb2)) 603 return 0; 604 605 gsi1 = gsi_start_nondebug_bb (bb1); 606 gsi2 = gsi_start_nondebug_bb (bb2); 607 gsi_advance_fw_nondebug_nonlocal (&gsi1); 608 gsi_advance_fw_nondebug_nonlocal (&gsi2); 609 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2))) 610 { 611 s1 = gsi_stmt (gsi1); 612 s2 = gsi_stmt (gsi2); 613 if (gimple_code (s1) != gimple_code (s2)) 614 return 0; 615 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2)) 616 return 0; 617 gsi_next_nondebug (&gsi1); 618 gsi_next_nondebug (&gsi2); 619 gsi_advance_fw_nondebug_nonlocal (&gsi1); 620 gsi_advance_fw_nondebug_nonlocal (&gsi2); 621 } 622 623 return 1; 624} 625 626/* Alloc and init a new SAME_SUCC. */ 627 628static same_succ 629same_succ_alloc (void) 630{ 631 same_succ same = XNEW (struct same_succ_def); 632 633 same->bbs = BITMAP_ALLOC (NULL); 634 same->succs = BITMAP_ALLOC (NULL); 635 same->inverse = BITMAP_ALLOC (NULL); 636 same->succ_flags.create (10); 637 same->in_worklist = false; 638 639 return same; 640} 641 642/* Delete same_succ E. */ 643 644void 645same_succ_def::remove (same_succ e) 646{ 647 BITMAP_FREE (e->bbs); 648 BITMAP_FREE (e->succs); 649 BITMAP_FREE (e->inverse); 650 e->succ_flags.release (); 651 652 XDELETE (e); 653} 654 655/* Reset same_succ SAME. */ 656 657static void 658same_succ_reset (same_succ same) 659{ 660 bitmap_clear (same->bbs); 661 bitmap_clear (same->succs); 662 bitmap_clear (same->inverse); 663 same->succ_flags.truncate (0); 664} 665 666static hash_table<same_succ_def> *same_succ_htab; 667 668/* Array that is used to store the edge flags for a successor. */ 669 670static int *same_succ_edge_flags; 671 672/* Bitmap that is used to mark bbs that are recently deleted. */ 673 674static bitmap deleted_bbs; 675 676/* Bitmap that is used to mark predecessors of bbs that are 677 deleted. */ 678 679static bitmap deleted_bb_preds; 680 681/* Prints same_succ_htab to stderr. */ 682 683extern void debug_same_succ (void); 684DEBUG_FUNCTION void 685debug_same_succ ( void) 686{ 687 same_succ_htab->traverse <FILE *, ssa_same_succ_print_traverse> (stderr); 688} 689 690 691/* Vector of bbs to process. */ 692 693static vec<same_succ> worklist; 694 695/* Prints worklist to FILE. */ 696 697static void 698print_worklist (FILE *file) 699{ 700 unsigned int i; 701 for (i = 0; i < worklist.length (); ++i) 702 same_succ_print (file, worklist[i]); 703} 704 705/* Adds SAME to worklist. */ 706 707static void 708add_to_worklist (same_succ same) 709{ 710 if (same->in_worklist) 711 return; 712 713 if (bitmap_count_bits (same->bbs) < 2) 714 return; 715 716 same->in_worklist = true; 717 worklist.safe_push (same); 718} 719 720/* Add BB to same_succ_htab. */ 721 722static void 723find_same_succ_bb (basic_block bb, same_succ *same_p) 724{ 725 unsigned int j; 726 bitmap_iterator bj; 727 same_succ same = *same_p; 728 same_succ *slot; 729 edge_iterator ei; 730 edge e; 731 732 if (bb == NULL 733 /* Be conservative with loop structure. It's not evident that this test 734 is sufficient. Before tail-merge, we've just called 735 loop_optimizer_finalize, and LOOPS_MAY_HAVE_MULTIPLE_LATCHES is now 736 set, so there's no guarantee that the loop->latch value is still valid. 737 But we assume that, since we've forced LOOPS_HAVE_SIMPLE_LATCHES at the 738 start of pre, we've kept that property intact throughout pre, and are 739 keeping it throughout tail-merge using this test. */ 740 || bb->loop_father->latch == bb) 741 return; 742 bitmap_set_bit (same->bbs, bb->index); 743 FOR_EACH_EDGE (e, ei, bb->succs) 744 { 745 int index = e->dest->index; 746 bitmap_set_bit (same->succs, index); 747 same_succ_edge_flags[index] = e->flags; 748 } 749 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj) 750 same->succ_flags.safe_push (same_succ_edge_flags[j]); 751 752 same->hashval = same_succ_hash (same); 753 754 slot = same_succ_htab->find_slot_with_hash (same, same->hashval, INSERT); 755 if (*slot == NULL) 756 { 757 *slot = same; 758 BB_SAME_SUCC (bb) = same; 759 add_to_worklist (same); 760 *same_p = NULL; 761 } 762 else 763 { 764 bitmap_set_bit ((*slot)->bbs, bb->index); 765 BB_SAME_SUCC (bb) = *slot; 766 add_to_worklist (*slot); 767 if (inverse_flags (same, *slot)) 768 bitmap_set_bit ((*slot)->inverse, bb->index); 769 same_succ_reset (same); 770 } 771} 772 773/* Find bbs with same successors. */ 774 775static void 776find_same_succ (void) 777{ 778 same_succ same = same_succ_alloc (); 779 basic_block bb; 780 781 FOR_EACH_BB_FN (bb, cfun) 782 { 783 find_same_succ_bb (bb, &same); 784 if (same == NULL) 785 same = same_succ_alloc (); 786 } 787 788 same_succ_def::remove (same); 789} 790 791/* Initializes worklist administration. */ 792 793static void 794init_worklist (void) 795{ 796 alloc_aux_for_blocks (sizeof (struct aux_bb_info)); 797 same_succ_htab = new hash_table<same_succ_def> (n_basic_blocks_for_fn (cfun)); 798 same_succ_edge_flags = XCNEWVEC (int, last_basic_block_for_fn (cfun)); 799 deleted_bbs = BITMAP_ALLOC (NULL); 800 deleted_bb_preds = BITMAP_ALLOC (NULL); 801 worklist.create (n_basic_blocks_for_fn (cfun)); 802 find_same_succ (); 803 804 if (dump_file && (dump_flags & TDF_DETAILS)) 805 { 806 fprintf (dump_file, "initial worklist:\n"); 807 print_worklist (dump_file); 808 } 809} 810 811/* Deletes worklist administration. */ 812 813static void 814delete_worklist (void) 815{ 816 free_aux_for_blocks (); 817 delete same_succ_htab; 818 same_succ_htab = NULL; 819 XDELETEVEC (same_succ_edge_flags); 820 same_succ_edge_flags = NULL; 821 BITMAP_FREE (deleted_bbs); 822 BITMAP_FREE (deleted_bb_preds); 823 worklist.release (); 824} 825 826/* Mark BB as deleted, and mark its predecessors. */ 827 828static void 829mark_basic_block_deleted (basic_block bb) 830{ 831 edge e; 832 edge_iterator ei; 833 834 bitmap_set_bit (deleted_bbs, bb->index); 835 836 FOR_EACH_EDGE (e, ei, bb->preds) 837 bitmap_set_bit (deleted_bb_preds, e->src->index); 838} 839 840/* Removes BB from its corresponding same_succ. */ 841 842static void 843same_succ_flush_bb (basic_block bb) 844{ 845 same_succ same = BB_SAME_SUCC (bb); 846 BB_SAME_SUCC (bb) = NULL; 847 if (bitmap_single_bit_set_p (same->bbs)) 848 same_succ_htab->remove_elt_with_hash (same, same->hashval); 849 else 850 bitmap_clear_bit (same->bbs, bb->index); 851} 852 853/* Removes all bbs in BBS from their corresponding same_succ. */ 854 855static void 856same_succ_flush_bbs (bitmap bbs) 857{ 858 unsigned int i; 859 bitmap_iterator bi; 860 861 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi) 862 same_succ_flush_bb (BASIC_BLOCK_FOR_FN (cfun, i)); 863} 864 865/* Release the last vdef in BB, either normal or phi result. */ 866 867static void 868release_last_vdef (basic_block bb) 869{ 870 for (gimple_stmt_iterator i = gsi_last_bb (bb); !gsi_end_p (i); 871 gsi_prev_nondebug (&i)) 872 { 873 gimple stmt = gsi_stmt (i); 874 if (gimple_vdef (stmt) == NULL_TREE) 875 continue; 876 877 mark_virtual_operand_for_renaming (gimple_vdef (stmt)); 878 return; 879 } 880 881 for (gphi_iterator i = gsi_start_phis (bb); !gsi_end_p (i); 882 gsi_next (&i)) 883 { 884 gphi *phi = i.phi (); 885 tree res = gimple_phi_result (phi); 886 887 if (!virtual_operand_p (res)) 888 continue; 889 890 mark_virtual_phi_result_for_renaming (phi); 891 return; 892 } 893 894} 895 896/* For deleted_bb_preds, find bbs with same successors. */ 897 898static void 899update_worklist (void) 900{ 901 unsigned int i; 902 bitmap_iterator bi; 903 basic_block bb; 904 same_succ same; 905 906 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs); 907 bitmap_clear (deleted_bbs); 908 909 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK); 910 same_succ_flush_bbs (deleted_bb_preds); 911 912 same = same_succ_alloc (); 913 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi) 914 { 915 bb = BASIC_BLOCK_FOR_FN (cfun, i); 916 gcc_assert (bb != NULL); 917 find_same_succ_bb (bb, &same); 918 if (same == NULL) 919 same = same_succ_alloc (); 920 } 921 same_succ_def::remove (same); 922 bitmap_clear (deleted_bb_preds); 923} 924 925/* Prints cluster C to FILE. */ 926 927static void 928print_cluster (FILE *file, bb_cluster c) 929{ 930 if (c == NULL) 931 return; 932 bitmap_print (file, c->bbs, "bbs:", "\n"); 933 bitmap_print (file, c->preds, "preds:", "\n"); 934} 935 936/* Prints cluster C to stderr. */ 937 938extern void debug_cluster (bb_cluster); 939DEBUG_FUNCTION void 940debug_cluster (bb_cluster c) 941{ 942 print_cluster (stderr, c); 943} 944 945/* Update C->rep_bb, given that BB is added to the cluster. */ 946 947static void 948update_rep_bb (bb_cluster c, basic_block bb) 949{ 950 /* Initial. */ 951 if (c->rep_bb == NULL) 952 { 953 c->rep_bb = bb; 954 return; 955 } 956 957 /* Current needs no deps, keep it. */ 958 if (BB_DEP_BB (c->rep_bb) == NULL) 959 return; 960 961 /* Bb needs no deps, change rep_bb. */ 962 if (BB_DEP_BB (bb) == NULL) 963 { 964 c->rep_bb = bb; 965 return; 966 } 967 968 /* Bb needs last deps earlier than current, change rep_bb. A potential 969 problem with this, is that the first deps might also be earlier, which 970 would mean we prefer longer lifetimes for the deps. To be able to check 971 for this, we would have to trace BB_FIRST_DEP_BB as well, besides 972 BB_DEP_BB, which is really BB_LAST_DEP_BB. 973 The benefit of choosing the bb with last deps earlier, is that it can 974 potentially be used as replacement for more bbs. */ 975 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb))) 976 c->rep_bb = bb; 977} 978 979/* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */ 980 981static void 982add_bb_to_cluster (bb_cluster c, basic_block bb) 983{ 984 edge e; 985 edge_iterator ei; 986 987 bitmap_set_bit (c->bbs, bb->index); 988 989 FOR_EACH_EDGE (e, ei, bb->preds) 990 bitmap_set_bit (c->preds, e->src->index); 991 992 update_rep_bb (c, bb); 993} 994 995/* Allocate and init new cluster. */ 996 997static bb_cluster 998new_cluster (void) 999{ 1000 bb_cluster c; 1001 c = XCNEW (struct bb_cluster_def); 1002 c->bbs = BITMAP_ALLOC (NULL); 1003 c->preds = BITMAP_ALLOC (NULL); 1004 c->rep_bb = NULL; 1005 return c; 1006} 1007 1008/* Delete clusters. */ 1009 1010static void 1011delete_cluster (bb_cluster c) 1012{ 1013 if (c == NULL) 1014 return; 1015 BITMAP_FREE (c->bbs); 1016 BITMAP_FREE (c->preds); 1017 XDELETE (c); 1018} 1019 1020 1021/* Array that contains all clusters. */ 1022 1023static vec<bb_cluster> all_clusters; 1024 1025/* Allocate all cluster vectors. */ 1026 1027static void 1028alloc_cluster_vectors (void) 1029{ 1030 all_clusters.create (n_basic_blocks_for_fn (cfun)); 1031} 1032 1033/* Reset all cluster vectors. */ 1034 1035static void 1036reset_cluster_vectors (void) 1037{ 1038 unsigned int i; 1039 basic_block bb; 1040 for (i = 0; i < all_clusters.length (); ++i) 1041 delete_cluster (all_clusters[i]); 1042 all_clusters.truncate (0); 1043 FOR_EACH_BB_FN (bb, cfun) 1044 BB_CLUSTER (bb) = NULL; 1045} 1046 1047/* Delete all cluster vectors. */ 1048 1049static void 1050delete_cluster_vectors (void) 1051{ 1052 unsigned int i; 1053 for (i = 0; i < all_clusters.length (); ++i) 1054 delete_cluster (all_clusters[i]); 1055 all_clusters.release (); 1056} 1057 1058/* Merge cluster C2 into C1. */ 1059 1060static void 1061merge_clusters (bb_cluster c1, bb_cluster c2) 1062{ 1063 bitmap_ior_into (c1->bbs, c2->bbs); 1064 bitmap_ior_into (c1->preds, c2->preds); 1065} 1066 1067/* Register equivalence of BB1 and BB2 (members of cluster C). Store c in 1068 all_clusters, or merge c with existing cluster. */ 1069 1070static void 1071set_cluster (basic_block bb1, basic_block bb2) 1072{ 1073 basic_block merge_bb, other_bb; 1074 bb_cluster merge, old, c; 1075 1076 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL) 1077 { 1078 c = new_cluster (); 1079 add_bb_to_cluster (c, bb1); 1080 add_bb_to_cluster (c, bb2); 1081 BB_CLUSTER (bb1) = c; 1082 BB_CLUSTER (bb2) = c; 1083 c->index = all_clusters.length (); 1084 all_clusters.safe_push (c); 1085 } 1086 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL) 1087 { 1088 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1; 1089 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2; 1090 merge = BB_CLUSTER (merge_bb); 1091 add_bb_to_cluster (merge, other_bb); 1092 BB_CLUSTER (other_bb) = merge; 1093 } 1094 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2)) 1095 { 1096 unsigned int i; 1097 bitmap_iterator bi; 1098 1099 old = BB_CLUSTER (bb2); 1100 merge = BB_CLUSTER (bb1); 1101 merge_clusters (merge, old); 1102 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi) 1103 BB_CLUSTER (BASIC_BLOCK_FOR_FN (cfun, i)) = merge; 1104 all_clusters[old->index] = NULL; 1105 update_rep_bb (merge, old->rep_bb); 1106 delete_cluster (old); 1107 } 1108 else 1109 gcc_unreachable (); 1110} 1111 1112/* Return true if gimple operands T1 and T2 have the same value. */ 1113 1114static bool 1115gimple_operand_equal_value_p (tree t1, tree t2) 1116{ 1117 if (t1 == t2) 1118 return true; 1119 1120 if (t1 == NULL_TREE 1121 || t2 == NULL_TREE) 1122 return false; 1123 1124 if (operand_equal_p (t1, t2, 0)) 1125 return true; 1126 1127 return gvn_uses_equal (t1, t2); 1128} 1129 1130/* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and 1131 gimple_bb (s2) are members of SAME_SUCC. */ 1132 1133static bool 1134gimple_equal_p (same_succ same_succ, gimple s1, gimple s2) 1135{ 1136 unsigned int i; 1137 tree lhs1, lhs2; 1138 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); 1139 tree t1, t2; 1140 bool inv_cond; 1141 enum tree_code code1, code2; 1142 1143 if (gimple_code (s1) != gimple_code (s2)) 1144 return false; 1145 1146 switch (gimple_code (s1)) 1147 { 1148 case GIMPLE_CALL: 1149 if (!gimple_call_same_target_p (s1, s2)) 1150 return false; 1151 1152 t1 = gimple_call_chain (s1); 1153 t2 = gimple_call_chain (s2); 1154 if (!gimple_operand_equal_value_p (t1, t2)) 1155 return false; 1156 1157 if (gimple_call_num_args (s1) != gimple_call_num_args (s2)) 1158 return false; 1159 1160 for (i = 0; i < gimple_call_num_args (s1); ++i) 1161 { 1162 t1 = gimple_call_arg (s1, i); 1163 t2 = gimple_call_arg (s2, i); 1164 if (!gimple_operand_equal_value_p (t1, t2)) 1165 return false; 1166 } 1167 1168 lhs1 = gimple_get_lhs (s1); 1169 lhs2 = gimple_get_lhs (s2); 1170 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE) 1171 return true; 1172 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE) 1173 return false; 1174 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME) 1175 return vn_valueize (lhs1) == vn_valueize (lhs2); 1176 return operand_equal_p (lhs1, lhs2, 0); 1177 1178 case GIMPLE_ASSIGN: 1179 lhs1 = gimple_get_lhs (s1); 1180 lhs2 = gimple_get_lhs (s2); 1181 if (TREE_CODE (lhs1) != SSA_NAME 1182 && TREE_CODE (lhs2) != SSA_NAME) 1183 return (operand_equal_p (lhs1, lhs2, 0) 1184 && gimple_operand_equal_value_p (gimple_assign_rhs1 (s1), 1185 gimple_assign_rhs1 (s2))); 1186 else if (TREE_CODE (lhs1) == SSA_NAME 1187 && TREE_CODE (lhs2) == SSA_NAME) 1188 return operand_equal_p (gimple_assign_rhs1 (s1), 1189 gimple_assign_rhs1 (s2), 0); 1190 return false; 1191 1192 case GIMPLE_COND: 1193 t1 = gimple_cond_lhs (s1); 1194 t2 = gimple_cond_lhs (s2); 1195 if (!gimple_operand_equal_value_p (t1, t2)) 1196 return false; 1197 1198 t1 = gimple_cond_rhs (s1); 1199 t2 = gimple_cond_rhs (s2); 1200 if (!gimple_operand_equal_value_p (t1, t2)) 1201 return false; 1202 1203 code1 = gimple_expr_code (s1); 1204 code2 = gimple_expr_code (s2); 1205 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index) 1206 != bitmap_bit_p (same_succ->inverse, bb2->index)); 1207 if (inv_cond) 1208 { 1209 bool honor_nans = HONOR_NANS (t1); 1210 code2 = invert_tree_comparison (code2, honor_nans); 1211 } 1212 return code1 == code2; 1213 1214 default: 1215 return false; 1216 } 1217} 1218 1219/* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE. 1220 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the 1221 processed statements. */ 1222 1223static void 1224gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse, 1225 bool *vuse_escaped) 1226{ 1227 gimple stmt; 1228 tree lvuse; 1229 1230 while (true) 1231 { 1232 if (gsi_end_p (*gsi)) 1233 return; 1234 stmt = gsi_stmt (*gsi); 1235 1236 lvuse = gimple_vuse (stmt); 1237 if (lvuse != NULL_TREE) 1238 { 1239 *vuse = lvuse; 1240 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF)) 1241 *vuse_escaped = true; 1242 } 1243 1244 if (!stmt_local_def (stmt)) 1245 return; 1246 gsi_prev_nondebug (gsi); 1247 } 1248} 1249 1250/* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so, 1251 clusters them. */ 1252 1253static void 1254find_duplicate (same_succ same_succ, basic_block bb1, basic_block bb2) 1255{ 1256 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1); 1257 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2); 1258 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE; 1259 bool vuse_escaped = false; 1260 1261 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped); 1262 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped); 1263 1264 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2)) 1265 { 1266 gimple stmt1 = gsi_stmt (gsi1); 1267 gimple stmt2 = gsi_stmt (gsi2); 1268 1269 /* What could be better than to this this here is to blacklist the bb 1270 containing the stmt, when encountering the stmt f.i. in 1271 same_succ_hash. */ 1272 if (is_tm_ending (stmt1) 1273 || is_tm_ending (stmt2)) 1274 return; 1275 1276 if (!gimple_equal_p (same_succ, stmt1, stmt2)) 1277 return; 1278 1279 gsi_prev_nondebug (&gsi1); 1280 gsi_prev_nondebug (&gsi2); 1281 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped); 1282 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped); 1283 } 1284 1285 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2))) 1286 return; 1287 1288 /* If the incoming vuses are not the same, and the vuse escaped into an 1289 SSA_OP_DEF, then merging the 2 blocks will change the value of the def, 1290 which potentially means the semantics of one of the blocks will be changed. 1291 TODO: make this check more precise. */ 1292 if (vuse_escaped && vuse1 != vuse2) 1293 return; 1294 1295 if (dump_file) 1296 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n", 1297 bb1->index, bb2->index); 1298 1299 set_cluster (bb1, bb2); 1300} 1301 1302/* Returns whether for all phis in DEST the phi alternatives for E1 and 1303 E2 are equal. */ 1304 1305static bool 1306same_phi_alternatives_1 (basic_block dest, edge e1, edge e2) 1307{ 1308 int n1 = e1->dest_idx, n2 = e2->dest_idx; 1309 gphi_iterator gsi; 1310 1311 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi)) 1312 { 1313 gphi *phi = gsi.phi (); 1314 tree lhs = gimple_phi_result (phi); 1315 tree val1 = gimple_phi_arg_def (phi, n1); 1316 tree val2 = gimple_phi_arg_def (phi, n2); 1317 1318 if (virtual_operand_p (lhs)) 1319 continue; 1320 1321 if (operand_equal_for_phi_arg_p (val1, val2)) 1322 continue; 1323 if (gvn_uses_equal (val1, val2)) 1324 continue; 1325 1326 return false; 1327 } 1328 1329 return true; 1330} 1331 1332/* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the 1333 phi alternatives for BB1 and BB2 are equal. */ 1334 1335static bool 1336same_phi_alternatives (same_succ same_succ, basic_block bb1, basic_block bb2) 1337{ 1338 unsigned int s; 1339 bitmap_iterator bs; 1340 edge e1, e2; 1341 basic_block succ; 1342 1343 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs) 1344 { 1345 succ = BASIC_BLOCK_FOR_FN (cfun, s); 1346 e1 = find_edge (bb1, succ); 1347 e2 = find_edge (bb2, succ); 1348 if (e1->flags & EDGE_COMPLEX 1349 || e2->flags & EDGE_COMPLEX) 1350 return false; 1351 1352 /* For all phis in bb, the phi alternatives for e1 and e2 need to have 1353 the same value. */ 1354 if (!same_phi_alternatives_1 (succ, e1, e2)) 1355 return false; 1356 } 1357 1358 return true; 1359} 1360 1361/* Return true if BB has non-vop phis. */ 1362 1363static bool 1364bb_has_non_vop_phi (basic_block bb) 1365{ 1366 gimple_seq phis = phi_nodes (bb); 1367 gimple phi; 1368 1369 if (phis == NULL) 1370 return false; 1371 1372 if (!gimple_seq_singleton_p (phis)) 1373 return true; 1374 1375 phi = gimple_seq_first_stmt (phis); 1376 return !virtual_operand_p (gimple_phi_result (phi)); 1377} 1378 1379/* Returns true if redirecting the incoming edges of FROM to TO maintains the 1380 invariant that uses in FROM are dominates by their defs. */ 1381 1382static bool 1383deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to) 1384{ 1385 basic_block cd, dep_bb = BB_DEP_BB (to); 1386 edge_iterator ei; 1387 edge e; 1388 bitmap from_preds = BITMAP_ALLOC (NULL); 1389 1390 if (dep_bb == NULL) 1391 return true; 1392 1393 FOR_EACH_EDGE (e, ei, from->preds) 1394 bitmap_set_bit (from_preds, e->src->index); 1395 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds); 1396 BITMAP_FREE (from_preds); 1397 1398 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd); 1399} 1400 1401/* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its 1402 replacement bb) and vice versa maintains the invariant that uses in the 1403 replacement are dominates by their defs. */ 1404 1405static bool 1406deps_ok_for_redirect (basic_block bb1, basic_block bb2) 1407{ 1408 if (BB_CLUSTER (bb1) != NULL) 1409 bb1 = BB_CLUSTER (bb1)->rep_bb; 1410 1411 if (BB_CLUSTER (bb2) != NULL) 1412 bb2 = BB_CLUSTER (bb2)->rep_bb; 1413 1414 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2) 1415 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1)); 1416} 1417 1418/* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */ 1419 1420static void 1421find_clusters_1 (same_succ same_succ) 1422{ 1423 basic_block bb1, bb2; 1424 unsigned int i, j; 1425 bitmap_iterator bi, bj; 1426 int nr_comparisons; 1427 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS); 1428 1429 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi) 1430 { 1431 bb1 = BASIC_BLOCK_FOR_FN (cfun, i); 1432 1433 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding 1434 phi-nodes in bb1 and bb2, with the same alternatives for the same 1435 preds. */ 1436 if (bb_has_non_vop_phi (bb1)) 1437 continue; 1438 1439 nr_comparisons = 0; 1440 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj) 1441 { 1442 bb2 = BASIC_BLOCK_FOR_FN (cfun, j); 1443 1444 if (bb_has_non_vop_phi (bb2)) 1445 continue; 1446 1447 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2)) 1448 continue; 1449 1450 /* Limit quadratic behaviour. */ 1451 nr_comparisons++; 1452 if (nr_comparisons > max_comparisons) 1453 break; 1454 1455 /* This is a conservative dependency check. We could test more 1456 precise for allowed replacement direction. */ 1457 if (!deps_ok_for_redirect (bb1, bb2)) 1458 continue; 1459 1460 if (!(same_phi_alternatives (same_succ, bb1, bb2))) 1461 continue; 1462 1463 find_duplicate (same_succ, bb1, bb2); 1464 } 1465 } 1466} 1467 1468/* Find clusters of bbs which can be merged. */ 1469 1470static void 1471find_clusters (void) 1472{ 1473 same_succ same; 1474 1475 while (!worklist.is_empty ()) 1476 { 1477 same = worklist.pop (); 1478 same->in_worklist = false; 1479 if (dump_file && (dump_flags & TDF_DETAILS)) 1480 { 1481 fprintf (dump_file, "processing worklist entry\n"); 1482 same_succ_print (dump_file, same); 1483 } 1484 find_clusters_1 (same); 1485 } 1486} 1487 1488/* Returns the vop phi of BB, if any. */ 1489 1490static gphi * 1491vop_phi (basic_block bb) 1492{ 1493 gphi *stmt; 1494 gphi_iterator gsi; 1495 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1496 { 1497 stmt = gsi.phi (); 1498 if (! virtual_operand_p (gimple_phi_result (stmt))) 1499 continue; 1500 return stmt; 1501 } 1502 return NULL; 1503} 1504 1505/* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */ 1506 1507static void 1508replace_block_by (basic_block bb1, basic_block bb2) 1509{ 1510 edge pred_edge; 1511 edge e1, e2; 1512 edge_iterator ei; 1513 unsigned int i; 1514 gphi *bb2_phi; 1515 1516 bb2_phi = vop_phi (bb2); 1517 1518 /* Mark the basic block as deleted. */ 1519 mark_basic_block_deleted (bb1); 1520 1521 /* Redirect the incoming edges of bb1 to bb2. */ 1522 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i) 1523 { 1524 pred_edge = EDGE_PRED (bb1, i - 1); 1525 pred_edge = redirect_edge_and_branch (pred_edge, bb2); 1526 gcc_assert (pred_edge != NULL); 1527 1528 if (bb2_phi == NULL) 1529 continue; 1530 1531 /* The phi might have run out of capacity when the redirect added an 1532 argument, which means it could have been replaced. Refresh it. */ 1533 bb2_phi = vop_phi (bb2); 1534 1535 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)), 1536 pred_edge, UNKNOWN_LOCATION); 1537 } 1538 1539 bb2->frequency += bb1->frequency; 1540 if (bb2->frequency > BB_FREQ_MAX) 1541 bb2->frequency = BB_FREQ_MAX; 1542 1543 bb2->count += bb1->count; 1544 1545 /* Merge the outgoing edge counts from bb1 onto bb2. */ 1546 gcov_type out_sum = 0; 1547 FOR_EACH_EDGE (e1, ei, bb1->succs) 1548 { 1549 e2 = find_edge (bb2, e1->dest); 1550 gcc_assert (e2); 1551 e2->count += e1->count; 1552 out_sum += e2->count; 1553 } 1554 /* Recompute the edge probabilities from the new merged edge count. 1555 Use the sum of the new merged edge counts computed above instead 1556 of bb2's merged count, in case there are profile count insanities 1557 making the bb count inconsistent with the edge weights. */ 1558 FOR_EACH_EDGE (e2, ei, bb2->succs) 1559 { 1560 e2->probability = GCOV_COMPUTE_SCALE (e2->count, out_sum); 1561 } 1562 1563 /* Clear range info from all stmts in BB2 -- this transformation 1564 could make them out of date. */ 1565 reset_flow_sensitive_info_in_bb (bb2); 1566 1567 /* Do updates that use bb1, before deleting bb1. */ 1568 release_last_vdef (bb1); 1569 same_succ_flush_bb (bb1); 1570 1571 delete_basic_block (bb1); 1572} 1573 1574/* Bbs for which update_debug_stmt need to be called. */ 1575 1576static bitmap update_bbs; 1577 1578/* For each cluster in all_clusters, merge all cluster->bbs. Returns 1579 number of bbs removed. */ 1580 1581static int 1582apply_clusters (void) 1583{ 1584 basic_block bb1, bb2; 1585 bb_cluster c; 1586 unsigned int i, j; 1587 bitmap_iterator bj; 1588 int nr_bbs_removed = 0; 1589 1590 for (i = 0; i < all_clusters.length (); ++i) 1591 { 1592 c = all_clusters[i]; 1593 if (c == NULL) 1594 continue; 1595 1596 bb2 = c->rep_bb; 1597 bitmap_set_bit (update_bbs, bb2->index); 1598 1599 bitmap_clear_bit (c->bbs, bb2->index); 1600 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj) 1601 { 1602 bb1 = BASIC_BLOCK_FOR_FN (cfun, j); 1603 bitmap_clear_bit (update_bbs, bb1->index); 1604 1605 replace_block_by (bb1, bb2); 1606 nr_bbs_removed++; 1607 } 1608 } 1609 1610 return nr_bbs_removed; 1611} 1612 1613/* Resets debug statement STMT if it has uses that are not dominated by their 1614 defs. */ 1615 1616static void 1617update_debug_stmt (gimple stmt) 1618{ 1619 use_operand_p use_p; 1620 ssa_op_iter oi; 1621 basic_block bbuse; 1622 1623 if (!gimple_debug_bind_p (stmt)) 1624 return; 1625 1626 bbuse = gimple_bb (stmt); 1627 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE) 1628 { 1629 tree name = USE_FROM_PTR (use_p); 1630 gimple def_stmt = SSA_NAME_DEF_STMT (name); 1631 basic_block bbdef = gimple_bb (def_stmt); 1632 if (bbdef == NULL || bbuse == bbdef 1633 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)) 1634 continue; 1635 1636 gimple_debug_bind_reset_value (stmt); 1637 update_stmt (stmt); 1638 break; 1639 } 1640} 1641 1642/* Resets all debug statements that have uses that are not 1643 dominated by their defs. */ 1644 1645static void 1646update_debug_stmts (void) 1647{ 1648 basic_block bb; 1649 bitmap_iterator bi; 1650 unsigned int i; 1651 1652 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi) 1653 { 1654 gimple stmt; 1655 gimple_stmt_iterator gsi; 1656 1657 bb = BASIC_BLOCK_FOR_FN (cfun, i); 1658 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1659 { 1660 stmt = gsi_stmt (gsi); 1661 if (!is_gimple_debug (stmt)) 1662 continue; 1663 update_debug_stmt (stmt); 1664 } 1665 } 1666} 1667 1668/* Runs tail merge optimization. */ 1669 1670unsigned int 1671tail_merge_optimize (unsigned int todo) 1672{ 1673 int nr_bbs_removed_total = 0; 1674 int nr_bbs_removed; 1675 bool loop_entered = false; 1676 int iteration_nr = 0; 1677 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS); 1678 1679 if (!flag_tree_tail_merge 1680 || max_iterations == 0) 1681 return 0; 1682 1683 timevar_push (TV_TREE_TAIL_MERGE); 1684 1685 if (!dom_info_available_p (CDI_DOMINATORS)) 1686 { 1687 /* PRE can leave us with unreachable blocks, remove them now. */ 1688 delete_unreachable_blocks (); 1689 calculate_dominance_info (CDI_DOMINATORS); 1690 } 1691 init_worklist (); 1692 1693 while (!worklist.is_empty ()) 1694 { 1695 if (!loop_entered) 1696 { 1697 loop_entered = true; 1698 alloc_cluster_vectors (); 1699 update_bbs = BITMAP_ALLOC (NULL); 1700 } 1701 else 1702 reset_cluster_vectors (); 1703 1704 iteration_nr++; 1705 if (dump_file && (dump_flags & TDF_DETAILS)) 1706 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr); 1707 1708 find_clusters (); 1709 gcc_assert (worklist.is_empty ()); 1710 if (all_clusters.is_empty ()) 1711 break; 1712 1713 nr_bbs_removed = apply_clusters (); 1714 nr_bbs_removed_total += nr_bbs_removed; 1715 if (nr_bbs_removed == 0) 1716 break; 1717 1718 free_dominance_info (CDI_DOMINATORS); 1719 1720 if (iteration_nr == max_iterations) 1721 break; 1722 1723 calculate_dominance_info (CDI_DOMINATORS); 1724 update_worklist (); 1725 } 1726 1727 if (dump_file && (dump_flags & TDF_DETAILS)) 1728 fprintf (dump_file, "htab collision / search: %f\n", 1729 same_succ_htab->collisions ()); 1730 1731 if (nr_bbs_removed_total > 0) 1732 { 1733 if (MAY_HAVE_DEBUG_STMTS) 1734 { 1735 calculate_dominance_info (CDI_DOMINATORS); 1736 update_debug_stmts (); 1737 } 1738 1739 if (dump_file && (dump_flags & TDF_DETAILS)) 1740 { 1741 fprintf (dump_file, "Before TODOs.\n"); 1742 dump_function_to_file (current_function_decl, dump_file, dump_flags); 1743 } 1744 1745 mark_virtual_operands_for_renaming (cfun); 1746 } 1747 1748 delete_worklist (); 1749 if (loop_entered) 1750 { 1751 delete_cluster_vectors (); 1752 BITMAP_FREE (update_bbs); 1753 } 1754 1755 timevar_pop (TV_TREE_TAIL_MERGE); 1756 1757 return todo; 1758} 1759