1132718Skan/* Loop unswitching for GNU compiler. 2169689Skan Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. 3132718Skan 4132718SkanThis file is part of GCC. 5132718Skan 6132718SkanGCC is free software; you can redistribute it and/or modify it under 7132718Skanthe terms of the GNU General Public License as published by the Free 8132718SkanSoftware Foundation; either version 2, or (at your option) any later 9132718Skanversion. 10132718Skan 11132718SkanGCC is distributed in the hope that it will be useful, but WITHOUT ANY 12132718SkanWARRANTY; without even the implied warranty of MERCHANTABILITY or 13132718SkanFITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14132718Skanfor more details. 15132718Skan 16132718SkanYou should have received a copy of the GNU General Public License 17132718Skanalong with GCC; see the file COPYING. If not, write to the Free 18169689SkanSoftware Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 19169689Skan02110-1301, USA. */ 20132718Skan 21132718Skan#include "config.h" 22132718Skan#include "system.h" 23132718Skan#include "coretypes.h" 24132718Skan#include "tm.h" 25132718Skan#include "rtl.h" 26132718Skan#include "hard-reg-set.h" 27169689Skan#include "obstack.h" 28132718Skan#include "basic-block.h" 29132718Skan#include "cfgloop.h" 30132718Skan#include "cfglayout.h" 31132718Skan#include "params.h" 32132718Skan#include "output.h" 33132718Skan#include "expr.h" 34132718Skan 35132718Skan/* This pass moves constant conditions out of loops, duplicating the loop 36132718Skan in progress, i.e. this code: 37132718Skan 38132718Skan while (loop_cond) 39132718Skan { 40132718Skan A; 41132718Skan if (cond) 42132718Skan branch1; 43132718Skan else 44132718Skan branch2; 45132718Skan B; 46132718Skan if (cond) 47132718Skan branch3; 48132718Skan C; 49132718Skan } 50132718Skan where nothing inside the loop alters cond is transformed 51132718Skan into 52132718Skan 53132718Skan if (cond) 54132718Skan { 55132718Skan while (loop_cond) 56132718Skan { 57132718Skan A; 58132718Skan branch1; 59132718Skan B; 60132718Skan branch3; 61132718Skan C; 62132718Skan } 63132718Skan } 64132718Skan else 65132718Skan { 66132718Skan while (loop_cond) 67132718Skan { 68132718Skan A; 69132718Skan branch2; 70132718Skan B; 71132718Skan C; 72132718Skan } 73132718Skan } 74132718Skan 75132718Skan Duplicating the loop might lead to code growth exponential in number of 76132718Skan branches inside loop, so we limit the number of unswitchings performed 77132718Skan in a single loop to PARAM_MAX_UNSWITCH_LEVEL. We only perform the 78132718Skan transformation on innermost loops, as the benefit of doing it on loops 79132718Skan containing subloops would not be very large compared to complications 80132718Skan with handling this case. */ 81132718Skan 82132718Skanstatic struct loop *unswitch_loop (struct loops *, struct loop *, 83169689Skan basic_block, rtx, rtx); 84132718Skanstatic void unswitch_single_loop (struct loops *, struct loop *, rtx, int); 85169689Skanstatic rtx may_unswitch_on (basic_block, struct loop *, rtx *); 86132718Skan 87169689Skan/* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if 88169689Skan true, with probability PROB. If CINSN is not NULL, it is the insn to copy 89169689Skan in order to create a jump. */ 90169689Skan 91169689Skanrtx 92169689Skancompare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob, 93169689Skan rtx cinsn) 94169689Skan{ 95169689Skan rtx seq, jump, cond; 96169689Skan enum machine_mode mode; 97169689Skan 98169689Skan mode = GET_MODE (op0); 99169689Skan if (mode == VOIDmode) 100169689Skan mode = GET_MODE (op1); 101169689Skan 102169689Skan start_sequence (); 103169689Skan if (GET_MODE_CLASS (mode) == MODE_CC) 104169689Skan { 105169689Skan /* A hack -- there seems to be no easy generic way how to make a 106169689Skan conditional jump from a ccmode comparison. */ 107169689Skan gcc_assert (cinsn); 108169689Skan cond = XEXP (SET_SRC (pc_set (cinsn)), 0); 109169689Skan gcc_assert (GET_CODE (cond) == comp); 110169689Skan gcc_assert (rtx_equal_p (op0, XEXP (cond, 0))); 111169689Skan gcc_assert (rtx_equal_p (op1, XEXP (cond, 1))); 112169689Skan emit_jump_insn (copy_insn (PATTERN (cinsn))); 113169689Skan jump = get_last_insn (); 114169689Skan JUMP_LABEL (jump) = JUMP_LABEL (cinsn); 115169689Skan LABEL_NUSES (JUMP_LABEL (jump))++; 116169689Skan redirect_jump (jump, label, 0); 117169689Skan } 118169689Skan else 119169689Skan { 120169689Skan gcc_assert (!cinsn); 121169689Skan 122169689Skan op0 = force_operand (op0, NULL_RTX); 123169689Skan op1 = force_operand (op1, NULL_RTX); 124169689Skan do_compare_rtx_and_jump (op0, op1, comp, 0, 125169689Skan mode, NULL_RTX, NULL_RTX, label); 126169689Skan jump = get_last_insn (); 127169689Skan JUMP_LABEL (jump) = label; 128169689Skan LABEL_NUSES (label)++; 129169689Skan } 130169689Skan REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (prob), 131169689Skan REG_NOTES (jump)); 132169689Skan seq = get_insns (); 133169689Skan end_sequence (); 134169689Skan 135169689Skan return seq; 136169689Skan} 137169689Skan 138132718Skan/* Main entry point. Perform loop unswitching on all suitable LOOPS. */ 139132718Skanvoid 140132718Skanunswitch_loops (struct loops *loops) 141132718Skan{ 142132718Skan int i, num; 143132718Skan struct loop *loop; 144132718Skan 145132718Skan /* Go through inner loops (only original ones). */ 146132718Skan num = loops->num; 147132718Skan 148132718Skan for (i = 1; i < num; i++) 149132718Skan { 150132718Skan /* Removed loop? */ 151132718Skan loop = loops->parray[i]; 152132718Skan if (!loop) 153132718Skan continue; 154132718Skan 155132718Skan if (loop->inner) 156132718Skan continue; 157132718Skan 158132718Skan unswitch_single_loop (loops, loop, NULL_RTX, 0); 159132718Skan#ifdef ENABLE_CHECKING 160132718Skan verify_dominators (CDI_DOMINATORS); 161132718Skan verify_loop_structure (loops); 162132718Skan#endif 163132718Skan } 164169689Skan 165169689Skan iv_analysis_done (); 166132718Skan} 167132718Skan 168132718Skan/* Checks whether we can unswitch LOOP on condition at end of BB -- one of its 169169689Skan basic blocks (for what it means see comments below). In case condition 170169689Skan compares loop invariant cc mode register, return the jump in CINSN. */ 171169689Skan 172169689Skanstatic rtx 173169689Skanmay_unswitch_on (basic_block bb, struct loop *loop, rtx *cinsn) 174132718Skan{ 175169689Skan rtx test, at, op[2], stest; 176169689Skan struct rtx_iv iv; 177132718Skan unsigned i; 178169689Skan enum machine_mode mode; 179132718Skan 180132718Skan /* BB must end in a simple conditional jump. */ 181169689Skan if (EDGE_COUNT (bb->succs) != 2) 182169689Skan return NULL_RTX; 183132718Skan if (!any_condjump_p (BB_END (bb))) 184169689Skan return NULL_RTX; 185132718Skan 186132718Skan /* With branches inside loop. */ 187169689Skan if (!flow_bb_inside_loop_p (loop, EDGE_SUCC (bb, 0)->dest) 188169689Skan || !flow_bb_inside_loop_p (loop, EDGE_SUCC (bb, 1)->dest)) 189169689Skan return NULL_RTX; 190132718Skan 191132718Skan /* It must be executed just once each iteration (because otherwise we 192132718Skan are unable to update dominator/irreducible loop information correctly). */ 193132718Skan if (!just_once_each_iteration_p (loop, bb)) 194169689Skan return NULL_RTX; 195132718Skan 196169689Skan /* Condition must be invariant. */ 197169689Skan test = get_condition (BB_END (bb), &at, true, false); 198132718Skan if (!test) 199169689Skan return NULL_RTX; 200132718Skan 201169689Skan for (i = 0; i < 2; i++) 202169689Skan { 203169689Skan op[i] = XEXP (test, i); 204132718Skan 205169689Skan if (CONSTANT_P (op[i])) 206169689Skan continue; 207169689Skan 208169689Skan if (!iv_analyze (at, op[i], &iv)) 209169689Skan return NULL_RTX; 210169689Skan if (iv.step != const0_rtx 211169689Skan || iv.first_special) 212169689Skan return NULL_RTX; 213169689Skan 214169689Skan op[i] = get_iv_value (&iv, const0_rtx); 215169689Skan } 216169689Skan 217169689Skan mode = GET_MODE (op[0]); 218169689Skan if (mode == VOIDmode) 219169689Skan mode = GET_MODE (op[1]); 220169689Skan if (GET_MODE_CLASS (mode) == MODE_CC) 221169689Skan { 222169689Skan if (at != BB_END (bb)) 223169689Skan return NULL_RTX; 224169689Skan 225169689Skan if (!rtx_equal_p (op[0], XEXP (test, 0)) 226169689Skan || !rtx_equal_p (op[1], XEXP (test, 1))) 227169689Skan return NULL_RTX; 228169689Skan 229169689Skan *cinsn = BB_END (bb); 230169689Skan return test; 231169689Skan } 232169689Skan 233169689Skan stest = simplify_gen_relational (GET_CODE (test), SImode, 234169689Skan mode, op[0], op[1]); 235169689Skan if (stest == const0_rtx 236169689Skan || stest == const_true_rtx) 237169689Skan return stest; 238169689Skan 239169689Skan return canon_condition (gen_rtx_fmt_ee (GET_CODE (test), SImode, 240169689Skan op[0], op[1])); 241132718Skan} 242132718Skan 243132718Skan/* Reverses CONDition; returns NULL if we cannot. */ 244169689Skanrtx 245132718Skanreversed_condition (rtx cond) 246132718Skan{ 247132718Skan enum rtx_code reversed; 248132718Skan reversed = reversed_comparison_code (cond, NULL); 249132718Skan if (reversed == UNKNOWN) 250132718Skan return NULL_RTX; 251132718Skan else 252132718Skan return gen_rtx_fmt_ee (reversed, 253132718Skan GET_MODE (cond), XEXP (cond, 0), 254132718Skan XEXP (cond, 1)); 255132718Skan} 256132718Skan 257132718Skan/* Unswitch single LOOP. COND_CHECKED holds list of conditions we already 258132718Skan unswitched on and are therefore known to be true in this LOOP. NUM is 259132718Skan number of unswitchings done; do not allow it to grow too much, it is too 260132718Skan easy to create example on that the code would grow exponentially. */ 261132718Skanstatic void 262132718Skanunswitch_single_loop (struct loops *loops, struct loop *loop, 263132718Skan rtx cond_checked, int num) 264132718Skan{ 265169689Skan basic_block *bbs; 266132718Skan struct loop *nloop; 267132718Skan unsigned i; 268169689Skan rtx cond, rcond = NULL_RTX, conds, rconds, acond, cinsn; 269132718Skan int repeat; 270132718Skan edge e; 271132718Skan 272132718Skan /* Do not unswitch too much. */ 273132718Skan if (num > PARAM_VALUE (PARAM_MAX_UNSWITCH_LEVEL)) 274132718Skan { 275169689Skan if (dump_file) 276169689Skan fprintf (dump_file, ";; Not unswitching anymore, hit max level\n"); 277132718Skan return; 278132718Skan } 279132718Skan 280132718Skan /* Only unswitch innermost loops. */ 281132718Skan if (loop->inner) 282132718Skan { 283169689Skan if (dump_file) 284169689Skan fprintf (dump_file, ";; Not unswitching, not innermost loop\n"); 285132718Skan return; 286132718Skan } 287132718Skan 288132718Skan /* We must be able to duplicate loop body. */ 289132718Skan if (!can_duplicate_loop_p (loop)) 290132718Skan { 291169689Skan if (dump_file) 292169689Skan fprintf (dump_file, ";; Not unswitching, can't duplicate loop\n"); 293132718Skan return; 294132718Skan } 295132718Skan 296132718Skan /* The loop should not be too large, to limit code growth. */ 297132718Skan if (num_loop_insns (loop) > PARAM_VALUE (PARAM_MAX_UNSWITCH_INSNS)) 298132718Skan { 299169689Skan if (dump_file) 300169689Skan fprintf (dump_file, ";; Not unswitching, loop too big\n"); 301132718Skan return; 302132718Skan } 303132718Skan 304132718Skan /* Do not unswitch in cold areas. */ 305132718Skan if (!maybe_hot_bb_p (loop->header)) 306132718Skan { 307169689Skan if (dump_file) 308169689Skan fprintf (dump_file, ";; Not unswitching, not hot area\n"); 309132718Skan return; 310132718Skan } 311132718Skan 312132718Skan /* Nor if the loop usually does not roll. */ 313132718Skan if (expected_loop_iterations (loop) < 1) 314132718Skan { 315169689Skan if (dump_file) 316169689Skan fprintf (dump_file, ";; Not unswitching, loop iterations < 1\n"); 317132718Skan return; 318132718Skan } 319132718Skan 320132718Skan do 321132718Skan { 322132718Skan repeat = 0; 323169689Skan cinsn = NULL_RTX; 324132718Skan 325132718Skan /* Find a bb to unswitch on. */ 326132718Skan bbs = get_loop_body (loop); 327169689Skan iv_analysis_loop_init (loop); 328132718Skan for (i = 0; i < loop->num_nodes; i++) 329169689Skan if ((cond = may_unswitch_on (bbs[i], loop, &cinsn))) 330132718Skan break; 331132718Skan 332132718Skan if (i == loop->num_nodes) 333132718Skan { 334132718Skan free (bbs); 335132718Skan return; 336132718Skan } 337132718Skan 338169689Skan if (cond != const0_rtx 339169689Skan && cond != const_true_rtx) 340169689Skan { 341169689Skan rcond = reversed_condition (cond); 342169689Skan if (rcond) 343169689Skan rcond = canon_condition (rcond); 344132718Skan 345169689Skan /* Check whether the result can be predicted. */ 346169689Skan for (acond = cond_checked; acond; acond = XEXP (acond, 1)) 347169689Skan simplify_using_condition (XEXP (acond, 0), &cond, NULL); 348132718Skan } 349132718Skan 350169689Skan if (cond == const_true_rtx) 351132718Skan { 352132718Skan /* Remove false path. */ 353169689Skan e = FALLTHRU_EDGE (bbs[i]); 354132718Skan remove_path (loops, e); 355132718Skan free (bbs); 356132718Skan repeat = 1; 357132718Skan } 358169689Skan else if (cond == const0_rtx) 359132718Skan { 360132718Skan /* Remove true path. */ 361169689Skan e = BRANCH_EDGE (bbs[i]); 362132718Skan remove_path (loops, e); 363132718Skan free (bbs); 364132718Skan repeat = 1; 365132718Skan } 366132718Skan } while (repeat); 367132718Skan 368132718Skan /* We found the condition we can unswitch on. */ 369132718Skan conds = alloc_EXPR_LIST (0, cond, cond_checked); 370132718Skan if (rcond) 371132718Skan rconds = alloc_EXPR_LIST (0, rcond, cond_checked); 372132718Skan else 373132718Skan rconds = cond_checked; 374132718Skan 375169689Skan if (dump_file) 376169689Skan fprintf (dump_file, ";; Unswitching loop\n"); 377132718Skan 378132718Skan /* Unswitch the loop on this condition. */ 379169689Skan nloop = unswitch_loop (loops, loop, bbs[i], cond, cinsn); 380169689Skan gcc_assert (nloop); 381132718Skan 382132718Skan /* Invoke itself on modified loops. */ 383169689Skan unswitch_single_loop (loops, nloop, rconds, num + 1); 384169689Skan unswitch_single_loop (loops, loop, conds, num + 1); 385132718Skan 386132718Skan free_EXPR_LIST_node (conds); 387132718Skan if (rcond) 388132718Skan free_EXPR_LIST_node (rconds); 389169689Skan 390169689Skan free (bbs); 391132718Skan} 392132718Skan 393132718Skan/* Unswitch a LOOP w.r. to given basic block UNSWITCH_ON. We only support 394132718Skan unswitching of innermost loops. UNSWITCH_ON must be executed in every 395169689Skan iteration, i.e. it must dominate LOOP latch. COND is the condition 396169689Skan determining which loop is entered. Returns NULL if impossible, new loop 397169689Skan otherwise. The new loop is entered if COND is true. If CINSN is not 398169689Skan NULL, it is the insn in that COND is compared. */ 399169689Skan 400132718Skanstatic struct loop * 401169689Skanunswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on, 402169689Skan rtx cond, rtx cinsn) 403132718Skan{ 404169689Skan edge entry, latch_edge, true_edge, false_edge, e; 405169689Skan basic_block switch_bb, unswitch_on_alt; 406132718Skan struct loop *nloop; 407132718Skan sbitmap zero_bitmap; 408169689Skan int irred_flag, prob; 409169689Skan rtx seq; 410132718Skan 411132718Skan /* Some sanity checking. */ 412169689Skan gcc_assert (flow_bb_inside_loop_p (loop, unswitch_on)); 413169689Skan gcc_assert (EDGE_COUNT (unswitch_on->succs) == 2); 414169689Skan gcc_assert (just_once_each_iteration_p (loop, unswitch_on)); 415169689Skan gcc_assert (!loop->inner); 416169689Skan gcc_assert (flow_bb_inside_loop_p (loop, EDGE_SUCC (unswitch_on, 0)->dest)); 417169689Skan gcc_assert (flow_bb_inside_loop_p (loop, EDGE_SUCC (unswitch_on, 1)->dest)); 418132718Skan 419132718Skan entry = loop_preheader_edge (loop); 420132718Skan 421132718Skan /* Make a copy. */ 422132718Skan irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP; 423132718Skan entry->flags &= ~EDGE_IRREDUCIBLE_LOOP; 424132718Skan zero_bitmap = sbitmap_alloc (2); 425132718Skan sbitmap_zero (zero_bitmap); 426132718Skan if (!duplicate_loop_to_header_edge (loop, entry, loops, 1, 427132718Skan zero_bitmap, NULL, NULL, NULL, 0)) 428169689Skan { 429169689Skan sbitmap_free (zero_bitmap); 430169689Skan return NULL; 431169689Skan } 432169689Skan sbitmap_free (zero_bitmap); 433132718Skan entry->flags |= irred_flag; 434132718Skan 435132718Skan /* Record the block with condition we unswitch on. */ 436169689Skan unswitch_on_alt = get_bb_copy (unswitch_on); 437169689Skan true_edge = BRANCH_EDGE (unswitch_on_alt); 438169689Skan false_edge = FALLTHRU_EDGE (unswitch_on); 439169689Skan latch_edge = single_succ_edge (get_bb_copy (loop->latch)); 440132718Skan 441169689Skan /* Create a block with the condition. */ 442169689Skan prob = true_edge->probability; 443169689Skan switch_bb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb); 444169689Skan seq = compare_and_jump_seq (XEXP (cond, 0), XEXP (cond, 1), GET_CODE (cond), 445169689Skan block_label (true_edge->dest), 446169689Skan prob, cinsn); 447169689Skan emit_insn_after (seq, BB_END (switch_bb)); 448169689Skan e = make_edge (switch_bb, true_edge->dest, 0); 449169689Skan e->probability = prob; 450169689Skan e->count = latch_edge->count * prob / REG_BR_PROB_BASE; 451169689Skan e = make_edge (switch_bb, FALLTHRU_EDGE (unswitch_on)->dest, EDGE_FALLTHRU); 452169689Skan e->probability = false_edge->probability; 453169689Skan e->count = latch_edge->count * (false_edge->probability) / REG_BR_PROB_BASE; 454169689Skan 455132718Skan if (irred_flag) 456132718Skan { 457132718Skan switch_bb->flags |= BB_IRREDUCIBLE_LOOP; 458169689Skan EDGE_SUCC (switch_bb, 0)->flags |= EDGE_IRREDUCIBLE_LOOP; 459169689Skan EDGE_SUCC (switch_bb, 1)->flags |= EDGE_IRREDUCIBLE_LOOP; 460132718Skan } 461132718Skan else 462132718Skan { 463132718Skan switch_bb->flags &= ~BB_IRREDUCIBLE_LOOP; 464169689Skan EDGE_SUCC (switch_bb, 0)->flags &= ~EDGE_IRREDUCIBLE_LOOP; 465169689Skan EDGE_SUCC (switch_bb, 1)->flags &= ~EDGE_IRREDUCIBLE_LOOP; 466132718Skan } 467132718Skan 468132718Skan /* Loopify from the copy of LOOP body, constructing the new loop. */ 469132718Skan nloop = loopify (loops, latch_edge, 470169689Skan single_pred_edge (get_bb_copy (loop->header)), switch_bb, 471169689Skan BRANCH_EDGE (switch_bb), FALLTHRU_EDGE (switch_bb), true); 472132718Skan 473169689Skan /* Remove branches that are now unreachable in new loops. */ 474169689Skan remove_path (loops, true_edge); 475169689Skan remove_path (loops, false_edge); 476132718Skan 477132718Skan /* One of created loops do not have to be subloop of the outer loop now, 478132718Skan so fix its placement in loop data structure. */ 479132718Skan fix_loop_placement (loop); 480132718Skan fix_loop_placement (nloop); 481132718Skan 482132718Skan /* Preserve the simple loop preheaders. */ 483132718Skan loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); 484132718Skan loop_split_edge_with (loop_preheader_edge (nloop), NULL_RTX); 485132718Skan 486132718Skan return nloop; 487132718Skan} 488