1169689Skan/* Perform doloop optimizations 2169689Skan Copyright (C) 2004, 2005, 2006 Free Software Foundation, Inc. 3169689Skan Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz) 4169689Skan 5169689SkanThis file is part of GCC. 6169689Skan 7169689SkanGCC is free software; you can redistribute it and/or modify it under 8169689Skanthe terms of the GNU General Public License as published by the Free 9169689SkanSoftware Foundation; either version 2, or (at your option) any later 10169689Skanversion. 11169689Skan 12169689SkanGCC is distributed in the hope that it will be useful, but WITHOUT ANY 13169689SkanWARRANTY; without even the implied warranty of MERCHANTABILITY or 14169689SkanFITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15169689Skanfor more details. 16169689Skan 17169689SkanYou should have received a copy of the GNU General Public License 18169689Skanalong with GCC; see the file COPYING. If not, write to the Free 19169689SkanSoftware Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 20169689Skan02110-1301, USA. */ 21169689Skan 22169689Skan#include "config.h" 23169689Skan#include "system.h" 24169689Skan#include "coretypes.h" 25169689Skan#include "tm.h" 26169689Skan#include "rtl.h" 27169689Skan#include "flags.h" 28169689Skan#include "expr.h" 29169689Skan#include "hard-reg-set.h" 30169689Skan#include "basic-block.h" 31169689Skan#include "toplev.h" 32169689Skan#include "tm_p.h" 33169689Skan#include "cfgloop.h" 34169689Skan#include "output.h" 35169689Skan#include "params.h" 36169689Skan#include "target.h" 37169689Skan 38169689Skan/* This module is used to modify loops with a determinable number of 39169689Skan iterations to use special low-overhead looping instructions. 40169689Skan 41169689Skan It first validates whether the loop is well behaved and has a 42169689Skan determinable number of iterations (either at compile or run-time). 43169689Skan It then modifies the loop to use a low-overhead looping pattern as 44169689Skan follows: 45169689Skan 46169689Skan 1. A pseudo register is allocated as the loop iteration counter. 47169689Skan 48169689Skan 2. The number of loop iterations is calculated and is stored 49169689Skan in the loop counter. 50169689Skan 51169689Skan 3. At the end of the loop, the jump insn is replaced by the 52169689Skan doloop_end pattern. The compare must remain because it might be 53169689Skan used elsewhere. If the loop-variable or condition register are 54169689Skan used elsewhere, they will be eliminated by flow. 55169689Skan 56169689Skan 4. An optional doloop_begin pattern is inserted at the top of the 57169689Skan loop. 58169689Skan 59169689Skan TODO The optimization should only performed when either the biv used for exit 60169689Skan condition is unused at all except for the exit test, or if we do not have to 61169689Skan change its value, since otherwise we have to add a new induction variable, 62169689Skan which usually will not pay up (unless the cost of the doloop pattern is 63169689Skan somehow extremely lower than the cost of compare & jump, or unless the bct 64169689Skan register cannot be used for anything else but doloop -- ??? detect these 65169689Skan cases). */ 66169689Skan 67169689Skan#ifdef HAVE_doloop_end 68169689Skan 69169689Skan/* Return the loop termination condition for PATTERN or zero 70169689Skan if it is not a decrement and branch jump insn. */ 71169689Skan 72169689Skanrtx 73169689Skandoloop_condition_get (rtx pattern) 74169689Skan{ 75169689Skan rtx cmp; 76169689Skan rtx inc; 77169689Skan rtx reg; 78169689Skan rtx inc_src; 79169689Skan rtx condition; 80169689Skan 81169689Skan /* The canonical doloop pattern we expect is: 82169689Skan 83169689Skan (parallel [(set (pc) (if_then_else (condition) 84169689Skan (label_ref (label)) 85169689Skan (pc))) 86169689Skan (set (reg) (plus (reg) (const_int -1))) 87169689Skan (additional clobbers and uses)]) 88169689Skan 89169689Skan Some targets (IA-64) wrap the set of the loop counter in 90169689Skan an if_then_else too. 91169689Skan 92169689Skan In summary, the branch must be the first entry of the 93169689Skan parallel (also required by jump.c), and the second 94169689Skan entry of the parallel must be a set of the loop counter 95169689Skan register. */ 96169689Skan 97169689Skan if (GET_CODE (pattern) != PARALLEL) 98169689Skan return 0; 99169689Skan 100169689Skan cmp = XVECEXP (pattern, 0, 0); 101169689Skan inc = XVECEXP (pattern, 0, 1); 102169689Skan 103169689Skan /* Check for (set (reg) (something)). */ 104169689Skan if (GET_CODE (inc) != SET) 105169689Skan return 0; 106169689Skan reg = SET_DEST (inc); 107169689Skan if (! REG_P (reg)) 108169689Skan return 0; 109169689Skan 110169689Skan /* Check if something = (plus (reg) (const_int -1)). 111169689Skan On IA-64, this decrement is wrapped in an if_then_else. */ 112169689Skan inc_src = SET_SRC (inc); 113169689Skan if (GET_CODE (inc_src) == IF_THEN_ELSE) 114169689Skan inc_src = XEXP (inc_src, 1); 115169689Skan if (GET_CODE (inc_src) != PLUS 116169689Skan || XEXP (inc_src, 0) != reg 117169689Skan || XEXP (inc_src, 1) != constm1_rtx) 118169689Skan return 0; 119169689Skan 120169689Skan /* Check for (set (pc) (if_then_else (condition) 121169689Skan (label_ref (label)) 122169689Skan (pc))). */ 123169689Skan if (GET_CODE (cmp) != SET 124169689Skan || SET_DEST (cmp) != pc_rtx 125169689Skan || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE 126169689Skan || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF 127169689Skan || XEXP (SET_SRC (cmp), 2) != pc_rtx) 128169689Skan return 0; 129169689Skan 130169689Skan /* Extract loop termination condition. */ 131169689Skan condition = XEXP (SET_SRC (cmp), 0); 132169689Skan 133169689Skan /* We expect a GE or NE comparison with 0 or 1. */ 134169689Skan if ((GET_CODE (condition) != GE 135169689Skan && GET_CODE (condition) != NE) 136169689Skan || (XEXP (condition, 1) != const0_rtx 137169689Skan && XEXP (condition, 1) != const1_rtx)) 138169689Skan return 0; 139169689Skan 140169689Skan if ((XEXP (condition, 0) == reg) 141169689Skan || (GET_CODE (XEXP (condition, 0)) == PLUS 142169689Skan && XEXP (XEXP (condition, 0), 0) == reg)) 143169689Skan return condition; 144169689Skan 145169689Skan /* ??? If a machine uses a funny comparison, we could return a 146169689Skan canonicalized form here. */ 147169689Skan 148169689Skan return 0; 149169689Skan} 150169689Skan 151169689Skan/* Return nonzero if the loop specified by LOOP is suitable for 152169689Skan the use of special low-overhead looping instructions. DESC 153169689Skan describes the number of iterations of the loop. */ 154169689Skan 155169689Skanstatic bool 156169689Skandoloop_valid_p (struct loop *loop, struct niter_desc *desc) 157169689Skan{ 158169689Skan basic_block *body = get_loop_body (loop), bb; 159169689Skan rtx insn; 160169689Skan unsigned i; 161169689Skan bool result = true; 162169689Skan 163169689Skan /* Check for loops that may not terminate under special conditions. */ 164169689Skan if (!desc->simple_p 165169689Skan || desc->assumptions 166169689Skan || desc->infinite) 167169689Skan { 168169689Skan /* There are some cases that would require a special attention. 169169689Skan For example if the comparison is LEU and the comparison value 170169689Skan is UINT_MAX then the loop will not terminate. Similarly, if the 171169689Skan comparison code is GEU and the comparison value is 0, the 172169689Skan loop will not terminate. 173169689Skan 174169689Skan If the absolute increment is not 1, the loop can be infinite 175169689Skan even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2) 176169689Skan 177169689Skan ??? We could compute these conditions at run-time and have a 178169689Skan additional jump around the loop to ensure an infinite loop. 179169689Skan However, it is very unlikely that this is the intended 180169689Skan behavior of the loop and checking for these rare boundary 181169689Skan conditions would pessimize all other code. 182169689Skan 183169689Skan If the loop is executed only a few times an extra check to 184169689Skan restart the loop could use up most of the benefits of using a 185169689Skan count register loop. Note however, that normally, this 186169689Skan restart branch would never execute, so it could be predicted 187169689Skan well by the CPU. We should generate the pessimistic code by 188169689Skan default, and have an option, e.g. -funsafe-loops that would 189169689Skan enable count-register loops in this case. */ 190169689Skan if (dump_file) 191169689Skan fprintf (dump_file, "Doloop: Possible infinite iteration case.\n"); 192169689Skan result = false; 193169689Skan goto cleanup; 194169689Skan } 195169689Skan 196169689Skan for (i = 0; i < loop->num_nodes; i++) 197169689Skan { 198169689Skan bb = body[i]; 199169689Skan 200169689Skan for (insn = BB_HEAD (bb); 201169689Skan insn != NEXT_INSN (BB_END (bb)); 202169689Skan insn = NEXT_INSN (insn)) 203169689Skan { 204169689Skan /* Different targets have different necessities for low-overhead 205169689Skan looping. Call the back end for each instruction within the loop 206169689Skan to let it decide whether the insn prohibits a low-overhead loop. 207169689Skan It will then return the cause for it to emit to the dump file. */ 208169689Skan const char * invalid = targetm.invalid_within_doloop (insn); 209169689Skan if (invalid) 210169689Skan { 211169689Skan if (dump_file) 212169689Skan fprintf (dump_file, "Doloop: %s\n", invalid); 213169689Skan result = false; 214169689Skan goto cleanup; 215169689Skan } 216169689Skan } 217169689Skan } 218169689Skan result = true; 219169689Skan 220169689Skancleanup: 221169689Skan free (body); 222169689Skan 223169689Skan return result; 224169689Skan} 225169689Skan 226169689Skan/* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru 227169689Skan edge. If the condition is always false, do not do anything. If it is always 228169689Skan true, redirect E to DEST and return false. In all other cases, true is 229169689Skan returned. */ 230169689Skan 231169689Skanstatic bool 232169689Skanadd_test (rtx cond, edge *e, basic_block dest) 233169689Skan{ 234169689Skan rtx seq, jump, label; 235169689Skan enum machine_mode mode; 236169689Skan rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1); 237169689Skan enum rtx_code code = GET_CODE (cond); 238169689Skan basic_block bb; 239169689Skan 240169689Skan mode = GET_MODE (XEXP (cond, 0)); 241169689Skan if (mode == VOIDmode) 242169689Skan mode = GET_MODE (XEXP (cond, 1)); 243169689Skan 244169689Skan start_sequence (); 245169689Skan op0 = force_operand (op0, NULL_RTX); 246169689Skan op1 = force_operand (op1, NULL_RTX); 247169689Skan label = block_label (dest); 248169689Skan do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL_RTX, label); 249169689Skan 250169689Skan jump = get_last_insn (); 251169689Skan if (!JUMP_P (jump)) 252169689Skan { 253169689Skan /* The condition is always false and the jump was optimized out. */ 254169689Skan end_sequence (); 255169689Skan return true; 256169689Skan } 257169689Skan 258169689Skan seq = get_insns (); 259169689Skan end_sequence (); 260169689Skan bb = loop_split_edge_with (*e, seq); 261169689Skan *e = single_succ_edge (bb); 262169689Skan 263169689Skan if (any_uncondjump_p (jump)) 264169689Skan { 265169689Skan /* The condition is always true. */ 266169689Skan delete_insn (jump); 267169689Skan redirect_edge_and_branch_force (*e, dest); 268169689Skan return false; 269169689Skan } 270169689Skan 271169689Skan JUMP_LABEL (jump) = label; 272169689Skan 273169689Skan /* The jump is supposed to handle an unlikely special case. */ 274169689Skan REG_NOTES (jump) 275169689Skan = gen_rtx_EXPR_LIST (REG_BR_PROB, 276169689Skan const0_rtx, REG_NOTES (jump)); 277169689Skan LABEL_NUSES (label)++; 278169689Skan 279169689Skan make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU); 280169689Skan return true; 281169689Skan} 282169689Skan 283169689Skan/* Modify the loop to use the low-overhead looping insn where LOOP 284169689Skan describes the loop, DESC describes the number of iterations of the 285169689Skan loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the 286169689Skan end of the loop. CONDITION is the condition separated from the 287169689Skan DOLOOP_SEQ. COUNT is the number of iterations of the LOOP. */ 288169689Skan 289169689Skanstatic void 290169689Skandoloop_modify (struct loop *loop, struct niter_desc *desc, 291169689Skan rtx doloop_seq, rtx condition, rtx count) 292169689Skan{ 293169689Skan rtx counter_reg; 294169689Skan rtx tmp, noloop = NULL_RTX; 295169689Skan rtx sequence; 296169689Skan rtx jump_insn; 297169689Skan rtx jump_label; 298169689Skan int nonneg = 0; 299169689Skan bool increment_count; 300169689Skan basic_block loop_end = desc->out_edge->src; 301169689Skan enum machine_mode mode; 302169689Skan 303169689Skan jump_insn = BB_END (loop_end); 304169689Skan 305169689Skan if (dump_file) 306169689Skan { 307169689Skan fprintf (dump_file, "Doloop: Inserting doloop pattern ("); 308169689Skan if (desc->const_iter) 309169689Skan fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter); 310169689Skan else 311169689Skan fputs ("runtime", dump_file); 312169689Skan fputs (" iterations).\n", dump_file); 313169689Skan } 314169689Skan 315169689Skan /* Discard original jump to continue loop. The original compare 316169689Skan result may still be live, so it cannot be discarded explicitly. */ 317169689Skan delete_insn (jump_insn); 318169689Skan 319169689Skan counter_reg = XEXP (condition, 0); 320169689Skan if (GET_CODE (counter_reg) == PLUS) 321169689Skan counter_reg = XEXP (counter_reg, 0); 322169689Skan mode = GET_MODE (counter_reg); 323169689Skan 324169689Skan increment_count = false; 325169689Skan switch (GET_CODE (condition)) 326169689Skan { 327169689Skan case NE: 328169689Skan /* Currently only NE tests against zero and one are supported. */ 329169689Skan noloop = XEXP (condition, 1); 330169689Skan if (noloop != const0_rtx) 331169689Skan { 332169689Skan gcc_assert (noloop == const1_rtx); 333169689Skan increment_count = true; 334169689Skan } 335169689Skan break; 336169689Skan 337169689Skan case GE: 338169689Skan /* Currently only GE tests against zero are supported. */ 339169689Skan gcc_assert (XEXP (condition, 1) == const0_rtx); 340169689Skan 341169689Skan noloop = constm1_rtx; 342169689Skan 343169689Skan /* The iteration count does not need incrementing for a GE test. */ 344169689Skan increment_count = false; 345169689Skan 346169689Skan /* Determine if the iteration counter will be non-negative. 347169689Skan Note that the maximum value loaded is iterations_max - 1. */ 348169689Skan if (desc->niter_max 349169689Skan <= ((unsigned HOST_WIDEST_INT) 1 350169689Skan << (GET_MODE_BITSIZE (mode) - 1))) 351169689Skan nonneg = 1; 352169689Skan break; 353169689Skan 354169689Skan /* Abort if an invalid doloop pattern has been generated. */ 355169689Skan default: 356169689Skan gcc_unreachable (); 357169689Skan } 358169689Skan 359169689Skan if (increment_count) 360169689Skan count = simplify_gen_binary (PLUS, mode, count, const1_rtx); 361169689Skan 362169689Skan /* Insert initialization of the count register into the loop header. */ 363169689Skan start_sequence (); 364169689Skan tmp = force_operand (count, counter_reg); 365169689Skan convert_move (counter_reg, tmp, 1); 366169689Skan sequence = get_insns (); 367169689Skan end_sequence (); 368169689Skan emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); 369169689Skan 370169689Skan if (desc->noloop_assumptions) 371169689Skan { 372169689Skan rtx ass = copy_rtx (desc->noloop_assumptions); 373169689Skan basic_block preheader = loop_preheader_edge (loop)->src; 374169689Skan basic_block set_zero 375169689Skan = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); 376169689Skan basic_block new_preheader 377169689Skan = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); 378169689Skan edge te; 379169689Skan 380169689Skan /* Expand the condition testing the assumptions and if it does not pass, 381169689Skan reset the count register to 0. */ 382169689Skan redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader); 383169689Skan set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader); 384169689Skan 385169689Skan set_zero->count = 0; 386169689Skan set_zero->frequency = 0; 387169689Skan 388169689Skan te = single_succ_edge (preheader); 389169689Skan for (; ass; ass = XEXP (ass, 1)) 390169689Skan if (!add_test (XEXP (ass, 0), &te, set_zero)) 391169689Skan break; 392169689Skan 393169689Skan if (ass) 394169689Skan { 395169689Skan /* We reached a condition that is always true. This is very hard to 396169689Skan reproduce (such a loop does not roll, and thus it would most 397169689Skan likely get optimized out by some of the preceding optimizations). 398169689Skan In fact, I do not have any testcase for it. However, it would 399169689Skan also be very hard to show that it is impossible, so we must 400169689Skan handle this case. */ 401169689Skan set_zero->count = preheader->count; 402169689Skan set_zero->frequency = preheader->frequency; 403169689Skan } 404169689Skan 405169689Skan if (EDGE_COUNT (set_zero->preds) == 0) 406169689Skan { 407169689Skan /* All the conditions were simplified to false, remove the 408169689Skan unreachable set_zero block. */ 409169689Skan remove_bb_from_loops (set_zero); 410169689Skan delete_basic_block (set_zero); 411169689Skan } 412169689Skan else 413169689Skan { 414169689Skan /* Reset the counter to zero in the set_zero block. */ 415169689Skan start_sequence (); 416169689Skan convert_move (counter_reg, noloop, 0); 417169689Skan sequence = get_insns (); 418169689Skan end_sequence (); 419169689Skan emit_insn_after (sequence, BB_END (set_zero)); 420169689Skan 421169689Skan set_immediate_dominator (CDI_DOMINATORS, set_zero, 422169689Skan recount_dominator (CDI_DOMINATORS, 423169689Skan set_zero)); 424169689Skan } 425169689Skan 426169689Skan set_immediate_dominator (CDI_DOMINATORS, new_preheader, 427169689Skan recount_dominator (CDI_DOMINATORS, 428169689Skan new_preheader)); 429169689Skan } 430169689Skan 431169689Skan /* Some targets (eg, C4x) need to initialize special looping 432169689Skan registers. */ 433169689Skan#ifdef HAVE_doloop_begin 434169689Skan { 435169689Skan rtx init; 436169689Skan unsigned level = get_loop_level (loop) + 1; 437169689Skan init = gen_doloop_begin (counter_reg, 438169689Skan desc->const_iter ? desc->niter_expr : const0_rtx, 439169689Skan GEN_INT (desc->niter_max), 440169689Skan GEN_INT (level)); 441169689Skan if (init) 442169689Skan { 443169689Skan start_sequence (); 444169689Skan emit_insn (init); 445169689Skan sequence = get_insns (); 446169689Skan end_sequence (); 447169689Skan emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src)); 448169689Skan } 449169689Skan } 450169689Skan#endif 451169689Skan 452169689Skan /* Insert the new low-overhead looping insn. */ 453169689Skan emit_jump_insn_after (doloop_seq, BB_END (loop_end)); 454169689Skan jump_insn = BB_END (loop_end); 455169689Skan jump_label = block_label (desc->in_edge->dest); 456169689Skan JUMP_LABEL (jump_insn) = jump_label; 457169689Skan LABEL_NUSES (jump_label)++; 458169689Skan 459169689Skan /* Ensure the right fallthru edge is marked, for case we have reversed 460169689Skan the condition. */ 461169689Skan desc->in_edge->flags &= ~EDGE_FALLTHRU; 462169689Skan desc->out_edge->flags |= EDGE_FALLTHRU; 463169689Skan 464169689Skan /* Add a REG_NONNEG note if the actual or estimated maximum number 465169689Skan of iterations is non-negative. */ 466169689Skan if (nonneg) 467169689Skan { 468169689Skan REG_NOTES (jump_insn) 469169689Skan = gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn)); 470169689Skan } 471169689Skan} 472169689Skan 473169689Skan/* Process loop described by LOOP validating that the loop is suitable for 474169689Skan conversion to use a low overhead looping instruction, replacing the jump 475169689Skan insn where suitable. Returns true if the loop was successfully 476169689Skan modified. */ 477169689Skan 478169689Skanstatic bool 479169689Skandoloop_optimize (struct loop *loop) 480169689Skan{ 481169689Skan enum machine_mode mode; 482169689Skan rtx doloop_seq, doloop_pat, doloop_reg; 483169689Skan rtx iterations, count; 484169689Skan rtx iterations_max; 485169689Skan rtx start_label; 486169689Skan rtx condition; 487169689Skan unsigned level, est_niter; 488169689Skan int max_cost; 489169689Skan struct niter_desc *desc; 490169689Skan unsigned word_mode_size; 491169689Skan unsigned HOST_WIDE_INT word_mode_max; 492169689Skan 493169689Skan if (dump_file) 494169689Skan fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num); 495169689Skan 496169689Skan iv_analysis_loop_init (loop); 497169689Skan 498169689Skan /* Find the simple exit of a LOOP. */ 499169689Skan desc = get_simple_loop_desc (loop); 500169689Skan 501169689Skan /* Check that loop is a candidate for a low-overhead looping insn. */ 502169689Skan if (!doloop_valid_p (loop, desc)) 503169689Skan { 504169689Skan if (dump_file) 505169689Skan fprintf (dump_file, 506169689Skan "Doloop: The loop is not suitable.\n"); 507169689Skan return false; 508169689Skan } 509169689Skan mode = desc->mode; 510169689Skan 511169689Skan est_niter = 3; 512169689Skan if (desc->const_iter) 513169689Skan est_niter = desc->niter; 514169689Skan /* If the estimate on number of iterations is reliable (comes from profile 515169689Skan feedback), use it. Do not use it normally, since the expected number 516169689Skan of iterations of an unrolled loop is 2. */ 517169689Skan if (loop->header->count) 518169689Skan est_niter = expected_loop_iterations (loop); 519169689Skan 520169689Skan if (est_niter < 3) 521169689Skan { 522169689Skan if (dump_file) 523169689Skan fprintf (dump_file, 524169689Skan "Doloop: Too few iterations (%u) to be profitable.\n", 525169689Skan est_niter); 526169689Skan return false; 527169689Skan } 528169689Skan 529169689Skan max_cost 530169689Skan = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST)); 531169689Skan if (rtx_cost (desc->niter_expr, SET) > max_cost) 532169689Skan { 533169689Skan if (dump_file) 534169689Skan fprintf (dump_file, 535169689Skan "Doloop: number of iterations too costly to compute.\n"); 536169689Skan return false; 537169689Skan } 538169689Skan 539169689Skan count = copy_rtx (desc->niter_expr); 540169689Skan iterations = desc->const_iter ? desc->niter_expr : const0_rtx; 541169689Skan iterations_max = GEN_INT (desc->niter_max); 542169689Skan level = get_loop_level (loop) + 1; 543169689Skan 544169689Skan /* Generate looping insn. If the pattern FAILs then give up trying 545169689Skan to modify the loop since there is some aspect the back-end does 546169689Skan not like. */ 547169689Skan start_label = block_label (desc->in_edge->dest); 548169689Skan doloop_reg = gen_reg_rtx (mode); 549169689Skan doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, 550169689Skan GEN_INT (level), start_label); 551169689Skan 552169689Skan word_mode_size = GET_MODE_BITSIZE (word_mode); 553169689Skan word_mode_max 554169689Skan = ((unsigned HOST_WIDE_INT) 1 << (word_mode_size - 1) << 1) - 1; 555169689Skan if (! doloop_seq 556169689Skan && mode != word_mode 557169689Skan /* Before trying mode different from the one in that # of iterations is 558169689Skan computed, we must be sure that the number of iterations fits into 559169689Skan the new mode. */ 560169689Skan && (word_mode_size >= GET_MODE_BITSIZE (mode) 561169689Skan || desc->niter_max <= word_mode_max)) 562169689Skan { 563169689Skan if (word_mode_size > GET_MODE_BITSIZE (mode)) 564169689Skan { 565169689Skan count = simplify_gen_unary (ZERO_EXTEND, word_mode, 566169689Skan count, mode); 567169689Skan iterations = simplify_gen_unary (ZERO_EXTEND, word_mode, 568169689Skan iterations, mode); 569169689Skan iterations_max = simplify_gen_unary (ZERO_EXTEND, word_mode, 570169689Skan iterations_max, mode); 571169689Skan } 572169689Skan else 573169689Skan { 574169689Skan count = lowpart_subreg (word_mode, count, mode); 575169689Skan iterations = lowpart_subreg (word_mode, iterations, mode); 576169689Skan iterations_max = lowpart_subreg (word_mode, iterations_max, mode); 577169689Skan } 578169689Skan PUT_MODE (doloop_reg, word_mode); 579169689Skan doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max, 580169689Skan GEN_INT (level), start_label); 581169689Skan } 582169689Skan if (! doloop_seq) 583169689Skan { 584169689Skan if (dump_file) 585169689Skan fprintf (dump_file, 586169689Skan "Doloop: Target unwilling to use doloop pattern!\n"); 587169689Skan return false; 588169689Skan } 589169689Skan 590169689Skan /* If multiple instructions were created, the last must be the 591169689Skan jump instruction. Also, a raw define_insn may yield a plain 592169689Skan pattern. */ 593169689Skan doloop_pat = doloop_seq; 594169689Skan if (INSN_P (doloop_pat)) 595169689Skan { 596169689Skan while (NEXT_INSN (doloop_pat) != NULL_RTX) 597169689Skan doloop_pat = NEXT_INSN (doloop_pat); 598169689Skan if (JUMP_P (doloop_pat)) 599169689Skan doloop_pat = PATTERN (doloop_pat); 600169689Skan else 601169689Skan doloop_pat = NULL_RTX; 602169689Skan } 603169689Skan 604169689Skan if (! doloop_pat 605169689Skan || ! (condition = doloop_condition_get (doloop_pat))) 606169689Skan { 607169689Skan if (dump_file) 608169689Skan fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n"); 609169689Skan return false; 610169689Skan } 611169689Skan 612169689Skan doloop_modify (loop, desc, doloop_seq, condition, count); 613169689Skan return true; 614169689Skan} 615169689Skan 616169689Skan/* This is the main entry point. Process all LOOPS using doloop_optimize. */ 617169689Skan 618169689Skanvoid 619169689Skandoloop_optimize_loops (struct loops *loops) 620169689Skan{ 621169689Skan unsigned i; 622169689Skan struct loop *loop; 623169689Skan 624169689Skan for (i = 1; i < loops->num; i++) 625169689Skan { 626169689Skan loop = loops->parray[i]; 627169689Skan if (!loop) 628169689Skan continue; 629169689Skan 630169689Skan doloop_optimize (loop); 631169689Skan } 632169689Skan 633169689Skan iv_analysis_done (); 634169689Skan 635169689Skan#ifdef ENABLE_CHECKING 636169689Skan verify_dominators (CDI_DOMINATORS); 637169689Skan verify_loop_structure (loops); 638169689Skan#endif 639169689Skan} 640169689Skan#endif /* HAVE_doloop_end */ 641169689Skan 642