1/* Code to analyze doloop loops in order for targets to perform late 2 optimizations converting doloops to other forms of hardware loops. 3 Copyright (C) 2011-2015 Free Software Foundation, Inc. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING3. If not see 19<http://www.gnu.org/licenses/>. */ 20 21#include "config.h" 22#include "system.h" 23#include "coretypes.h" 24#include "tm.h" 25#include "rtl.h" 26#include "flags.h" 27#include "symtab.h" 28#include "hashtab.h" 29#include "hash-set.h" 30#include "vec.h" 31#include "machmode.h" 32#include "hard-reg-set.h" 33#include "input.h" 34#include "function.h" 35#include "statistics.h" 36#include "double-int.h" 37#include "real.h" 38#include "fixed-value.h" 39#include "alias.h" 40#include "wide-int.h" 41#include "inchash.h" 42#include "tree.h" 43#include "insn-config.h" 44#include "expmed.h" 45#include "dojump.h" 46#include "explow.h" 47#include "calls.h" 48#include "emit-rtl.h" 49#include "varasm.h" 50#include "stmt.h" 51#include "expr.h" 52#include "regs.h" 53#include "predict.h" 54#include "dominance.h" 55#include "cfg.h" 56#include "cfgrtl.h" 57#include "basic-block.h" 58#include "tm_p.h" 59#include "df.h" 60#include "cfgloop.h" 61#include "recog.h" 62#include "target.h" 63#include "hw-doloop.h" 64#include "dumpfile.h" 65 66#ifdef HAVE_doloop_end 67 68/* Dump information collected in LOOPS. */ 69static void 70dump_hwloops (hwloop_info loops) 71{ 72 hwloop_info loop; 73 74 for (loop = loops; loop; loop = loop->next) 75 { 76 hwloop_info i; 77 basic_block b; 78 unsigned ix; 79 80 fprintf (dump_file, ";; loop %d: ", loop->loop_no); 81 if (loop->bad) 82 fprintf (dump_file, "(bad) "); 83 fprintf (dump_file, "{head:%d, depth:%d, reg:%u}", 84 loop->head == NULL ? -1 : loop->head->index, 85 loop->depth, REGNO (loop->iter_reg)); 86 87 fprintf (dump_file, " blocks: [ "); 88 for (ix = 0; loop->blocks.iterate (ix, &b); ix++) 89 fprintf (dump_file, "%d ", b->index); 90 fprintf (dump_file, "] "); 91 92 fprintf (dump_file, " inner loops: [ "); 93 for (ix = 0; loop->loops.iterate (ix, &i); ix++) 94 fprintf (dump_file, "%d ", i->loop_no); 95 fprintf (dump_file, "]\n"); 96 } 97 fprintf (dump_file, "\n"); 98} 99 100/* Return true if BB is part of LOOP. */ 101static bool 102bb_in_loop_p (hwloop_info loop, basic_block bb) 103{ 104 return bitmap_bit_p (loop->block_bitmap, bb->index); 105} 106 107/* Scan the blocks of LOOP (and its inferiors), and record things such as 108 hard registers set, jumps out of and within the loop. */ 109static void 110scan_loop (hwloop_info loop) 111{ 112 unsigned ix; 113 basic_block bb; 114 115 if (loop->bad) 116 return; 117 118 if (REGNO_REG_SET_P (df_get_live_in (loop->successor), 119 REGNO (loop->iter_reg))) 120 loop->iter_reg_used_outside = true; 121 122 for (ix = 0; loop->blocks.iterate (ix, &bb); ix++) 123 { 124 rtx_insn *insn; 125 edge e; 126 edge_iterator ei; 127 128 if (bb != loop->tail) 129 FOR_EACH_EDGE (e, ei, bb->succs) 130 { 131 if (bb_in_loop_p (loop, e->dest)) 132 { 133 if (!(e->flags & EDGE_FALLTHRU)) 134 loop->jumps_within = true; 135 } 136 else 137 { 138 loop->jumps_outof = true; 139 if (!loop->bad) 140 gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest), 141 REGNO (loop->iter_reg))); 142 } 143 } 144 145 for (insn = BB_HEAD (bb); 146 insn != NEXT_INSN (BB_END (bb)); 147 insn = NEXT_INSN (insn)) 148 { 149 df_ref def; 150 HARD_REG_SET set_this_insn; 151 152 if (!NONDEBUG_INSN_P (insn)) 153 continue; 154 155 if (recog_memoized (insn) < 0 156 && (GET_CODE (PATTERN (insn)) == ASM_INPUT 157 || asm_noperands (PATTERN (insn)) >= 0)) 158 loop->has_asm = true; 159 160 CLEAR_HARD_REG_SET (set_this_insn); 161 FOR_EACH_INSN_DEF (def, insn) 162 { 163 rtx dreg = DF_REF_REG (def); 164 165 if (!REG_P (dreg)) 166 continue; 167 168 add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg), 169 REGNO (dreg)); 170 } 171 172 if (insn == loop->loop_end) 173 CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg)); 174 else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn))) 175 loop->iter_reg_used = true; 176 IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn); 177 } 178 } 179} 180 181/* Compute the incoming_dest and incoming_src members of LOOP by 182 identifying the edges that jump into the loop. If there is more 183 than one block that jumps into the loop, incoming_src will be set 184 to NULL; likewise, if there is more than one block in the loop that 185 is the destination of an incoming edge, incoming_dest will be NULL. 186 187 Return true if either of these two fields is nonnull, false 188 otherwise. */ 189static bool 190process_incoming_edges (hwloop_info loop) 191{ 192 edge e; 193 edge_iterator ei; 194 bool first = true; 195 196 FOR_EACH_EDGE (e, ei, loop->incoming) 197 { 198 if (first) 199 { 200 loop->incoming_src = e->src; 201 loop->incoming_dest = e->dest; 202 first = false; 203 } 204 else 205 { 206 if (e->dest != loop->incoming_dest) 207 loop->incoming_dest = NULL; 208 if (e->src != loop->incoming_src) 209 loop->incoming_src = NULL; 210 } 211 } 212 if (loop->incoming_src == NULL && loop->incoming_dest == NULL) 213 return false; 214 215 return true; 216} 217 218/* Try to identify a forwarder block that jump into LOOP, and add it to 219 the set of blocks in the loop, updating the vector of incoming blocks as 220 well. This transformation gives a second chance to loops we couldn't 221 otherwise handle by increasing the chance that we'll end up with one 222 incoming_src block. 223 Return true if we made a change, false otherwise. */ 224static bool 225add_forwarder_blocks (hwloop_info loop) 226{ 227 edge e; 228 edge_iterator ei; 229 230 FOR_EACH_EDGE (e, ei, loop->incoming) 231 { 232 if (forwarder_block_p (e->src)) 233 { 234 edge e2; 235 edge_iterator ei2; 236 237 if (dump_file) 238 fprintf (dump_file, 239 ";; Adding forwarder block %d to loop %d and retrying\n", 240 e->src->index, loop->loop_no); 241 loop->blocks.safe_push (e->src); 242 bitmap_set_bit (loop->block_bitmap, e->src->index); 243 FOR_EACH_EDGE (e2, ei2, e->src->preds) 244 vec_safe_push (loop->incoming, e2); 245 loop->incoming->unordered_remove (ei.index); 246 return true; 247 } 248 } 249 return false; 250} 251 252/* Called from reorg_loops when a potential loop end is found. LOOP is 253 a newly set up structure describing the loop, it is this function's 254 responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the 255 loop_end insn and its enclosing basic block. REG is the loop counter 256 register. 257 For our purposes, a loop is defined by the set of blocks reachable from 258 the loop head in which the loop counter register is live. This matches 259 the expected use; targets that call into this code usually replace the 260 loop counter with a different special register. */ 261static void 262discover_loop (hwloop_info loop, basic_block tail_bb, rtx_insn *tail_insn, rtx reg) 263{ 264 bool found_tail; 265 unsigned dwork = 0; 266 basic_block bb; 267 268 loop->tail = tail_bb; 269 loop->loop_end = tail_insn; 270 loop->iter_reg = reg; 271 vec_alloc (loop->incoming, 2); 272 loop->start_label = as_a <rtx_insn *> (JUMP_LABEL (tail_insn)); 273 274 if (EDGE_COUNT (tail_bb->succs) != 2) 275 { 276 loop->bad = true; 277 return; 278 } 279 loop->head = BRANCH_EDGE (tail_bb)->dest; 280 loop->successor = FALLTHRU_EDGE (tail_bb)->dest; 281 282 auto_vec<basic_block, 20> works; 283 works.safe_push (loop->head); 284 285 found_tail = false; 286 for (dwork = 0; works.iterate (dwork, &bb); dwork++) 287 { 288 edge e; 289 edge_iterator ei; 290 if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun)) 291 { 292 /* We've reached the exit block. The loop must be bad. */ 293 if (dump_file) 294 fprintf (dump_file, 295 ";; Loop is bad - reached exit block while scanning\n"); 296 loop->bad = true; 297 break; 298 } 299 300 if (bitmap_bit_p (loop->block_bitmap, bb->index)) 301 continue; 302 303 /* We've not seen this block before. Add it to the loop's 304 list and then add each successor to the work list. */ 305 306 loop->blocks.safe_push (bb); 307 bitmap_set_bit (loop->block_bitmap, bb->index); 308 309 if (bb == tail_bb) 310 found_tail = true; 311 else 312 { 313 FOR_EACH_EDGE (e, ei, bb->succs) 314 { 315 basic_block succ = EDGE_SUCC (bb, ei.index)->dest; 316 if (REGNO_REG_SET_P (df_get_live_in (succ), 317 REGNO (loop->iter_reg))) 318 works.safe_push (succ); 319 } 320 } 321 } 322 323 if (!found_tail) 324 loop->bad = true; 325 326 /* Find the predecessor, and make sure nothing else jumps into this loop. */ 327 if (!loop->bad) 328 { 329 FOR_EACH_VEC_ELT (loop->blocks, dwork, bb) 330 { 331 edge e; 332 edge_iterator ei; 333 FOR_EACH_EDGE (e, ei, bb->preds) 334 { 335 basic_block pred = e->src; 336 337 if (!bb_in_loop_p (loop, pred)) 338 { 339 if (dump_file) 340 fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n", 341 loop->loop_no, pred->index, 342 e->dest->index); 343 vec_safe_push (loop->incoming, e); 344 } 345 } 346 } 347 348 if (!process_incoming_edges (loop)) 349 { 350 if (dump_file) 351 fprintf (dump_file, 352 ";; retrying loop %d with forwarder blocks\n", 353 loop->loop_no); 354 if (!add_forwarder_blocks (loop)) 355 { 356 if (dump_file) 357 fprintf (dump_file, ";; No forwarder blocks found\n"); 358 loop->bad = true; 359 } 360 else if (!process_incoming_edges (loop)) 361 { 362 if (dump_file) 363 fprintf (dump_file, 364 ";; can't find suitable entry for loop %d\n", 365 loop->loop_no); 366 } 367 } 368 } 369} 370 371/* Analyze the structure of the loops in the current function. Use 372 LOOP_STACK for bitmap allocations. Returns all the valid candidates for 373 hardware loops found in this function. HOOKS is the argument 374 passed to reorg_loops, used here to find the iteration registers 375 from a loop_end pattern. */ 376static hwloop_info 377discover_loops (bitmap_obstack *loop_stack, struct hw_doloop_hooks *hooks) 378{ 379 hwloop_info loops = NULL; 380 hwloop_info loop; 381 basic_block bb; 382 int nloops = 0; 383 384 /* Find all the possible loop tails. This means searching for every 385 loop_end instruction. For each one found, create a hwloop_info 386 structure and add the head block to the work list. */ 387 FOR_EACH_BB_FN (bb, cfun) 388 { 389 rtx_insn *tail = BB_END (bb); 390 rtx_insn *insn; 391 rtx reg; 392 393 while (tail && NOTE_P (tail) && tail != BB_HEAD (bb)) 394 tail = PREV_INSN (tail); 395 396 if (tail == NULL_RTX) 397 continue; 398 399 if (!JUMP_P (tail)) 400 continue; 401 reg = hooks->end_pattern_reg (tail); 402 if (reg == NULL_RTX) 403 continue; 404 405 /* A possible loop end */ 406 407 /* There's a degenerate case we can handle - an empty loop consisting 408 of only a back branch. Handle that by deleting the branch. */ 409 insn = JUMP_LABEL_AS_INSN (tail); 410 while (insn && !NONDEBUG_INSN_P (insn)) 411 insn = NEXT_INSN (insn); 412 if (insn == tail) 413 { 414 basic_block succ = FALLTHRU_EDGE (bb)->dest; 415 if (dump_file) 416 { 417 fprintf (dump_file, ";; degenerate loop ending at\n"); 418 print_rtl_single (dump_file, tail); 419 } 420 if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg))) 421 { 422 if (dump_file) 423 fprintf (dump_file, ";; deleting it\n"); 424 delete_insn_and_edges (tail); 425 continue; 426 } 427 } 428 429 loop = XCNEW (struct hwloop_info_d); 430 loop->next = loops; 431 loops = loop; 432 loop->loop_no = nloops++; 433 loop->blocks.create (20); 434 loop->block_bitmap = BITMAP_ALLOC (loop_stack); 435 436 if (dump_file) 437 { 438 fprintf (dump_file, ";; potential loop %d ending at\n", 439 loop->loop_no); 440 print_rtl_single (dump_file, tail); 441 } 442 443 discover_loop (loop, bb, tail, reg); 444 } 445 446 /* Compute loop nestings. Given two loops A and B, either the sets 447 of their blocks don't intersect at all, or one is the subset of 448 the other, or the blocks don't form a good nesting structure. */ 449 for (loop = loops; loop; loop = loop->next) 450 { 451 hwloop_info other; 452 453 if (loop->bad) 454 continue; 455 456 for (other = loops; other; other = other->next) 457 { 458 if (other->bad) 459 continue; 460 461 if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap)) 462 continue; 463 if (!bitmap_intersect_compl_p (other->block_bitmap, 464 loop->block_bitmap)) 465 loop->loops.safe_push (other); 466 else if (!bitmap_intersect_compl_p (loop->block_bitmap, 467 other->block_bitmap)) 468 other->loops.safe_push (loop); 469 else 470 { 471 if (dump_file) 472 fprintf (dump_file, 473 ";; can't find suitable nesting for loops %d and %d\n", 474 loop->loop_no, other->loop_no); 475 loop->bad = other->bad = true; 476 } 477 } 478 } 479 480 if (dump_file) 481 dump_hwloops (loops); 482 483 return loops; 484} 485 486/* Free up the loop structures in LOOPS. */ 487static void 488free_loops (hwloop_info loops) 489{ 490 while (loops) 491 { 492 hwloop_info loop = loops; 493 loops = loop->next; 494 loop->loops.release (); 495 loop->blocks.release (); 496 BITMAP_FREE (loop->block_bitmap); 497 XDELETE (loop); 498 } 499} 500 501#define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux) 502 503/* Initialize the aux fields to give ascending indices to basic blocks. */ 504static void 505set_bb_indices (void) 506{ 507 basic_block bb; 508 intptr_t index; 509 510 index = 0; 511 FOR_EACH_BB_FN (bb, cfun) 512 bb->aux = (void *) index++; 513} 514 515/* The taken-branch edge from the loop end can actually go forward. 516 If the target's hardware loop support requires that the loop end be 517 after the loop start, try to reorder a loop's basic blocks when we 518 find such a case. 519 This is not very aggressive; it only moves at most one block. It 520 does not introduce new branches into loops; it may remove them, or 521 it may switch fallthru/jump edges. */ 522static void 523reorder_loops (hwloop_info loops) 524{ 525 basic_block bb; 526 hwloop_info loop; 527 528 cfg_layout_initialize (0); 529 530 set_bb_indices (); 531 532 for (loop = loops; loop; loop = loop->next) 533 { 534 edge e; 535 edge_iterator ei; 536 537 if (loop->bad) 538 continue; 539 540 if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail)) 541 continue; 542 543 FOR_EACH_EDGE (e, ei, loop->head->succs) 544 { 545 if (bitmap_bit_p (loop->block_bitmap, e->dest->index) 546 && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail)) 547 { 548 basic_block start_bb = e->dest; 549 basic_block start_prev_bb = start_bb->prev_bb; 550 551 if (dump_file) 552 fprintf (dump_file, ";; Moving block %d before block %d\n", 553 loop->head->index, start_bb->index); 554 loop->head->prev_bb->next_bb = loop->head->next_bb; 555 loop->head->next_bb->prev_bb = loop->head->prev_bb; 556 557 loop->head->prev_bb = start_prev_bb; 558 loop->head->next_bb = start_bb; 559 start_prev_bb->next_bb = start_bb->prev_bb = loop->head; 560 561 set_bb_indices (); 562 break; 563 } 564 } 565 loops = loops->next; 566 } 567 568 FOR_EACH_BB_FN (bb, cfun) 569 { 570 if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun)) 571 bb->aux = bb->next_bb; 572 else 573 bb->aux = NULL; 574 } 575 cfg_layout_finalize (); 576 clear_aux_for_blocks (); 577 df_analyze (); 578} 579 580/* Call the OPT function for LOOP and all of its sub-loops. This is 581 done in a depth-first search; innermost loops are visited first. 582 OPTIMIZE and FAIL are the functions passed to reorg_loops by the 583 target's reorg pass. */ 584static void 585optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks) 586{ 587 int ix; 588 hwloop_info inner; 589 int inner_depth = 0; 590 591 if (loop->visited) 592 return; 593 594 loop->visited = 1; 595 596 if (loop->bad) 597 { 598 if (dump_file) 599 fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no); 600 goto bad_loop; 601 } 602 603 /* Every loop contains in its list of inner loops every loop nested inside 604 it, even if there are intermediate loops. This works because we're doing 605 a depth-first search here and never visit a loop more than once. 606 Recursion depth is effectively limited by the number of available 607 hardware registers. */ 608 for (ix = 0; loop->loops.iterate (ix, &inner); ix++) 609 { 610 optimize_loop (inner, hooks); 611 612 if (!inner->bad && inner_depth < inner->depth) 613 inner_depth = inner->depth; 614 /* The set of registers may be changed while optimizing the inner 615 loop. */ 616 IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop); 617 } 618 619 loop->depth = inner_depth + 1; 620 621 if (hooks->opt (loop)) 622 return; 623 624 bad_loop: 625 if (dump_file) 626 fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no); 627 628 loop->bad = true; 629 hooks->fail (loop); 630} 631 632/* This function can be used from a port's machine_dependent_reorg to 633 find and analyze loops that end in loop_end instructions. It uses 634 a set of function pointers in HOOKS to call back into the 635 target-specific functions to perform the actual machine-specific 636 transformations. 637 638 Such transformations typically involve additional set-up 639 instructions before the loop, to define loop bounds or set up a 640 special loop counter register. 641 642 DO_REORDER should be set to true if we should try to use the 643 reorder_loops function to ensure the loop end occurs after the loop 644 start. This is for use by targets where the loop hardware requires 645 this condition. 646 647 HOOKS is used to pass in target specific hooks; see 648 hw-doloop.h. */ 649void 650reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks) 651{ 652 hwloop_info loops = NULL; 653 hwloop_info loop; 654 bitmap_obstack loop_stack; 655 656 df_live_add_problem (); 657 df_live_set_all_dirty (); 658 df_analyze (); 659 660 bitmap_obstack_initialize (&loop_stack); 661 662 if (dump_file) 663 fprintf (dump_file, ";; Find loops, first pass\n\n"); 664 665 loops = discover_loops (&loop_stack, hooks); 666 667 /* We can't enter cfglayout mode anymore if basic block partitioning 668 already happened. */ 669 if (do_reorder && !flag_reorder_blocks_and_partition) 670 { 671 reorder_loops (loops); 672 free_loops (loops); 673 674 if (dump_file) 675 fprintf (dump_file, ";; Find loops, second pass\n\n"); 676 677 loops = discover_loops (&loop_stack, hooks); 678 } 679 680 for (loop = loops; loop; loop = loop->next) 681 scan_loop (loop); 682 683 /* Now apply the optimizations. */ 684 for (loop = loops; loop; loop = loop->next) 685 optimize_loop (loop, hooks); 686 687 if (dump_file) 688 { 689 fprintf (dump_file, ";; After hardware loops optimization:\n\n"); 690 dump_hwloops (loops); 691 } 692 693 free_loops (loops); 694 bitmap_obstack_release (&loop_stack); 695 696 if (dump_file) 697 print_rtl (dump_file, get_insns ()); 698} 699#endif 700