1/* Shrink-wrapping related optimizations. 2 Copyright (C) 1987-2015 Free Software Foundation, Inc. 3 4This file is part of GCC. 5 6GCC is free software; you can redistribute it and/or modify it under 7the terms of the GNU General Public License as published by the Free 8Software Foundation; either version 3, or (at your option) any later 9version. 10 11GCC is distributed in the hope that it will be useful, but WITHOUT ANY 12WARRANTY; without even the implied warranty of MERCHANTABILITY or 13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14for more details. 15 16You should have received a copy of the GNU General Public License 17along with GCC; see the file COPYING3. If not see 18<http://www.gnu.org/licenses/>. */ 19 20/* This file handles shrink-wrapping related optimizations. */ 21 22#include "config.h" 23#include "system.h" 24#include "coretypes.h" 25#include "tm.h" 26#include "rtl-error.h" 27#include "hash-set.h" 28#include "machmode.h" 29#include "vec.h" 30#include "double-int.h" 31#include "input.h" 32#include "alias.h" 33#include "symtab.h" 34#include "wide-int.h" 35#include "inchash.h" 36#include "tree.h" 37#include "fold-const.h" 38#include "stor-layout.h" 39#include "varasm.h" 40#include "stringpool.h" 41#include "flags.h" 42#include "except.h" 43#include "hard-reg-set.h" 44#include "function.h" 45#include "hashtab.h" 46#include "rtl.h" 47#include "statistics.h" 48#include "real.h" 49#include "fixed-value.h" 50#include "insn-config.h" 51#include "expmed.h" 52#include "dojump.h" 53#include "explow.h" 54#include "calls.h" 55#include "emit-rtl.h" 56#include "stmt.h" 57#include "expr.h" 58#include "insn-codes.h" 59#include "optabs.h" 60#include "libfuncs.h" 61#include "regs.h" 62#include "recog.h" 63#include "output.h" 64#include "tm_p.h" 65#include "langhooks.h" 66#include "target.h" 67#include "common/common-target.h" 68#include "gimple-expr.h" 69#include "gimplify.h" 70#include "tree-pass.h" 71#include "predict.h" 72#include "dominance.h" 73#include "cfg.h" 74#include "cfgrtl.h" 75#include "basic-block.h" 76#include "df.h" 77#include "params.h" 78#include "bb-reorder.h" 79#include "shrink-wrap.h" 80#include "regcprop.h" 81#include "rtl-iter.h" 82#include "valtrack.h" 83 84#ifdef HAVE_simple_return 85 86/* Return true if INSN requires the stack frame to be set up. 87 PROLOGUE_USED contains the hard registers used in the function 88 prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the 89 prologue to set up for the function. */ 90bool 91requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used, 92 HARD_REG_SET set_up_by_prologue) 93{ 94 df_ref def, use; 95 HARD_REG_SET hardregs; 96 unsigned regno; 97 98 if (CALL_P (insn)) 99 return !SIBLING_CALL_P (insn); 100 101 /* We need a frame to get the unique CFA expected by the unwinder. */ 102 if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn)) 103 return true; 104 105 CLEAR_HARD_REG_SET (hardregs); 106 FOR_EACH_INSN_DEF (def, insn) 107 { 108 rtx dreg = DF_REF_REG (def); 109 110 if (!REG_P (dreg)) 111 continue; 112 113 add_to_hard_reg_set (&hardregs, GET_MODE (dreg), 114 REGNO (dreg)); 115 } 116 if (hard_reg_set_intersect_p (hardregs, prologue_used)) 117 return true; 118 AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set); 119 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 120 if (TEST_HARD_REG_BIT (hardregs, regno) 121 && df_regs_ever_live_p (regno)) 122 return true; 123 124 FOR_EACH_INSN_USE (use, insn) 125 { 126 rtx reg = DF_REF_REG (use); 127 128 if (!REG_P (reg)) 129 continue; 130 131 add_to_hard_reg_set (&hardregs, GET_MODE (reg), 132 REGNO (reg)); 133 } 134 if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue)) 135 return true; 136 137 return false; 138} 139 140/* See whether there has a single live edge from BB, which dest uses 141 [REGNO, END_REGNO). Return the live edge if its dest bb has 142 one or two predecessors. Otherwise return NULL. */ 143 144static edge 145live_edge_for_reg (basic_block bb, int regno, int end_regno) 146{ 147 edge e, live_edge; 148 edge_iterator ei; 149 bitmap live; 150 int i; 151 152 live_edge = NULL; 153 FOR_EACH_EDGE (e, ei, bb->succs) 154 { 155 live = df_get_live_in (e->dest); 156 for (i = regno; i < end_regno; i++) 157 if (REGNO_REG_SET_P (live, i)) 158 { 159 if (live_edge && live_edge != e) 160 return NULL; 161 live_edge = e; 162 } 163 } 164 165 /* We can sometimes encounter dead code. Don't try to move it 166 into the exit block. */ 167 if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) 168 return NULL; 169 170 /* Reject targets of abnormal edges. This is needed for correctness 171 on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on 172 exception edges even though it is generally treated as call-saved 173 for the majority of the compilation. Moving across abnormal edges 174 isn't going to be interesting for shrink-wrap usage anyway. */ 175 if (live_edge->flags & EDGE_ABNORMAL) 176 return NULL; 177 178 /* When live_edge->dest->preds == 2, we can create a new block on 179 the edge to make it meet the requirement. */ 180 if (EDGE_COUNT (live_edge->dest->preds) > 2) 181 return NULL; 182 183 return live_edge; 184} 185 186/* Try to move INSN from BB to a successor. Return true on success. 187 USES and DEFS are the set of registers that are used and defined 188 after INSN in BB. SPLIT_P indicates whether a live edge from BB 189 is splitted or not. */ 190 191static bool 192move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn, 193 const HARD_REG_SET uses, 194 const HARD_REG_SET defs, 195 bool *split_p, 196 struct dead_debug_local *debug) 197{ 198 rtx set, src, dest; 199 bitmap live_out, live_in, bb_uses, bb_defs; 200 unsigned int i, dregno, end_dregno; 201 unsigned int sregno = FIRST_PSEUDO_REGISTER; 202 unsigned int end_sregno = FIRST_PSEUDO_REGISTER; 203 basic_block next_block; 204 edge live_edge; 205 rtx_insn *dinsn; 206 df_ref def; 207 208 /* Look for a simple register assignment. We don't use single_set here 209 because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary 210 destinations. */ 211 if (!INSN_P (insn)) 212 return false; 213 set = PATTERN (insn); 214 if (GET_CODE (set) != SET) 215 return false; 216 src = SET_SRC (set); 217 dest = SET_DEST (set); 218 219 /* For the destination, we want only a register. Also disallow STACK 220 or FRAME related adjustments. They are likely part of the prologue, 221 so keep them in the entry block. */ 222 if (!REG_P (dest) 223 || dest == stack_pointer_rtx 224 || dest == frame_pointer_rtx 225 || dest == hard_frame_pointer_rtx) 226 return false; 227 228 /* For the source, we want one of: 229 (1) A (non-overlapping) register 230 (2) A constant, 231 (3) An expression involving no more than one register. 232 233 That last point comes from the code following, which was originally 234 written to handle only register move operations, and still only handles 235 a single source register when checking for overlaps. Happily, the 236 same checks can be applied to expressions like (plus reg const). */ 237 238 if (CONSTANT_P (src)) 239 ; 240 else if (!REG_P (src)) 241 { 242 rtx src_inner = NULL_RTX; 243 244 if (can_throw_internal (insn)) 245 return false; 246 247 subrtx_var_iterator::array_type array; 248 FOR_EACH_SUBRTX_VAR (iter, array, src, ALL) 249 { 250 rtx x = *iter; 251 switch (GET_RTX_CLASS (GET_CODE (x))) 252 { 253 case RTX_CONST_OBJ: 254 case RTX_COMPARE: 255 case RTX_COMM_COMPARE: 256 case RTX_BIN_ARITH: 257 case RTX_COMM_ARITH: 258 case RTX_UNARY: 259 case RTX_TERNARY: 260 /* Constant or expression. Continue. */ 261 break; 262 263 case RTX_OBJ: 264 case RTX_EXTRA: 265 switch (GET_CODE (x)) 266 { 267 case UNSPEC: 268 case SUBREG: 269 case STRICT_LOW_PART: 270 case PC: 271 case LO_SUM: 272 /* Ok. Continue. */ 273 break; 274 275 case REG: 276 /* Fail if we see a second inner register. */ 277 if (src_inner != NULL) 278 return false; 279 src_inner = x; 280 break; 281 282 default: 283 return false; 284 } 285 break; 286 287 default: 288 return false; 289 } 290 } 291 292 if (src_inner != NULL) 293 src = src_inner; 294 } 295 296 /* Make sure that the source register isn't defined later in BB. */ 297 if (REG_P (src)) 298 { 299 sregno = REGNO (src); 300 end_sregno = END_REGNO (src); 301 if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno)) 302 return false; 303 } 304 305 /* Make sure that the destination register isn't referenced later in BB. */ 306 dregno = REGNO (dest); 307 end_dregno = END_REGNO (dest); 308 if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno) 309 || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno)) 310 return false; 311 312 /* See whether there is a successor block to which we could move INSN. */ 313 live_edge = live_edge_for_reg (bb, dregno, end_dregno); 314 if (!live_edge) 315 return false; 316 317 next_block = live_edge->dest; 318 /* Create a new basic block on the edge. */ 319 if (EDGE_COUNT (next_block->preds) == 2) 320 { 321 /* split_edge for a block with only one successor is meaningless. */ 322 if (EDGE_COUNT (bb->succs) == 1) 323 return false; 324 325 /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */ 326 if (!df_live) 327 return false; 328 329 basic_block old_dest = live_edge->dest; 330 next_block = split_edge (live_edge); 331 332 /* We create a new basic block. Call df_grow_bb_info to make sure 333 all data structures are allocated. */ 334 df_grow_bb_info (df_live); 335 336 bitmap_and (df_get_live_in (next_block), df_get_live_out (bb), 337 df_get_live_in (old_dest)); 338 df_set_bb_dirty (next_block); 339 340 /* We should not split more than once for a function. */ 341 if (*split_p) 342 return false; 343 344 *split_p = true; 345 } 346 347 /* At this point we are committed to moving INSN, but let's try to 348 move it as far as we can. */ 349 do 350 { 351 if (MAY_HAVE_DEBUG_INSNS) 352 { 353 FOR_BB_INSNS_REVERSE (bb, dinsn) 354 if (DEBUG_INSN_P (dinsn)) 355 { 356 df_ref use; 357 FOR_EACH_INSN_USE (use, dinsn) 358 if (refers_to_regno_p (dregno, end_dregno, 359 DF_REF_REG (use), (rtx *) NULL)) 360 dead_debug_add (debug, use, DF_REF_REGNO (use)); 361 } 362 else if (dinsn == insn) 363 break; 364 } 365 live_out = df_get_live_out (bb); 366 live_in = df_get_live_in (next_block); 367 bb = next_block; 368 369 /* Check whether BB uses DEST or clobbers DEST. We need to add 370 INSN to BB if so. Either way, DEST is no longer live on entry, 371 except for any part that overlaps SRC (next loop). */ 372 bb_uses = &DF_LR_BB_INFO (bb)->use; 373 bb_defs = &DF_LR_BB_INFO (bb)->def; 374 if (df_live) 375 { 376 for (i = dregno; i < end_dregno; i++) 377 { 378 if (*split_p 379 || REGNO_REG_SET_P (bb_uses, i) 380 || REGNO_REG_SET_P (bb_defs, i) 381 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i)) 382 next_block = NULL; 383 CLEAR_REGNO_REG_SET (live_out, i); 384 CLEAR_REGNO_REG_SET (live_in, i); 385 } 386 387 /* Check whether BB clobbers SRC. We need to add INSN to BB if so. 388 Either way, SRC is now live on entry. */ 389 for (i = sregno; i < end_sregno; i++) 390 { 391 if (*split_p 392 || REGNO_REG_SET_P (bb_defs, i) 393 || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i)) 394 next_block = NULL; 395 SET_REGNO_REG_SET (live_out, i); 396 SET_REGNO_REG_SET (live_in, i); 397 } 398 } 399 else 400 { 401 /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and 402 DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e. 403 at -O1, just give up searching NEXT_BLOCK. */ 404 next_block = NULL; 405 for (i = dregno; i < end_dregno; i++) 406 { 407 CLEAR_REGNO_REG_SET (live_out, i); 408 CLEAR_REGNO_REG_SET (live_in, i); 409 } 410 411 for (i = sregno; i < end_sregno; i++) 412 { 413 SET_REGNO_REG_SET (live_out, i); 414 SET_REGNO_REG_SET (live_in, i); 415 } 416 } 417 418 /* If we don't need to add the move to BB, look for a single 419 successor block. */ 420 if (next_block) 421 { 422 live_edge = live_edge_for_reg (next_block, dregno, end_dregno); 423 if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1) 424 break; 425 next_block = live_edge->dest; 426 } 427 } 428 while (next_block); 429 430 /* For the new created basic block, there is no dataflow info at all. 431 So skip the following dataflow update and check. */ 432 if (!(*split_p)) 433 { 434 /* BB now defines DEST. It only uses the parts of DEST that overlap SRC 435 (next loop). */ 436 for (i = dregno; i < end_dregno; i++) 437 { 438 CLEAR_REGNO_REG_SET (bb_uses, i); 439 SET_REGNO_REG_SET (bb_defs, i); 440 } 441 442 /* BB now uses SRC. */ 443 for (i = sregno; i < end_sregno; i++) 444 SET_REGNO_REG_SET (bb_uses, i); 445 } 446 447 /* Insert debug temps for dead REGs used in subsequent debug insns. */ 448 if (debug->used && !bitmap_empty_p (debug->used)) 449 FOR_EACH_INSN_DEF (def, insn) 450 dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn, 451 DEBUG_TEMP_BEFORE_WITH_VALUE); 452 453 emit_insn_after (PATTERN (insn), bb_note (bb)); 454 delete_insn (insn); 455 return true; 456} 457 458/* Look for register copies in the first block of the function, and move 459 them down into successor blocks if the register is used only on one 460 path. This exposes more opportunities for shrink-wrapping. These 461 kinds of sets often occur when incoming argument registers are moved 462 to call-saved registers because their values are live across one or 463 more calls during the function. */ 464 465void 466prepare_shrink_wrap (basic_block entry_block) 467{ 468 rtx_insn *insn, *curr; 469 rtx x; 470 HARD_REG_SET uses, defs; 471 df_ref def, use; 472 bool split_p = false; 473 unsigned int i; 474 struct dead_debug_local debug; 475 476 if (JUMP_P (BB_END (entry_block))) 477 { 478 /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries 479 to sink the copies from parameter to callee saved register out of 480 entry block. copyprop_hardreg_forward_bb_without_debug_insn is called 481 to release some dependences. */ 482 copyprop_hardreg_forward_bb_without_debug_insn (entry_block); 483 } 484 485 dead_debug_local_init (&debug, NULL, NULL); 486 CLEAR_HARD_REG_SET (uses); 487 CLEAR_HARD_REG_SET (defs); 488 489 FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr) 490 if (NONDEBUG_INSN_P (insn) 491 && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs, 492 &split_p, &debug)) 493 { 494 /* Add all defined registers to DEFs. */ 495 FOR_EACH_INSN_DEF (def, insn) 496 { 497 x = DF_REF_REG (def); 498 if (REG_P (x) && HARD_REGISTER_P (x)) 499 for (i = REGNO (x); i < END_REGNO (x); i++) 500 SET_HARD_REG_BIT (defs, i); 501 } 502 503 /* Add all used registers to USESs. */ 504 FOR_EACH_INSN_USE (use, insn) 505 { 506 x = DF_REF_REG (use); 507 if (REG_P (x) && HARD_REGISTER_P (x)) 508 for (i = REGNO (x); i < END_REGNO (x); i++) 509 SET_HARD_REG_BIT (uses, i); 510 } 511 } 512 513 dead_debug_local_finish (&debug, NULL); 514} 515 516/* Create a copy of BB instructions and insert at BEFORE. Redirect 517 preds of BB to COPY_BB if they don't appear in NEED_PROLOGUE. */ 518void 519dup_block_and_redirect (basic_block bb, basic_block copy_bb, rtx_insn *before, 520 bitmap_head *need_prologue) 521{ 522 edge_iterator ei; 523 edge e; 524 rtx_insn *insn = BB_END (bb); 525 526 /* We know BB has a single successor, so there is no need to copy a 527 simple jump at the end of BB. */ 528 if (simplejump_p (insn)) 529 insn = PREV_INSN (insn); 530 531 start_sequence (); 532 duplicate_insn_chain (BB_HEAD (bb), insn); 533 if (dump_file) 534 { 535 unsigned count = 0; 536 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 537 if (active_insn_p (insn)) 538 ++count; 539 fprintf (dump_file, "Duplicating bb %d to bb %d, %u active insns.\n", 540 bb->index, copy_bb->index, count); 541 } 542 insn = get_insns (); 543 end_sequence (); 544 emit_insn_before (insn, before); 545 546 /* Redirect all the paths that need no prologue into copy_bb. */ 547 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));) 548 if (!bitmap_bit_p (need_prologue, e->src->index)) 549 { 550 int freq = EDGE_FREQUENCY (e); 551 copy_bb->count += e->count; 552 copy_bb->frequency += EDGE_FREQUENCY (e); 553 e->dest->count -= e->count; 554 if (e->dest->count < 0) 555 e->dest->count = 0; 556 e->dest->frequency -= freq; 557 if (e->dest->frequency < 0) 558 e->dest->frequency = 0; 559 redirect_edge_and_branch_force (e, copy_bb); 560 continue; 561 } 562 else 563 ei_next (&ei); 564} 565 566 567/* Try to perform a kind of shrink-wrapping, making sure the 568 prologue/epilogue is emitted only around those parts of the 569 function that require it. */ 570 571void 572try_shrink_wrapping (edge *entry_edge, edge orig_entry_edge, 573 bitmap_head *bb_flags, rtx_insn *prologue_seq) 574{ 575 edge e; 576 edge_iterator ei; 577 bool nonempty_prologue = false; 578 unsigned max_grow_size; 579 rtx_insn *seq; 580 581 for (seq = prologue_seq; seq; seq = NEXT_INSN (seq)) 582 if (!NOTE_P (seq) || NOTE_KIND (seq) != NOTE_INSN_PROLOGUE_END) 583 { 584 nonempty_prologue = true; 585 break; 586 } 587 588 if (flag_shrink_wrap && HAVE_simple_return 589 && (targetm.profile_before_prologue () || !crtl->profile) 590 && nonempty_prologue && !crtl->calls_eh_return) 591 { 592 HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge; 593 struct hard_reg_set_container set_up_by_prologue; 594 rtx_insn *p_insn; 595 vec<basic_block> vec; 596 basic_block bb; 597 bitmap_head bb_antic_flags; 598 bitmap_head bb_on_list; 599 bitmap_head bb_tail; 600 601 if (dump_file) 602 fprintf (dump_file, "Attempting shrink-wrapping optimization.\n"); 603 604 /* Compute the registers set and used in the prologue. */ 605 CLEAR_HARD_REG_SET (prologue_clobbered); 606 CLEAR_HARD_REG_SET (prologue_used); 607 for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn)) 608 { 609 HARD_REG_SET this_used; 610 if (!NONDEBUG_INSN_P (p_insn)) 611 continue; 612 613 CLEAR_HARD_REG_SET (this_used); 614 note_uses (&PATTERN (p_insn), record_hard_reg_uses, 615 &this_used); 616 AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered); 617 IOR_HARD_REG_SET (prologue_used, this_used); 618 note_stores (PATTERN (p_insn), record_hard_reg_sets, 619 &prologue_clobbered); 620 } 621 622 prepare_shrink_wrap ((*entry_edge)->dest); 623 624 bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack); 625 bitmap_initialize (&bb_on_list, &bitmap_default_obstack); 626 bitmap_initialize (&bb_tail, &bitmap_default_obstack); 627 628 /* Find the set of basic blocks that require a stack frame, 629 and blocks that are too big to be duplicated. */ 630 631 vec.create (n_basic_blocks_for_fn (cfun)); 632 633 CLEAR_HARD_REG_SET (set_up_by_prologue.set); 634 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, 635 STACK_POINTER_REGNUM); 636 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM); 637 if (frame_pointer_needed) 638 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, 639 HARD_FRAME_POINTER_REGNUM); 640 if (pic_offset_table_rtx 641 && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) 642 add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, 643 PIC_OFFSET_TABLE_REGNUM); 644 if (crtl->drap_reg) 645 add_to_hard_reg_set (&set_up_by_prologue.set, 646 GET_MODE (crtl->drap_reg), 647 REGNO (crtl->drap_reg)); 648 if (targetm.set_up_by_prologue) 649 targetm.set_up_by_prologue (&set_up_by_prologue); 650 651 /* We don't use a different max size depending on 652 optimize_bb_for_speed_p because increasing shrink-wrapping 653 opportunities by duplicating tail blocks can actually result 654 in an overall decrease in code size. */ 655 max_grow_size = get_uncond_jump_length (); 656 max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS); 657 658 FOR_EACH_BB_FN (bb, cfun) 659 { 660 rtx_insn *insn; 661 unsigned size = 0; 662 663 FOR_BB_INSNS (bb, insn) 664 if (NONDEBUG_INSN_P (insn)) 665 { 666 if (requires_stack_frame_p (insn, prologue_used, 667 set_up_by_prologue.set)) 668 { 669 if (bb == (*entry_edge)->dest) 670 goto fail_shrinkwrap; 671 bitmap_set_bit (bb_flags, bb->index); 672 vec.quick_push (bb); 673 break; 674 } 675 else if (size <= max_grow_size) 676 { 677 size += get_attr_min_length (insn); 678 if (size > max_grow_size) 679 bitmap_set_bit (&bb_on_list, bb->index); 680 } 681 } 682 } 683 684 /* Blocks that really need a prologue, or are too big for tails. */ 685 bitmap_ior_into (&bb_on_list, bb_flags); 686 687 /* For every basic block that needs a prologue, mark all blocks 688 reachable from it, so as to ensure they are also seen as 689 requiring a prologue. */ 690 while (!vec.is_empty ()) 691 { 692 basic_block tmp_bb = vec.pop (); 693 694 FOR_EACH_EDGE (e, ei, tmp_bb->succs) 695 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) 696 && bitmap_set_bit (bb_flags, e->dest->index)) 697 vec.quick_push (e->dest); 698 } 699 700 /* Find the set of basic blocks that need no prologue, have a 701 single successor, can be duplicated, meet a max size 702 requirement, and go to the exit via like blocks. */ 703 vec.quick_push (EXIT_BLOCK_PTR_FOR_FN (cfun)); 704 while (!vec.is_empty ()) 705 { 706 basic_block tmp_bb = vec.pop (); 707 708 FOR_EACH_EDGE (e, ei, tmp_bb->preds) 709 if (single_succ_p (e->src) 710 && !bitmap_bit_p (&bb_on_list, e->src->index) 711 && can_duplicate_block_p (e->src)) 712 { 713 edge pe; 714 edge_iterator pei; 715 716 /* If there is predecessor of e->src which doesn't 717 need prologue and the edge is complex, 718 we might not be able to redirect the branch 719 to a copy of e->src. */ 720 FOR_EACH_EDGE (pe, pei, e->src->preds) 721 if ((pe->flags & EDGE_COMPLEX) != 0 722 && !bitmap_bit_p (bb_flags, pe->src->index)) 723 break; 724 if (pe == NULL && bitmap_set_bit (&bb_tail, e->src->index)) 725 vec.quick_push (e->src); 726 } 727 } 728 729 /* Now walk backwards from every block that is marked as needing 730 a prologue to compute the bb_antic_flags bitmap. Exclude 731 tail blocks; They can be duplicated to be used on paths not 732 needing a prologue. */ 733 bitmap_clear (&bb_on_list); 734 bitmap_and_compl (&bb_antic_flags, bb_flags, &bb_tail); 735 FOR_EACH_BB_FN (bb, cfun) 736 { 737 if (!bitmap_bit_p (&bb_antic_flags, bb->index)) 738 continue; 739 FOR_EACH_EDGE (e, ei, bb->preds) 740 if (!bitmap_bit_p (&bb_antic_flags, e->src->index) 741 && bitmap_set_bit (&bb_on_list, e->src->index)) 742 vec.quick_push (e->src); 743 } 744 while (!vec.is_empty ()) 745 { 746 basic_block tmp_bb = vec.pop (); 747 bool all_set = true; 748 749 bitmap_clear_bit (&bb_on_list, tmp_bb->index); 750 FOR_EACH_EDGE (e, ei, tmp_bb->succs) 751 if (!bitmap_bit_p (&bb_antic_flags, e->dest->index)) 752 { 753 all_set = false; 754 break; 755 } 756 757 if (all_set) 758 { 759 bitmap_set_bit (&bb_antic_flags, tmp_bb->index); 760 FOR_EACH_EDGE (e, ei, tmp_bb->preds) 761 if (!bitmap_bit_p (&bb_antic_flags, e->src->index) 762 && bitmap_set_bit (&bb_on_list, e->src->index)) 763 vec.quick_push (e->src); 764 } 765 } 766 /* Find exactly one edge that leads to a block in ANTIC from 767 a block that isn't. */ 768 if (!bitmap_bit_p (&bb_antic_flags, (*entry_edge)->dest->index)) 769 FOR_EACH_BB_FN (bb, cfun) 770 { 771 if (!bitmap_bit_p (&bb_antic_flags, bb->index)) 772 continue; 773 FOR_EACH_EDGE (e, ei, bb->preds) 774 if (!bitmap_bit_p (&bb_antic_flags, e->src->index)) 775 { 776 if (*entry_edge != orig_entry_edge) 777 { 778 *entry_edge = orig_entry_edge; 779 if (dump_file) 780 fprintf (dump_file, "More than one candidate edge.\n"); 781 goto fail_shrinkwrap; 782 } 783 if (dump_file) 784 fprintf (dump_file, "Found candidate edge for " 785 "shrink-wrapping, %d->%d.\n", e->src->index, 786 e->dest->index); 787 *entry_edge = e; 788 } 789 } 790 791 if (*entry_edge != orig_entry_edge) 792 { 793 /* Test whether the prologue is known to clobber any register 794 (other than FP or SP) which are live on the edge. */ 795 CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM); 796 if (frame_pointer_needed) 797 CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM); 798 REG_SET_TO_HARD_REG_SET (live_on_edge, 799 df_get_live_in ((*entry_edge)->dest)); 800 if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered)) 801 { 802 *entry_edge = orig_entry_edge; 803 if (dump_file) 804 fprintf (dump_file, 805 "Shrink-wrapping aborted due to clobber.\n"); 806 } 807 } 808 if (*entry_edge != orig_entry_edge) 809 { 810 crtl->shrink_wrapped = true; 811 if (dump_file) 812 fprintf (dump_file, "Performing shrink-wrapping.\n"); 813 814 /* Find tail blocks reachable from both blocks needing a 815 prologue and blocks not needing a prologue. */ 816 if (!bitmap_empty_p (&bb_tail)) 817 FOR_EACH_BB_FN (bb, cfun) 818 { 819 bool some_pro, some_no_pro; 820 if (!bitmap_bit_p (&bb_tail, bb->index)) 821 continue; 822 some_pro = some_no_pro = false; 823 FOR_EACH_EDGE (e, ei, bb->preds) 824 { 825 if (bitmap_bit_p (bb_flags, e->src->index)) 826 some_pro = true; 827 else 828 some_no_pro = true; 829 } 830 if (some_pro && some_no_pro) 831 vec.quick_push (bb); 832 else 833 bitmap_clear_bit (&bb_tail, bb->index); 834 } 835 /* Find the head of each tail. */ 836 while (!vec.is_empty ()) 837 { 838 basic_block tbb = vec.pop (); 839 840 if (!bitmap_bit_p (&bb_tail, tbb->index)) 841 continue; 842 843 while (single_succ_p (tbb)) 844 { 845 tbb = single_succ (tbb); 846 bitmap_clear_bit (&bb_tail, tbb->index); 847 } 848 } 849 /* Now duplicate the tails. */ 850 if (!bitmap_empty_p (&bb_tail)) 851 FOR_EACH_BB_REVERSE_FN (bb, cfun) 852 { 853 basic_block copy_bb, tbb; 854 rtx_insn *insert_point; 855 int eflags; 856 857 if (!bitmap_clear_bit (&bb_tail, bb->index)) 858 continue; 859 860 /* Create a copy of BB, instructions and all, for 861 use on paths that don't need a prologue. 862 Ideal placement of the copy is on a fall-thru edge 863 or after a block that would jump to the copy. */ 864 FOR_EACH_EDGE (e, ei, bb->preds) 865 if (!bitmap_bit_p (bb_flags, e->src->index) 866 && single_succ_p (e->src)) 867 break; 868 if (e) 869 { 870 /* Make sure we insert after any barriers. */ 871 rtx_insn *end = get_last_bb_insn (e->src); 872 copy_bb = create_basic_block (NEXT_INSN (end), 873 NULL_RTX, e->src); 874 BB_COPY_PARTITION (copy_bb, e->src); 875 } 876 else 877 { 878 /* Otherwise put the copy at the end of the function. */ 879 copy_bb = create_basic_block (NULL_RTX, NULL_RTX, 880 EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); 881 BB_COPY_PARTITION (copy_bb, bb); 882 } 883 884 insert_point = emit_note_after (NOTE_INSN_DELETED, 885 BB_END (copy_bb)); 886 emit_barrier_after (BB_END (copy_bb)); 887 888 tbb = bb; 889 while (1) 890 { 891 dup_block_and_redirect (tbb, copy_bb, insert_point, 892 bb_flags); 893 tbb = single_succ (tbb); 894 if (tbb == EXIT_BLOCK_PTR_FOR_FN (cfun)) 895 break; 896 e = split_block (copy_bb, PREV_INSN (insert_point)); 897 copy_bb = e->dest; 898 } 899 900 /* Quiet verify_flow_info by (ab)using EDGE_FAKE. 901 We have yet to add a simple_return to the tails, 902 as we'd like to first convert_jumps_to_returns in 903 case the block is no longer used after that. */ 904 eflags = EDGE_FAKE; 905 if (CALL_P (PREV_INSN (insert_point)) 906 && SIBLING_CALL_P (PREV_INSN (insert_point))) 907 eflags = EDGE_SIBCALL | EDGE_ABNORMAL; 908 make_single_succ_edge (copy_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 909 eflags); 910 911 /* verify_flow_info doesn't like a note after a 912 sibling call. */ 913 delete_insn (insert_point); 914 if (bitmap_empty_p (&bb_tail)) 915 break; 916 } 917 } 918 919 fail_shrinkwrap: 920 bitmap_clear (&bb_tail); 921 bitmap_clear (&bb_antic_flags); 922 bitmap_clear (&bb_on_list); 923 vec.release (); 924 } 925} 926 927/* If we're allowed to generate a simple return instruction, then by 928 definition we don't need a full epilogue. If the last basic 929 block before the exit block does not contain active instructions, 930 examine its predecessors and try to emit (conditional) return 931 instructions. */ 932 933edge 934get_unconverted_simple_return (edge exit_fallthru_edge, bitmap_head bb_flags, 935 vec<edge> *unconverted_simple_returns, 936 rtx_insn **returnjump) 937{ 938 if (optimize) 939 { 940 unsigned i, last; 941 942 /* convert_jumps_to_returns may add to preds of the exit block 943 (but won't remove). Stop at end of current preds. */ 944 last = EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds); 945 for (i = 0; i < last; i++) 946 { 947 edge e = EDGE_I (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds, i); 948 if (LABEL_P (BB_HEAD (e->src)) 949 && !bitmap_bit_p (&bb_flags, e->src->index) 950 && !active_insn_between (BB_HEAD (e->src), BB_END (e->src))) 951 *unconverted_simple_returns 952 = convert_jumps_to_returns (e->src, true, 953 *unconverted_simple_returns); 954 } 955 } 956 957 if (exit_fallthru_edge != NULL 958 && EDGE_COUNT (exit_fallthru_edge->src->preds) != 0 959 && !bitmap_bit_p (&bb_flags, exit_fallthru_edge->src->index)) 960 { 961 basic_block last_bb; 962 963 last_bb = emit_return_for_exit (exit_fallthru_edge, true); 964 *returnjump = BB_END (last_bb); 965 exit_fallthru_edge = NULL; 966 } 967 return exit_fallthru_edge; 968} 969 970/* If there were branches to an empty LAST_BB which we tried to 971 convert to conditional simple_returns, but couldn't for some 972 reason, create a block to hold a simple_return insn and redirect 973 those remaining edges. */ 974 975void 976convert_to_simple_return (edge entry_edge, edge orig_entry_edge, 977 bitmap_head bb_flags, rtx_insn *returnjump, 978 vec<edge> unconverted_simple_returns) 979{ 980 edge e; 981 edge_iterator ei; 982 983 if (!unconverted_simple_returns.is_empty ()) 984 { 985 basic_block simple_return_block_hot = NULL; 986 basic_block simple_return_block_cold = NULL; 987 edge pending_edge_hot = NULL; 988 edge pending_edge_cold = NULL; 989 basic_block exit_pred; 990 int i; 991 992 gcc_assert (entry_edge != orig_entry_edge); 993 994 /* See if we can reuse the last insn that was emitted for the 995 epilogue. */ 996 if (returnjump != NULL_RTX 997 && JUMP_LABEL (returnjump) == simple_return_rtx) 998 { 999 e = split_block (BLOCK_FOR_INSN (returnjump), PREV_INSN (returnjump)); 1000 if (BB_PARTITION (e->src) == BB_HOT_PARTITION) 1001 simple_return_block_hot = e->dest; 1002 else 1003 simple_return_block_cold = e->dest; 1004 } 1005 1006 /* Also check returns we might need to add to tail blocks. */ 1007 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) 1008 if (EDGE_COUNT (e->src->preds) != 0 1009 && (e->flags & EDGE_FAKE) != 0 1010 && !bitmap_bit_p (&bb_flags, e->src->index)) 1011 { 1012 if (BB_PARTITION (e->src) == BB_HOT_PARTITION) 1013 pending_edge_hot = e; 1014 else 1015 pending_edge_cold = e; 1016 } 1017 1018 /* Save a pointer to the exit's predecessor BB for use in 1019 inserting new BBs at the end of the function. Do this 1020 after the call to split_block above which may split 1021 the original exit pred. */ 1022 exit_pred = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb; 1023 1024 FOR_EACH_VEC_ELT (unconverted_simple_returns, i, e) 1025 { 1026 basic_block *pdest_bb; 1027 edge pending; 1028 1029 if (BB_PARTITION (e->src) == BB_HOT_PARTITION) 1030 { 1031 pdest_bb = &simple_return_block_hot; 1032 pending = pending_edge_hot; 1033 } 1034 else 1035 { 1036 pdest_bb = &simple_return_block_cold; 1037 pending = pending_edge_cold; 1038 } 1039 1040 if (*pdest_bb == NULL && pending != NULL) 1041 { 1042 emit_return_into_block (true, pending->src); 1043 pending->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE); 1044 *pdest_bb = pending->src; 1045 } 1046 else if (*pdest_bb == NULL) 1047 { 1048 basic_block bb; 1049 rtx_insn *start; 1050 1051 bb = create_basic_block (NULL, NULL, exit_pred); 1052 BB_COPY_PARTITION (bb, e->src); 1053 start = emit_jump_insn_after (gen_simple_return (), 1054 BB_END (bb)); 1055 JUMP_LABEL (start) = simple_return_rtx; 1056 emit_barrier_after (start); 1057 1058 *pdest_bb = bb; 1059 make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); 1060 } 1061 redirect_edge_and_branch_force (e, *pdest_bb); 1062 } 1063 unconverted_simple_returns.release (); 1064 } 1065 1066 if (entry_edge != orig_entry_edge) 1067 { 1068 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) 1069 if (EDGE_COUNT (e->src->preds) != 0 1070 && (e->flags & EDGE_FAKE) != 0 1071 && !bitmap_bit_p (&bb_flags, e->src->index)) 1072 { 1073 emit_return_into_block (true, e->src); 1074 e->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE); 1075 } 1076 } 1077} 1078 1079#endif 1080