1/* Decompose multiword subregs. 2 Copyright (C) 2007-2015 Free Software Foundation, Inc. 3 Contributed by Richard Henderson <rth@redhat.com> 4 Ian Lance Taylor <iant@google.com> 5 6This file is part of GCC. 7 8GCC is free software; you can redistribute it and/or modify it under 9the terms of the GNU General Public License as published by the Free 10Software Foundation; either version 3, or (at your option) any later 11version. 12 13GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14WARRANTY; without even the implied warranty of MERCHANTABILITY or 15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16for more details. 17 18You should have received a copy of the GNU General Public License 19along with GCC; see the file COPYING3. If not see 20<http://www.gnu.org/licenses/>. */ 21 22#include "config.h" 23#include "system.h" 24#include "coretypes.h" 25#include "machmode.h" 26#include "tm.h" 27#include "hash-set.h" 28#include "vec.h" 29#include "double-int.h" 30#include "input.h" 31#include "alias.h" 32#include "symtab.h" 33#include "wide-int.h" 34#include "inchash.h" 35#include "tree.h" 36#include "rtl.h" 37#include "tm_p.h" 38#include "flags.h" 39#include "insn-config.h" 40#include "obstack.h" 41#include "predict.h" 42#include "hard-reg-set.h" 43#include "function.h" 44#include "dominance.h" 45#include "cfg.h" 46#include "cfgrtl.h" 47#include "cfgbuild.h" 48#include "basic-block.h" 49#include "recog.h" 50#include "bitmap.h" 51#include "dce.h" 52#include "hashtab.h" 53#include "statistics.h" 54#include "real.h" 55#include "fixed-value.h" 56#include "expmed.h" 57#include "dojump.h" 58#include "explow.h" 59#include "calls.h" 60#include "emit-rtl.h" 61#include "varasm.h" 62#include "stmt.h" 63#include "expr.h" 64#include "except.h" 65#include "regs.h" 66#include "tree-pass.h" 67#include "df.h" 68#include "lower-subreg.h" 69#include "rtl-iter.h" 70 71#ifdef STACK_GROWS_DOWNWARD 72# undef STACK_GROWS_DOWNWARD 73# define STACK_GROWS_DOWNWARD 1 74#else 75# define STACK_GROWS_DOWNWARD 0 76#endif 77 78 79/* Decompose multi-word pseudo-registers into individual 80 pseudo-registers when possible and profitable. This is possible 81 when all the uses of a multi-word register are via SUBREG, or are 82 copies of the register to another location. Breaking apart the 83 register permits more CSE and permits better register allocation. 84 This is profitable if the machine does not have move instructions 85 to do this. 86 87 This pass only splits moves with modes that are wider than 88 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with 89 integer modes that are twice the width of word_mode. The latter 90 could be generalized if there was a need to do this, but the trend in 91 architectures is to not need this. 92 93 There are two useful preprocessor defines for use by maintainers: 94 95 #define LOG_COSTS 1 96 97 if you wish to see the actual cost estimates that are being used 98 for each mode wider than word mode and the cost estimates for zero 99 extension and the shifts. This can be useful when port maintainers 100 are tuning insn rtx costs. 101 102 #define FORCE_LOWERING 1 103 104 if you wish to test the pass with all the transformation forced on. 105 This can be useful for finding bugs in the transformations. */ 106 107#define LOG_COSTS 0 108#define FORCE_LOWERING 0 109 110/* Bit N in this bitmap is set if regno N is used in a context in 111 which we can decompose it. */ 112static bitmap decomposable_context; 113 114/* Bit N in this bitmap is set if regno N is used in a context in 115 which it can not be decomposed. */ 116static bitmap non_decomposable_context; 117 118/* Bit N in this bitmap is set if regno N is used in a subreg 119 which changes the mode but not the size. This typically happens 120 when the register accessed as a floating-point value; we want to 121 avoid generating accesses to its subwords in integer modes. */ 122static bitmap subreg_context; 123 124/* Bit N in the bitmap in element M of this array is set if there is a 125 copy from reg M to reg N. */ 126static vec<bitmap> reg_copy_graph; 127 128struct target_lower_subreg default_target_lower_subreg; 129#if SWITCHABLE_TARGET 130struct target_lower_subreg *this_target_lower_subreg 131 = &default_target_lower_subreg; 132#endif 133 134#define twice_word_mode \ 135 this_target_lower_subreg->x_twice_word_mode 136#define choices \ 137 this_target_lower_subreg->x_choices 138 139/* RTXes used while computing costs. */ 140struct cost_rtxes { 141 /* Source and target registers. */ 142 rtx source; 143 rtx target; 144 145 /* A twice_word_mode ZERO_EXTEND of SOURCE. */ 146 rtx zext; 147 148 /* A shift of SOURCE. */ 149 rtx shift; 150 151 /* A SET of TARGET. */ 152 rtx set; 153}; 154 155/* Return the cost of a CODE shift in mode MODE by OP1 bits, using the 156 rtxes in RTXES. SPEED_P selects between the speed and size cost. */ 157 158static int 159shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code, 160 machine_mode mode, int op1) 161{ 162 PUT_CODE (rtxes->shift, code); 163 PUT_MODE (rtxes->shift, mode); 164 PUT_MODE (rtxes->source, mode); 165 XEXP (rtxes->shift, 1) = GEN_INT (op1); 166 return set_src_cost (rtxes->shift, speed_p); 167} 168 169/* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X] 170 to true if it is profitable to split a double-word CODE shift 171 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing 172 for speed or size profitability. 173 174 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is 175 the cost of moving zero into a word-mode register. WORD_MOVE_COST 176 is the cost of moving between word registers. */ 177 178static void 179compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes, 180 bool *splitting, enum rtx_code code, 181 int word_move_zero_cost, int word_move_cost) 182{ 183 int wide_cost, narrow_cost, upper_cost, i; 184 185 for (i = 0; i < BITS_PER_WORD; i++) 186 { 187 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode, 188 i + BITS_PER_WORD); 189 if (i == 0) 190 narrow_cost = word_move_cost; 191 else 192 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i); 193 194 if (code != ASHIFTRT) 195 upper_cost = word_move_zero_cost; 196 else if (i == BITS_PER_WORD - 1) 197 upper_cost = word_move_cost; 198 else 199 upper_cost = shift_cost (speed_p, rtxes, code, word_mode, 200 BITS_PER_WORD - 1); 201 202 if (LOG_COSTS) 203 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n", 204 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code), 205 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost); 206 207 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost) 208 splitting[i] = true; 209 } 210} 211 212/* Compute what we should do when optimizing for speed or size; SPEED_P 213 selects which. Use RTXES for computing costs. */ 214 215static void 216compute_costs (bool speed_p, struct cost_rtxes *rtxes) 217{ 218 unsigned int i; 219 int word_move_zero_cost, word_move_cost; 220 221 PUT_MODE (rtxes->target, word_mode); 222 SET_SRC (rtxes->set) = CONST0_RTX (word_mode); 223 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p); 224 225 SET_SRC (rtxes->set) = rtxes->source; 226 word_move_cost = set_rtx_cost (rtxes->set, speed_p); 227 228 if (LOG_COSTS) 229 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n", 230 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost); 231 232 for (i = 0; i < MAX_MACHINE_MODE; i++) 233 { 234 machine_mode mode = (machine_mode) i; 235 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD; 236 if (factor > 1) 237 { 238 int mode_move_cost; 239 240 PUT_MODE (rtxes->target, mode); 241 PUT_MODE (rtxes->source, mode); 242 mode_move_cost = set_rtx_cost (rtxes->set, speed_p); 243 244 if (LOG_COSTS) 245 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n", 246 GET_MODE_NAME (mode), mode_move_cost, 247 word_move_cost, factor); 248 249 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor) 250 { 251 choices[speed_p].move_modes_to_split[i] = true; 252 choices[speed_p].something_to_do = true; 253 } 254 } 255 } 256 257 /* For the moves and shifts, the only case that is checked is one 258 where the mode of the target is an integer mode twice the width 259 of the word_mode. 260 261 If it is not profitable to split a double word move then do not 262 even consider the shifts or the zero extension. */ 263 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode]) 264 { 265 int zext_cost; 266 267 /* The only case here to check to see if moving the upper part with a 268 zero is cheaper than doing the zext itself. */ 269 PUT_MODE (rtxes->source, word_mode); 270 zext_cost = set_src_cost (rtxes->zext, speed_p); 271 272 if (LOG_COSTS) 273 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n", 274 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND), 275 zext_cost, word_move_cost, word_move_zero_cost); 276 277 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost) 278 choices[speed_p].splitting_zext = true; 279 280 compute_splitting_shift (speed_p, rtxes, 281 choices[speed_p].splitting_ashift, ASHIFT, 282 word_move_zero_cost, word_move_cost); 283 compute_splitting_shift (speed_p, rtxes, 284 choices[speed_p].splitting_lshiftrt, LSHIFTRT, 285 word_move_zero_cost, word_move_cost); 286 compute_splitting_shift (speed_p, rtxes, 287 choices[speed_p].splitting_ashiftrt, ASHIFTRT, 288 word_move_zero_cost, word_move_cost); 289 } 290} 291 292/* Do one-per-target initialisation. This involves determining 293 which operations on the machine are profitable. If none are found, 294 then the pass just returns when called. */ 295 296void 297init_lower_subreg (void) 298{ 299 struct cost_rtxes rtxes; 300 301 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg)); 302 303 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode); 304 305 rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER); 306 rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1); 307 rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source); 308 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source); 309 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx); 310 311 if (LOG_COSTS) 312 fprintf (stderr, "\nSize costs\n==========\n\n"); 313 compute_costs (false, &rtxes); 314 315 if (LOG_COSTS) 316 fprintf (stderr, "\nSpeed costs\n===========\n\n"); 317 compute_costs (true, &rtxes); 318} 319 320static bool 321simple_move_operand (rtx x) 322{ 323 if (GET_CODE (x) == SUBREG) 324 x = SUBREG_REG (x); 325 326 if (!OBJECT_P (x)) 327 return false; 328 329 if (GET_CODE (x) == LABEL_REF 330 || GET_CODE (x) == SYMBOL_REF 331 || GET_CODE (x) == HIGH 332 || GET_CODE (x) == CONST) 333 return false; 334 335 if (MEM_P (x) 336 && (MEM_VOLATILE_P (x) 337 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x)))) 338 return false; 339 340 return true; 341} 342 343/* If INSN is a single set between two objects that we want to split, 344 return the single set. SPEED_P says whether we are optimizing 345 INSN for speed or size. 346 347 INSN should have been passed to recog and extract_insn before this 348 is called. */ 349 350static rtx 351simple_move (rtx_insn *insn, bool speed_p) 352{ 353 rtx x; 354 rtx set; 355 machine_mode mode; 356 357 if (recog_data.n_operands != 2) 358 return NULL_RTX; 359 360 set = single_set (insn); 361 if (!set) 362 return NULL_RTX; 363 364 x = SET_DEST (set); 365 if (x != recog_data.operand[0] && x != recog_data.operand[1]) 366 return NULL_RTX; 367 if (!simple_move_operand (x)) 368 return NULL_RTX; 369 370 x = SET_SRC (set); 371 if (x != recog_data.operand[0] && x != recog_data.operand[1]) 372 return NULL_RTX; 373 /* For the src we can handle ASM_OPERANDS, and it is beneficial for 374 things like x86 rdtsc which returns a DImode value. */ 375 if (GET_CODE (x) != ASM_OPERANDS 376 && !simple_move_operand (x)) 377 return NULL_RTX; 378 379 /* We try to decompose in integer modes, to avoid generating 380 inefficient code copying between integer and floating point 381 registers. That means that we can't decompose if this is a 382 non-integer mode for which there is no integer mode of the same 383 size. */ 384 mode = GET_MODE (SET_DEST (set)); 385 if (!SCALAR_INT_MODE_P (mode) 386 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0) 387 == BLKmode)) 388 return NULL_RTX; 389 390 /* Reject PARTIAL_INT modes. They are used for processor specific 391 purposes and it's probably best not to tamper with them. */ 392 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) 393 return NULL_RTX; 394 395 if (!choices[speed_p].move_modes_to_split[(int) mode]) 396 return NULL_RTX; 397 398 return set; 399} 400 401/* If SET is a copy from one multi-word pseudo-register to another, 402 record that in reg_copy_graph. Return whether it is such a 403 copy. */ 404 405static bool 406find_pseudo_copy (rtx set) 407{ 408 rtx dest = SET_DEST (set); 409 rtx src = SET_SRC (set); 410 unsigned int rd, rs; 411 bitmap b; 412 413 if (!REG_P (dest) || !REG_P (src)) 414 return false; 415 416 rd = REGNO (dest); 417 rs = REGNO (src); 418 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs)) 419 return false; 420 421 b = reg_copy_graph[rs]; 422 if (b == NULL) 423 { 424 b = BITMAP_ALLOC (NULL); 425 reg_copy_graph[rs] = b; 426 } 427 428 bitmap_set_bit (b, rd); 429 430 return true; 431} 432 433/* Look through the registers in DECOMPOSABLE_CONTEXT. For each case 434 where they are copied to another register, add the register to 435 which they are copied to DECOMPOSABLE_CONTEXT. Use 436 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track 437 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */ 438 439static void 440propagate_pseudo_copies (void) 441{ 442 bitmap queue, propagate; 443 444 queue = BITMAP_ALLOC (NULL); 445 propagate = BITMAP_ALLOC (NULL); 446 447 bitmap_copy (queue, decomposable_context); 448 do 449 { 450 bitmap_iterator iter; 451 unsigned int i; 452 453 bitmap_clear (propagate); 454 455 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter) 456 { 457 bitmap b = reg_copy_graph[i]; 458 if (b) 459 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context); 460 } 461 462 bitmap_and_compl (queue, propagate, decomposable_context); 463 bitmap_ior_into (decomposable_context, propagate); 464 } 465 while (!bitmap_empty_p (queue)); 466 467 BITMAP_FREE (queue); 468 BITMAP_FREE (propagate); 469} 470 471/* A pointer to one of these values is passed to 472 find_decomposable_subregs. */ 473 474enum classify_move_insn 475{ 476 /* Not a simple move from one location to another. */ 477 NOT_SIMPLE_MOVE, 478 /* A simple move we want to decompose. */ 479 DECOMPOSABLE_SIMPLE_MOVE, 480 /* Any other simple move. */ 481 SIMPLE_MOVE 482}; 483 484/* If we find a SUBREG in *LOC which we could use to decompose a 485 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an 486 unadorned register which is not a simple pseudo-register copy, 487 DATA will point at the type of move, and we set a bit in 488 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */ 489 490static void 491find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi) 492{ 493 subrtx_var_iterator::array_type array; 494 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST) 495 { 496 rtx x = *iter; 497 if (GET_CODE (x) == SUBREG) 498 { 499 rtx inner = SUBREG_REG (x); 500 unsigned int regno, outer_size, inner_size, outer_words, inner_words; 501 502 if (!REG_P (inner)) 503 continue; 504 505 regno = REGNO (inner); 506 if (HARD_REGISTER_NUM_P (regno)) 507 { 508 iter.skip_subrtxes (); 509 continue; 510 } 511 512 outer_size = GET_MODE_SIZE (GET_MODE (x)); 513 inner_size = GET_MODE_SIZE (GET_MODE (inner)); 514 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 515 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 516 517 /* We only try to decompose single word subregs of multi-word 518 registers. When we find one, we return -1 to avoid iterating 519 over the inner register. 520 521 ??? This doesn't allow, e.g., DImode subregs of TImode values 522 on 32-bit targets. We would need to record the way the 523 pseudo-register was used, and only decompose if all the uses 524 were the same number and size of pieces. Hopefully this 525 doesn't happen much. */ 526 527 if (outer_words == 1 && inner_words > 1) 528 { 529 bitmap_set_bit (decomposable_context, regno); 530 iter.skip_subrtxes (); 531 continue; 532 } 533 534 /* If this is a cast from one mode to another, where the modes 535 have the same size, and they are not tieable, then mark this 536 register as non-decomposable. If we decompose it we are 537 likely to mess up whatever the backend is trying to do. */ 538 if (outer_words > 1 539 && outer_size == inner_size 540 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner))) 541 { 542 bitmap_set_bit (non_decomposable_context, regno); 543 bitmap_set_bit (subreg_context, regno); 544 iter.skip_subrtxes (); 545 continue; 546 } 547 } 548 else if (REG_P (x)) 549 { 550 unsigned int regno; 551 552 /* We will see an outer SUBREG before we see the inner REG, so 553 when we see a plain REG here it means a direct reference to 554 the register. 555 556 If this is not a simple copy from one location to another, 557 then we can not decompose this register. If this is a simple 558 copy we want to decompose, and the mode is right, 559 then we mark the register as decomposable. 560 Otherwise we don't say anything about this register -- 561 it could be decomposed, but whether that would be 562 profitable depends upon how it is used elsewhere. 563 564 We only set bits in the bitmap for multi-word 565 pseudo-registers, since those are the only ones we care about 566 and it keeps the size of the bitmaps down. */ 567 568 regno = REGNO (x); 569 if (!HARD_REGISTER_NUM_P (regno) 570 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) 571 { 572 switch (*pcmi) 573 { 574 case NOT_SIMPLE_MOVE: 575 bitmap_set_bit (non_decomposable_context, regno); 576 break; 577 case DECOMPOSABLE_SIMPLE_MOVE: 578 if (MODES_TIEABLE_P (GET_MODE (x), word_mode)) 579 bitmap_set_bit (decomposable_context, regno); 580 break; 581 case SIMPLE_MOVE: 582 break; 583 default: 584 gcc_unreachable (); 585 } 586 } 587 } 588 else if (MEM_P (x)) 589 { 590 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE; 591 592 /* Any registers used in a MEM do not participate in a 593 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion 594 here, and return -1 to block the parent's recursion. */ 595 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem); 596 iter.skip_subrtxes (); 597 } 598 } 599} 600 601/* Decompose REGNO into word-sized components. We smash the REG node 602 in place. This ensures that (1) something goes wrong quickly if we 603 fail to make some replacement, and (2) the debug information inside 604 the symbol table is automatically kept up to date. */ 605 606static void 607decompose_register (unsigned int regno) 608{ 609 rtx reg; 610 unsigned int words, i; 611 rtvec v; 612 613 reg = regno_reg_rtx[regno]; 614 615 regno_reg_rtx[regno] = NULL_RTX; 616 617 words = GET_MODE_SIZE (GET_MODE (reg)); 618 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 619 620 v = rtvec_alloc (words); 621 for (i = 0; i < words; ++i) 622 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD); 623 624 PUT_CODE (reg, CONCATN); 625 XVEC (reg, 0) = v; 626 627 if (dump_file) 628 { 629 fprintf (dump_file, "; Splitting reg %u ->", regno); 630 for (i = 0; i < words; ++i) 631 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i))); 632 fputc ('\n', dump_file); 633 } 634} 635 636/* Get a SUBREG of a CONCATN. */ 637 638static rtx 639simplify_subreg_concatn (machine_mode outermode, rtx op, 640 unsigned int byte) 641{ 642 unsigned int inner_size; 643 machine_mode innermode, partmode; 644 rtx part; 645 unsigned int final_offset; 646 647 gcc_assert (GET_CODE (op) == CONCATN); 648 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0); 649 650 innermode = GET_MODE (op); 651 gcc_assert (byte < GET_MODE_SIZE (innermode)); 652 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode)); 653 654 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0); 655 part = XVECEXP (op, 0, byte / inner_size); 656 partmode = GET_MODE (part); 657 658 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of 659 regular CONST_VECTORs. They have vector or integer modes, depending 660 on the capabilities of the target. Cope with them. */ 661 if (partmode == VOIDmode && VECTOR_MODE_P (innermode)) 662 partmode = GET_MODE_INNER (innermode); 663 else if (partmode == VOIDmode) 664 { 665 enum mode_class mclass = GET_MODE_CLASS (innermode); 666 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0); 667 } 668 669 final_offset = byte % inner_size; 670 if (final_offset + GET_MODE_SIZE (outermode) > inner_size) 671 return NULL_RTX; 672 673 return simplify_gen_subreg (outermode, part, partmode, final_offset); 674} 675 676/* Wrapper around simplify_gen_subreg which handles CONCATN. */ 677 678static rtx 679simplify_gen_subreg_concatn (machine_mode outermode, rtx op, 680 machine_mode innermode, unsigned int byte) 681{ 682 rtx ret; 683 684 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN. 685 If OP is a SUBREG of a CONCATN, then it must be a simple mode 686 change with the same size and offset 0, or it must extract a 687 part. We shouldn't see anything else here. */ 688 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN) 689 { 690 rtx op2; 691 692 if ((GET_MODE_SIZE (GET_MODE (op)) 693 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))) 694 && SUBREG_BYTE (op) == 0) 695 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op), 696 GET_MODE (SUBREG_REG (op)), byte); 697 698 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op), 699 SUBREG_BYTE (op)); 700 if (op2 == NULL_RTX) 701 { 702 /* We don't handle paradoxical subregs here. */ 703 gcc_assert (GET_MODE_SIZE (outermode) 704 <= GET_MODE_SIZE (GET_MODE (op))); 705 gcc_assert (GET_MODE_SIZE (GET_MODE (op)) 706 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))); 707 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op), 708 byte + SUBREG_BYTE (op)); 709 gcc_assert (op2 != NULL_RTX); 710 return op2; 711 } 712 713 op = op2; 714 gcc_assert (op != NULL_RTX); 715 gcc_assert (innermode == GET_MODE (op)); 716 } 717 718 if (GET_CODE (op) == CONCATN) 719 return simplify_subreg_concatn (outermode, op, byte); 720 721 ret = simplify_gen_subreg (outermode, op, innermode, byte); 722 723 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then 724 resolve_simple_move will ask for the high part of the paradoxical 725 subreg, which does not have a value. Just return a zero. */ 726 if (ret == NULL_RTX 727 && GET_CODE (op) == SUBREG 728 && SUBREG_BYTE (op) == 0 729 && (GET_MODE_SIZE (innermode) 730 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))) 731 return CONST0_RTX (outermode); 732 733 gcc_assert (ret != NULL_RTX); 734 return ret; 735} 736 737/* Return whether we should resolve X into the registers into which it 738 was decomposed. */ 739 740static bool 741resolve_reg_p (rtx x) 742{ 743 return GET_CODE (x) == CONCATN; 744} 745 746/* Return whether X is a SUBREG of a register which we need to 747 resolve. */ 748 749static bool 750resolve_subreg_p (rtx x) 751{ 752 if (GET_CODE (x) != SUBREG) 753 return false; 754 return resolve_reg_p (SUBREG_REG (x)); 755} 756 757/* Look for SUBREGs in *LOC which need to be decomposed. */ 758 759static bool 760resolve_subreg_use (rtx *loc, rtx insn) 761{ 762 subrtx_ptr_iterator::array_type array; 763 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST) 764 { 765 rtx *loc = *iter; 766 rtx x = *loc; 767 if (resolve_subreg_p (x)) 768 { 769 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), 770 SUBREG_BYTE (x)); 771 772 /* It is possible for a note to contain a reference which we can 773 decompose. In this case, return 1 to the caller to indicate 774 that the note must be removed. */ 775 if (!x) 776 { 777 gcc_assert (!insn); 778 return true; 779 } 780 781 validate_change (insn, loc, x, 1); 782 iter.skip_subrtxes (); 783 } 784 else if (resolve_reg_p (x)) 785 /* Return 1 to the caller to indicate that we found a direct 786 reference to a register which is being decomposed. This can 787 happen inside notes, multiword shift or zero-extend 788 instructions. */ 789 return true; 790 } 791 792 return false; 793} 794 795/* Resolve any decomposed registers which appear in register notes on 796 INSN. */ 797 798static void 799resolve_reg_notes (rtx_insn *insn) 800{ 801 rtx *pnote, note; 802 803 note = find_reg_equal_equiv_note (insn); 804 if (note) 805 { 806 int old_count = num_validated_changes (); 807 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX)) 808 remove_note (insn, note); 809 else 810 if (old_count != num_validated_changes ()) 811 df_notes_rescan (insn); 812 } 813 814 pnote = ®_NOTES (insn); 815 while (*pnote != NULL_RTX) 816 { 817 bool del = false; 818 819 note = *pnote; 820 switch (REG_NOTE_KIND (note)) 821 { 822 case REG_DEAD: 823 case REG_UNUSED: 824 if (resolve_reg_p (XEXP (note, 0))) 825 del = true; 826 break; 827 828 default: 829 break; 830 } 831 832 if (del) 833 *pnote = XEXP (note, 1); 834 else 835 pnote = &XEXP (note, 1); 836 } 837} 838 839/* Return whether X can be decomposed into subwords. */ 840 841static bool 842can_decompose_p (rtx x) 843{ 844 if (REG_P (x)) 845 { 846 unsigned int regno = REGNO (x); 847 848 if (HARD_REGISTER_NUM_P (regno)) 849 { 850 unsigned int byte, num_bytes; 851 852 num_bytes = GET_MODE_SIZE (GET_MODE (x)); 853 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD) 854 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0) 855 return false; 856 return true; 857 } 858 else 859 return !bitmap_bit_p (subreg_context, regno); 860 } 861 862 return true; 863} 864 865/* Decompose the registers used in a simple move SET within INSN. If 866 we don't change anything, return INSN, otherwise return the start 867 of the sequence of moves. */ 868 869static rtx_insn * 870resolve_simple_move (rtx set, rtx_insn *insn) 871{ 872 rtx src, dest, real_dest; 873 rtx_insn *insns; 874 machine_mode orig_mode, dest_mode; 875 unsigned int words; 876 bool pushing; 877 878 src = SET_SRC (set); 879 dest = SET_DEST (set); 880 orig_mode = GET_MODE (dest); 881 882 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 883 gcc_assert (words > 1); 884 885 start_sequence (); 886 887 /* We have to handle copying from a SUBREG of a decomposed reg where 888 the SUBREG is larger than word size. Rather than assume that we 889 can take a word_mode SUBREG of the destination, we copy to a new 890 register and then copy that to the destination. */ 891 892 real_dest = NULL_RTX; 893 894 if (GET_CODE (src) == SUBREG 895 && resolve_reg_p (SUBREG_REG (src)) 896 && (SUBREG_BYTE (src) != 0 897 || (GET_MODE_SIZE (orig_mode) 898 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))))) 899 { 900 real_dest = dest; 901 dest = gen_reg_rtx (orig_mode); 902 if (REG_P (real_dest)) 903 REG_ATTRS (dest) = REG_ATTRS (real_dest); 904 } 905 906 /* Similarly if we are copying to a SUBREG of a decomposed reg where 907 the SUBREG is larger than word size. */ 908 909 if (GET_CODE (dest) == SUBREG 910 && resolve_reg_p (SUBREG_REG (dest)) 911 && (SUBREG_BYTE (dest) != 0 912 || (GET_MODE_SIZE (orig_mode) 913 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)))))) 914 { 915 rtx reg, smove; 916 rtx_insn *minsn; 917 918 reg = gen_reg_rtx (orig_mode); 919 minsn = emit_move_insn (reg, src); 920 smove = single_set (minsn); 921 gcc_assert (smove != NULL_RTX); 922 resolve_simple_move (smove, minsn); 923 src = reg; 924 } 925 926 /* If we didn't have any big SUBREGS of decomposed registers, and 927 neither side of the move is a register we are decomposing, then 928 we don't have to do anything here. */ 929 930 if (src == SET_SRC (set) 931 && dest == SET_DEST (set) 932 && !resolve_reg_p (src) 933 && !resolve_subreg_p (src) 934 && !resolve_reg_p (dest) 935 && !resolve_subreg_p (dest)) 936 { 937 end_sequence (); 938 return insn; 939 } 940 941 /* It's possible for the code to use a subreg of a decomposed 942 register while forming an address. We need to handle that before 943 passing the address to emit_move_insn. We pass NULL_RTX as the 944 insn parameter to resolve_subreg_use because we can not validate 945 the insn yet. */ 946 if (MEM_P (src) || MEM_P (dest)) 947 { 948 int acg; 949 950 if (MEM_P (src)) 951 resolve_subreg_use (&XEXP (src, 0), NULL_RTX); 952 if (MEM_P (dest)) 953 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX); 954 acg = apply_change_group (); 955 gcc_assert (acg); 956 } 957 958 /* If SRC is a register which we can't decompose, or has side 959 effects, we need to move via a temporary register. */ 960 961 if (!can_decompose_p (src) 962 || side_effects_p (src) 963 || GET_CODE (src) == ASM_OPERANDS) 964 { 965 rtx reg; 966 967 reg = gen_reg_rtx (orig_mode); 968 969#ifdef AUTO_INC_DEC 970 { 971 rtx move = emit_move_insn (reg, src); 972 if (MEM_P (src)) 973 { 974 rtx note = find_reg_note (insn, REG_INC, NULL_RTX); 975 if (note) 976 add_reg_note (move, REG_INC, XEXP (note, 0)); 977 } 978 } 979#else 980 emit_move_insn (reg, src); 981#endif 982 src = reg; 983 } 984 985 /* If DEST is a register which we can't decompose, or has side 986 effects, we need to first move to a temporary register. We 987 handle the common case of pushing an operand directly. We also 988 go through a temporary register if it holds a floating point 989 value. This gives us better code on systems which can't move 990 data easily between integer and floating point registers. */ 991 992 dest_mode = orig_mode; 993 pushing = push_operand (dest, dest_mode); 994 if (!can_decompose_p (dest) 995 || (side_effects_p (dest) && !pushing) 996 || (!SCALAR_INT_MODE_P (dest_mode) 997 && !resolve_reg_p (dest) 998 && !resolve_subreg_p (dest))) 999 { 1000 if (real_dest == NULL_RTX) 1001 real_dest = dest; 1002 if (!SCALAR_INT_MODE_P (dest_mode)) 1003 { 1004 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT, 1005 MODE_INT, 0); 1006 gcc_assert (dest_mode != BLKmode); 1007 } 1008 dest = gen_reg_rtx (dest_mode); 1009 if (REG_P (real_dest)) 1010 REG_ATTRS (dest) = REG_ATTRS (real_dest); 1011 } 1012 1013 if (pushing) 1014 { 1015 unsigned int i, j, jinc; 1016 1017 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0); 1018 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY); 1019 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY); 1020 1021 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD) 1022 { 1023 j = 0; 1024 jinc = 1; 1025 } 1026 else 1027 { 1028 j = words - 1; 1029 jinc = -1; 1030 } 1031 1032 for (i = 0; i < words; ++i, j += jinc) 1033 { 1034 rtx temp; 1035 1036 temp = copy_rtx (XEXP (dest, 0)); 1037 temp = adjust_automodify_address_nv (dest, word_mode, temp, 1038 j * UNITS_PER_WORD); 1039 emit_move_insn (temp, 1040 simplify_gen_subreg_concatn (word_mode, src, 1041 orig_mode, 1042 j * UNITS_PER_WORD)); 1043 } 1044 } 1045 else 1046 { 1047 unsigned int i; 1048 1049 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest))) 1050 emit_clobber (dest); 1051 1052 for (i = 0; i < words; ++i) 1053 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest, 1054 dest_mode, 1055 i * UNITS_PER_WORD), 1056 simplify_gen_subreg_concatn (word_mode, src, 1057 orig_mode, 1058 i * UNITS_PER_WORD)); 1059 } 1060 1061 if (real_dest != NULL_RTX) 1062 { 1063 rtx mdest, smove; 1064 rtx_insn *minsn; 1065 1066 if (dest_mode == orig_mode) 1067 mdest = dest; 1068 else 1069 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0); 1070 minsn = emit_move_insn (real_dest, mdest); 1071 1072#ifdef AUTO_INC_DEC 1073 if (MEM_P (real_dest) 1074 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest))) 1075 { 1076 rtx note = find_reg_note (insn, REG_INC, NULL_RTX); 1077 if (note) 1078 add_reg_note (minsn, REG_INC, XEXP (note, 0)); 1079 } 1080#endif 1081 1082 smove = single_set (minsn); 1083 gcc_assert (smove != NULL_RTX); 1084 1085 resolve_simple_move (smove, minsn); 1086 } 1087 1088 insns = get_insns (); 1089 end_sequence (); 1090 1091 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX); 1092 1093 emit_insn_before (insns, insn); 1094 1095 /* If we get here via self-recursion, then INSN is not yet in the insns 1096 chain and delete_insn will fail. We only want to remove INSN from the 1097 current sequence. See PR56738. */ 1098 if (in_sequence_p ()) 1099 remove_insn (insn); 1100 else 1101 delete_insn (insn); 1102 1103 return insns; 1104} 1105 1106/* Change a CLOBBER of a decomposed register into a CLOBBER of the 1107 component registers. Return whether we changed something. */ 1108 1109static bool 1110resolve_clobber (rtx pat, rtx_insn *insn) 1111{ 1112 rtx reg; 1113 machine_mode orig_mode; 1114 unsigned int words, i; 1115 int ret; 1116 1117 reg = XEXP (pat, 0); 1118 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg)) 1119 return false; 1120 1121 orig_mode = GET_MODE (reg); 1122 words = GET_MODE_SIZE (orig_mode); 1123 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 1124 1125 ret = validate_change (NULL_RTX, &XEXP (pat, 0), 1126 simplify_gen_subreg_concatn (word_mode, reg, 1127 orig_mode, 0), 1128 0); 1129 df_insn_rescan (insn); 1130 gcc_assert (ret != 0); 1131 1132 for (i = words - 1; i > 0; --i) 1133 { 1134 rtx x; 1135 1136 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode, 1137 i * UNITS_PER_WORD); 1138 x = gen_rtx_CLOBBER (VOIDmode, x); 1139 emit_insn_after (x, insn); 1140 } 1141 1142 resolve_reg_notes (insn); 1143 1144 return true; 1145} 1146 1147/* A USE of a decomposed register is no longer meaningful. Return 1148 whether we changed something. */ 1149 1150static bool 1151resolve_use (rtx pat, rtx_insn *insn) 1152{ 1153 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0))) 1154 { 1155 delete_insn (insn); 1156 return true; 1157 } 1158 1159 resolve_reg_notes (insn); 1160 1161 return false; 1162} 1163 1164/* A VAR_LOCATION can be simplified. */ 1165 1166static void 1167resolve_debug (rtx_insn *insn) 1168{ 1169 subrtx_ptr_iterator::array_type array; 1170 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST) 1171 { 1172 rtx *loc = *iter; 1173 rtx x = *loc; 1174 if (resolve_subreg_p (x)) 1175 { 1176 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), 1177 SUBREG_BYTE (x)); 1178 1179 if (x) 1180 *loc = x; 1181 else 1182 x = copy_rtx (*loc); 1183 } 1184 if (resolve_reg_p (x)) 1185 *loc = copy_rtx (x); 1186 } 1187 1188 df_insn_rescan (insn); 1189 1190 resolve_reg_notes (insn); 1191} 1192 1193/* Check if INSN is a decomposable multiword-shift or zero-extend and 1194 set the decomposable_context bitmap accordingly. SPEED_P is true 1195 if we are optimizing INSN for speed rather than size. Return true 1196 if INSN is decomposable. */ 1197 1198static bool 1199find_decomposable_shift_zext (rtx_insn *insn, bool speed_p) 1200{ 1201 rtx set; 1202 rtx op; 1203 rtx op_operand; 1204 1205 set = single_set (insn); 1206 if (!set) 1207 return false; 1208 1209 op = SET_SRC (set); 1210 if (GET_CODE (op) != ASHIFT 1211 && GET_CODE (op) != LSHIFTRT 1212 && GET_CODE (op) != ASHIFTRT 1213 && GET_CODE (op) != ZERO_EXTEND) 1214 return false; 1215 1216 op_operand = XEXP (op, 0); 1217 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand) 1218 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set))) 1219 || HARD_REGISTER_NUM_P (REGNO (op_operand)) 1220 || GET_MODE (op) != twice_word_mode) 1221 return false; 1222 1223 if (GET_CODE (op) == ZERO_EXTEND) 1224 { 1225 if (GET_MODE (op_operand) != word_mode 1226 || !choices[speed_p].splitting_zext) 1227 return false; 1228 } 1229 else /* left or right shift */ 1230 { 1231 bool *splitting = (GET_CODE (op) == ASHIFT 1232 ? choices[speed_p].splitting_ashift 1233 : GET_CODE (op) == ASHIFTRT 1234 ? choices[speed_p].splitting_ashiftrt 1235 : choices[speed_p].splitting_lshiftrt); 1236 if (!CONST_INT_P (XEXP (op, 1)) 1237 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD, 1238 2 * BITS_PER_WORD - 1) 1239 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD]) 1240 return false; 1241 1242 bitmap_set_bit (decomposable_context, REGNO (op_operand)); 1243 } 1244 1245 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set))); 1246 1247 return true; 1248} 1249 1250/* Decompose a more than word wide shift (in INSN) of a multiword 1251 pseudo or a multiword zero-extend of a wordmode pseudo into a move 1252 and 'set to zero' insn. Return a pointer to the new insn when a 1253 replacement was done. */ 1254 1255static rtx_insn * 1256resolve_shift_zext (rtx_insn *insn) 1257{ 1258 rtx set; 1259 rtx op; 1260 rtx op_operand; 1261 rtx_insn *insns; 1262 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX; 1263 int src_reg_num, dest_reg_num, offset1, offset2, src_offset; 1264 1265 set = single_set (insn); 1266 if (!set) 1267 return NULL; 1268 1269 op = SET_SRC (set); 1270 if (GET_CODE (op) != ASHIFT 1271 && GET_CODE (op) != LSHIFTRT 1272 && GET_CODE (op) != ASHIFTRT 1273 && GET_CODE (op) != ZERO_EXTEND) 1274 return NULL; 1275 1276 op_operand = XEXP (op, 0); 1277 1278 /* We can tear this operation apart only if the regs were already 1279 torn apart. */ 1280 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand)) 1281 return NULL; 1282 1283 /* src_reg_num is the number of the word mode register which we 1284 are operating on. For a left shift and a zero_extend on little 1285 endian machines this is register 0. */ 1286 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT) 1287 ? 1 : 0; 1288 1289 if (WORDS_BIG_ENDIAN 1290 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD) 1291 src_reg_num = 1 - src_reg_num; 1292 1293 if (GET_CODE (op) == ZERO_EXTEND) 1294 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0; 1295 else 1296 dest_reg_num = 1 - src_reg_num; 1297 1298 offset1 = UNITS_PER_WORD * dest_reg_num; 1299 offset2 = UNITS_PER_WORD * (1 - dest_reg_num); 1300 src_offset = UNITS_PER_WORD * src_reg_num; 1301 1302 start_sequence (); 1303 1304 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set), 1305 GET_MODE (SET_DEST (set)), 1306 offset1); 1307 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set), 1308 GET_MODE (SET_DEST (set)), 1309 offset2); 1310 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand, 1311 GET_MODE (op_operand), 1312 src_offset); 1313 if (GET_CODE (op) == ASHIFTRT 1314 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1) 1315 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg), 1316 BITS_PER_WORD - 1, NULL_RTX, 0); 1317 1318 if (GET_CODE (op) != ZERO_EXTEND) 1319 { 1320 int shift_count = INTVAL (XEXP (op, 1)); 1321 if (shift_count > BITS_PER_WORD) 1322 src_reg = expand_shift (GET_CODE (op) == ASHIFT ? 1323 LSHIFT_EXPR : RSHIFT_EXPR, 1324 word_mode, src_reg, 1325 shift_count - BITS_PER_WORD, 1326 dest_reg, GET_CODE (op) != ASHIFTRT); 1327 } 1328 1329 if (dest_reg != src_reg) 1330 emit_move_insn (dest_reg, src_reg); 1331 if (GET_CODE (op) != ASHIFTRT) 1332 emit_move_insn (dest_upper, CONST0_RTX (word_mode)); 1333 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1) 1334 emit_move_insn (dest_upper, copy_rtx (src_reg)); 1335 else 1336 emit_move_insn (dest_upper, upper_src); 1337 insns = get_insns (); 1338 1339 end_sequence (); 1340 1341 emit_insn_before (insns, insn); 1342 1343 if (dump_file) 1344 { 1345 rtx_insn *in; 1346 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn)); 1347 for (in = insns; in != insn; in = NEXT_INSN (in)) 1348 fprintf (dump_file, "%d ", INSN_UID (in)); 1349 fprintf (dump_file, "\n"); 1350 } 1351 1352 delete_insn (insn); 1353 return insns; 1354} 1355 1356/* Print to dump_file a description of what we're doing with shift code CODE. 1357 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */ 1358 1359static void 1360dump_shift_choices (enum rtx_code code, bool *splitting) 1361{ 1362 int i; 1363 const char *sep; 1364 1365 fprintf (dump_file, 1366 " Splitting mode %s for %s lowering with shift amounts = ", 1367 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code)); 1368 sep = ""; 1369 for (i = 0; i < BITS_PER_WORD; i++) 1370 if (splitting[i]) 1371 { 1372 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD); 1373 sep = ","; 1374 } 1375 fprintf (dump_file, "\n"); 1376} 1377 1378/* Print to dump_file a description of what we're doing when optimizing 1379 for speed or size; SPEED_P says which. DESCRIPTION is a description 1380 of the SPEED_P choice. */ 1381 1382static void 1383dump_choices (bool speed_p, const char *description) 1384{ 1385 unsigned int i; 1386 1387 fprintf (dump_file, "Choices when optimizing for %s:\n", description); 1388 1389 for (i = 0; i < MAX_MACHINE_MODE; i++) 1390 if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD) 1391 fprintf (dump_file, " %s mode %s for copy lowering.\n", 1392 choices[speed_p].move_modes_to_split[i] 1393 ? "Splitting" 1394 : "Skipping", 1395 GET_MODE_NAME ((machine_mode) i)); 1396 1397 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n", 1398 choices[speed_p].splitting_zext ? "Splitting" : "Skipping", 1399 GET_MODE_NAME (twice_word_mode)); 1400 1401 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift); 1402 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt); 1403 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt); 1404 fprintf (dump_file, "\n"); 1405} 1406 1407/* Look for registers which are always accessed via word-sized SUBREGs 1408 or -if DECOMPOSE_COPIES is true- via copies. Decompose these 1409 registers into several word-sized pseudo-registers. */ 1410 1411static void 1412decompose_multiword_subregs (bool decompose_copies) 1413{ 1414 unsigned int max; 1415 basic_block bb; 1416 bool speed_p; 1417 1418 if (dump_file) 1419 { 1420 dump_choices (false, "size"); 1421 dump_choices (true, "speed"); 1422 } 1423 1424 /* Check if this target even has any modes to consider lowering. */ 1425 if (!choices[false].something_to_do && !choices[true].something_to_do) 1426 { 1427 if (dump_file) 1428 fprintf (dump_file, "Nothing to do!\n"); 1429 return; 1430 } 1431 1432 max = max_reg_num (); 1433 1434 /* First see if there are any multi-word pseudo-registers. If there 1435 aren't, there is nothing we can do. This should speed up this 1436 pass in the normal case, since it should be faster than scanning 1437 all the insns. */ 1438 { 1439 unsigned int i; 1440 bool useful_modes_seen = false; 1441 1442 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i) 1443 if (regno_reg_rtx[i] != NULL) 1444 { 1445 machine_mode mode = GET_MODE (regno_reg_rtx[i]); 1446 if (choices[false].move_modes_to_split[(int) mode] 1447 || choices[true].move_modes_to_split[(int) mode]) 1448 { 1449 useful_modes_seen = true; 1450 break; 1451 } 1452 } 1453 1454 if (!useful_modes_seen) 1455 { 1456 if (dump_file) 1457 fprintf (dump_file, "Nothing to lower in this function.\n"); 1458 return; 1459 } 1460 } 1461 1462 if (df) 1463 { 1464 df_set_flags (DF_DEFER_INSN_RESCAN); 1465 run_word_dce (); 1466 } 1467 1468 /* FIXME: It may be possible to change this code to look for each 1469 multi-word pseudo-register and to find each insn which sets or 1470 uses that register. That should be faster than scanning all the 1471 insns. */ 1472 1473 decomposable_context = BITMAP_ALLOC (NULL); 1474 non_decomposable_context = BITMAP_ALLOC (NULL); 1475 subreg_context = BITMAP_ALLOC (NULL); 1476 1477 reg_copy_graph.create (max); 1478 reg_copy_graph.safe_grow_cleared (max); 1479 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max); 1480 1481 speed_p = optimize_function_for_speed_p (cfun); 1482 FOR_EACH_BB_FN (bb, cfun) 1483 { 1484 rtx_insn *insn; 1485 1486 FOR_BB_INSNS (bb, insn) 1487 { 1488 rtx set; 1489 enum classify_move_insn cmi; 1490 int i, n; 1491 1492 if (!INSN_P (insn) 1493 || GET_CODE (PATTERN (insn)) == CLOBBER 1494 || GET_CODE (PATTERN (insn)) == USE) 1495 continue; 1496 1497 recog_memoized (insn); 1498 1499 if (find_decomposable_shift_zext (insn, speed_p)) 1500 continue; 1501 1502 extract_insn (insn); 1503 1504 set = simple_move (insn, speed_p); 1505 1506 if (!set) 1507 cmi = NOT_SIMPLE_MOVE; 1508 else 1509 { 1510 /* We mark pseudo-to-pseudo copies as decomposable during the 1511 second pass only. The first pass is so early that there is 1512 good chance such moves will be optimized away completely by 1513 subsequent optimizations anyway. 1514 1515 However, we call find_pseudo_copy even during the first pass 1516 so as to properly set up the reg_copy_graph. */ 1517 if (find_pseudo_copy (set)) 1518 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE; 1519 else 1520 cmi = SIMPLE_MOVE; 1521 } 1522 1523 n = recog_data.n_operands; 1524 for (i = 0; i < n; ++i) 1525 { 1526 find_decomposable_subregs (&recog_data.operand[i], &cmi); 1527 1528 /* We handle ASM_OPERANDS as a special case to support 1529 things like x86 rdtsc which returns a DImode value. 1530 We can decompose the output, which will certainly be 1531 operand 0, but not the inputs. */ 1532 1533 if (cmi == SIMPLE_MOVE 1534 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS) 1535 { 1536 gcc_assert (i == 0); 1537 cmi = NOT_SIMPLE_MOVE; 1538 } 1539 } 1540 } 1541 } 1542 1543 bitmap_and_compl_into (decomposable_context, non_decomposable_context); 1544 if (!bitmap_empty_p (decomposable_context)) 1545 { 1546 sbitmap sub_blocks; 1547 unsigned int i; 1548 sbitmap_iterator sbi; 1549 bitmap_iterator iter; 1550 unsigned int regno; 1551 1552 propagate_pseudo_copies (); 1553 1554 sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun)); 1555 bitmap_clear (sub_blocks); 1556 1557 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter) 1558 decompose_register (regno); 1559 1560 FOR_EACH_BB_FN (bb, cfun) 1561 { 1562 rtx_insn *insn; 1563 1564 FOR_BB_INSNS (bb, insn) 1565 { 1566 rtx pat; 1567 1568 if (!INSN_P (insn)) 1569 continue; 1570 1571 pat = PATTERN (insn); 1572 if (GET_CODE (pat) == CLOBBER) 1573 resolve_clobber (pat, insn); 1574 else if (GET_CODE (pat) == USE) 1575 resolve_use (pat, insn); 1576 else if (DEBUG_INSN_P (insn)) 1577 resolve_debug (insn); 1578 else 1579 { 1580 rtx set; 1581 int i; 1582 1583 recog_memoized (insn); 1584 extract_insn (insn); 1585 1586 set = simple_move (insn, speed_p); 1587 if (set) 1588 { 1589 rtx_insn *orig_insn = insn; 1590 bool cfi = control_flow_insn_p (insn); 1591 1592 /* We can end up splitting loads to multi-word pseudos 1593 into separate loads to machine word size pseudos. 1594 When this happens, we first had one load that can 1595 throw, and after resolve_simple_move we'll have a 1596 bunch of loads (at least two). All those loads may 1597 trap if we can have non-call exceptions, so they 1598 all will end the current basic block. We split the 1599 block after the outer loop over all insns, but we 1600 make sure here that we will be able to split the 1601 basic block and still produce the correct control 1602 flow graph for it. */ 1603 gcc_assert (!cfi 1604 || (cfun->can_throw_non_call_exceptions 1605 && can_throw_internal (insn))); 1606 1607 insn = resolve_simple_move (set, insn); 1608 if (insn != orig_insn) 1609 { 1610 recog_memoized (insn); 1611 extract_insn (insn); 1612 1613 if (cfi) 1614 bitmap_set_bit (sub_blocks, bb->index); 1615 } 1616 } 1617 else 1618 { 1619 rtx_insn *decomposed_shift; 1620 1621 decomposed_shift = resolve_shift_zext (insn); 1622 if (decomposed_shift != NULL_RTX) 1623 { 1624 insn = decomposed_shift; 1625 recog_memoized (insn); 1626 extract_insn (insn); 1627 } 1628 } 1629 1630 for (i = recog_data.n_operands - 1; i >= 0; --i) 1631 resolve_subreg_use (recog_data.operand_loc[i], insn); 1632 1633 resolve_reg_notes (insn); 1634 1635 if (num_validated_changes () > 0) 1636 { 1637 for (i = recog_data.n_dups - 1; i >= 0; --i) 1638 { 1639 rtx *pl = recog_data.dup_loc[i]; 1640 int dup_num = recog_data.dup_num[i]; 1641 rtx *px = recog_data.operand_loc[dup_num]; 1642 1643 validate_unshare_change (insn, pl, *px, 1); 1644 } 1645 1646 i = apply_change_group (); 1647 gcc_assert (i); 1648 } 1649 } 1650 } 1651 } 1652 1653 /* If we had insns to split that caused control flow insns in the middle 1654 of a basic block, split those blocks now. Note that we only handle 1655 the case where splitting a load has caused multiple possibly trapping 1656 loads to appear. */ 1657 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi) 1658 { 1659 rtx_insn *insn, *end; 1660 edge fallthru; 1661 1662 bb = BASIC_BLOCK_FOR_FN (cfun, i); 1663 insn = BB_HEAD (bb); 1664 end = BB_END (bb); 1665 1666 while (insn != end) 1667 { 1668 if (control_flow_insn_p (insn)) 1669 { 1670 /* Split the block after insn. There will be a fallthru 1671 edge, which is OK so we keep it. We have to create the 1672 exception edges ourselves. */ 1673 fallthru = split_block (bb, insn); 1674 rtl_make_eh_edge (NULL, bb, BB_END (bb)); 1675 bb = fallthru->dest; 1676 insn = BB_HEAD (bb); 1677 } 1678 else 1679 insn = NEXT_INSN (insn); 1680 } 1681 } 1682 1683 sbitmap_free (sub_blocks); 1684 } 1685 1686 { 1687 unsigned int i; 1688 bitmap b; 1689 1690 FOR_EACH_VEC_ELT (reg_copy_graph, i, b) 1691 if (b) 1692 BITMAP_FREE (b); 1693 } 1694 1695 reg_copy_graph.release (); 1696 1697 BITMAP_FREE (decomposable_context); 1698 BITMAP_FREE (non_decomposable_context); 1699 BITMAP_FREE (subreg_context); 1700} 1701 1702/* Implement first lower subreg pass. */ 1703 1704namespace { 1705 1706const pass_data pass_data_lower_subreg = 1707{ 1708 RTL_PASS, /* type */ 1709 "subreg1", /* name */ 1710 OPTGROUP_NONE, /* optinfo_flags */ 1711 TV_LOWER_SUBREG, /* tv_id */ 1712 0, /* properties_required */ 1713 0, /* properties_provided */ 1714 0, /* properties_destroyed */ 1715 0, /* todo_flags_start */ 1716 0, /* todo_flags_finish */ 1717}; 1718 1719class pass_lower_subreg : public rtl_opt_pass 1720{ 1721public: 1722 pass_lower_subreg (gcc::context *ctxt) 1723 : rtl_opt_pass (pass_data_lower_subreg, ctxt) 1724 {} 1725 1726 /* opt_pass methods: */ 1727 virtual bool gate (function *) { return flag_split_wide_types != 0; } 1728 virtual unsigned int execute (function *) 1729 { 1730 decompose_multiword_subregs (false); 1731 return 0; 1732 } 1733 1734}; // class pass_lower_subreg 1735 1736} // anon namespace 1737 1738rtl_opt_pass * 1739make_pass_lower_subreg (gcc::context *ctxt) 1740{ 1741 return new pass_lower_subreg (ctxt); 1742} 1743 1744/* Implement second lower subreg pass. */ 1745 1746namespace { 1747 1748const pass_data pass_data_lower_subreg2 = 1749{ 1750 RTL_PASS, /* type */ 1751 "subreg2", /* name */ 1752 OPTGROUP_NONE, /* optinfo_flags */ 1753 TV_LOWER_SUBREG, /* tv_id */ 1754 0, /* properties_required */ 1755 0, /* properties_provided */ 1756 0, /* properties_destroyed */ 1757 0, /* todo_flags_start */ 1758 TODO_df_finish, /* todo_flags_finish */ 1759}; 1760 1761class pass_lower_subreg2 : public rtl_opt_pass 1762{ 1763public: 1764 pass_lower_subreg2 (gcc::context *ctxt) 1765 : rtl_opt_pass (pass_data_lower_subreg2, ctxt) 1766 {} 1767 1768 /* opt_pass methods: */ 1769 virtual bool gate (function *) { return flag_split_wide_types != 0; } 1770 virtual unsigned int execute (function *) 1771 { 1772 decompose_multiword_subregs (true); 1773 return 0; 1774 } 1775 1776}; // class pass_lower_subreg2 1777 1778} // anon namespace 1779 1780rtl_opt_pass * 1781make_pass_lower_subreg2 (gcc::context *ctxt) 1782{ 1783 return new pass_lower_subreg2 (ctxt); 1784} 1785