1/* Sign extension elimination optimization for GNU compiler. 2 Copyright (C) 2005 Free Software Foundation, Inc. 3 Contributed by Leehod Baruch <leehod@il.ibm.com> 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9-Software Foundation; either version 2, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING. If not, write to the Free 19Software Foundation, 59 Temple Place - Suite 330, Boston, MA 2002111-1307, USA. 21 22Problem description: 23-------------------- 24In order to support 32bit computations on a 64bit machine, sign 25extension instructions are generated to ensure the correctness of 26the computation. 27A possible policy (as currently implemented) is to generate a sign 28extension right after each 32bit computation. 29Depending on the instruction set of the architecture, some of these 30sign extension instructions may be redundant. 31There are two cases in which the extension may be redundant: 32 33Case1: 34The instruction that uses the 64bit operands that are sign 35extended has a dual mode that works with 32bit operands. 36For example: 37 38 int32 a, b; 39 40 a = .... --> a = .... 41 a = sign extend a --> 42 b = .... --> b = .... 43 b = sign extend a --> 44 --> 45 cmpd a, b --> cmpw a, b //half word compare 46 47Case2: 48The instruction that defines the 64bit operand (which is later sign 49extended) has a dual mode that defines and sign-extends simultaneously 50a 32bit operand. For example: 51 52 int32 a; 53 54 ld a --> lwa a // load half word and sign extend 55 a = sign extend a --> 56 --> 57 return a --> return a 58 59 60General idea for solution: 61-------------------------- 62First, try to merge the sign extension with the instruction that 63defines the source of the extension and (separately) with the 64instructions that uses the extended result. By doing this, both cases 65of redundancies (as described above) will be eliminated. 66 67Then, use partial redundancy elimination to place the non redundant 68ones at optimal placements. 69 70 71Implementation by example: 72-------------------------- 73Note: The instruction stream is not changed till the last phase. 74 75Phase 0: Initial code, as currently generated by gcc. 76 77 def1 def3 78 se1 def2 se3 79 | \ | / | 80 | \ | / | 81 | \ | / | 82 | \ | / | 83 | \ | / | 84 | \|/ | 85 use1 use2 use3 86 use4 87def1 + se1: 88set ((reg:SI 10) (..def1rhs..)) 89set ((reg:DI 100) (sign_extend:DI (reg:SI 10))) 90 91def2: 92set ((reg:DI 100) (const_int 7)) 93 94def3 + se3: 95set ((reg:SI 20) (..def3rhs..)) 96set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) 97 98use1: 99set ((reg:CC...) (compare:CC (reg:DI 100) (...))) 100 101use2, use3, use4: 102set ((...) (reg:DI 100)) 103 104Phase 1: Propagate extensions to uses. 105 106 def1 def3 107 se1 def2 se3 108 | \ | / | 109 | \ | / | 110 | \ | / | 111 | \ | / | 112 | \ | / | 113 | \|/ | 114 se se se 115 use1 use2 use3 116 se 117 use4 118 119From here, all of the subregs are lowpart ! 120 121def1, def2, def3: No change. 122 123use1: 124set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100))))) 125set ((reg:CC...) (compare:CC (reg:DI 100) (...))) 126 127use2, use3, use4: 128set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100))))) 129set ((...) (reg:DI 100)) 130 131 132Phase 2: Merge and eliminate locally redundant extensions. 133 134 135 *def1 def2 *def3 136 [se removed] se se3 137 | \ | / | 138 | \ | / | 139 | \ | / | 140 | \ | / | 141 | \ | / | 142 | \|/ | 143 [se removed] se se 144 *use1 use2 use3 145 [se removed] 146 use4 147 148The instructions that were changed at this phase are marked with 149asterisk. 150 151*def1: Merge failed. 152 Remove the sign extension instruction, modify def1 and 153 insert a move instruction to assure to correctness of the code. 154set ((subreg:SI (reg:DI 100)) (..def1rhs..)) 155set ((reg:SI 10) (subreg:SI (reg:DI 100))) 156 157def2 + se: There is no need for merge. 158 Def2 is not changed but a sign extension instruction is 159 created. 160set ((reg:DI 100) (const_int 7)) 161set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100))))) 162 163*def3 + se3: Merge succeeded. 164set ((reg:DI 100) (sign_extend:DI (..def3rhs..))) 165set ((reg:SI 20) (reg:DI 100)) 166set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) 167(The extension instruction is the original one). 168 169*use1: Merge succeeded. Remove the sign extension instruction. 170set ((reg:CC...) 171 (compare:CC (subreg:SI (reg:DI 100)) (...))) 172 173use2, use3: Merge failed. No change. 174 175use4: The extension is locally redundant, therefore it is eliminated 176 at this point. 177 178 179Phase 3: Eliminate globally redundant extensions. 180 181Following the LCM output: 182 183 def1 def2 def3 184 se se3 185 | \ | / | 186 | \ | / | 187 | se | / | 188 | \ | / | 189 | \ | / | 190 | \|/ | 191 [ses removed] 192 use1 use2 use3 193 use4 194 195se: 196set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100))))) 197 198se3: 199set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) 200 201 202Phase 4: Commit changes to the insn stream. 203 204 205 def1 def3 *def1 def2 *def3 206 se1 def2 se3 [se removed] [se removed] 207 | \ | / | | \ | / | 208 | \ | / | ------> | \ | / | 209 | \ | / | ------> | se | / | 210 | \ | / | | \ | / | 211 | \ | / | | \ | / | 212 | \|/ | | \|/ | 213 use1 use2 use3 *use1 use2 use3 214 use4 use4 215 216The instructions that were changed during the whole optimization are 217marked with asterisk. 218 219The result: 220 221def1 + se1: 222[ set ((reg:SI 10) (..def1rhs..)) ] - Deleted 223[ set ((reg:DI 100) (sign_extend:DI (reg:SI 10))) ] - Deleted 224set ((subreg:SI (reg:DI 100)) (..def3rhs..)) - Inserted 225set ((reg:SI 10) (subreg:SI (reg:DI 100))) - Inserted 226 227def2: 228set ((reg:DI 100) (const_int 7)) - No change 229 230def3 + se3: 231[ set ((reg:SI 20) (..def3rhs..)) ] - Deleted 232[ set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) ] - Deleted 233set ((reg:DI 100) (sign_extend:DI (..def3rhs..))) - Inserted 234set ((reg:SI 20) (reg:DI 100)) - Inserted 235 236use1: 237[ set ((reg:CC...) (compare:CC (reg:DI 100) (...))) ] - Deleted 238set ((reg:CC...) - Inserted 239 (compare:CC (subreg:SI (reg:DI 100)) (...))) 240 241use2, use3, use4: 242set ((...) (reg:DI 100)) - No change 243 244se: - Inserted 245set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100))))) 246 247Note: Most of the simple move instructions that were inserted will be 248 trivially dead and therefore eliminated. 249 250The implementation outline: 251--------------------------- 252Some definitions: 253 A web is RELEVANT if at the end of phase 1, his leader's 254 relevancy is {ZERO, SIGN}_EXTENDED_DEF. The source_mode of 255 the web is the source_mode of his leader. 256 A definition is a candidate for the optimization if it is part 257 of a RELEVANT web and his local source_mode is not narrower 258 then the source_mode of its web. 259 A use is a candidate for the optimization if it is part of a 260 RELEVANT web. 261 A simple explicit extension is a single set instruction that 262 extends a register (or a subregister) to a register (or 263 subregister). 264 A complex explicit extension is an explicit extension instruction 265 that is not simple. 266 A def extension is a simple explicit extension that is 267 also a candidate for the optimization. This extension is part 268 of the instruction stream, it is not generated by this 269 optimization. 270 A use extension is a simple explicit extension that is generated 271 and stored for candidate use during this optimization. It is 272 not emitted to the instruction stream till the last phase of 273 the optimization. 274 A reference is an instruction that satisfy at least on of these 275 criteria: 276 - It contains a definition with EXTENDED_DEF relevancy in a RELEVANT web. 277 - It is followed by a def extension. 278 - It contains a candidate use. 279 280Phase 1: Propagate extensions to uses. 281 In this phase, we find candidate extensions for the optimization 282 and we generate (but not emit) proper extensions "right before the 283 uses". 284 285 a. Build a DF object. 286 b. Traverse over all the instructions that contains a definition 287 and set their local relevancy and local source_mode like this: 288 - If the instruction is a simple explicit extension instruction, 289 mark it as {ZERO, SIGN}_EXTENDED_DEF according to the extension 290 type and mark its source_mode to be the mode of the quantity 291 that is been extended. 292 - Otherwise, If the instruction has an implicit extension, 293 which means that its high part is an extension of its low part, 294 or if it is a complicated explicit extension, mark it as 295 EXTENDED_DEF and set its source_mode to be the narrowest 296 mode that is been extended in the instruction. 297 c. Traverse over all the instructions that contains a use and set 298 their local relevancy to RELEVANT_USE (except for few corner 299 cases). 300 d. Produce the web. During union of two entries, update the 301 relevancy and source_mode of the leader. There are two major 302 guide lines for this update: 303 - If one of the entries is NOT_RELEVANT, mark the leader 304 NOT_RELEVANT. 305 - If one is ZERO_EXTENDED_DEF and the other is SIGN_EXTENDED_DEF 306 (or vice versa) mark the leader as NOT_RELEVANT. We don't 307 handle this kind of mixed webs. 308 (For more details about this update process, 309 see see_update_leader_extra_info ()). 310 e. Generate uses extensions according to the relevancy and 311 source_mode of the webs. 312 313Phase 2: Merge and eliminate locally redundant extensions. 314 In this phase, we try to merge def extensions and use 315 extensions with their references, and eliminate redundant extensions 316 in the same basic block. 317 318 Traverse over all the references. Do this in basic block number and 319 luid number forward order. 320 For each reference do: 321 a. Peephole optimization - try to merge it with all its 322 def extensions and use extensions in the following 323 order: 324 - Try to merge only the def extensions, one by one. 325 - Try to merge only the use extensions, one by one. 326 - Try to merge any couple of use extensions simultaneously. 327 - Try to merge any def extension with one or two uses 328 extensions simultaneously. 329 b. Handle each EXTENDED_DEF in it as if it was already merged with 330 an extension. 331 332 During the merge process we save the following data for each 333 register in each basic block: 334 a. The first instruction that defines the register in the basic 335 block. 336 b. The last instruction that defines the register in the basic 337 block. 338 c. The first extension of this register before the first 339 instruction that defines it in the basic block. 340 c. The first extension of this register after the last 341 instruction that defines it in the basic block. 342 This data will help us eliminate (or more precisely, not generate) 343 locally redundant extensions, and will be useful in the next stage. 344 345 While merging extensions with their reference there are 4 possible 346 situations: 347 a. A use extension was merged with the reference: 348 Delete the extension instruction and save the merged reference 349 for phase 4. (For details, see see_use_extension_merged ()) 350 b. A use extension failed to be merged with the reference: 351 If there is already such an extension in the same basic block 352 and it is not dead at this point, delete the unmerged extension 353 (it is locally redundant), otherwise properly update the above 354 basic block data. 355 (For details, see see_merge_one_use_extension ()) 356 c. A def extension was merged with the reference: 357 Mark this extension as a merged_def extension and properly 358 update the above basic block data. 359 (For details, see see_merge_one_def_extension ()) 360 d. A def extension failed to be merged with the reference: 361 Replace the definition of the NARROWmode register in the 362 reference with the proper subreg of WIDEmode register and save 363 the result as a merged reference. Also, properly update the 364 the above basic block data. 365 (For details, see see_def_extension_not_merged ()) 366 367Phase 3: Eliminate globally redundant extensions. 368In this phase, we set the bit vectors input of the edge based LCM 369using the recorded data on the registers in each basic block. 370We also save pointers for all the anticipatable and available 371occurrences of the relevant extensions. Then we run the LCM. 372 373 a. Initialize the comp, antloc, kill bit vectors to zero and the 374 transp bit vector to ones. 375 376 b. Traverse over all the references. Do this in basic block number 377 and luid number forward order. For each reference: 378 - Go over all its use extensions. For each such extension - 379 If it is not dead from the beginning of the basic block SET 380 the antloc bit of the current extension in the current 381 basic block bits. 382 If it is not dead till the end of the basic block SET the 383 comp bit of the current extension in the current basic 384 block bits. 385 - Go over all its def extensions that were merged with 386 it. For each such extension - 387 If it is not dead till the end of the basic block SET the 388 comp bit of the current extension in the current basic 389 block bits. 390 RESET the proper transp and kill bits. 391 - Go over all its def extensions that were not merged 392 with it. For each such extension - 393 RESET the transp bit and SET the kill bit of the current 394 extension in the current basic block bits. 395 396 c. Run the edge based LCM. 397 398Phase 4: Commit changes to the insn stream. 399This is the only phase that actually changes the instruction stream. 400Up to this point the optimization could be aborted at any time. 401Here we insert extensions at their best placements and delete the 402redundant ones according to the output of the LCM. We also replace 403some of the instructions according to the second phase merges results. 404 405 a. Use the pre_delete_map (from the output of the LCM) in order to 406 delete redundant extensions. This will prevent them from been 407 emitted in the first place. 408 409 b. Insert extensions on edges where needed according to 410 pre_insert_map and edge_list (from the output of the LCM). 411 412 c. For each reference do- 413 - Emit all the uses extensions that were not deleted until now, 414 right before the reference. 415 - Delete all the merged and unmerged def extensions from 416 the instruction stream. 417 - Replace the reference with the merged one, if exist. 418 419The implementation consists of four data structures: 420- Data structure I 421 Purpose: To handle the relevancy of the uses, definitions and webs. 422 Relevant structures: web_entry (from df.h), see_entry_extra_info. 423 Details: This is a disjoint-set data structure. Most of its functions are 424 implemented in web.c. Each definition and use in the code are 425 elements. A web_entry structure is allocated for each element to 426 hold the element's relevancy and source_mode. The union rules are 427 defined in see_update_leader_extra_info (). 428- Data structure II 429 Purpose: To store references and their extensions (uses and defs) 430 and to enable traverse over these references according to basic 431 block order. 432 Relevant structure: see_ref_s. 433 Details: This data structure consists of an array of splay trees. One splay 434 tree for each basic block. The splay tree nodes are references and 435 the keys are the luids of the references. 436 A see_ref_s structure is allocated for each reference. It holds the 437 reference itself, its def and uses extensions and later the merged 438 version of the reference. 439 Using this data structure we can traverse over all the references of 440 a basic block and their extensions in forward order. 441- Data structure III. 442 Purpose: To store local properties of registers for each basic block. 443 This data will later help us build the LCM sbitmap_vectors 444 input. 445 Relevant structure: see_register_properties. 446 Details: This data structure consists of an array of hash tables. One hash 447 for each basic block. The hash node are a register properties 448 and the keys are the numbers of the registers. 449 A see_register_properties structure is allocated for each register 450 that we might be interested in its properties. 451 Using this data structure we can easily find the properties of a 452 register in a specific basic block. This is necessary for locally 453 redundancy elimination and for setting up the LCM input. 454- Data structure IV. 455 Purpose: To store the extensions that are candidate for PRE and their 456 anticipatable and available occurrences. 457 Relevant structure: see_occr, see_pre_extension_expr. 458 Details: This data structure is a hash tables. Its nodes are the extensions 459 that are candidate for PRE. 460 A see_pre_extension_expr structure is allocated for each candidate 461 extension. It holds a copy of the extension and a linked list of all 462 the anticipatable and available occurrences of it. 463 We use this data structure when we read the output of the LCM. */ 464 465#include "config.h" 466#include "system.h" 467#include "coretypes.h" 468#include "tm.h" 469 470#include "obstack.h" 471#include "rtl.h" 472#include "output.h" 473#include "df.h" 474#include "insn-config.h" 475#include "recog.h" 476#include "expr.h" 477#include "splay-tree.h" 478#include "hashtab.h" 479#include "regs.h" 480#include "timevar.h" 481#include "tree-pass.h" 482 483/* Used to classify defs and uses according to relevancy. */ 484enum entry_type { 485 NOT_RELEVANT, 486 SIGN_EXTENDED_DEF, 487 ZERO_EXTENDED_DEF, 488 EXTENDED_DEF, 489 RELEVANT_USE 490}; 491 492/* Used to classify extensions in relevant webs. */ 493enum extension_type { 494 DEF_EXTENSION, 495 EXPLICIT_DEF_EXTENSION, 496 IMPLICIT_DEF_EXTENSION, 497 USE_EXTENSION 498}; 499 500/* Global data structures and flags. */ 501 502/* This structure will be assigned for each web_entry structure (defined 503 in df.h). It is placed in the extra_info field of a web_entry and holds the 504 relevancy and source mode of the web_entry. */ 505 506struct see_entry_extra_info 507{ 508 /* The relevancy of the ref. */ 509 enum entry_type relevancy; 510 /* The relevancy of the ref. 511 This field is updated only once - when this structure is created. */ 512 enum entry_type local_relevancy; 513 /* The source register mode. */ 514 enum machine_mode source_mode; 515 /* This field is used only if the relevancy is ZERO/SIGN_EXTENDED_DEF. 516 It is updated only once when this structure is created. */ 517 enum machine_mode local_source_mode; 518 /* This field is used only if the relevancy is EXTENDED_DEF. 519 It holds the narrowest mode that is sign extended. */ 520 enum machine_mode source_mode_signed; 521 /* This field is used only if the relevancy is EXTENDED_DEF. 522 It holds the narrowest mode that is zero extended. */ 523 enum machine_mode source_mode_unsigned; 524}; 525 526/* There is one such structure for every reference. It stores the reference 527 itself as well as its extensions (uses and definitions). 528 Used as the value in splay_tree see_bb_splay_ar[]. */ 529struct see_ref_s 530{ 531 /* The luid of the insn. */ 532 unsigned int luid; 533 /* The insn of the ref. */ 534 rtx insn; 535 /* The merged insn that was formed from the reference's insn and extensions. 536 If all merges failed, it remains NULL. */ 537 rtx merged_insn; 538 /* The def extensions of the reference that were not merged with 539 it. */ 540 htab_t unmerged_def_se_hash; 541 /* The def extensions of the reference that were merged with 542 it. Implicit extensions of the reference will be stored here too. */ 543 htab_t merged_def_se_hash; 544 /* The uses extensions of reference. */ 545 htab_t use_se_hash; 546}; 547 548/* There is one such structure for every relevant extended register in a 549 specific basic block. This data will help us build the LCM sbitmap_vectors 550 input. */ 551struct see_register_properties 552{ 553 /* The register number. */ 554 unsigned int regno; 555 /* The last luid of the reference that defines this register in this basic 556 block. */ 557 int last_def; 558 /* The luid of the reference that has the first extension of this register 559 that appears before any definition in this basic block. */ 560 int first_se_before_any_def; 561 /* The luid of the reference that has the first extension of this register 562 that appears after the last definition in this basic block. */ 563 int first_se_after_last_def; 564}; 565 566/* Occurrence of an expression. 567 There must be at most one available occurrence and at most one anticipatable 568 occurrence per basic block. */ 569struct see_occr 570{ 571 /* Next occurrence of this expression. */ 572 struct see_occr *next; 573 /* The insn that computes the expression. */ 574 rtx insn; 575 int block_num; 576}; 577 578/* There is one such structure for every relevant extension expression. 579 It holds a copy of this extension instruction as well as a linked lists of 580 pointers to all the antic and avail occurrences of it. */ 581struct see_pre_extension_expr 582{ 583 /* A copy of the extension instruction. */ 584 rtx se_insn; 585 /* Index in the available expression bitmaps. */ 586 int bitmap_index; 587 /* List of anticipatable occurrences in basic blocks in the function. 588 An "anticipatable occurrence" is the first occurrence in the basic block, 589 the operands are not modified in the basic block prior to the occurrence 590 and the output is not used between the start of the block and the 591 occurrence. */ 592 struct see_occr *antic_occr; 593 /* List of available occurrence in basic blocks in the function. 594 An "available occurrence" is the last occurrence in the basic block and 595 the operands are not modified by following statements in the basic block 596 [including this insn]. */ 597 struct see_occr *avail_occr; 598}; 599 600/* Helper structure for the note_uses and see_replace_src functions. */ 601struct see_replace_data 602{ 603 rtx from; 604 rtx to; 605}; 606 607/* Helper structure for the note_uses and see_mentioned_reg functions. */ 608struct see_mentioned_reg_data 609{ 610 rtx reg; 611 bool mentioned; 612}; 613 614/* A data flow object that will be created once and used throughout the 615 optimization. */ 616static struct df *df = NULL; 617/* An array of web_entries. The i'th definition in the df object is associated 618 with def_entry[i] */ 619static struct web_entry *def_entry = NULL; 620/* An array of web_entries. The i'th use in the df object is associated with 621 use_entry[i] */ 622static struct web_entry *use_entry = NULL; 623/* Array of splay_trees. 624 see_bb_splay_ar[i] refers to the splay tree of the i'th basic block. 625 The splay tree will hold see_ref_s structures. The key is the luid 626 of the insn. This way we can traverse over the references of each basic 627 block in forward or backward order. */ 628static splay_tree *see_bb_splay_ar = NULL; 629/* Array of hashes. 630 see_bb_hash_ar[i] refers to the hash of the i'th basic block. 631 The hash will hold see_register_properties structure. The key is regno. */ 632static htab_t *see_bb_hash_ar = NULL; 633/* Hash table that holds a copy of all the extensions. The key is the right 634 hand side of the se_insn field. */ 635static htab_t see_pre_extension_hash = NULL; 636 637/* Local LCM properties of expressions. */ 638/* Nonzero for expressions that are transparent in the block. */ 639static sbitmap *transp = NULL; 640/* Nonzero for expressions that are computed (available) in the block. */ 641static sbitmap *comp = NULL; 642/* Nonzero for expressions that are locally anticipatable in the block. */ 643static sbitmap *antloc = NULL; 644/* Nonzero for expressions that are locally killed in the block. */ 645static sbitmap *ae_kill = NULL; 646/* Nonzero for expressions which should be inserted on a specific edge. */ 647static sbitmap *pre_insert_map = NULL; 648/* Nonzero for expressions which should be deleted in a specific block. */ 649static sbitmap *pre_delete_map = NULL; 650/* Contains the edge_list returned by pre_edge_lcm. */ 651static struct edge_list *edge_list = NULL; 652/* Records the last basic block at the beginning of the optimization. */ 653static int last_bb; 654/* Records the number of uses at the beginning of the optimization. */ 655static unsigned int uses_num; 656/* Records the number of definitions at the beginning of the optimization. */ 657static unsigned int defs_num; 658 659#define ENTRY_EI(ENTRY) ((struct see_entry_extra_info *) (ENTRY)->extra_info) 660 661/* Functions implementation. */ 662 663/* Verifies that EXTENSION's pattern is this: 664 665 set (reg/subreg reg1) (sign/zero_extend:WIDEmode (reg/subreg reg2)) 666 667 If it doesn't have the expected pattern return NULL. 668 Otherwise, if RETURN_DEST_REG is set, return reg1 else return reg2. */ 669 670static rtx 671see_get_extension_reg (rtx extension, bool return_dest_reg) 672{ 673 rtx set, rhs, lhs; 674 rtx reg1 = NULL; 675 rtx reg2 = NULL; 676 677 /* Parallel pattern for extension not supported for the moment. */ 678 if (GET_CODE (PATTERN (extension)) == PARALLEL) 679 return NULL; 680 681 set = single_set (extension); 682 if (!set) 683 return NULL; 684 lhs = SET_DEST (set); 685 rhs = SET_SRC (set); 686 687 if (REG_P (lhs)) 688 reg1 = lhs; 689 else if (REG_P (SUBREG_REG (lhs))) 690 reg1 = SUBREG_REG (lhs); 691 else 692 return NULL; 693 694 if (GET_CODE (rhs) != SIGN_EXTEND && GET_CODE (rhs) != ZERO_EXTEND) 695 return NULL; 696 697 rhs = XEXP (rhs, 0); 698 if (REG_P (rhs)) 699 reg2 = rhs; 700 else if (REG_P (SUBREG_REG (rhs))) 701 reg2 = SUBREG_REG (rhs); 702 else 703 return NULL; 704 705 if (return_dest_reg) 706 return reg1; 707 return reg2; 708} 709 710/* Verifies that EXTENSION's pattern is this: 711 712 set (reg/subreg reg1) (sign/zero_extend: (...expr...) 713 714 If it doesn't have the expected pattern return UNKNOWN. 715 Otherwise, set SOURCE_MODE to be the mode of the extended expr and return 716 the rtx code of the extension. */ 717 718static enum rtx_code 719see_get_extension_data (rtx extension, enum machine_mode *source_mode) 720{ 721 rtx rhs, lhs, set; 722 723 if (!extension || !INSN_P (extension)) 724 return UNKNOWN; 725 726 /* Parallel pattern for extension not supported for the moment. */ 727 if (GET_CODE (PATTERN (extension)) == PARALLEL) 728 return NOT_RELEVANT; 729 730 set = single_set (extension); 731 if (!set) 732 return NOT_RELEVANT; 733 rhs = SET_SRC (set); 734 lhs = SET_DEST (set); 735 736 /* Don't handle extensions to something other then register or 737 subregister. */ 738 if (!REG_P (lhs) && !SUBREG_REG (lhs)) 739 return UNKNOWN; 740 741 if (GET_CODE (rhs) != SIGN_EXTEND && GET_CODE (rhs) != ZERO_EXTEND) 742 return UNKNOWN; 743 744 if (!REG_P (XEXP (rhs, 0)) 745 && !(GET_CODE (XEXP (rhs, 0)) == SUBREG 746 && REG_P (SUBREG_REG (XEXP (rhs, 0))))) 747 return UNKNOWN; 748 749 *source_mode = GET_MODE (XEXP (rhs, 0)); 750 751 if (GET_CODE (rhs) == SIGN_EXTEND) 752 return SIGN_EXTEND; 753 return ZERO_EXTEND; 754} 755 756 757/* Generate instruction with the pattern: 758 set ((reg r) (sign/zero_extend (subreg:mode (reg r)))) 759 (the register r on both sides of the set is the same register). 760 And recognize it. 761 If the recognition failed, this is very bad, return NULL (This will abort 762 the entire optimization). 763 Otherwise, return the generated instruction. */ 764 765static rtx 766see_gen_normalized_extension (rtx reg, enum rtx_code extension_code, 767 enum machine_mode mode) 768{ 769 rtx subreg, insn; 770 rtx extension = NULL; 771 772 if (!reg 773 || !REG_P (reg) 774 || (extension_code != SIGN_EXTEND && extension_code != ZERO_EXTEND)) 775 return NULL; 776 777 subreg = gen_lowpart_SUBREG (mode, reg); 778 if (extension_code == SIGN_EXTEND) 779 extension = gen_rtx_SIGN_EXTEND (GET_MODE (reg), subreg); 780 else 781 extension = gen_rtx_ZERO_EXTEND (GET_MODE (reg), subreg); 782 783 start_sequence (); 784 emit_insn (gen_rtx_SET (VOIDmode, reg, extension)); 785 insn = get_insns (); 786 end_sequence (); 787 788 if (insn_invalid_p (insn)) 789 /* Recognition failed, this is very bad for this optimization. 790 Abort the optimization. */ 791 return NULL; 792 return insn; 793} 794 795/* Hashes and splay_trees related functions implementation. */ 796 797/* Helper functions for the pre_extension hash. 798 This kind of hash will hold see_pre_extension_expr structures. 799 800 The key is the right hand side of the se_insn field. 801 Note that the se_insn is an expression that looks like: 802 803 set ((reg:WIDEmode r1) (sign_extend:WIDEmode 804 (subreg:NARROWmode (reg:WIDEmode r2)))) */ 805 806/* Return TRUE if P1 has the same value in its rhs as P2. 807 Otherwise, return FALSE. 808 P1 and P2 are see_pre_extension_expr structures. */ 809 810static int 811eq_descriptor_pre_extension (const void *p1, const void *p2) 812{ 813 const struct see_pre_extension_expr *extension1 = p1; 814 const struct see_pre_extension_expr *extension2 = p2; 815 rtx set1 = single_set (extension1->se_insn); 816 rtx set2 = single_set (extension2->se_insn); 817 rtx rhs1, rhs2; 818 819 gcc_assert (set1 && set2); 820 rhs1 = SET_SRC (set1); 821 rhs2 = SET_SRC (set2); 822 823 return rtx_equal_p (rhs1, rhs2); 824} 825 826 827/* P is a see_pre_extension_expr struct, use the RHS of the se_insn field. 828 Note that the RHS is an expression that looks like this: 829 (sign_extend:WIDEmode (subreg:NARROWmode (reg:WIDEmode r))) */ 830 831static hashval_t 832hash_descriptor_pre_extension (const void *p) 833{ 834 const struct see_pre_extension_expr *extension = p; 835 rtx set = single_set (extension->se_insn); 836 rtx rhs; 837 838 gcc_assert (set); 839 rhs = SET_SRC (set); 840 841 return hash_rtx (rhs, GET_MODE (rhs), 0, NULL, 0); 842} 843 844 845/* Free the allocated memory of the current see_pre_extension_expr struct. 846 847 It frees the two linked list of the occurrences structures. */ 848 849static void 850hash_del_pre_extension (void *p) 851{ 852 struct see_pre_extension_expr *extension = p; 853 struct see_occr *curr_occr = extension->antic_occr; 854 struct see_occr *next_occr = NULL; 855 856 /* Free the linked list of the anticipatable occurrences. */ 857 while (curr_occr) 858 { 859 next_occr = curr_occr->next; 860 free (curr_occr); 861 curr_occr = next_occr; 862 } 863 864 /* Free the linked list of the available occurrences. */ 865 curr_occr = extension->avail_occr; 866 while (curr_occr) 867 { 868 next_occr = curr_occr->next; 869 free (curr_occr); 870 curr_occr = next_occr; 871 } 872 873 /* Free the see_pre_extension_expr structure itself. */ 874 free (extension); 875} 876 877 878/* Helper functions for the register_properties hash. 879 This kind of hash will hold see_register_properties structures. 880 881 The value of the key is the regno field of the structure. */ 882 883/* Return TRUE if P1 has the same value in the regno field as P2. 884 Otherwise, return FALSE. 885 Where P1 and P2 are see_register_properties structures. */ 886 887static int 888eq_descriptor_properties (const void *p1, const void *p2) 889{ 890 const struct see_register_properties *curr_prop1 = p1; 891 const struct see_register_properties *curr_prop2 = p2; 892 893 return curr_prop1->regno == curr_prop2->regno; 894} 895 896 897/* P is a see_register_properties struct, use the register number in the 898 regno field. */ 899 900static hashval_t 901hash_descriptor_properties (const void *p) 902{ 903 const struct see_register_properties *curr_prop = p; 904 return curr_prop->regno; 905} 906 907 908/* Free the allocated memory of the current see_register_properties struct. */ 909static void 910hash_del_properties (void *p) 911{ 912 struct see_register_properties *curr_prop = p; 913 free (curr_prop); 914} 915 916 917/* Helper functions for an extension hash. 918 This kind of hash will hold insns that look like: 919 920 set ((reg:WIDEmode r1) (sign_extend:WIDEmode 921 (subreg:NARROWmode (reg:WIDEmode r2)))) 922 or 923 set ((reg:WIDEmode r1) (sign_extend:WIDEmode (reg:NARROWmode r2))) 924 925 The value of the key is (REGNO (reg:WIDEmode r1)) 926 It is possible to search this hash in two ways: 927 1. By a register rtx. The Value that is been compared to the keys is the 928 REGNO of it. 929 2. By an insn with the above pattern. The Value that is been compared to 930 the keys is the REGNO of the reg on the lhs. */ 931 932/* Return TRUE if P1 has the same value as P2. Otherwise, return FALSE. 933 Where P1 is an insn and P2 is an insn or a register. */ 934 935static int 936eq_descriptor_extension (const void *p1, const void *p2) 937{ 938 const rtx insn = (rtx) p1; 939 const rtx element = (rtx) p2; 940 rtx set1 = single_set (insn); 941 rtx dest_reg1; 942 rtx set2 = NULL; 943 rtx dest_reg2 = NULL; 944 945 gcc_assert (set1 && element && (REG_P (element) || INSN_P (element))); 946 947 dest_reg1 = SET_DEST (set1); 948 949 if (INSN_P (element)) 950 { 951 set2 = single_set (element); 952 dest_reg2 = SET_DEST (set2); 953 } 954 else 955 dest_reg2 = element; 956 957 return REGNO (dest_reg1) == REGNO (dest_reg2); 958} 959 960 961/* If P is an insn, use the register number of its lhs 962 otherwise, P is a register, use its number. */ 963 964static hashval_t 965hash_descriptor_extension (const void *p) 966{ 967 const rtx r = (rtx) p; 968 rtx set, lhs; 969 970 if (r && REG_P (r)) 971 return REGNO (r); 972 973 gcc_assert (r && INSN_P (r)); 974 set = single_set (r); 975 gcc_assert (set); 976 lhs = SET_DEST (set); 977 return REGNO (lhs); 978} 979 980 981/* Helper function for a see_bb_splay_ar[i] splay tree. 982 It frees all the allocated memory of a struct see_ref_s pointer. 983 984 VALUE is the value of a splay tree node. */ 985 986static void 987see_free_ref_s (splay_tree_value value) 988{ 989 struct see_ref_s *ref_s = (struct see_ref_s *)value; 990 991 if (ref_s->unmerged_def_se_hash) 992 htab_delete (ref_s->unmerged_def_se_hash); 993 if (ref_s->merged_def_se_hash) 994 htab_delete (ref_s->merged_def_se_hash); 995 if (ref_s->use_se_hash) 996 htab_delete (ref_s->use_se_hash); 997 free (ref_s); 998} 999 1000 1001/* Rest of the implementation. */ 1002 1003/* Search the extension hash for a suitable entry for EXTENSION. 1004 TYPE is the type of EXTENSION (USE_EXTENSION or DEF_EXTENSION). 1005 1006 If TYPE is DEF_EXTENSION we need to normalize EXTENSION before searching the 1007 extension hash. 1008 1009 If a suitable entry was found, return the slot. Otherwise, store EXTENSION 1010 in the hash and return NULL. */ 1011 1012static struct see_pre_extension_expr * 1013see_seek_pre_extension_expr (rtx extension, enum extension_type type) 1014{ 1015 struct see_pre_extension_expr **slot_pre_exp, temp_pre_exp; 1016 rtx dest_extension_reg = see_get_extension_reg (extension, 1); 1017 enum rtx_code extension_code; 1018 enum machine_mode source_extension_mode; 1019 1020 if (type == DEF_EXTENSION) 1021 { 1022 extension_code = see_get_extension_data (extension, 1023 &source_extension_mode); 1024 gcc_assert (extension_code != UNKNOWN); 1025 extension = 1026 see_gen_normalized_extension (dest_extension_reg, extension_code, 1027 source_extension_mode); 1028 } 1029 temp_pre_exp.se_insn = extension; 1030 slot_pre_exp = 1031 (struct see_pre_extension_expr **) htab_find_slot (see_pre_extension_hash, 1032 &temp_pre_exp, INSERT); 1033 if (*slot_pre_exp == NULL) 1034 /* This is the first time this extension instruction is encountered. Store 1035 it in the hash. */ 1036 { 1037 (*slot_pre_exp) = xmalloc (sizeof (struct see_pre_extension_expr)); 1038 (*slot_pre_exp)->se_insn = extension; 1039 (*slot_pre_exp)->bitmap_index = 1040 (htab_elements (see_pre_extension_hash) - 1); 1041 (*slot_pre_exp)->antic_occr = NULL; 1042 (*slot_pre_exp)->avail_occr = NULL; 1043 return NULL; 1044 } 1045 return *slot_pre_exp; 1046} 1047 1048 1049/* This function defines how to update the extra_info of the web_entry. 1050 1051 FIRST is the pointer of the extra_info of the first web_entry. 1052 SECOND is the pointer of the extra_info of the second web_entry. 1053 The first web_entry will be the predecessor (leader) of the second web_entry 1054 after the union. 1055 1056 Return true if FIRST and SECOND points to the same web entry structure and 1057 nothing is done. Otherwise, return false. */ 1058 1059static bool 1060see_update_leader_extra_info (struct web_entry *first, struct web_entry *second) 1061{ 1062 struct see_entry_extra_info *first_ei, *second_ei; 1063 1064 first = unionfind_root (first); 1065 second = unionfind_root (second); 1066 1067 if (unionfind_union (first, second)) 1068 return true; 1069 1070 first_ei = (struct see_entry_extra_info *) first->extra_info; 1071 second_ei = (struct see_entry_extra_info *) second->extra_info; 1072 1073 gcc_assert (first_ei && second_ei); 1074 1075 if (second_ei->relevancy == NOT_RELEVANT) 1076 { 1077 first_ei->relevancy = NOT_RELEVANT; 1078 return false; 1079 } 1080 switch (first_ei->relevancy) 1081 { 1082 case NOT_RELEVANT: 1083 break; 1084 case RELEVANT_USE: 1085 switch (second_ei->relevancy) 1086 { 1087 case RELEVANT_USE: 1088 break; 1089 case EXTENDED_DEF: 1090 first_ei->relevancy = second_ei->relevancy; 1091 first_ei->source_mode_signed = second_ei->source_mode_signed; 1092 first_ei->source_mode_unsigned = second_ei->source_mode_unsigned; 1093 break; 1094 case SIGN_EXTENDED_DEF: 1095 case ZERO_EXTENDED_DEF: 1096 first_ei->relevancy = second_ei->relevancy; 1097 first_ei->source_mode = second_ei->source_mode; 1098 break; 1099 default: 1100 gcc_unreachable (); 1101 } 1102 break; 1103 case SIGN_EXTENDED_DEF: 1104 switch (second_ei->relevancy) 1105 { 1106 case SIGN_EXTENDED_DEF: 1107 /* The mode of the root should be the wider one in this case. */ 1108 first_ei->source_mode = 1109 (first_ei->source_mode > second_ei->source_mode) ? 1110 first_ei->source_mode : second_ei->source_mode; 1111 break; 1112 case RELEVANT_USE: 1113 break; 1114 case ZERO_EXTENDED_DEF: 1115 /* Don't mix webs with zero extend and sign extend. */ 1116 first_ei->relevancy = NOT_RELEVANT; 1117 break; 1118 case EXTENDED_DEF: 1119 if (second_ei->source_mode_signed == MAX_MACHINE_MODE) 1120 first_ei->relevancy = NOT_RELEVANT; 1121 else 1122 /* The mode of the root should be the wider one in this case. */ 1123 first_ei->source_mode = 1124 (first_ei->source_mode > second_ei->source_mode_signed) ? 1125 first_ei->source_mode : second_ei->source_mode_signed; 1126 break; 1127 default: 1128 gcc_unreachable (); 1129 } 1130 break; 1131 /* This case is similar to the previous one, with little changes. */ 1132 case ZERO_EXTENDED_DEF: 1133 switch (second_ei->relevancy) 1134 { 1135 case SIGN_EXTENDED_DEF: 1136 /* Don't mix webs with zero extend and sign extend. */ 1137 first_ei->relevancy = NOT_RELEVANT; 1138 break; 1139 case RELEVANT_USE: 1140 break; 1141 case ZERO_EXTENDED_DEF: 1142 /* The mode of the root should be the wider one in this case. */ 1143 first_ei->source_mode = 1144 (first_ei->source_mode > second_ei->source_mode) ? 1145 first_ei->source_mode : second_ei->source_mode; 1146 break; 1147 case EXTENDED_DEF: 1148 if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE) 1149 first_ei->relevancy = NOT_RELEVANT; 1150 else 1151 /* The mode of the root should be the wider one in this case. */ 1152 first_ei->source_mode = 1153 (first_ei->source_mode > second_ei->source_mode_unsigned) ? 1154 first_ei->source_mode : second_ei->source_mode_unsigned; 1155 break; 1156 default: 1157 gcc_unreachable (); 1158 } 1159 break; 1160 case EXTENDED_DEF: 1161 if (first_ei->source_mode_signed != MAX_MACHINE_MODE 1162 && first_ei->source_mode_unsigned != MAX_MACHINE_MODE) 1163 { 1164 switch (second_ei->relevancy) 1165 { 1166 case SIGN_EXTENDED_DEF: 1167 first_ei->relevancy = SIGN_EXTENDED_DEF; 1168 first_ei->source_mode = 1169 (first_ei->source_mode_signed > second_ei->source_mode) ? 1170 first_ei->source_mode_signed : second_ei->source_mode; 1171 break; 1172 case RELEVANT_USE: 1173 break; 1174 case ZERO_EXTENDED_DEF: 1175 first_ei->relevancy = ZERO_EXTENDED_DEF; 1176 first_ei->source_mode = 1177 (first_ei->source_mode_unsigned > second_ei->source_mode) ? 1178 first_ei->source_mode_unsigned : second_ei->source_mode; 1179 break; 1180 case EXTENDED_DEF: 1181 if (second_ei->source_mode_unsigned != MAX_MACHINE_MODE) 1182 first_ei->source_mode_unsigned = 1183 (first_ei->source_mode_unsigned > 1184 second_ei->source_mode_unsigned) ? 1185 first_ei->source_mode_unsigned : 1186 second_ei->source_mode_unsigned; 1187 if (second_ei->source_mode_signed != MAX_MACHINE_MODE) 1188 first_ei->source_mode_signed = 1189 (first_ei->source_mode_signed > 1190 second_ei->source_mode_signed) ? 1191 first_ei->source_mode_signed : second_ei->source_mode_signed; 1192 break; 1193 default: 1194 gcc_unreachable (); 1195 } 1196 } 1197 else if (first_ei->source_mode_signed == MAX_MACHINE_MODE) 1198 { 1199 gcc_assert (first_ei->source_mode_unsigned != MAX_MACHINE_MODE); 1200 switch (second_ei->relevancy) 1201 { 1202 case SIGN_EXTENDED_DEF: 1203 first_ei->relevancy = NOT_RELEVANT; 1204 break; 1205 case RELEVANT_USE: 1206 break; 1207 case ZERO_EXTENDED_DEF: 1208 first_ei->relevancy = ZERO_EXTENDED_DEF; 1209 first_ei->source_mode = 1210 (first_ei->source_mode_unsigned > second_ei->source_mode) ? 1211 first_ei->source_mode_unsigned : second_ei->source_mode; 1212 break; 1213 case EXTENDED_DEF: 1214 if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE) 1215 first_ei->relevancy = NOT_RELEVANT; 1216 else 1217 first_ei->source_mode_unsigned = 1218 (first_ei->source_mode_unsigned > 1219 second_ei->source_mode_unsigned) ? 1220 first_ei->source_mode_unsigned : 1221 second_ei->source_mode_unsigned; 1222 break; 1223 default: 1224 gcc_unreachable (); 1225 } 1226 } 1227 else 1228 { 1229 gcc_assert (first_ei->source_mode_unsigned == MAX_MACHINE_MODE); 1230 gcc_assert (first_ei->source_mode_signed != MAX_MACHINE_MODE); 1231 switch (second_ei->relevancy) 1232 { 1233 case SIGN_EXTENDED_DEF: 1234 first_ei->relevancy = SIGN_EXTENDED_DEF; 1235 first_ei->source_mode = 1236 (first_ei->source_mode_signed > second_ei->source_mode) ? 1237 first_ei->source_mode_signed : second_ei->source_mode; 1238 break; 1239 case RELEVANT_USE: 1240 break; 1241 case ZERO_EXTENDED_DEF: 1242 first_ei->relevancy = NOT_RELEVANT; 1243 break; 1244 case EXTENDED_DEF: 1245 if (second_ei->source_mode_signed == MAX_MACHINE_MODE) 1246 first_ei->relevancy = NOT_RELEVANT; 1247 else 1248 first_ei->source_mode_signed = 1249 (first_ei->source_mode_signed > 1250 second_ei->source_mode_signed) ? 1251 first_ei->source_mode_signed : second_ei->source_mode_signed; 1252 break; 1253 default: 1254 gcc_unreachable (); 1255 } 1256 } 1257 break; 1258 default: 1259 /* Unknown patern type. */ 1260 gcc_unreachable (); 1261 } 1262 1263 return false; 1264} 1265 1266 1267/* Free global data structures. */ 1268 1269static void 1270see_free_data_structures (void) 1271{ 1272 int i; 1273 unsigned int j; 1274 1275 /* Free the bitmap vectors. */ 1276 if (transp) 1277 { 1278 sbitmap_vector_free (transp); 1279 transp = NULL; 1280 sbitmap_vector_free (comp); 1281 comp = NULL; 1282 sbitmap_vector_free (antloc); 1283 antloc = NULL; 1284 sbitmap_vector_free (ae_kill); 1285 ae_kill = NULL; 1286 } 1287 if (pre_insert_map) 1288 { 1289 sbitmap_vector_free (pre_insert_map); 1290 pre_insert_map = NULL; 1291 } 1292 if (pre_delete_map) 1293 { 1294 sbitmap_vector_free (pre_delete_map); 1295 pre_delete_map = NULL; 1296 } 1297 if (edge_list) 1298 { 1299 free_edge_list (edge_list); 1300 edge_list = NULL; 1301 } 1302 1303 /* Free the extension hash. */ 1304 htab_delete (see_pre_extension_hash); 1305 1306 /* Free the array of hashes. */ 1307 for (i = 0; i < last_bb; i++) 1308 if (see_bb_hash_ar[i]) 1309 htab_delete (see_bb_hash_ar[i]); 1310 free (see_bb_hash_ar); 1311 1312 /* Free the array of splay trees. */ 1313 for (i = 0; i < last_bb; i++) 1314 if (see_bb_splay_ar[i]) 1315 splay_tree_delete (see_bb_splay_ar[i]); 1316 free (see_bb_splay_ar); 1317 1318 /* Free the array of web entries and their extra info field. */ 1319 for (j = 0; j < defs_num; j++) 1320 free (def_entry[j].extra_info); 1321 free (def_entry); 1322 for (j = 0; j < uses_num; j++) 1323 free (use_entry[j].extra_info); 1324 free (use_entry); 1325} 1326 1327 1328/* Initialize global data structures and variables. */ 1329 1330static void 1331see_initialize_data_structures (void) 1332{ 1333 /* Build the df object. */ 1334 df = df_init (DF_HARD_REGS | DF_EQUIV_NOTES | DF_SUBREGS); 1335 df_rd_add_problem (df, 0); 1336 df_chain_add_problem (df, DF_DU_CHAIN | DF_UD_CHAIN); 1337 df_analyze (df); 1338 1339 if (dump_file) 1340 df_dump (df, dump_file); 1341 1342 /* Record the last basic block at the beginning of the optimization. */ 1343 last_bb = last_basic_block; 1344 /* Record the number of uses at the beginning of the optimization. */ 1345 uses_num = DF_USES_SIZE (df); 1346 /* Record the number of definitions at the beginning of the optimization. */ 1347 defs_num = DF_DEFS_SIZE (df); 1348 1349 /* Allocate web entries array for the union-find data structure. */ 1350 def_entry = xcalloc (defs_num, sizeof (struct web_entry)); 1351 use_entry = xcalloc (uses_num, sizeof (struct web_entry)); 1352 1353 /* Allocate an array of splay trees. 1354 One splay tree for each basic block. */ 1355 see_bb_splay_ar = xcalloc (last_bb, sizeof (splay_tree)); 1356 1357 /* Allocate an array of hashes. 1358 One hash for each basic block. */ 1359 see_bb_hash_ar = xcalloc (last_bb, sizeof (htab_t)); 1360 1361 /* Allocate the extension hash. It will hold the extensions that we want 1362 to PRE. */ 1363 see_pre_extension_hash = htab_create (10, 1364 hash_descriptor_pre_extension, 1365 eq_descriptor_pre_extension, 1366 hash_del_pre_extension); 1367} 1368 1369 1370/* Function called by note_uses to check if a register is used in a 1371 subexpressions. 1372 1373 X is a pointer to the subexpression and DATA is a pointer to a 1374 see_mentioned_reg_data structure that contains the register to look for and 1375 a place for the result. */ 1376 1377static void 1378see_mentioned_reg (rtx *x, void *data) 1379{ 1380 struct see_mentioned_reg_data *d 1381 = (struct see_mentioned_reg_data *) data; 1382 1383 if (reg_mentioned_p (d->reg, *x)) 1384 d->mentioned = true; 1385} 1386 1387 1388/* We don't want to merge a use extension with a reference if the extended 1389 register is used only in a simple move instruction. We also don't want to 1390 merge a def extension with a reference if the source register of the 1391 extension is defined only in a simple move in the reference. 1392 1393 REF is the reference instruction. 1394 EXTENSION is the use extension or def extension instruction. 1395 TYPE is the type of the extension (use or def). 1396 1397 Return true if the reference is complicated enough, so we would like to merge 1398 it with the extension. Otherwise, return false. */ 1399 1400static bool 1401see_want_to_be_merged_with_extension (rtx ref, rtx extension, 1402 enum extension_type type) 1403{ 1404 rtx pat; 1405 rtx dest_extension_reg = see_get_extension_reg (extension, 1); 1406 rtx source_extension_reg = see_get_extension_reg (extension, 0); 1407 enum rtx_code code; 1408 struct see_mentioned_reg_data d; 1409 int i; 1410 1411 pat = PATTERN (ref); 1412 code = GET_CODE (pat); 1413 1414 if (code == PARALLEL) 1415 { 1416 for (i = 0; i < XVECLEN (pat, 0); i++) 1417 { 1418 rtx sub = XVECEXP (pat, 0, i); 1419 1420 if (GET_CODE (sub) == SET 1421 && (REG_P (SET_DEST (sub)) 1422 || (GET_CODE (SET_DEST (sub)) == SUBREG 1423 && REG_P (SUBREG_REG (SET_DEST (sub))))) 1424 && (REG_P (SET_SRC (sub)) 1425 || (GET_CODE (SET_SRC (sub)) == SUBREG 1426 && REG_P (SUBREG_REG (SET_SRC (sub)))))) 1427 { 1428 /* This is a simple move SET. */ 1429 if (type == DEF_EXTENSION 1430 && reg_mentioned_p (source_extension_reg, SET_DEST (sub))) 1431 return false; 1432 } 1433 else 1434 { 1435 /* This is not a simple move SET. 1436 Check if it uses the source of the extension. */ 1437 if (type == USE_EXTENSION) 1438 { 1439 d.reg = dest_extension_reg; 1440 d.mentioned = false; 1441 note_uses (&sub, see_mentioned_reg, &d); 1442 if (d.mentioned) 1443 return true; 1444 } 1445 } 1446 } 1447 if (type == USE_EXTENSION) 1448 return false; 1449 } 1450 else 1451 { 1452 if (code == SET 1453 && (REG_P (SET_DEST (pat)) 1454 || (GET_CODE (SET_DEST (pat)) == SUBREG 1455 && REG_P (SUBREG_REG (SET_DEST (pat))))) 1456 && (REG_P (SET_SRC (pat)) 1457 || (GET_CODE (SET_SRC (pat)) == SUBREG 1458 && REG_P (SUBREG_REG (SET_SRC (pat)))))) 1459 /* This is a simple move SET. */ 1460 return false; 1461 } 1462 1463 return true; 1464} 1465 1466 1467/* Print the register number of the current see_register_properties 1468 structure. 1469 1470 This is a subroutine of see_main called via htab_traverse. 1471 SLOT contains the current see_register_properties structure pointer. */ 1472 1473static int 1474see_print_register_properties (void **slot, void *b ATTRIBUTE_UNUSED) 1475{ 1476 struct see_register_properties *prop = *slot; 1477 1478 gcc_assert (prop); 1479 fprintf (dump_file, "Property found for register %d\n", prop->regno); 1480 return 1; 1481} 1482 1483 1484/* Print the extension instruction of the current see_register_properties 1485 structure. 1486 1487 This is a subroutine of see_main called via htab_traverse. 1488 SLOT contains the current see_pre_extension_expr structure pointer. */ 1489 1490static int 1491see_print_pre_extension_expr (void **slot, void *b ATTRIBUTE_UNUSED) 1492{ 1493 struct see_pre_extension_expr *pre_extension = *slot; 1494 1495 gcc_assert (pre_extension 1496 && pre_extension->se_insn 1497 && INSN_P (pre_extension->se_insn)); 1498 1499 fprintf (dump_file, "Index %d for:\n", pre_extension->bitmap_index); 1500 print_rtl_single (dump_file, pre_extension->se_insn); 1501 1502 return 1; 1503} 1504 1505 1506/* Phase 4 implementation: Commit changes to the insn stream. */ 1507 1508/* Delete the merged def extension. 1509 1510 This is a subroutine of see_commit_ref_changes called via htab_traverse. 1511 1512 SLOT contains the current def extension instruction. 1513 B is the see_ref_s structure pointer. */ 1514 1515static int 1516see_delete_merged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED) 1517{ 1518 rtx def_se = *slot; 1519 1520 if (dump_file) 1521 { 1522 fprintf (dump_file, "Deleting merged def extension:\n"); 1523 print_rtl_single (dump_file, def_se); 1524 } 1525 1526 if (INSN_DELETED_P (def_se)) 1527 /* This def extension is an implicit one. No need to delete it since 1528 it is not in the insn stream. */ 1529 return 1; 1530 1531 delete_insn (def_se); 1532 return 1; 1533} 1534 1535 1536/* Delete the unmerged def extension. 1537 1538 This is a subroutine of see_commit_ref_changes called via htab_traverse. 1539 1540 SLOT contains the current def extension instruction. 1541 B is the see_ref_s structure pointer. */ 1542 1543static int 1544see_delete_unmerged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED) 1545{ 1546 rtx def_se = *slot; 1547 1548 if (dump_file) 1549 { 1550 fprintf (dump_file, "Deleting unmerged def extension:\n"); 1551 print_rtl_single (dump_file, def_se); 1552 } 1553 1554 delete_insn (def_se); 1555 return 1; 1556} 1557 1558 1559/* Emit the non-redundant use extension to the instruction stream. 1560 1561 This is a subroutine of see_commit_ref_changes called via htab_traverse. 1562 1563 SLOT contains the current use extension instruction. 1564 B is the see_ref_s structure pointer. */ 1565 1566static int 1567see_emit_use_extension (void **slot, void *b) 1568{ 1569 rtx use_se = *slot; 1570 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; 1571 1572 if (INSN_DELETED_P (use_se)) 1573 /* This use extension was previously removed according to the lcm 1574 output. */ 1575 return 1; 1576 1577 if (dump_file) 1578 { 1579 fprintf (dump_file, "Inserting use extension:\n"); 1580 print_rtl_single (dump_file, use_se); 1581 } 1582 1583 add_insn_before (use_se, curr_ref_s->insn); 1584 1585 return 1; 1586} 1587 1588 1589/* For each relevant reference: 1590 a. Emit the non-redundant use extensions. 1591 b. Delete the def extensions. 1592 c. Replace the original reference with the merged one (if exists) and add the 1593 move instructions that were generated. 1594 1595 This is a subroutine of see_commit_changes called via splay_tree_foreach. 1596 1597 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a 1598 see_ref_s structure. */ 1599 1600static int 1601see_commit_ref_changes (splay_tree_node stn, 1602 void *data ATTRIBUTE_UNUSED) 1603{ 1604 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash; 1605 htab_t unmerged_def_se_hash = 1606 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash; 1607 htab_t merged_def_se_hash = 1608 ((struct see_ref_s *) (stn->value))->merged_def_se_hash; 1609 rtx ref = ((struct see_ref_s *) (stn->value))->insn; 1610 rtx merged_ref = ((struct see_ref_s *) (stn->value))->merged_insn; 1611 1612 /* Emit the non-redundant use extensions. */ 1613 if (use_se_hash) 1614 htab_traverse_noresize (use_se_hash, see_emit_use_extension, 1615 (PTR) (stn->value)); 1616 1617 /* Delete the def extensions. */ 1618 if (unmerged_def_se_hash) 1619 htab_traverse (unmerged_def_se_hash, see_delete_unmerged_def_extension, 1620 (PTR) (stn->value)); 1621 1622 if (merged_def_se_hash) 1623 htab_traverse (merged_def_se_hash, see_delete_merged_def_extension, 1624 (PTR) (stn->value)); 1625 1626 /* Replace the original reference with the merged one (if exists) and add the 1627 move instructions that were generated. */ 1628 if (merged_ref && !INSN_DELETED_P (ref)) 1629 { 1630 if (dump_file) 1631 { 1632 fprintf (dump_file, "Replacing orig reference:\n"); 1633 print_rtl_single (dump_file, ref); 1634 fprintf (dump_file, "With merged reference:\n"); 1635 print_rtl_single (dump_file, merged_ref); 1636 } 1637 emit_insn_after (merged_ref, ref); 1638 delete_insn (ref); 1639 } 1640 1641 /* Continue to the next reference. */ 1642 return 0; 1643} 1644 1645 1646/* Insert partially redundant expressions on edges to make the expressions fully 1647 redundant. 1648 1649 INDEX_MAP is a mapping of an index to an expression. 1650 Return true if an instruction was inserted on an edge. 1651 Otherwise, return false. */ 1652 1653static bool 1654see_pre_insert_extensions (struct see_pre_extension_expr **index_map) 1655{ 1656 int num_edges = NUM_EDGES (edge_list); 1657 int set_size = pre_insert_map[0]->size; 1658 size_t pre_extension_num = htab_elements (see_pre_extension_hash); 1659 1660 int did_insert = 0; 1661 int e; 1662 int i; 1663 int j; 1664 1665 for (e = 0; e < num_edges; e++) 1666 { 1667 int indx; 1668 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e); 1669 1670 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS) 1671 { 1672 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i]; 1673 1674 for (j = indx; insert && j < (int) pre_extension_num; 1675 j++, insert >>= 1) 1676 if (insert & 1) 1677 { 1678 struct see_pre_extension_expr *expr = index_map[j]; 1679 int idx = expr->bitmap_index; 1680 rtx se_insn = NULL; 1681 edge eg = INDEX_EDGE (edge_list, e); 1682 1683 start_sequence (); 1684 emit_insn (PATTERN (expr->se_insn)); 1685 se_insn = get_insns (); 1686 end_sequence (); 1687 1688 if (eg->flags & EDGE_ABNORMAL) 1689 { 1690 rtx new_insn = NULL; 1691 1692 new_insn = insert_insn_end_bb_new (se_insn, bb); 1693 gcc_assert (new_insn && INSN_P (new_insn)); 1694 1695 if (dump_file) 1696 { 1697 fprintf (dump_file, 1698 "PRE: end of bb %d, insn %d, ", 1699 bb->index, INSN_UID (new_insn)); 1700 fprintf (dump_file, 1701 "inserting expression %d\n", idx); 1702 } 1703 } 1704 else 1705 { 1706 insert_insn_on_edge (se_insn, eg); 1707 1708 if (dump_file) 1709 { 1710 fprintf (dump_file, "PRE: edge (%d,%d), ", 1711 bb->index, 1712 INDEX_EDGE_SUCC_BB (edge_list, e)->index); 1713 fprintf (dump_file, "inserting expression %d\n", idx); 1714 } 1715 } 1716 did_insert = true; 1717 } 1718 } 1719 } 1720 return did_insert; 1721} 1722 1723 1724/* Since all the redundant extensions must be anticipatable, they must be a use 1725 extensions. Mark them as deleted. This will prevent them from been emitted 1726 in the first place. 1727 1728 This is a subroutine of see_commit_changes called via htab_traverse. 1729 1730 SLOT contains the current see_pre_extension_expr structure pointer. */ 1731 1732static int 1733see_pre_delete_extension (void **slot, void *b ATTRIBUTE_UNUSED) 1734{ 1735 struct see_pre_extension_expr *expr = *slot; 1736 struct see_occr *occr; 1737 int indx = expr->bitmap_index; 1738 1739 for (occr = expr->antic_occr; occr != NULL; occr = occr->next) 1740 { 1741 if (TEST_BIT (pre_delete_map[occr->block_num], indx)) 1742 { 1743 /* Mark as deleted. */ 1744 INSN_DELETED_P (occr->insn) = 1; 1745 if (dump_file) 1746 { 1747 fprintf (dump_file,"Redundant extension deleted:\n"); 1748 print_rtl_single (dump_file, occr->insn); 1749 } 1750 } 1751 } 1752 return 1; 1753} 1754 1755 1756/* Create the index_map mapping of an index to an expression. 1757 1758 This is a subroutine of see_commit_changes called via htab_traverse. 1759 1760 SLOT contains the current see_pre_extension_expr structure pointer. 1761 B a pointer to see_pre_extension_expr structure pointer. */ 1762 1763static int 1764see_map_extension (void **slot, void *b) 1765{ 1766 struct see_pre_extension_expr *expr = *slot; 1767 struct see_pre_extension_expr **index_map = 1768 (struct see_pre_extension_expr **) b; 1769 1770 index_map[expr->bitmap_index] = expr; 1771 1772 return 1; 1773} 1774 1775 1776/* Phase 4 top level function. 1777 In this phase we finally change the instruction stream. 1778 Here we insert extensions at their best placements and delete the 1779 redundant ones according to the output of the LCM. We also replace 1780 some of the instructions according to phase 2 merges results. */ 1781 1782static void 1783see_commit_changes (void) 1784{ 1785 struct see_pre_extension_expr **index_map; 1786 size_t pre_extension_num = htab_elements (see_pre_extension_hash); 1787 bool did_insert = false; 1788 int i; 1789 1790 index_map = xcalloc (pre_extension_num, 1791 sizeof (struct see_pre_extension_expr *)); 1792 1793 if (dump_file) 1794 fprintf (dump_file, 1795 "* Phase 4: Commit changes to the insn stream. *\n"); 1796 1797 /* Produce a mapping of all the pre_extensions. */ 1798 htab_traverse (see_pre_extension_hash, see_map_extension, (PTR) index_map); 1799 1800 /* Delete redundant extension. This will prevent them from been emitted in 1801 the first place. */ 1802 htab_traverse (see_pre_extension_hash, see_pre_delete_extension, NULL); 1803 1804 /* At this point, we must free the DF object, since the number of basic blocks 1805 may change. */ 1806 df_finish (df); 1807 df = NULL; 1808 1809 /* Insert extensions on edges, according to the LCM result. */ 1810 did_insert = see_pre_insert_extensions (index_map); 1811 1812 if (did_insert) 1813 commit_edge_insertions (); 1814 1815 /* Commit the rest of the changes. */ 1816 for (i = 0; i < last_bb; i++) 1817 { 1818 if (see_bb_splay_ar[i]) 1819 { 1820 /* Traverse over all the references in the basic block in forward 1821 order. */ 1822 splay_tree_foreach (see_bb_splay_ar[i], 1823 see_commit_ref_changes, NULL); 1824 } 1825 } 1826 1827 free (index_map); 1828} 1829 1830 1831/* Phase 3 implementation: Eliminate globally redundant extensions. */ 1832 1833/* Analyze the properties of a merged def extension for the LCM and record avail 1834 occurrences. 1835 1836 This is a subroutine of see_analyze_ref_local_prop called 1837 via htab_traverse. 1838 1839 SLOT contains the current def extension instruction. 1840 B is the see_ref_s structure pointer. */ 1841 1842static int 1843see_analyze_merged_def_local_prop (void **slot, void *b) 1844{ 1845 rtx def_se = *slot; 1846 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; 1847 rtx ref = curr_ref_s->insn; 1848 struct see_pre_extension_expr *extension_expr; 1849 int indx; 1850 int bb_num = BLOCK_NUM (ref); 1851 htab_t curr_bb_hash; 1852 struct see_register_properties *curr_prop, **slot_prop; 1853 struct see_register_properties temp_prop; 1854 rtx dest_extension_reg = see_get_extension_reg (def_se, 1); 1855 struct see_occr *curr_occr = NULL; 1856 struct see_occr *tmp_occr = NULL; 1857 1858 extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION); 1859 /* The extension_expr must be found. */ 1860 gcc_assert (extension_expr); 1861 1862 curr_bb_hash = see_bb_hash_ar[bb_num]; 1863 gcc_assert (curr_bb_hash); 1864 temp_prop.regno = REGNO (dest_extension_reg); 1865 slot_prop = 1866 (struct see_register_properties **) htab_find_slot (curr_bb_hash, 1867 &temp_prop, INSERT); 1868 curr_prop = *slot_prop; 1869 gcc_assert (curr_prop); 1870 1871 indx = extension_expr->bitmap_index; 1872 1873 /* Reset the transparency bit. */ 1874 RESET_BIT (transp[bb_num], indx); 1875 /* Reset the killed bit. */ 1876 RESET_BIT (ae_kill[bb_num], indx); 1877 1878 if (curr_prop->first_se_after_last_def == DF_INSN_LUID (df, ref)) 1879 { 1880 /* Set the available bit. */ 1881 SET_BIT (comp[bb_num], indx); 1882 /* Record the available occurrence. */ 1883 curr_occr = xmalloc (sizeof (struct see_occr)); 1884 curr_occr->next = NULL; 1885 curr_occr->insn = def_se; 1886 curr_occr->block_num = bb_num; 1887 tmp_occr = extension_expr->avail_occr; 1888 if (!tmp_occr) 1889 extension_expr->avail_occr = curr_occr; 1890 else 1891 { 1892 while (tmp_occr->next) 1893 tmp_occr = tmp_occr->next; 1894 tmp_occr->next = curr_occr; 1895 } 1896 } 1897 1898 return 1; 1899} 1900 1901 1902/* Analyze the properties of a unmerged def extension for the LCM. 1903 1904 This is a subroutine of see_analyze_ref_local_prop called 1905 via htab_traverse. 1906 1907 SLOT contains the current def extension instruction. 1908 B is the see_ref_s structure pointer. */ 1909 1910static int 1911see_analyze_unmerged_def_local_prop (void **slot, void *b) 1912{ 1913 rtx def_se = *slot; 1914 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; 1915 rtx ref = curr_ref_s->insn; 1916 struct see_pre_extension_expr *extension_expr; 1917 int indx; 1918 int bb_num = BLOCK_NUM (ref); 1919 htab_t curr_bb_hash; 1920 struct see_register_properties *curr_prop, **slot_prop; 1921 struct see_register_properties temp_prop; 1922 rtx dest_extension_reg = see_get_extension_reg (def_se, 1); 1923 1924 extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION); 1925 /* The extension_expr must be found. */ 1926 gcc_assert (extension_expr); 1927 1928 curr_bb_hash = see_bb_hash_ar[bb_num]; 1929 gcc_assert (curr_bb_hash); 1930 temp_prop.regno = REGNO (dest_extension_reg); 1931 slot_prop = 1932 (struct see_register_properties **) htab_find_slot (curr_bb_hash, 1933 &temp_prop, INSERT); 1934 curr_prop = *slot_prop; 1935 gcc_assert (curr_prop); 1936 1937 indx = extension_expr->bitmap_index; 1938 1939 /* Reset the transparency bit. */ 1940 RESET_BIT (transp[bb_num], indx); 1941 /* Set the killed bit. */ 1942 SET_BIT (ae_kill[bb_num], indx); 1943 1944 return 1; 1945} 1946 1947 1948/* Analyze the properties of a use extension for the LCM and record anic and 1949 avail occurrences. 1950 1951 This is a subroutine of see_analyze_ref_local_prop called 1952 via htab_traverse. 1953 1954 SLOT contains the current use extension instruction. 1955 B is the see_ref_s structure pointer. */ 1956 1957static int 1958see_analyze_use_local_prop (void **slot, void *b) 1959{ 1960 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; 1961 rtx use_se = *slot; 1962 rtx ref = curr_ref_s->insn; 1963 rtx dest_extension_reg = see_get_extension_reg (use_se, 1); 1964 struct see_pre_extension_expr *extension_expr; 1965 struct see_register_properties *curr_prop, **slot_prop; 1966 struct see_register_properties temp_prop; 1967 struct see_occr *curr_occr = NULL; 1968 struct see_occr *tmp_occr = NULL; 1969 htab_t curr_bb_hash; 1970 int indx; 1971 int bb_num = BLOCK_NUM (ref); 1972 1973 extension_expr = see_seek_pre_extension_expr (use_se, USE_EXTENSION); 1974 /* The extension_expr must be found. */ 1975 gcc_assert (extension_expr); 1976 1977 curr_bb_hash = see_bb_hash_ar[bb_num]; 1978 gcc_assert (curr_bb_hash); 1979 temp_prop.regno = REGNO (dest_extension_reg); 1980 slot_prop = 1981 (struct see_register_properties **) htab_find_slot (curr_bb_hash, 1982 &temp_prop, INSERT); 1983 curr_prop = *slot_prop; 1984 gcc_assert (curr_prop); 1985 1986 indx = extension_expr->bitmap_index; 1987 1988 if (curr_prop->first_se_before_any_def == DF_INSN_LUID (df, ref)) 1989 { 1990 /* Set the anticipatable bit. */ 1991 SET_BIT (antloc[bb_num], indx); 1992 /* Record the anticipatable occurrence. */ 1993 curr_occr = xmalloc (sizeof (struct see_occr)); 1994 curr_occr->next = NULL; 1995 curr_occr->insn = use_se; 1996 curr_occr->block_num = bb_num; 1997 tmp_occr = extension_expr->antic_occr; 1998 if (!tmp_occr) 1999 extension_expr->antic_occr = curr_occr; 2000 else 2001 { 2002 while (tmp_occr->next) 2003 tmp_occr = tmp_occr->next; 2004 tmp_occr->next = curr_occr; 2005 } 2006 if (curr_prop->last_def < 0) 2007 { 2008 /* Set the available bit. */ 2009 SET_BIT (comp[bb_num], indx); 2010 /* Record the available occurrence. */ 2011 curr_occr = xmalloc (sizeof (struct see_occr)); 2012 curr_occr->next = NULL; 2013 curr_occr->insn = use_se; 2014 curr_occr->block_num = bb_num; 2015 tmp_occr = extension_expr->avail_occr; 2016 if (!tmp_occr) 2017 extension_expr->avail_occr = curr_occr; 2018 else 2019 { 2020 while (tmp_occr->next) 2021 tmp_occr = tmp_occr->next; 2022 tmp_occr->next = curr_occr; 2023 } 2024 } 2025 /* Note: there is no need to reset the killed bit since it must be zero at 2026 this point. */ 2027 } 2028 else if (curr_prop->first_se_after_last_def == DF_INSN_LUID (df, ref)) 2029 { 2030 /* Set the available bit. */ 2031 SET_BIT (comp[bb_num], indx); 2032 /* Reset the killed bit. */ 2033 RESET_BIT (ae_kill[bb_num], indx); 2034 /* Record the available occurrence. */ 2035 curr_occr = xmalloc (sizeof (struct see_occr)); 2036 curr_occr->next = NULL; 2037 curr_occr->insn = use_se; 2038 curr_occr->block_num = bb_num; 2039 tmp_occr = extension_expr->avail_occr; 2040 if (!tmp_occr) 2041 extension_expr->avail_occr = curr_occr; 2042 else 2043 { 2044 while (tmp_occr->next) 2045 tmp_occr = tmp_occr->next; 2046 tmp_occr->next = curr_occr; 2047 } 2048 } 2049 return 1; 2050} 2051 2052 2053/* Here we traverse over all the merged and unmerged extensions of the reference 2054 and analyze their properties for the LCM. 2055 2056 This is a subroutine of see_execute_LCM called via splay_tree_foreach. 2057 2058 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a 2059 see_ref_s structure. */ 2060 2061static int 2062see_analyze_ref_local_prop (splay_tree_node stn, 2063 void *data ATTRIBUTE_UNUSED) 2064{ 2065 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash; 2066 htab_t unmerged_def_se_hash = 2067 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash; 2068 htab_t merged_def_se_hash = 2069 ((struct see_ref_s *) (stn->value))->merged_def_se_hash; 2070 2071 /* Analyze use extensions that were not merged with the reference. */ 2072 if (use_se_hash) 2073 htab_traverse_noresize (use_se_hash, see_analyze_use_local_prop, 2074 (PTR) (stn->value)); 2075 2076 /* Analyze def extensions that were not merged with the reference. */ 2077 if (unmerged_def_se_hash) 2078 htab_traverse (unmerged_def_se_hash, see_analyze_unmerged_def_local_prop, 2079 (PTR) (stn->value)); 2080 2081 /* Analyze def extensions that were merged with the reference. */ 2082 if (merged_def_se_hash) 2083 htab_traverse (merged_def_se_hash, see_analyze_merged_def_local_prop, 2084 (PTR) (stn->value)); 2085 2086 /* Continue to the next definition. */ 2087 return 0; 2088} 2089 2090 2091/* Phase 3 top level function. 2092 In this phase, we set the input bit vectors of the LCM according to data 2093 gathered in phase 2. 2094 Then we run the edge based LCM. */ 2095 2096static void 2097see_execute_LCM (void) 2098{ 2099 size_t pre_extension_num = htab_elements (see_pre_extension_hash); 2100 int i = 0; 2101 2102 if (dump_file) 2103 fprintf (dump_file, 2104 "* Phase 3: Eliminate globally redundant extensions. *\n"); 2105 2106 /* Initialize the global sbitmap vectors. */ 2107 transp = sbitmap_vector_alloc (last_bb, pre_extension_num); 2108 comp = sbitmap_vector_alloc (last_bb, pre_extension_num); 2109 antloc = sbitmap_vector_alloc (last_bb, pre_extension_num); 2110 ae_kill = sbitmap_vector_alloc (last_bb, pre_extension_num); 2111 sbitmap_vector_ones (transp, last_bb); 2112 sbitmap_vector_zero (comp, last_bb); 2113 sbitmap_vector_zero (antloc, last_bb); 2114 sbitmap_vector_zero (ae_kill, last_bb); 2115 2116 /* Traverse over all the splay trees of the basic blocks. */ 2117 for (i = 0; i < last_bb; i++) 2118 { 2119 if (see_bb_splay_ar[i]) 2120 { 2121 /* Traverse over all the references in the basic block in forward 2122 order. */ 2123 splay_tree_foreach (see_bb_splay_ar[i], 2124 see_analyze_ref_local_prop, NULL); 2125 } 2126 } 2127 2128 /* Add fake exit edges before running the lcm. */ 2129 add_noreturn_fake_exit_edges (); 2130 2131 /* Run the LCM. */ 2132 edge_list = pre_edge_lcm (pre_extension_num, transp, comp, antloc, 2133 ae_kill, &pre_insert_map, &pre_delete_map); 2134 2135 /* Remove the fake edges. */ 2136 remove_fake_exit_edges (); 2137} 2138 2139 2140/* Phase 2 implementation: Merge and eliminate locally redundant extensions. */ 2141 2142/* In this function we set the register properties for the register that is 2143 defined and extended in the reference. 2144 The properties are defined in see_register_properties structure which is 2145 allocated per basic block and per register. 2146 Later the extension is inserted into the see_pre_extension_hash for the next 2147 phase of the optimization. 2148 2149 This is a subroutine of see_handle_extensions_for_one_ref called 2150 via htab_traverse. 2151 2152 SLOT contains the current def extension instruction. 2153 B is the see_ref_s structure pointer. */ 2154 2155static int 2156see_set_prop_merged_def (void **slot, void *b) 2157{ 2158 rtx def_se = *slot; 2159 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; 2160 rtx insn = curr_ref_s->insn; 2161 rtx dest_extension_reg = see_get_extension_reg (def_se, 1); 2162 htab_t curr_bb_hash; 2163 struct see_register_properties *curr_prop = NULL; 2164 struct see_register_properties **slot_prop; 2165 struct see_register_properties temp_prop; 2166 int ref_luid = DF_INSN_LUID (df, insn); 2167 2168 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)]; 2169 if (!curr_bb_hash) 2170 { 2171 /* The hash doesn't exist yet. Create it. */ 2172 curr_bb_hash = htab_create (10, 2173 hash_descriptor_properties, 2174 eq_descriptor_properties, 2175 hash_del_properties); 2176 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash; 2177 } 2178 2179 /* Find the right register properties in the right basic block. */ 2180 temp_prop.regno = REGNO (dest_extension_reg); 2181 slot_prop = 2182 (struct see_register_properties **) htab_find_slot (curr_bb_hash, 2183 &temp_prop, INSERT); 2184 2185 if (slot_prop && *slot_prop != NULL) 2186 { 2187 /* Property already exists. */ 2188 curr_prop = *slot_prop; 2189 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg)); 2190 2191 curr_prop->last_def = ref_luid; 2192 curr_prop->first_se_after_last_def = ref_luid; 2193 } 2194 else 2195 { 2196 /* Property doesn't exist yet. */ 2197 curr_prop = xmalloc (sizeof (struct see_register_properties)); 2198 curr_prop->regno = REGNO (dest_extension_reg); 2199 curr_prop->last_def = ref_luid; 2200 curr_prop->first_se_before_any_def = -1; 2201 curr_prop->first_se_after_last_def = ref_luid; 2202 *slot_prop = curr_prop; 2203 } 2204 2205 /* Insert the def_se into see_pre_extension_hash if it isn't already 2206 there. */ 2207 see_seek_pre_extension_expr (def_se, DEF_EXTENSION); 2208 2209 return 1; 2210} 2211 2212 2213/* In this function we set the register properties for the register that is 2214 defined but not extended in the reference. 2215 The properties are defined in see_register_properties structure which is 2216 allocated per basic block and per register. 2217 Later the extension is inserted into the see_pre_extension_hash for the next 2218 phase of the optimization. 2219 2220 This is a subroutine of see_handle_extensions_for_one_ref called 2221 via htab_traverse. 2222 2223 SLOT contains the current def extension instruction. 2224 B is the see_ref_s structure pointer. */ 2225 2226static int 2227see_set_prop_unmerged_def (void **slot, void *b) 2228{ 2229 rtx def_se = *slot; 2230 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; 2231 rtx insn = curr_ref_s->insn; 2232 rtx dest_extension_reg = see_get_extension_reg (def_se, 1); 2233 htab_t curr_bb_hash; 2234 struct see_register_properties *curr_prop = NULL; 2235 struct see_register_properties **slot_prop; 2236 struct see_register_properties temp_prop; 2237 int ref_luid = DF_INSN_LUID (df, insn); 2238 2239 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)]; 2240 if (!curr_bb_hash) 2241 { 2242 /* The hash doesn't exist yet. Create it. */ 2243 curr_bb_hash = htab_create (10, 2244 hash_descriptor_properties, 2245 eq_descriptor_properties, 2246 hash_del_properties); 2247 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash; 2248 } 2249 2250 /* Find the right register properties in the right basic block. */ 2251 temp_prop.regno = REGNO (dest_extension_reg); 2252 slot_prop = 2253 (struct see_register_properties **) htab_find_slot (curr_bb_hash, 2254 &temp_prop, INSERT); 2255 2256 if (slot_prop && *slot_prop != NULL) 2257 { 2258 /* Property already exists. */ 2259 curr_prop = *slot_prop; 2260 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg)); 2261 2262 curr_prop->last_def = ref_luid; 2263 curr_prop->first_se_after_last_def = -1; 2264 } 2265 else 2266 { 2267 /* Property doesn't exist yet. */ 2268 curr_prop = xmalloc (sizeof (struct see_register_properties)); 2269 curr_prop->regno = REGNO (dest_extension_reg); 2270 curr_prop->last_def = ref_luid; 2271 curr_prop->first_se_before_any_def = -1; 2272 curr_prop->first_se_after_last_def = -1; 2273 *slot_prop = curr_prop; 2274 } 2275 2276 /* Insert the def_se into see_pre_extension_hash if it isn't already 2277 there. */ 2278 see_seek_pre_extension_expr (def_se, DEF_EXTENSION); 2279 2280 return 1; 2281} 2282 2283 2284/* In this function we set the register properties for the register that is used 2285 in the reference. 2286 The properties are defined in see_register_properties structure which is 2287 allocated per basic block and per register. 2288 When a redundant use extension is found it is removed from the hash of the 2289 reference. 2290 If the extension is non redundant it is inserted into the 2291 see_pre_extension_hash for the next phase of the optimization. 2292 2293 This is a subroutine of see_handle_extensions_for_one_ref called 2294 via htab_traverse. 2295 2296 SLOT contains the current use extension instruction. 2297 B is the see_ref_s structure pointer. */ 2298 2299static int 2300see_set_prop_unmerged_use (void **slot, void *b) 2301{ 2302 rtx use_se = *slot; 2303 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; 2304 rtx insn = curr_ref_s->insn; 2305 rtx dest_extension_reg = see_get_extension_reg (use_se, 1); 2306 htab_t curr_bb_hash; 2307 struct see_register_properties *curr_prop = NULL; 2308 struct see_register_properties **slot_prop; 2309 struct see_register_properties temp_prop; 2310 bool locally_redundant = false; 2311 int ref_luid = DF_INSN_LUID (df, insn); 2312 2313 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)]; 2314 if (!curr_bb_hash) 2315 { 2316 /* The hash doesn't exist yet. Create it. */ 2317 curr_bb_hash = htab_create (10, 2318 hash_descriptor_properties, 2319 eq_descriptor_properties, 2320 hash_del_properties); 2321 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash; 2322 } 2323 2324 /* Find the right register properties in the right basic block. */ 2325 temp_prop.regno = REGNO (dest_extension_reg); 2326 slot_prop = 2327 (struct see_register_properties **) htab_find_slot (curr_bb_hash, 2328 &temp_prop, INSERT); 2329 2330 if (slot_prop && *slot_prop != NULL) 2331 { 2332 /* Property already exists. */ 2333 curr_prop = *slot_prop; 2334 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg)); 2335 2336 2337 if (curr_prop->last_def < 0 && curr_prop->first_se_before_any_def < 0) 2338 curr_prop->first_se_before_any_def = ref_luid; 2339 else if (curr_prop->last_def < 0 2340 && curr_prop->first_se_before_any_def >= 0) 2341 { 2342 /* In this case the extension is locally redundant. */ 2343 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot); 2344 locally_redundant = true; 2345 } 2346 else if (curr_prop->last_def >= 0 2347 && curr_prop->first_se_after_last_def < 0) 2348 curr_prop->first_se_after_last_def = ref_luid; 2349 else if (curr_prop->last_def >= 0 2350 && curr_prop->first_se_after_last_def >= 0) 2351 { 2352 /* In this case the extension is locally redundant. */ 2353 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot); 2354 locally_redundant = true; 2355 } 2356 else 2357 gcc_unreachable (); 2358 } 2359 else 2360 { 2361 /* Property doesn't exist yet. Create a new one. */ 2362 curr_prop = xmalloc (sizeof (struct see_register_properties)); 2363 curr_prop->regno = REGNO (dest_extension_reg); 2364 curr_prop->last_def = -1; 2365 curr_prop->first_se_before_any_def = ref_luid; 2366 curr_prop->first_se_after_last_def = -1; 2367 *slot_prop = curr_prop; 2368 } 2369 2370 /* Insert the use_se into see_pre_extension_hash if it isn't already 2371 there. */ 2372 if (!locally_redundant) 2373 see_seek_pre_extension_expr (use_se, USE_EXTENSION); 2374 if (locally_redundant && dump_file) 2375 { 2376 fprintf (dump_file, "Locally redundant extension:\n"); 2377 print_rtl_single (dump_file, use_se); 2378 } 2379 return 1; 2380} 2381 2382 2383/* Print an extension instruction. 2384 2385 This is a subroutine of see_handle_extensions_for_one_ref called 2386 via htab_traverse. 2387 SLOT contains the extension instruction. */ 2388 2389static int 2390see_print_one_extension (void **slot, void *b ATTRIBUTE_UNUSED) 2391{ 2392 rtx def_se = *slot; 2393 2394 gcc_assert (def_se && INSN_P (def_se)); 2395 print_rtl_single (dump_file, def_se); 2396 2397 return 1; 2398} 2399 2400/* Function called by note_uses to replace used subexpressions. 2401 2402 X is a pointer to the subexpression and DATA is a pointer to a 2403 see_replace_data structure that contains the data for the replacement. */ 2404 2405static void 2406see_replace_src (rtx *x, void *data) 2407{ 2408 struct see_replace_data *d 2409 = (struct see_replace_data *) data; 2410 2411 *x = replace_rtx (*x, d->from, d->to); 2412} 2413 2414 2415/* At this point the pattern is expected to be: 2416 2417 ref: set (dest_reg) (rhs) 2418 def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg)) 2419 2420 The merge of these two instructions didn't succeed. 2421 2422 We try to generate the pattern: 2423 set (subreg (dest_extension_reg)) (rhs) 2424 2425 We do this in 4 steps: 2426 a. Replace every use of dest_reg with a new pseudo register. 2427 b. Replace every instance of dest_reg with the subreg. 2428 c. Replace every use of the new pseudo register back to dest_reg. 2429 d. Try to recognize and simplify. 2430 2431 If the manipulation failed, leave the original ref but try to generate and 2432 recognize a simple move instruction: 2433 set (subreg (dest_extension_reg)) (dest_reg) 2434 This move instruction will be emitted right after the ref to the instruction 2435 stream and assure the correctness of the code after def_se will be removed. 2436 2437 CURR_REF_S is the current reference. 2438 DEF_SE is the extension that couldn't be merged. */ 2439 2440static void 2441see_def_extension_not_merged (struct see_ref_s *curr_ref_s, rtx def_se) 2442{ 2443 struct see_replace_data d; 2444 /* If the original insn was already merged with an extension before, 2445 take the merged one. */ 2446 rtx ref = (curr_ref_s->merged_insn) ? curr_ref_s->merged_insn : 2447 curr_ref_s->insn; 2448 rtx merged_ref_next = (curr_ref_s->merged_insn) ? 2449 NEXT_INSN (curr_ref_s->merged_insn): NULL_RTX; 2450 rtx ref_copy = copy_rtx (ref); 2451 rtx source_extension_reg = see_get_extension_reg (def_se, 0); 2452 rtx dest_extension_reg = see_get_extension_reg (def_se, 1); 2453 rtx move_insn = NULL; 2454 rtx set, rhs; 2455 rtx dest_reg, dest_real_reg; 2456 rtx new_pseudo_reg, subreg; 2457 enum machine_mode source_extension_mode = GET_MODE (source_extension_reg); 2458 enum machine_mode dest_mode; 2459 2460 set = single_set (def_se); 2461 gcc_assert (set); 2462 rhs = SET_SRC (set); 2463 gcc_assert (GET_CODE (rhs) == SIGN_EXTEND 2464 || GET_CODE (rhs) == ZERO_EXTEND); 2465 dest_reg = XEXP (rhs, 0); 2466 gcc_assert (REG_P (dest_reg) 2467 || (GET_CODE (dest_reg) == SUBREG 2468 && REG_P (SUBREG_REG (dest_reg)))); 2469 dest_real_reg = REG_P (dest_reg) ? dest_reg : SUBREG_REG (dest_reg); 2470 dest_mode = GET_MODE (dest_reg); 2471 2472 subreg = gen_lowpart_SUBREG (dest_mode, dest_extension_reg); 2473 new_pseudo_reg = gen_reg_rtx (source_extension_mode); 2474 2475 /* Step a: Replace every use of dest_real_reg with a new pseudo register. */ 2476 d.from = dest_real_reg; 2477 d.to = new_pseudo_reg; 2478 note_uses (&PATTERN (ref_copy), see_replace_src, &d); 2479 /* Step b: Replace every instance of dest_reg with the subreg. */ 2480 ref_copy = replace_rtx (ref_copy, dest_reg, subreg); 2481 2482 /* Step c: Replace every use of the new pseudo register back to 2483 dest_real_reg. */ 2484 d.from = new_pseudo_reg; 2485 d.to = dest_real_reg; 2486 note_uses (&PATTERN (ref_copy), see_replace_src, &d); 2487 2488 if (rtx_equal_p (PATTERN (ref), PATTERN (ref_copy)) 2489 || insn_invalid_p (ref_copy)) 2490 { 2491 /* The manipulation failed. */ 2492 2493 /* Create a new copy. */ 2494 ref_copy = copy_rtx (ref); 2495 2496 /* Create a simple move instruction that will replace the def_se. */ 2497 start_sequence (); 2498 emit_move_insn (subreg, dest_reg); 2499 move_insn = get_insns (); 2500 end_sequence (); 2501 2502 /* Link the manipulated instruction to the newly created move instruction 2503 and to the former created move instructions. */ 2504 PREV_INSN (ref_copy) = NULL_RTX; 2505 NEXT_INSN (ref_copy) = move_insn; 2506 PREV_INSN (move_insn) = ref_copy; 2507 NEXT_INSN (move_insn) = merged_ref_next; 2508 if (merged_ref_next != NULL_RTX) 2509 PREV_INSN (merged_ref_next) = move_insn; 2510 curr_ref_s->merged_insn = ref_copy; 2511 2512 if (dump_file) 2513 { 2514 fprintf (dump_file, "Following def merge failure a move "); 2515 fprintf (dump_file, "insn was added after the ref.\n"); 2516 fprintf (dump_file, "Original ref:\n"); 2517 print_rtl_single (dump_file, ref); 2518 fprintf (dump_file, "Move insn that was added:\n"); 2519 print_rtl_single (dump_file, move_insn); 2520 } 2521 return; 2522 } 2523 2524 /* The manipulation succeeded. Store the new manipulated reference. */ 2525 2526 /* Try to simplify the new manipulated insn. */ 2527 validate_simplify_insn (ref_copy); 2528 2529 /* Create a simple move instruction to assure the correctness of the code. */ 2530 start_sequence (); 2531 emit_move_insn (dest_reg, subreg); 2532 move_insn = get_insns (); 2533 end_sequence (); 2534 2535 /* Link the manipulated instruction to the newly created move instruction and 2536 to the former created move instructions. */ 2537 PREV_INSN (ref_copy) = NULL_RTX; 2538 NEXT_INSN (ref_copy) = move_insn; 2539 PREV_INSN (move_insn) = ref_copy; 2540 NEXT_INSN (move_insn) = merged_ref_next; 2541 if (merged_ref_next != NULL_RTX) 2542 PREV_INSN (merged_ref_next) = move_insn; 2543 curr_ref_s->merged_insn = ref_copy; 2544 2545 if (dump_file) 2546 { 2547 fprintf (dump_file, "Following merge failure the ref was transformed!\n"); 2548 fprintf (dump_file, "Original ref:\n"); 2549 print_rtl_single (dump_file, ref); 2550 fprintf (dump_file, "Transformed ref:\n"); 2551 print_rtl_single (dump_file, ref_copy); 2552 fprintf (dump_file, "Move insn that was added:\n"); 2553 print_rtl_single (dump_file, move_insn); 2554 } 2555} 2556 2557 2558/* Merge the reference instruction (ref) with the current use extension. 2559 2560 use_se extends a NARROWmode register to a WIDEmode register. 2561 ref uses the WIDEmode register. 2562 2563 The pattern we try to merge is this: 2564 use_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg)) 2565 ref: use (dest_extension_reg) 2566 2567 where dest_extension_reg and source_extension_reg can be subregs. 2568 2569 The merge is done by generating, simplifying and recognizing the pattern: 2570 use (sign/zero_extend (source_extension_reg)) 2571 2572 If ref is too simple (according to see_want_to_be_merged_with_extension ()) 2573 we don't try to merge it with use_se and we continue as if the merge failed. 2574 2575 This is a subroutine of see_handle_extensions_for_one_ref called 2576 via htab_traverse. 2577 SLOT contains the current use extension instruction. 2578 B is the see_ref_s structure pointer. */ 2579 2580static int 2581see_merge_one_use_extension (void **slot, void *b) 2582{ 2583 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; 2584 rtx use_se = *slot; 2585 rtx ref = (curr_ref_s->merged_insn) ? curr_ref_s->merged_insn : 2586 curr_ref_s->insn; 2587 rtx merged_ref_next = (curr_ref_s->merged_insn) ? 2588 NEXT_INSN (curr_ref_s->merged_insn): NULL_RTX; 2589 rtx ref_copy = copy_rtx (ref); 2590 rtx extension_set = single_set (use_se); 2591 rtx extension_rhs = NULL; 2592 rtx dest_extension_reg = see_get_extension_reg (use_se, 1); 2593 rtx note = NULL; 2594 rtx simplified_note = NULL; 2595 2596 gcc_assert (use_se && curr_ref_s && extension_set); 2597 2598 extension_rhs = SET_SRC (extension_set); 2599 2600 /* In REG_EQUIV and REG_EQUAL notes that mention the register we need to 2601 replace the uses of the dest_extension_reg with the rhs of the extension 2602 instruction. This is necessary since there might not be an extension in 2603 the path between the definition and the note when this optimization is 2604 over. */ 2605 note = find_reg_equal_equiv_note (ref_copy); 2606 if (note) 2607 { 2608 simplified_note = simplify_replace_rtx (XEXP (note, 0), 2609 dest_extension_reg, 2610 extension_rhs); 2611 if (rtx_equal_p (XEXP (note, 0), simplified_note)) 2612 /* Replacement failed. Remove the note. */ 2613 remove_note (ref_copy, note); 2614 else 2615 XEXP (note, 0) = simplified_note; 2616 } 2617 2618 if (!see_want_to_be_merged_with_extension (ref, use_se, USE_EXTENSION)) 2619 { 2620 /* The use in the reference is too simple. Don't try to merge. */ 2621 if (dump_file) 2622 { 2623 fprintf (dump_file, "Use merge skipped!\n"); 2624 fprintf (dump_file, "Original instructions:\n"); 2625 print_rtl_single (dump_file, use_se); 2626 print_rtl_single (dump_file, ref); 2627 } 2628 /* Don't remove the current use_se from the use_se_hash and continue to 2629 the next extension. */ 2630 return 1; 2631 } 2632 2633 validate_replace_src_group (dest_extension_reg, extension_rhs, ref_copy); 2634 2635 if (!num_changes_pending ()) 2636 /* In this case this is not a real use (the only use is/was in the notes 2637 list). Remove the use extension from the hash. This will prevent it 2638 from been emitted in the first place. */ 2639 { 2640 if (dump_file) 2641 { 2642 fprintf (dump_file, "Use extension not necessary before:\n"); 2643 print_rtl_single (dump_file, ref); 2644 } 2645 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot); 2646 PREV_INSN (ref_copy) = NULL_RTX; 2647 NEXT_INSN (ref_copy) = merged_ref_next; 2648 if (merged_ref_next != NULL_RTX) 2649 PREV_INSN (merged_ref_next) = ref_copy; 2650 curr_ref_s->merged_insn = ref_copy; 2651 return 1; 2652 } 2653 2654 if (!apply_change_group ()) 2655 { 2656 /* The merge failed. */ 2657 if (dump_file) 2658 { 2659 fprintf (dump_file, "Use merge failed!\n"); 2660 fprintf (dump_file, "Original instructions:\n"); 2661 print_rtl_single (dump_file, use_se); 2662 print_rtl_single (dump_file, ref); 2663 } 2664 /* Don't remove the current use_se from the use_se_hash and continue to 2665 the next extension. */ 2666 return 1; 2667 } 2668 2669 /* The merge succeeded! */ 2670 2671 /* Try to simplify the new merged insn. */ 2672 validate_simplify_insn (ref_copy); 2673 2674 PREV_INSN (ref_copy) = NULL_RTX; 2675 NEXT_INSN (ref_copy) = merged_ref_next; 2676 if (merged_ref_next != NULL_RTX) 2677 PREV_INSN (merged_ref_next) = ref_copy; 2678 curr_ref_s->merged_insn = ref_copy; 2679 2680 if (dump_file) 2681 { 2682 fprintf (dump_file, "Use merge succeeded!\n"); 2683 fprintf (dump_file, "Original instructions:\n"); 2684 print_rtl_single (dump_file, use_se); 2685 print_rtl_single (dump_file, ref); 2686 fprintf (dump_file, "Merged instruction:\n"); 2687 print_rtl_single (dump_file, ref_copy); 2688 } 2689 2690 /* Remove the current use_se from the use_se_hash. This will prevent it from 2691 been emitted in the first place. */ 2692 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot); 2693 return 1; 2694} 2695 2696 2697/* Merge the reference instruction (ref) with the extension that follows it 2698 in the same basic block (def_se). 2699 ref sets a NARROWmode register and def_se extends it to WIDEmode register. 2700 2701 The pattern we try to merge is this: 2702 ref: set (dest_reg) (rhs) 2703 def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg)) 2704 2705 where dest_reg and source_extension_reg can both be subregs (together) 2706 and (REGNO (dest_reg) == REGNO (source_extension_reg)) 2707 2708 The merge is done by generating, simplifying and recognizing the pattern: 2709 set (dest_extension_reg) (sign/zero_extend (rhs)) 2710 If ref is a parallel instruction we just replace the relevant set in it. 2711 2712 If ref is too simple (according to see_want_to_be_merged_with_extension ()) 2713 we don't try to merge it with def_se and we continue as if the merge failed. 2714 2715 This is a subroutine of see_handle_extensions_for_one_ref called 2716 via htab_traverse. 2717 2718 SLOT contains the current def extension instruction. 2719 B is the see_ref_s structure pointer. */ 2720 2721static int 2722see_merge_one_def_extension (void **slot, void *b) 2723{ 2724 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; 2725 rtx def_se = *slot; 2726 /* If the original insn was already merged with an extension before, 2727 take the merged one. */ 2728 rtx ref = (curr_ref_s->merged_insn) ? curr_ref_s->merged_insn : 2729 curr_ref_s->insn; 2730 rtx merged_ref_next = (curr_ref_s->merged_insn) ? 2731 NEXT_INSN (curr_ref_s->merged_insn): NULL_RTX; 2732 rtx ref_copy = copy_rtx (ref); 2733 rtx new_set = NULL; 2734 rtx source_extension_reg = see_get_extension_reg (def_se, 0); 2735 rtx dest_extension_reg = see_get_extension_reg (def_se, 1); 2736 rtx move_insn, *rtx_slot, subreg; 2737 rtx temp_extension = NULL; 2738 rtx simplified_temp_extension = NULL; 2739 rtx *pat; 2740 enum rtx_code code; 2741 enum rtx_code extension_code; 2742 enum machine_mode source_extension_mode; 2743 enum machine_mode source_mode; 2744 enum machine_mode dest_extension_mode; 2745 bool merge_success = false; 2746 int i; 2747 2748 gcc_assert (def_se 2749 && INSN_P (def_se) 2750 && curr_ref_s 2751 && ref 2752 && INSN_P (ref)); 2753 2754 if (!see_want_to_be_merged_with_extension (ref, def_se, DEF_EXTENSION)) 2755 { 2756 /* The definition in the reference is too simple. Don't try to merge. */ 2757 if (dump_file) 2758 { 2759 fprintf (dump_file, "Def merge skipped!\n"); 2760 fprintf (dump_file, "Original instructions:\n"); 2761 print_rtl_single (dump_file, ref); 2762 print_rtl_single (dump_file, def_se); 2763 } 2764 2765 see_def_extension_not_merged (curr_ref_s, def_se); 2766 /* Continue to the next extension. */ 2767 return 1; 2768 } 2769 2770 extension_code = see_get_extension_data (def_se, &source_mode); 2771 2772 /* Try to merge and simplify the extension. */ 2773 source_extension_mode = GET_MODE (source_extension_reg); 2774 dest_extension_mode = GET_MODE (dest_extension_reg); 2775 2776 pat = &PATTERN (ref_copy); 2777 code = GET_CODE (*pat); 2778 2779 if (code == PARALLEL) 2780 { 2781 bool need_to_apply_change = false; 2782 2783 for (i = 0; i < XVECLEN (*pat, 0); i++) 2784 { 2785 rtx *sub = &XVECEXP (*pat, 0, i); 2786 2787 if (GET_CODE (*sub) == SET 2788 && GET_MODE (SET_SRC (*sub)) != VOIDmode 2789 && GET_MODE (SET_DEST (*sub)) == source_mode 2790 && ((REG_P (SET_DEST (*sub)) 2791 && REGNO (SET_DEST (*sub)) == REGNO (source_extension_reg)) 2792 || (GET_CODE (SET_DEST (*sub)) == SUBREG 2793 && REG_P (SUBREG_REG (SET_DEST (*sub))) 2794 && (REGNO (SUBREG_REG (SET_DEST (*sub))) == 2795 REGNO (source_extension_reg))))) 2796 { 2797 rtx orig_src = SET_SRC (*sub); 2798 2799 if (extension_code == SIGN_EXTEND) 2800 temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode, 2801 orig_src); 2802 else 2803 temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode, 2804 orig_src); 2805 simplified_temp_extension = simplify_rtx (temp_extension); 2806 temp_extension = 2807 (simplified_temp_extension) ? simplified_temp_extension : 2808 temp_extension; 2809 new_set = gen_rtx_SET (VOIDmode, dest_extension_reg, 2810 temp_extension); 2811 validate_change (ref_copy, sub, new_set, 1); 2812 need_to_apply_change = true; 2813 } 2814 } 2815 if (need_to_apply_change) 2816 if (apply_change_group ()) 2817 merge_success = true; 2818 } 2819 else if (code == SET 2820 && GET_MODE (SET_SRC (*pat)) != VOIDmode 2821 && GET_MODE (SET_DEST (*pat)) == source_mode 2822 && ((REG_P (SET_DEST (*pat)) 2823 && REGNO (SET_DEST (*pat)) == REGNO (source_extension_reg)) 2824 || (GET_CODE (SET_DEST (*pat)) == SUBREG 2825 && REG_P (SUBREG_REG (SET_DEST (*pat))) 2826 && (REGNO (SUBREG_REG (SET_DEST (*pat))) == 2827 REGNO (source_extension_reg))))) 2828 { 2829 rtx orig_src = SET_SRC (*pat); 2830 2831 if (extension_code == SIGN_EXTEND) 2832 temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode, orig_src); 2833 else 2834 temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode, orig_src); 2835 simplified_temp_extension = simplify_rtx (temp_extension); 2836 temp_extension = (simplified_temp_extension) ? simplified_temp_extension : 2837 temp_extension; 2838 new_set = gen_rtx_SET (VOIDmode, dest_extension_reg, temp_extension); 2839 if (validate_change (ref_copy, pat, new_set, 0)) 2840 merge_success = true; 2841 } 2842 if (!merge_success) 2843 { 2844 /* The merge failed. */ 2845 if (dump_file) 2846 { 2847 fprintf (dump_file, "Def merge failed!\n"); 2848 fprintf (dump_file, "Original instructions:\n"); 2849 print_rtl_single (dump_file, ref); 2850 print_rtl_single (dump_file, def_se); 2851 } 2852 2853 see_def_extension_not_merged (curr_ref_s, def_se); 2854 /* Continue to the next extension. */ 2855 return 1; 2856 } 2857 2858 /* The merge succeeded! */ 2859 2860 /* Create a simple move instruction to assure the correctness of the code. */ 2861 subreg = gen_lowpart_SUBREG (source_extension_mode, dest_extension_reg); 2862 start_sequence (); 2863 emit_move_insn (source_extension_reg, subreg); 2864 move_insn = get_insns (); 2865 end_sequence (); 2866 2867 /* Link the merged instruction to the newly created move instruction and 2868 to the former created move instructions. */ 2869 PREV_INSN (ref_copy) = NULL_RTX; 2870 NEXT_INSN (ref_copy) = move_insn; 2871 PREV_INSN (move_insn) = ref_copy; 2872 NEXT_INSN (move_insn) = merged_ref_next; 2873 if (merged_ref_next != NULL_RTX) 2874 PREV_INSN (merged_ref_next) = move_insn; 2875 curr_ref_s->merged_insn = ref_copy; 2876 2877 if (dump_file) 2878 { 2879 fprintf (dump_file, "Def merge succeeded!\n"); 2880 fprintf (dump_file, "Original instructions:\n"); 2881 print_rtl_single (dump_file, ref); 2882 print_rtl_single (dump_file, def_se); 2883 fprintf (dump_file, "Merged instruction:\n"); 2884 print_rtl_single (dump_file, ref_copy); 2885 fprintf (dump_file, "Move instruction that was added:\n"); 2886 print_rtl_single (dump_file, move_insn); 2887 } 2888 2889 /* Remove the current def_se from the unmerged_def_se_hash and insert it to 2890 the merged_def_se_hash. */ 2891 htab_clear_slot (curr_ref_s->unmerged_def_se_hash, (PTR *)slot); 2892 if (!curr_ref_s->merged_def_se_hash) 2893 curr_ref_s->merged_def_se_hash = htab_create (10, 2894 hash_descriptor_extension, 2895 eq_descriptor_extension, 2896 NULL); 2897 rtx_slot = (rtx *) htab_find_slot (curr_ref_s->merged_def_se_hash, 2898 dest_extension_reg, INSERT); 2899 gcc_assert (*rtx_slot == NULL); 2900 *rtx_slot = def_se; 2901 2902 return 1; 2903} 2904 2905 2906/* Try to eliminate extensions in this order: 2907 a. Try to merge only the def extensions, one by one. 2908 b. Try to merge only the use extensions, one by one. 2909 2910 TODO: 2911 Try to merge any couple of use extensions simultaneously. 2912 Try to merge any def extension with one or two uses extensions 2913 simultaneously. 2914 2915 After all the merges are done, update the register properties for the basic 2916 block and eliminate locally redundant use extensions. 2917 2918 This is a subroutine of see_merge_and_eliminate_extensions called 2919 via splay_tree_foreach. 2920 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a 2921 see_ref_s structure. */ 2922 2923static int 2924see_handle_extensions_for_one_ref (splay_tree_node stn, 2925 void *data ATTRIBUTE_UNUSED) 2926{ 2927 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash; 2928 htab_t unmerged_def_se_hash = 2929 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash; 2930 htab_t merged_def_se_hash; 2931 rtx ref = ((struct see_ref_s *) (stn->value))->insn; 2932 2933 if (dump_file) 2934 { 2935 fprintf (dump_file, "Handling ref:\n"); 2936 print_rtl_single (dump_file, ref); 2937 } 2938 2939 /* a. Try to eliminate only def extensions, one by one. */ 2940 if (unmerged_def_se_hash) 2941 htab_traverse_noresize (unmerged_def_se_hash, see_merge_one_def_extension, 2942 (PTR) (stn->value)); 2943 2944 if (use_se_hash) 2945 /* b. Try to eliminate only use extensions, one by one. */ 2946 htab_traverse_noresize (use_se_hash, see_merge_one_use_extension, 2947 (PTR) (stn->value)); 2948 2949 merged_def_se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash; 2950 2951 if (dump_file) 2952 { 2953 fprintf (dump_file, "The hashes of the current reference:\n"); 2954 if (unmerged_def_se_hash) 2955 { 2956 fprintf (dump_file, "unmerged_def_se_hash:\n"); 2957 htab_traverse (unmerged_def_se_hash, see_print_one_extension, NULL); 2958 } 2959 if (merged_def_se_hash) 2960 { 2961 fprintf (dump_file, "merged_def_se_hash:\n"); 2962 htab_traverse (merged_def_se_hash, see_print_one_extension, NULL); 2963 } 2964 if (use_se_hash) 2965 { 2966 fprintf (dump_file, "use_se_hash:\n"); 2967 htab_traverse (use_se_hash, see_print_one_extension, NULL); 2968 } 2969 } 2970 2971 /* Now that all the merges are done, update the register properties of the 2972 basic block and eliminate locally redundant extensions. 2973 It is important that we first traverse the use extensions hash and 2974 afterwards the def extensions hashes. */ 2975 2976 if (use_se_hash) 2977 htab_traverse_noresize (use_se_hash, see_set_prop_unmerged_use, 2978 (PTR) (stn->value)); 2979 2980 if (unmerged_def_se_hash) 2981 htab_traverse (unmerged_def_se_hash, see_set_prop_unmerged_def, 2982 (PTR) (stn->value)); 2983 2984 if (merged_def_se_hash) 2985 htab_traverse (merged_def_se_hash, see_set_prop_merged_def, 2986 (PTR) (stn->value)); 2987 2988 /* Continue to the next definition. */ 2989 return 0; 2990} 2991 2992 2993/* Phase 2 top level function. 2994 In this phase, we try to merge def extensions and use extensions with their 2995 references, and eliminate redundant extensions in the same basic block. 2996 We also gather information for the next phases. */ 2997 2998static void 2999see_merge_and_eliminate_extensions (void) 3000{ 3001 int i = 0; 3002 3003 if (dump_file) 3004 fprintf (dump_file, 3005 "* Phase 2: Merge and eliminate locally redundant extensions. *\n"); 3006 3007 /* Traverse over all the splay trees of the basic blocks. */ 3008 for (i = 0; i < last_bb; i++) 3009 { 3010 if (see_bb_splay_ar[i]) 3011 { 3012 if (dump_file) 3013 fprintf (dump_file, "Handling references for bb %d\n", i); 3014 /* Traverse over all the references in the basic block in forward 3015 order. */ 3016 splay_tree_foreach (see_bb_splay_ar[i], 3017 see_handle_extensions_for_one_ref, NULL); 3018 } 3019 } 3020} 3021 3022 3023/* Phase 1 implementation: Propagate extensions to uses. */ 3024 3025/* Insert REF_INSN into the splay tree of its basic block. 3026 SE_INSN is the extension to store in the proper hash according to TYPE. 3027 3028 Return true if everything went well. 3029 Otherwise, return false (this will cause the optimization to be aborted). */ 3030 3031static bool 3032see_store_reference_and_extension (rtx ref_insn, rtx se_insn, 3033 enum extension_type type) 3034{ 3035 rtx *rtx_slot; 3036 int curr_bb_num; 3037 splay_tree_node stn = NULL; 3038 htab_t se_hash = NULL; 3039 struct see_ref_s *ref_s = NULL; 3040 3041 /* Check the arguments. */ 3042 gcc_assert (ref_insn && se_insn); 3043 if (!see_bb_splay_ar) 3044 return false; 3045 3046 curr_bb_num = BLOCK_NUM (ref_insn); 3047 gcc_assert (curr_bb_num < last_bb && curr_bb_num >= 0); 3048 3049 /* Insert the reference to the splay tree of its basic block. */ 3050 if (!see_bb_splay_ar[curr_bb_num]) 3051 /* The splay tree for this block doesn't exist yet, create it. */ 3052 see_bb_splay_ar[curr_bb_num] = splay_tree_new (splay_tree_compare_ints, 3053 NULL, see_free_ref_s); 3054 else 3055 /* Splay tree already exists, check if the current reference is already 3056 in it. */ 3057 { 3058 stn = splay_tree_lookup (see_bb_splay_ar[curr_bb_num], 3059 DF_INSN_LUID (df, ref_insn)); 3060 if (stn) 3061 switch (type) 3062 { 3063 case EXPLICIT_DEF_EXTENSION: 3064 se_hash = 3065 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash; 3066 if (!se_hash) 3067 { 3068 se_hash = htab_create (10, 3069 hash_descriptor_extension, 3070 eq_descriptor_extension, 3071 NULL); 3072 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash = 3073 se_hash; 3074 } 3075 break; 3076 case IMPLICIT_DEF_EXTENSION: 3077 se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash; 3078 if (!se_hash) 3079 { 3080 se_hash = htab_create (10, 3081 hash_descriptor_extension, 3082 eq_descriptor_extension, 3083 NULL); 3084 ((struct see_ref_s *) (stn->value))->merged_def_se_hash = 3085 se_hash; 3086 } 3087 break; 3088 case USE_EXTENSION: 3089 se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash; 3090 if (!se_hash) 3091 { 3092 se_hash = htab_create (10, 3093 hash_descriptor_extension, 3094 eq_descriptor_extension, 3095 NULL); 3096 ((struct see_ref_s *) (stn->value))->use_se_hash = se_hash; 3097 } 3098 break; 3099 default: 3100 gcc_unreachable (); 3101 } 3102 } 3103 3104 /* Initialize a new see_ref_s structure and insert it to the splay 3105 tree. */ 3106 if (!stn) 3107 { 3108 ref_s = xmalloc (sizeof (struct see_ref_s)); 3109 ref_s->luid = DF_INSN_LUID (df, ref_insn); 3110 ref_s->insn = ref_insn; 3111 ref_s->merged_insn = NULL; 3112 3113 /* Initialize the hashes. */ 3114 switch (type) 3115 { 3116 case EXPLICIT_DEF_EXTENSION: 3117 ref_s->unmerged_def_se_hash = htab_create (10, 3118 hash_descriptor_extension, 3119 eq_descriptor_extension, 3120 NULL); 3121 se_hash = ref_s->unmerged_def_se_hash; 3122 ref_s->merged_def_se_hash = NULL; 3123 ref_s->use_se_hash = NULL; 3124 break; 3125 case IMPLICIT_DEF_EXTENSION: 3126 ref_s->merged_def_se_hash = htab_create (10, 3127 hash_descriptor_extension, 3128 eq_descriptor_extension, 3129 NULL); 3130 se_hash = ref_s->merged_def_se_hash; 3131 ref_s->unmerged_def_se_hash = NULL; 3132 ref_s->use_se_hash = NULL; 3133 break; 3134 case USE_EXTENSION: 3135 ref_s->use_se_hash = htab_create (10, 3136 hash_descriptor_extension, 3137 eq_descriptor_extension, 3138 NULL); 3139 se_hash = ref_s->use_se_hash; 3140 ref_s->unmerged_def_se_hash = NULL; 3141 ref_s->merged_def_se_hash = NULL; 3142 break; 3143 default: 3144 gcc_unreachable (); 3145 } 3146 } 3147 3148 /* Insert the new extension instruction into the correct se_hash of the 3149 current reference. */ 3150 rtx_slot = (rtx *) htab_find_slot (se_hash, se_insn, INSERT); 3151 if (*rtx_slot != NULL) 3152 { 3153 gcc_assert (type == USE_EXTENSION); 3154 gcc_assert (rtx_equal_p (PATTERN (*rtx_slot), PATTERN (se_insn))); 3155 } 3156 else 3157 *rtx_slot = se_insn; 3158 3159 /* If this is a new reference, insert it into the splay_tree. */ 3160 if (!stn) 3161 splay_tree_insert (see_bb_splay_ar[curr_bb_num], 3162 DF_INSN_LUID (df, ref_insn), (splay_tree_value) ref_s); 3163 return true; 3164} 3165 3166 3167/* Go over all the defs, for each relevant definition (defined below) store its 3168 instruction as a reference. 3169 3170 A definition is relevant if its root has 3171 ((entry_type == SIGN_EXTENDED_DEF) || (entry_type == ZERO_EXTENDED_DEF)) and 3172 his source_mode is not narrower then the the roots source_mode. 3173 3174 Return the number of relevant defs or negative number if something bad had 3175 happened and the optimization should be aborted. */ 3176 3177static int 3178see_handle_relevant_defs (void) 3179{ 3180 rtx insn = NULL; 3181 rtx se_insn = NULL; 3182 rtx reg = NULL; 3183 rtx ref_insn = NULL; 3184 struct web_entry *root_entry = NULL; 3185 unsigned int i; 3186 int num_relevant_defs = 0; 3187 enum rtx_code extension_code; 3188 3189 for (i = 0; i < defs_num; i++) 3190 { 3191 insn = DF_REF_INSN (DF_DEFS_GET (df, i)); 3192 reg = DF_REF_REAL_REG (DF_DEFS_GET (df, i)); 3193 3194 if (!insn) 3195 continue; 3196 3197 if (!INSN_P (insn)) 3198 continue; 3199 3200 root_entry = unionfind_root (&def_entry[i]); 3201 3202 if (ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF 3203 && ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF) 3204 /* The current web is not relevant. Continue to the next def. */ 3205 continue; 3206 3207 if (root_entry->reg) 3208 /* It isn't possible to have two different register for the same 3209 web. */ 3210 gcc_assert (rtx_equal_p (root_entry->reg, reg)); 3211 else 3212 root_entry->reg = reg; 3213 3214 /* The current definition is an EXTENDED_DEF or a definition that its 3215 source_mode is narrower then its web's source_mode. 3216 This means that we need to generate the implicit extension explicitly 3217 and store it in the current reference's merged_def_se_hash. */ 3218 if (ENTRY_EI (&def_entry[i])->local_relevancy == EXTENDED_DEF 3219 || (ENTRY_EI (&def_entry[i])->local_source_mode < 3220 ENTRY_EI (root_entry)->source_mode)) 3221 { 3222 num_relevant_defs++; 3223 3224 if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF) 3225 extension_code = SIGN_EXTEND; 3226 else 3227 extension_code = ZERO_EXTEND; 3228 3229 se_insn = 3230 see_gen_normalized_extension (reg, extension_code, 3231 ENTRY_EI (root_entry)->source_mode); 3232 3233 /* This is a dummy extension, mark it as deleted. */ 3234 INSN_DELETED_P (se_insn) = 1; 3235 3236 if (!see_store_reference_and_extension (insn, se_insn, 3237 IMPLICIT_DEF_EXTENSION)) 3238 /* Something bad happened. Abort the optimization. */ 3239 return -1; 3240 continue; 3241 } 3242 3243 ref_insn = PREV_INSN (insn); 3244 gcc_assert (BLOCK_NUM (ref_insn) == BLOCK_NUM (insn)); 3245 3246 num_relevant_defs++; 3247 3248 if (!see_store_reference_and_extension (ref_insn, insn, 3249 EXPLICIT_DEF_EXTENSION)) 3250 /* Something bad happened. Abort the optimization. */ 3251 return -1; 3252 } 3253 return num_relevant_defs; 3254} 3255 3256 3257/* Go over all the uses, for each use in relevant web store its instruction as 3258 a reference and generate an extension before it. 3259 3260 Return the number of relevant uses or negative number if something bad had 3261 happened and the optimization should be aborted. */ 3262 3263static int 3264see_handle_relevant_uses (void) 3265{ 3266 rtx insn = NULL; 3267 rtx reg = NULL; 3268 struct web_entry *root_entry = NULL; 3269 rtx se_insn = NULL; 3270 unsigned int i; 3271 int num_relevant_uses = 0; 3272 enum rtx_code extension_code; 3273 3274 for (i = 0; i < uses_num; i++) 3275 { 3276 insn = DF_REF_INSN (DF_USES_GET (df, i)); 3277 reg = DF_REF_REAL_REG (DF_USES_GET (df, i)); 3278 3279 if (!insn) 3280 continue; 3281 3282 if (!INSN_P (insn)) 3283 continue; 3284 3285 root_entry = unionfind_root (&use_entry[i]); 3286 3287 if (ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF 3288 && ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF) 3289 /* The current web is not relevant. Continue to the next use. */ 3290 continue; 3291 3292 if (root_entry->reg) 3293 /* It isn't possible to have two different register for the same 3294 web. */ 3295 gcc_assert (rtx_equal_p (root_entry->reg, reg)); 3296 else 3297 root_entry->reg = reg; 3298 3299 /* Generate the use extension. */ 3300 if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF) 3301 extension_code = SIGN_EXTEND; 3302 else 3303 extension_code = ZERO_EXTEND; 3304 3305 se_insn = 3306 see_gen_normalized_extension (reg, extension_code, 3307 ENTRY_EI (root_entry)->source_mode); 3308 if (!se_insn) 3309 /* This is very bad, abort the transformation. */ 3310 return -1; 3311 3312 num_relevant_uses++; 3313 3314 if (!see_store_reference_and_extension (insn, se_insn, 3315 USE_EXTENSION)) 3316 /* Something bad happened. Abort the optimization. */ 3317 return -1; 3318 } 3319 3320 return num_relevant_uses; 3321} 3322 3323 3324/* Updates the relevancy of all the uses. 3325 The information of the i'th use is stored in use_entry[i]. 3326 Currently all the uses are relevant for the optimization except for uses that 3327 are in LIBCALL or RETVAL instructions. */ 3328 3329static void 3330see_update_uses_relevancy (void) 3331{ 3332 rtx insn = NULL; 3333 rtx reg = NULL; 3334 struct see_entry_extra_info *curr_entry_extra_info; 3335 enum entry_type et; 3336 unsigned int i; 3337 3338 if (!df || !use_entry) 3339 return; 3340 3341 for (i = 0; i < uses_num; i++) 3342 { 3343 3344 insn = DF_REF_INSN (DF_USES_GET (df, i)); 3345 reg = DF_REF_REAL_REG (DF_USES_GET (df, i)); 3346 3347 et = RELEVANT_USE; 3348 3349 if (insn) 3350 { 3351 if (!INSN_P (insn)) 3352 et = NOT_RELEVANT; 3353 if (insn && find_reg_note (insn, REG_LIBCALL, NULL_RTX)) 3354 et = NOT_RELEVANT; 3355 if (find_reg_note (insn, REG_RETVAL, NULL_RTX)) 3356 et = NOT_RELEVANT; 3357 } 3358 else 3359 et = NOT_RELEVANT; 3360 3361 if (dump_file) 3362 { 3363 fprintf (dump_file, "u%i insn %i reg %i ", 3364 i, (insn ? INSN_UID (insn) : -1), REGNO (reg)); 3365 if (et == NOT_RELEVANT) 3366 fprintf (dump_file, "NOT RELEVANT \n"); 3367 else 3368 fprintf (dump_file, "RELEVANT USE \n"); 3369 } 3370 3371 curr_entry_extra_info = xmalloc (sizeof (struct see_entry_extra_info)); 3372 curr_entry_extra_info->relevancy = et; 3373 curr_entry_extra_info->local_relevancy = et; 3374 use_entry[i].extra_info = curr_entry_extra_info; 3375 use_entry[i].reg = NULL; 3376 use_entry[i].pred = NULL; 3377 } 3378} 3379 3380 3381/* A definition in a candidate for this optimization only if its pattern is 3382 recognized as relevant in this function. 3383 INSN is the instruction to be recognized. 3384 3385- If this is the pattern of a common sign extension after definition: 3386 PREV_INSN (INSN): def (reg:NARROWmode r) 3387 INSN: set ((reg:WIDEmode r') 3388 (sign_extend:WIDEmode (reg:NARROWmode r))) 3389 return SIGN_EXTENDED_DEF and set SOURCE_MODE to NARROWmode. 3390 3391- If this is the pattern of a common zero extension after definition: 3392 PREV_INSN (INSN): def (reg:NARROWmode r) 3393 INSN: set ((reg:WIDEmode r') 3394 (zero_extend:WIDEmode (reg:NARROWmode r))) 3395 return ZERO_EXTENDED_DEF and set SOURCE_MODE to NARROWmode. 3396 3397- Otherwise, 3398 3399 For the pattern: 3400 INSN: set ((reg:WIDEmode r) (sign_extend:WIDEmode (...expr...))) 3401 return EXTENDED_DEF and set SOURCE_MODE to the mode of expr. 3402 3403 For the pattern: 3404 INSN: set ((reg:WIDEmode r) (zero_extend:WIDEmode (...expr...))) 3405 return EXTENDED_DEF and set SOURCE_MODE_UNSIGNED to the mode of expr. 3406 3407 For the pattern: 3408 INSN: set ((reg:WIDEmode r) (CONST_INT (...))) 3409 return EXTENDED_DEF and set SOURCE_MODE(_UNSIGNED) to the narrowest mode that 3410 is implicitly sign(zero) extended to WIDEmode in the INSN. 3411 3412- FIXME: Extensions that are not adjacent to their definition and EXTENDED_DEF 3413 that is part of a PARALLEL instruction are not handled. 3414 These restriction can be relaxed. */ 3415 3416static enum entry_type 3417see_analyze_one_def (rtx insn, enum machine_mode *source_mode, 3418 enum machine_mode *source_mode_unsigned) 3419{ 3420 enum rtx_code extension_code; 3421 rtx rhs = NULL; 3422 rtx lhs = NULL; 3423 rtx set = NULL; 3424 rtx source_register = NULL; 3425 rtx prev_insn = NULL; 3426 rtx next_insn = NULL; 3427 enum machine_mode mode; 3428 enum machine_mode next_source_mode; 3429 HOST_WIDE_INT val = 0; 3430 HOST_WIDE_INT val2 = 0; 3431 int i = 0; 3432 3433 *source_mode = MAX_MACHINE_MODE; 3434 *source_mode_unsigned = MAX_MACHINE_MODE; 3435 3436 if (!insn) 3437 return NOT_RELEVANT; 3438 3439 if (!INSN_P (insn)) 3440 return NOT_RELEVANT; 3441 3442 extension_code = see_get_extension_data (insn, source_mode); 3443 switch (extension_code) 3444 { 3445 case SIGN_EXTEND: 3446 case ZERO_EXTEND: 3447 source_register = see_get_extension_reg (insn, 0); 3448 /* FIXME: This restriction can be relaxed. The only thing that is 3449 important is that the reference would be inside the same basic block 3450 as the extension. */ 3451 prev_insn = PREV_INSN (insn); 3452 if (!prev_insn || !INSN_P (prev_insn)) 3453 return NOT_RELEVANT; 3454 3455 if (!reg_set_between_p (source_register, PREV_INSN (prev_insn), insn)) 3456 return NOT_RELEVANT; 3457 3458 if (find_reg_note (prev_insn, REG_LIBCALL, NULL_RTX)) 3459 return NOT_RELEVANT; 3460 3461 if (find_reg_note (prev_insn, REG_RETVAL, NULL_RTX)) 3462 return NOT_RELEVANT; 3463 3464 /* If we can't use copy_rtx on the reference it can't be a reference. */ 3465 if (GET_CODE (PATTERN (prev_insn)) == PARALLEL 3466 && asm_noperands (PATTERN (prev_insn)) >= 0) 3467 return NOT_RELEVANT; 3468 3469 /* Now, check if this extension is a reference itself. If so, it is not 3470 relevant. Handling this extension as relevant would make things much 3471 more complicated. */ 3472 next_insn = NEXT_INSN (insn); 3473 if (next_insn 3474 && INSN_P (next_insn) 3475 && (see_get_extension_data (next_insn, &next_source_mode) != 3476 NOT_RELEVANT)) 3477 { 3478 rtx curr_dest_register = see_get_extension_reg (insn, 1); 3479 rtx next_source_register = see_get_extension_reg (next_insn, 0); 3480 3481 if (REGNO (curr_dest_register) == REGNO (next_source_register)) 3482 return NOT_RELEVANT; 3483 } 3484 3485 if (extension_code == SIGN_EXTEND) 3486 return SIGN_EXTENDED_DEF; 3487 else 3488 return ZERO_EXTENDED_DEF; 3489 3490 case UNKNOWN: 3491 /* This may still be an EXTENDED_DEF. */ 3492 3493 /* FIXME: This restriction can be relaxed. It is possible to handle 3494 PARALLEL insns too. */ 3495 set = single_set (insn); 3496 if (!set) 3497 return NOT_RELEVANT; 3498 rhs = SET_SRC (set); 3499 lhs = SET_DEST (set); 3500 3501 /* Don't handle extensions to something other then register or 3502 subregister. */ 3503 if (!REG_P (lhs) && !SUBREG_REG (lhs)) 3504 return NOT_RELEVANT; 3505 3506 switch (GET_CODE (rhs)) 3507 { 3508 case SIGN_EXTEND: 3509 *source_mode = GET_MODE (XEXP (rhs, 0)); 3510 *source_mode_unsigned = MAX_MACHINE_MODE; 3511 return EXTENDED_DEF; 3512 case ZERO_EXTEND: 3513 *source_mode = MAX_MACHINE_MODE; 3514 *source_mode_unsigned = GET_MODE (XEXP (rhs, 0)); 3515 return EXTENDED_DEF; 3516 case CONST_INT: 3517 3518 val = INTVAL (rhs); 3519 3520 /* Find the narrowest mode, val could fit into. */ 3521 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT), i = 0; 3522 GET_MODE_BITSIZE (mode) < BITS_PER_WORD; 3523 mode = GET_MODE_WIDER_MODE (mode), i++) 3524 { 3525 val2 = trunc_int_for_mode (val, mode); 3526 if (val2 == val && *source_mode == MAX_MACHINE_MODE) 3527 *source_mode = mode; 3528 if (val == (val & (HOST_WIDE_INT)GET_MODE_MASK (mode)) 3529 && *source_mode_unsigned == MAX_MACHINE_MODE) 3530 *source_mode_unsigned = mode; 3531 if (*source_mode != MAX_MACHINE_MODE 3532 && *source_mode_unsigned !=MAX_MACHINE_MODE) 3533 return EXTENDED_DEF; 3534 } 3535 if (*source_mode != MAX_MACHINE_MODE 3536 || *source_mode_unsigned !=MAX_MACHINE_MODE) 3537 return EXTENDED_DEF; 3538 return NOT_RELEVANT; 3539 default: 3540 return NOT_RELEVANT; 3541 } 3542 default: 3543 gcc_unreachable (); 3544 } 3545} 3546 3547 3548/* Updates the relevancy and source_mode of all the definitions. 3549 The information of the i'th definition is stored in def_entry[i]. */ 3550 3551static void 3552see_update_defs_relevancy (void) 3553{ 3554 struct see_entry_extra_info *curr_entry_extra_info; 3555 unsigned int i; 3556 rtx insn = NULL; 3557 rtx reg = NULL; 3558 enum entry_type et; 3559 enum machine_mode source_mode; 3560 enum machine_mode source_mode_unsigned; 3561 3562 if (!df || !def_entry) 3563 return; 3564 3565 for (i = 0; i < defs_num; i++) 3566 { 3567 insn = DF_REF_INSN (DF_DEFS_GET (df, i)); 3568 reg = DF_REF_REAL_REG (DF_DEFS_GET (df, i)); 3569 3570 et = see_analyze_one_def (insn, &source_mode, &source_mode_unsigned); 3571 3572 curr_entry_extra_info = xmalloc (sizeof (struct see_entry_extra_info)); 3573 curr_entry_extra_info->relevancy = et; 3574 curr_entry_extra_info->local_relevancy = et; 3575 if (et != EXTENDED_DEF) 3576 { 3577 curr_entry_extra_info->source_mode = source_mode; 3578 curr_entry_extra_info->local_source_mode = source_mode; 3579 } 3580 else 3581 { 3582 curr_entry_extra_info->source_mode_signed = source_mode; 3583 curr_entry_extra_info->source_mode_unsigned = source_mode_unsigned; 3584 } 3585 def_entry[i].extra_info = curr_entry_extra_info; 3586 def_entry[i].reg = NULL; 3587 def_entry[i].pred = NULL; 3588 3589 if (dump_file) 3590 { 3591 if (et == NOT_RELEVANT) 3592 { 3593 fprintf (dump_file, "d%i insn %i reg %i ", 3594 i, (insn ? INSN_UID (insn) : -1), REGNO (reg)); 3595 fprintf (dump_file, "NOT RELEVANT \n"); 3596 } 3597 else 3598 { 3599 fprintf (dump_file, "d%i insn %i reg %i ", 3600 i ,INSN_UID (insn), REGNO (reg)); 3601 fprintf (dump_file, "RELEVANT - "); 3602 switch (et) 3603 { 3604 case SIGN_EXTENDED_DEF : 3605 fprintf (dump_file, "SIGN_EXTENDED_DEF, source_mode = %s\n", 3606 GET_MODE_NAME (source_mode)); 3607 break; 3608 case ZERO_EXTENDED_DEF : 3609 fprintf (dump_file, "ZERO_EXTENDED_DEF, source_mode = %s\n", 3610 GET_MODE_NAME (source_mode)); 3611 break; 3612 case EXTENDED_DEF : 3613 fprintf (dump_file, "EXTENDED_DEF, "); 3614 if (source_mode != MAX_MACHINE_MODE 3615 && source_mode_unsigned != MAX_MACHINE_MODE) 3616 { 3617 fprintf (dump_file, "positive const, "); 3618 fprintf (dump_file, "source_mode_signed = %s, ", 3619 GET_MODE_NAME (source_mode)); 3620 fprintf (dump_file, "source_mode_unsigned = %s\n", 3621 GET_MODE_NAME (source_mode_unsigned)); 3622 } 3623 else if (source_mode != MAX_MACHINE_MODE) 3624 fprintf (dump_file, "source_mode_signed = %s\n", 3625 GET_MODE_NAME (source_mode)); 3626 else 3627 fprintf (dump_file, "source_mode_unsigned = %s\n", 3628 GET_MODE_NAME (source_mode_unsigned)); 3629 break; 3630 default : 3631 gcc_unreachable (); 3632 } 3633 } 3634 } 3635 } 3636} 3637 3638 3639/* Phase 1 top level function. 3640 In this phase the relevancy of all the definitions and uses are checked, 3641 later the webs are produces and the extensions are generated. 3642 These extensions are not emitted yet into the insns stream. 3643 3644 returns true if at list one relevant web was found and there were no 3645 problems, otherwise return false. */ 3646 3647static bool 3648see_propagate_extensions_to_uses (void) 3649{ 3650 unsigned int i = 0; 3651 int num_relevant_uses; 3652 int num_relevant_defs; 3653 3654 if (dump_file) 3655 fprintf (dump_file, 3656 "* Phase 1: Propagate extensions to uses. *\n"); 3657 3658 /* Update the relevancy of references using the DF object. */ 3659 see_update_defs_relevancy (); 3660 see_update_uses_relevancy (); 3661 3662 /* Produce the webs and update the extra_info of the root. 3663 In general, a web is relevant if all its definitions and uses are relevant 3664 and there is at least one definition that was marked as SIGN_EXTENDED_DEF 3665 or ZERO_EXTENDED_DEF. */ 3666 for (i = 0; i < uses_num; i++) 3667 union_defs (df, DF_USES_GET (df, i), def_entry, use_entry, 3668 see_update_leader_extra_info); 3669 3670 /* Generate use extensions for references and insert these 3671 references to see_bb_splay_ar data structure. */ 3672 num_relevant_uses = see_handle_relevant_uses (); 3673 3674 if (num_relevant_uses < 0) 3675 return false; 3676 3677 /* Store the def extensions in their references structures and insert these 3678 references to see_bb_splay_ar data structure. */ 3679 num_relevant_defs = see_handle_relevant_defs (); 3680 3681 if (num_relevant_defs < 0) 3682 return false; 3683 3684 return num_relevant_uses > 0 || num_relevant_defs > 0; 3685} 3686 3687 3688/* Main entry point for the sign extension elimination optimization. */ 3689 3690static void 3691see_main (void) 3692{ 3693 bool cont = false; 3694 int i = 0; 3695 3696 /* Initialize global data structures. */ 3697 see_initialize_data_structures (); 3698 3699 /* Phase 1: Propagate extensions to uses. */ 3700 cont = see_propagate_extensions_to_uses (); 3701 3702 if (cont) 3703 { 3704 init_recog (); 3705 3706 /* Phase 2: Merge and eliminate locally redundant extensions. */ 3707 see_merge_and_eliminate_extensions (); 3708 3709 /* Phase 3: Eliminate globally redundant extensions. */ 3710 see_execute_LCM (); 3711 3712 /* Phase 4: Commit changes to the insn stream. */ 3713 see_commit_changes (); 3714 3715 if (dump_file) 3716 { 3717 /* For debug purpose only. */ 3718 fprintf (dump_file, "see_pre_extension_hash:\n"); 3719 htab_traverse (see_pre_extension_hash, see_print_pre_extension_expr, 3720 NULL); 3721 3722 for (i = 0; i < last_bb; i++) 3723 { 3724 if (see_bb_hash_ar[i]) 3725 /* Traverse over all the references in the basic block in 3726 forward order. */ 3727 { 3728 fprintf (dump_file, 3729 "Searching register properties in bb %d\n", i); 3730 htab_traverse (see_bb_hash_ar[i], 3731 see_print_register_properties, NULL); 3732 } 3733 } 3734 } 3735 } 3736 3737 /* Free global data structures. */ 3738 see_free_data_structures (); 3739} 3740 3741 3742static bool 3743gate_handle_see (void) 3744{ 3745 return optimize > 1 && flag_see; 3746} 3747 3748static unsigned int 3749rest_of_handle_see (void) 3750{ 3751 int no_new_pseudos_bcp = no_new_pseudos; 3752 3753 no_new_pseudos = 0; 3754 see_main (); 3755 no_new_pseudos = no_new_pseudos_bcp; 3756 3757 delete_trivially_dead_insns (get_insns (), max_reg_num ()); 3758 update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES, 3759 (PROP_DEATH_NOTES)); 3760 cleanup_cfg (CLEANUP_EXPENSIVE); 3761 reg_scan (get_insns (), max_reg_num ()); 3762 3763 return 0; 3764} 3765 3766struct tree_opt_pass pass_see = 3767{ 3768 "see", /* name */ 3769 gate_handle_see, /* gate */ 3770 rest_of_handle_see, /* execute */ 3771 NULL, /* sub */ 3772 NULL, /* next */ 3773 0, /* static_pass_number */ 3774 TV_SEE, /* tv_id */ 3775 0, /* properties_required */ 3776 0, /* properties_provided */ 3777 0, /* properties_destroyed */ 3778 0, /* todo_flags_start */ 3779 TODO_dump_func, /* todo_flags_finish */ 3780 'u' /* letter */ 3781}; 3782 3783