1/* Reassociation for trees. 2 Copyright (C) 2005-2015 Free Software Foundation, Inc. 3 Contributed by Daniel Berlin <dan@dberlin.org> 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 3, or (at your option) 10any later version. 11 12GCC is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING3. If not see 19<http://www.gnu.org/licenses/>. */ 20 21#include "config.h" 22#include "system.h" 23#include "coretypes.h" 24#include "hash-table.h" 25#include "tm.h" 26#include "rtl.h" 27#include "tm_p.h" 28#include "hash-set.h" 29#include "machmode.h" 30#include "vec.h" 31#include "double-int.h" 32#include "input.h" 33#include "alias.h" 34#include "symtab.h" 35#include "wide-int.h" 36#include "inchash.h" 37#include "tree.h" 38#include "fold-const.h" 39#include "stor-layout.h" 40#include "predict.h" 41#include "hard-reg-set.h" 42#include "function.h" 43#include "dominance.h" 44#include "cfg.h" 45#include "cfganal.h" 46#include "basic-block.h" 47#include "gimple-pretty-print.h" 48#include "tree-inline.h" 49#include "hash-map.h" 50#include "tree-ssa-alias.h" 51#include "internal-fn.h" 52#include "gimple-fold.h" 53#include "tree-eh.h" 54#include "gimple-expr.h" 55#include "is-a.h" 56#include "gimple.h" 57#include "gimple-iterator.h" 58#include "gimplify-me.h" 59#include "gimple-ssa.h" 60#include "tree-cfg.h" 61#include "tree-phinodes.h" 62#include "ssa-iterators.h" 63#include "stringpool.h" 64#include "tree-ssanames.h" 65#include "tree-ssa-loop-niter.h" 66#include "tree-ssa-loop.h" 67#include "hashtab.h" 68#include "flags.h" 69#include "statistics.h" 70#include "real.h" 71#include "fixed-value.h" 72#include "insn-config.h" 73#include "expmed.h" 74#include "dojump.h" 75#include "explow.h" 76#include "calls.h" 77#include "emit-rtl.h" 78#include "varasm.h" 79#include "stmt.h" 80#include "expr.h" 81#include "tree-dfa.h" 82#include "tree-ssa.h" 83#include "tree-iterator.h" 84#include "tree-pass.h" 85#include "alloc-pool.h" 86#include "langhooks.h" 87#include "cfgloop.h" 88#include "target.h" 89#include "params.h" 90#include "diagnostic-core.h" 91#include "builtins.h" 92#include "gimplify.h" 93#include "insn-codes.h" 94#include "optabs.h" 95 96/* This is a simple global reassociation pass. It is, in part, based 97 on the LLVM pass of the same name (They do some things more/less 98 than we do, in different orders, etc). 99 100 It consists of five steps: 101 102 1. Breaking up subtract operations into addition + negate, where 103 it would promote the reassociation of adds. 104 105 2. Left linearization of the expression trees, so that (A+B)+(C+D) 106 becomes (((A+B)+C)+D), which is easier for us to rewrite later. 107 During linearization, we place the operands of the binary 108 expressions into a vector of operand_entry_t 109 110 3. Optimization of the operand lists, eliminating things like a + 111 -a, a & a, etc. 112 113 3a. Combine repeated factors with the same occurrence counts 114 into a __builtin_powi call that will later be optimized into 115 an optimal number of multiplies. 116 117 4. Rewrite the expression trees we linearized and optimized so 118 they are in proper rank order. 119 120 5. Repropagate negates, as nothing else will clean it up ATM. 121 122 A bit of theory on #4, since nobody seems to write anything down 123 about why it makes sense to do it the way they do it: 124 125 We could do this much nicer theoretically, but don't (for reasons 126 explained after how to do it theoretically nice :P). 127 128 In order to promote the most redundancy elimination, you want 129 binary expressions whose operands are the same rank (or 130 preferably, the same value) exposed to the redundancy eliminator, 131 for possible elimination. 132 133 So the way to do this if we really cared, is to build the new op 134 tree from the leaves to the roots, merging as you go, and putting the 135 new op on the end of the worklist, until you are left with one 136 thing on the worklist. 137 138 IE if you have to rewrite the following set of operands (listed with 139 rank in parentheses), with opcode PLUS_EXPR: 140 141 a (1), b (1), c (1), d (2), e (2) 142 143 144 We start with our merge worklist empty, and the ops list with all of 145 those on it. 146 147 You want to first merge all leaves of the same rank, as much as 148 possible. 149 150 So first build a binary op of 151 152 mergetmp = a + b, and put "mergetmp" on the merge worklist. 153 154 Because there is no three operand form of PLUS_EXPR, c is not going to 155 be exposed to redundancy elimination as a rank 1 operand. 156 157 So you might as well throw it on the merge worklist (you could also 158 consider it to now be a rank two operand, and merge it with d and e, 159 but in this case, you then have evicted e from a binary op. So at 160 least in this situation, you can't win.) 161 162 Then build a binary op of d + e 163 mergetmp2 = d + e 164 165 and put mergetmp2 on the merge worklist. 166 167 so merge worklist = {mergetmp, c, mergetmp2} 168 169 Continue building binary ops of these operations until you have only 170 one operation left on the worklist. 171 172 So we have 173 174 build binary op 175 mergetmp3 = mergetmp + c 176 177 worklist = {mergetmp2, mergetmp3} 178 179 mergetmp4 = mergetmp2 + mergetmp3 180 181 worklist = {mergetmp4} 182 183 because we have one operation left, we can now just set the original 184 statement equal to the result of that operation. 185 186 This will at least expose a + b and d + e to redundancy elimination 187 as binary operations. 188 189 For extra points, you can reuse the old statements to build the 190 mergetmps, since you shouldn't run out. 191 192 So why don't we do this? 193 194 Because it's expensive, and rarely will help. Most trees we are 195 reassociating have 3 or less ops. If they have 2 ops, they already 196 will be written into a nice single binary op. If you have 3 ops, a 197 single simple check suffices to tell you whether the first two are of the 198 same rank. If so, you know to order it 199 200 mergetmp = op1 + op2 201 newstmt = mergetmp + op3 202 203 instead of 204 mergetmp = op2 + op3 205 newstmt = mergetmp + op1 206 207 If all three are of the same rank, you can't expose them all in a 208 single binary operator anyway, so the above is *still* the best you 209 can do. 210 211 Thus, this is what we do. When we have three ops left, we check to see 212 what order to put them in, and call it a day. As a nod to vector sum 213 reduction, we check if any of the ops are really a phi node that is a 214 destructive update for the associating op, and keep the destructive 215 update together for vector sum reduction recognition. */ 216 217 218/* Statistics */ 219static struct 220{ 221 int linearized; 222 int constants_eliminated; 223 int ops_eliminated; 224 int rewritten; 225 int pows_encountered; 226 int pows_created; 227} reassociate_stats; 228 229/* Operator, rank pair. */ 230typedef struct operand_entry 231{ 232 unsigned int rank; 233 int id; 234 tree op; 235 unsigned int count; 236} *operand_entry_t; 237 238static alloc_pool operand_entry_pool; 239 240/* This is used to assign a unique ID to each struct operand_entry 241 so that qsort results are identical on different hosts. */ 242static int next_operand_entry_id; 243 244/* Starting rank number for a given basic block, so that we can rank 245 operations using unmovable instructions in that BB based on the bb 246 depth. */ 247static long *bb_rank; 248 249/* Operand->rank hashtable. */ 250static hash_map<tree, long> *operand_rank; 251 252/* Vector of SSA_NAMEs on which after reassociate_bb is done with 253 all basic blocks the CFG should be adjusted - basic blocks 254 split right after that SSA_NAME's definition statement and before 255 the only use, which must be a bit ior. */ 256static vec<tree> reassoc_branch_fixups; 257 258/* Forward decls. */ 259static long get_rank (tree); 260static bool reassoc_stmt_dominates_stmt_p (gimple, gimple); 261 262/* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts 263 possibly added by gsi_remove. */ 264 265bool 266reassoc_remove_stmt (gimple_stmt_iterator *gsi) 267{ 268 gimple stmt = gsi_stmt (*gsi); 269 270 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI) 271 return gsi_remove (gsi, true); 272 273 gimple_stmt_iterator prev = *gsi; 274 gsi_prev (&prev); 275 unsigned uid = gimple_uid (stmt); 276 basic_block bb = gimple_bb (stmt); 277 bool ret = gsi_remove (gsi, true); 278 if (!gsi_end_p (prev)) 279 gsi_next (&prev); 280 else 281 prev = gsi_start_bb (bb); 282 gimple end_stmt = gsi_stmt (*gsi); 283 while ((stmt = gsi_stmt (prev)) != end_stmt) 284 { 285 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0); 286 gimple_set_uid (stmt, uid); 287 gsi_next (&prev); 288 } 289 return ret; 290} 291 292/* Bias amount for loop-carried phis. We want this to be larger than 293 the depth of any reassociation tree we can see, but not larger than 294 the rank difference between two blocks. */ 295#define PHI_LOOP_BIAS (1 << 15) 296 297/* Rank assigned to a phi statement. If STMT is a loop-carried phi of 298 an innermost loop, and the phi has only a single use which is inside 299 the loop, then the rank is the block rank of the loop latch plus an 300 extra bias for the loop-carried dependence. This causes expressions 301 calculated into an accumulator variable to be independent for each 302 iteration of the loop. If STMT is some other phi, the rank is the 303 block rank of its containing block. */ 304static long 305phi_rank (gimple stmt) 306{ 307 basic_block bb = gimple_bb (stmt); 308 struct loop *father = bb->loop_father; 309 tree res; 310 unsigned i; 311 use_operand_p use; 312 gimple use_stmt; 313 314 /* We only care about real loops (those with a latch). */ 315 if (!father->latch) 316 return bb_rank[bb->index]; 317 318 /* Interesting phis must be in headers of innermost loops. */ 319 if (bb != father->header 320 || father->inner) 321 return bb_rank[bb->index]; 322 323 /* Ignore virtual SSA_NAMEs. */ 324 res = gimple_phi_result (stmt); 325 if (virtual_operand_p (res)) 326 return bb_rank[bb->index]; 327 328 /* The phi definition must have a single use, and that use must be 329 within the loop. Otherwise this isn't an accumulator pattern. */ 330 if (!single_imm_use (res, &use, &use_stmt) 331 || gimple_bb (use_stmt)->loop_father != father) 332 return bb_rank[bb->index]; 333 334 /* Look for phi arguments from within the loop. If found, bias this phi. */ 335 for (i = 0; i < gimple_phi_num_args (stmt); i++) 336 { 337 tree arg = gimple_phi_arg_def (stmt, i); 338 if (TREE_CODE (arg) == SSA_NAME 339 && !SSA_NAME_IS_DEFAULT_DEF (arg)) 340 { 341 gimple def_stmt = SSA_NAME_DEF_STMT (arg); 342 if (gimple_bb (def_stmt)->loop_father == father) 343 return bb_rank[father->latch->index] + PHI_LOOP_BIAS; 344 } 345 } 346 347 /* Must be an uninteresting phi. */ 348 return bb_rank[bb->index]; 349} 350 351/* If EXP is an SSA_NAME defined by a PHI statement that represents a 352 loop-carried dependence of an innermost loop, return TRUE; else 353 return FALSE. */ 354static bool 355loop_carried_phi (tree exp) 356{ 357 gimple phi_stmt; 358 long block_rank; 359 360 if (TREE_CODE (exp) != SSA_NAME 361 || SSA_NAME_IS_DEFAULT_DEF (exp)) 362 return false; 363 364 phi_stmt = SSA_NAME_DEF_STMT (exp); 365 366 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI) 367 return false; 368 369 /* Non-loop-carried phis have block rank. Loop-carried phis have 370 an additional bias added in. If this phi doesn't have block rank, 371 it's biased and should not be propagated. */ 372 block_rank = bb_rank[gimple_bb (phi_stmt)->index]; 373 374 if (phi_rank (phi_stmt) != block_rank) 375 return true; 376 377 return false; 378} 379 380/* Return the maximum of RANK and the rank that should be propagated 381 from expression OP. For most operands, this is just the rank of OP. 382 For loop-carried phis, the value is zero to avoid undoing the bias 383 in favor of the phi. */ 384static long 385propagate_rank (long rank, tree op) 386{ 387 long op_rank; 388 389 if (loop_carried_phi (op)) 390 return rank; 391 392 op_rank = get_rank (op); 393 394 return MAX (rank, op_rank); 395} 396 397/* Look up the operand rank structure for expression E. */ 398 399static inline long 400find_operand_rank (tree e) 401{ 402 long *slot = operand_rank->get (e); 403 return slot ? *slot : -1; 404} 405 406/* Insert {E,RANK} into the operand rank hashtable. */ 407 408static inline void 409insert_operand_rank (tree e, long rank) 410{ 411 gcc_assert (rank > 0); 412 gcc_assert (!operand_rank->put (e, rank)); 413} 414 415/* Given an expression E, return the rank of the expression. */ 416 417static long 418get_rank (tree e) 419{ 420 /* Constants have rank 0. */ 421 if (is_gimple_min_invariant (e)) 422 return 0; 423 424 /* SSA_NAME's have the rank of the expression they are the result 425 of. 426 For globals and uninitialized values, the rank is 0. 427 For function arguments, use the pre-setup rank. 428 For PHI nodes, stores, asm statements, etc, we use the rank of 429 the BB. 430 For simple operations, the rank is the maximum rank of any of 431 its operands, or the bb_rank, whichever is less. 432 I make no claims that this is optimal, however, it gives good 433 results. */ 434 435 /* We make an exception to the normal ranking system to break 436 dependences of accumulator variables in loops. Suppose we 437 have a simple one-block loop containing: 438 439 x_1 = phi(x_0, x_2) 440 b = a + x_1 441 c = b + d 442 x_2 = c + e 443 444 As shown, each iteration of the calculation into x is fully 445 dependent upon the iteration before it. We would prefer to 446 see this in the form: 447 448 x_1 = phi(x_0, x_2) 449 b = a + d 450 c = b + e 451 x_2 = c + x_1 452 453 If the loop is unrolled, the calculations of b and c from 454 different iterations can be interleaved. 455 456 To obtain this result during reassociation, we bias the rank 457 of the phi definition x_1 upward, when it is recognized as an 458 accumulator pattern. The artificial rank causes it to be 459 added last, providing the desired independence. */ 460 461 if (TREE_CODE (e) == SSA_NAME) 462 { 463 gimple stmt; 464 long rank; 465 int i, n; 466 tree op; 467 468 if (SSA_NAME_IS_DEFAULT_DEF (e)) 469 return find_operand_rank (e); 470 471 stmt = SSA_NAME_DEF_STMT (e); 472 if (gimple_code (stmt) == GIMPLE_PHI) 473 return phi_rank (stmt); 474 475 if (!is_gimple_assign (stmt) 476 || gimple_vdef (stmt)) 477 return bb_rank[gimple_bb (stmt)->index]; 478 479 /* If we already have a rank for this expression, use that. */ 480 rank = find_operand_rank (e); 481 if (rank != -1) 482 return rank; 483 484 /* Otherwise, find the maximum rank for the operands. As an 485 exception, remove the bias from loop-carried phis when propagating 486 the rank so that dependent operations are not also biased. */ 487 rank = 0; 488 if (gimple_assign_single_p (stmt)) 489 { 490 tree rhs = gimple_assign_rhs1 (stmt); 491 n = TREE_OPERAND_LENGTH (rhs); 492 if (n == 0) 493 rank = propagate_rank (rank, rhs); 494 else 495 { 496 for (i = 0; i < n; i++) 497 { 498 op = TREE_OPERAND (rhs, i); 499 500 if (op != NULL_TREE) 501 rank = propagate_rank (rank, op); 502 } 503 } 504 } 505 else 506 { 507 n = gimple_num_ops (stmt); 508 for (i = 1; i < n; i++) 509 { 510 op = gimple_op (stmt, i); 511 gcc_assert (op); 512 rank = propagate_rank (rank, op); 513 } 514 } 515 516 if (dump_file && (dump_flags & TDF_DETAILS)) 517 { 518 fprintf (dump_file, "Rank for "); 519 print_generic_expr (dump_file, e, 0); 520 fprintf (dump_file, " is %ld\n", (rank + 1)); 521 } 522 523 /* Note the rank in the hashtable so we don't recompute it. */ 524 insert_operand_rank (e, (rank + 1)); 525 return (rank + 1); 526 } 527 528 /* Globals, etc, are rank 0 */ 529 return 0; 530} 531 532 533/* We want integer ones to end up last no matter what, since they are 534 the ones we can do the most with. */ 535#define INTEGER_CONST_TYPE 1 << 3 536#define FLOAT_CONST_TYPE 1 << 2 537#define OTHER_CONST_TYPE 1 << 1 538 539/* Classify an invariant tree into integer, float, or other, so that 540 we can sort them to be near other constants of the same type. */ 541static inline int 542constant_type (tree t) 543{ 544 if (INTEGRAL_TYPE_P (TREE_TYPE (t))) 545 return INTEGER_CONST_TYPE; 546 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) 547 return FLOAT_CONST_TYPE; 548 else 549 return OTHER_CONST_TYPE; 550} 551 552/* qsort comparison function to sort operand entries PA and PB by rank 553 so that the sorted array is ordered by rank in decreasing order. */ 554static int 555sort_by_operand_rank (const void *pa, const void *pb) 556{ 557 const operand_entry_t oea = *(const operand_entry_t *)pa; 558 const operand_entry_t oeb = *(const operand_entry_t *)pb; 559 560 /* It's nicer for optimize_expression if constants that are likely 561 to fold when added/multiplied//whatever are put next to each 562 other. Since all constants have rank 0, order them by type. */ 563 if (oeb->rank == 0 && oea->rank == 0) 564 { 565 if (constant_type (oeb->op) != constant_type (oea->op)) 566 return constant_type (oeb->op) - constant_type (oea->op); 567 else 568 /* To make sorting result stable, we use unique IDs to determine 569 order. */ 570 return oeb->id - oea->id; 571 } 572 573 /* Lastly, make sure the versions that are the same go next to each 574 other. */ 575 if ((oeb->rank - oea->rank == 0) 576 && TREE_CODE (oea->op) == SSA_NAME 577 && TREE_CODE (oeb->op) == SSA_NAME) 578 { 579 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse 580 versions of removed SSA_NAMEs, so if possible, prefer to sort 581 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT. 582 See PR60418. */ 583 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op) 584 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op) 585 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) 586 { 587 gimple stmta = SSA_NAME_DEF_STMT (oea->op); 588 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op); 589 basic_block bba = gimple_bb (stmta); 590 basic_block bbb = gimple_bb (stmtb); 591 if (bbb != bba) 592 { 593 if (bb_rank[bbb->index] != bb_rank[bba->index]) 594 return bb_rank[bbb->index] - bb_rank[bba->index]; 595 } 596 else 597 { 598 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb); 599 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta); 600 if (da != db) 601 return da ? 1 : -1; 602 } 603 } 604 605 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) 606 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op); 607 else 608 return oeb->id - oea->id; 609 } 610 611 if (oeb->rank != oea->rank) 612 return oeb->rank - oea->rank; 613 else 614 return oeb->id - oea->id; 615} 616 617/* Add an operand entry to *OPS for the tree operand OP. */ 618 619static void 620add_to_ops_vec (vec<operand_entry_t> *ops, tree op) 621{ 622 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); 623 624 oe->op = op; 625 oe->rank = get_rank (op); 626 oe->id = next_operand_entry_id++; 627 oe->count = 1; 628 ops->safe_push (oe); 629} 630 631/* Add an operand entry to *OPS for the tree operand OP with repeat 632 count REPEAT. */ 633 634static void 635add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op, 636 HOST_WIDE_INT repeat) 637{ 638 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); 639 640 oe->op = op; 641 oe->rank = get_rank (op); 642 oe->id = next_operand_entry_id++; 643 oe->count = repeat; 644 ops->safe_push (oe); 645 646 reassociate_stats.pows_encountered++; 647} 648 649/* Return true if STMT is reassociable operation containing a binary 650 operation with tree code CODE, and is inside LOOP. */ 651 652static bool 653is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop) 654{ 655 basic_block bb = gimple_bb (stmt); 656 657 if (gimple_bb (stmt) == NULL) 658 return false; 659 660 if (!flow_bb_inside_loop_p (loop, bb)) 661 return false; 662 663 if (is_gimple_assign (stmt) 664 && gimple_assign_rhs_code (stmt) == code 665 && has_single_use (gimple_assign_lhs (stmt))) 666 return true; 667 668 return false; 669} 670 671 672/* Given NAME, if NAME is defined by a unary operation OPCODE, return the 673 operand of the negate operation. Otherwise, return NULL. */ 674 675static tree 676get_unary_op (tree name, enum tree_code opcode) 677{ 678 gimple stmt = SSA_NAME_DEF_STMT (name); 679 680 if (!is_gimple_assign (stmt)) 681 return NULL_TREE; 682 683 if (gimple_assign_rhs_code (stmt) == opcode) 684 return gimple_assign_rhs1 (stmt); 685 return NULL_TREE; 686} 687 688/* If CURR and LAST are a pair of ops that OPCODE allows us to 689 eliminate through equivalences, do so, remove them from OPS, and 690 return true. Otherwise, return false. */ 691 692static bool 693eliminate_duplicate_pair (enum tree_code opcode, 694 vec<operand_entry_t> *ops, 695 bool *all_done, 696 unsigned int i, 697 operand_entry_t curr, 698 operand_entry_t last) 699{ 700 701 /* If we have two of the same op, and the opcode is & |, min, or max, 702 we can eliminate one of them. 703 If we have two of the same op, and the opcode is ^, we can 704 eliminate both of them. */ 705 706 if (last && last->op == curr->op) 707 { 708 switch (opcode) 709 { 710 case MAX_EXPR: 711 case MIN_EXPR: 712 case BIT_IOR_EXPR: 713 case BIT_AND_EXPR: 714 if (dump_file && (dump_flags & TDF_DETAILS)) 715 { 716 fprintf (dump_file, "Equivalence: "); 717 print_generic_expr (dump_file, curr->op, 0); 718 fprintf (dump_file, " [&|minmax] "); 719 print_generic_expr (dump_file, last->op, 0); 720 fprintf (dump_file, " -> "); 721 print_generic_stmt (dump_file, last->op, 0); 722 } 723 724 ops->ordered_remove (i); 725 reassociate_stats.ops_eliminated ++; 726 727 return true; 728 729 case BIT_XOR_EXPR: 730 if (dump_file && (dump_flags & TDF_DETAILS)) 731 { 732 fprintf (dump_file, "Equivalence: "); 733 print_generic_expr (dump_file, curr->op, 0); 734 fprintf (dump_file, " ^ "); 735 print_generic_expr (dump_file, last->op, 0); 736 fprintf (dump_file, " -> nothing\n"); 737 } 738 739 reassociate_stats.ops_eliminated += 2; 740 741 if (ops->length () == 2) 742 { 743 ops->create (0); 744 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op))); 745 *all_done = true; 746 } 747 else 748 { 749 ops->ordered_remove (i-1); 750 ops->ordered_remove (i-1); 751 } 752 753 return true; 754 755 default: 756 break; 757 } 758 } 759 return false; 760} 761 762static vec<tree> plus_negates; 763 764/* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not 765 expression, look in OPS for a corresponding positive operation to cancel 766 it out. If we find one, remove the other from OPS, replace 767 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise, 768 return false. */ 769 770static bool 771eliminate_plus_minus_pair (enum tree_code opcode, 772 vec<operand_entry_t> *ops, 773 unsigned int currindex, 774 operand_entry_t curr) 775{ 776 tree negateop; 777 tree notop; 778 unsigned int i; 779 operand_entry_t oe; 780 781 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) 782 return false; 783 784 negateop = get_unary_op (curr->op, NEGATE_EXPR); 785 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 786 if (negateop == NULL_TREE && notop == NULL_TREE) 787 return false; 788 789 /* Any non-negated version will have a rank that is one less than 790 the current rank. So once we hit those ranks, if we don't find 791 one, we can stop. */ 792 793 for (i = currindex + 1; 794 ops->iterate (i, &oe) 795 && oe->rank >= curr->rank - 1 ; 796 i++) 797 { 798 if (oe->op == negateop) 799 { 800 801 if (dump_file && (dump_flags & TDF_DETAILS)) 802 { 803 fprintf (dump_file, "Equivalence: "); 804 print_generic_expr (dump_file, negateop, 0); 805 fprintf (dump_file, " + -"); 806 print_generic_expr (dump_file, oe->op, 0); 807 fprintf (dump_file, " -> 0\n"); 808 } 809 810 ops->ordered_remove (i); 811 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op))); 812 ops->ordered_remove (currindex); 813 reassociate_stats.ops_eliminated ++; 814 815 return true; 816 } 817 else if (oe->op == notop) 818 { 819 tree op_type = TREE_TYPE (oe->op); 820 821 if (dump_file && (dump_flags & TDF_DETAILS)) 822 { 823 fprintf (dump_file, "Equivalence: "); 824 print_generic_expr (dump_file, notop, 0); 825 fprintf (dump_file, " + ~"); 826 print_generic_expr (dump_file, oe->op, 0); 827 fprintf (dump_file, " -> -1\n"); 828 } 829 830 ops->ordered_remove (i); 831 add_to_ops_vec (ops, build_int_cst_type (op_type, -1)); 832 ops->ordered_remove (currindex); 833 reassociate_stats.ops_eliminated ++; 834 835 return true; 836 } 837 } 838 839 /* CURR->OP is a negate expr in a plus expr: save it for later 840 inspection in repropagate_negates(). */ 841 if (negateop != NULL_TREE) 842 plus_negates.safe_push (curr->op); 843 844 return false; 845} 846 847/* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a 848 bitwise not expression, look in OPS for a corresponding operand to 849 cancel it out. If we find one, remove the other from OPS, replace 850 OPS[CURRINDEX] with 0, and return true. Otherwise, return 851 false. */ 852 853static bool 854eliminate_not_pairs (enum tree_code opcode, 855 vec<operand_entry_t> *ops, 856 unsigned int currindex, 857 operand_entry_t curr) 858{ 859 tree notop; 860 unsigned int i; 861 operand_entry_t oe; 862 863 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 864 || TREE_CODE (curr->op) != SSA_NAME) 865 return false; 866 867 notop = get_unary_op (curr->op, BIT_NOT_EXPR); 868 if (notop == NULL_TREE) 869 return false; 870 871 /* Any non-not version will have a rank that is one less than 872 the current rank. So once we hit those ranks, if we don't find 873 one, we can stop. */ 874 875 for (i = currindex + 1; 876 ops->iterate (i, &oe) 877 && oe->rank >= curr->rank - 1; 878 i++) 879 { 880 if (oe->op == notop) 881 { 882 if (dump_file && (dump_flags & TDF_DETAILS)) 883 { 884 fprintf (dump_file, "Equivalence: "); 885 print_generic_expr (dump_file, notop, 0); 886 if (opcode == BIT_AND_EXPR) 887 fprintf (dump_file, " & ~"); 888 else if (opcode == BIT_IOR_EXPR) 889 fprintf (dump_file, " | ~"); 890 print_generic_expr (dump_file, oe->op, 0); 891 if (opcode == BIT_AND_EXPR) 892 fprintf (dump_file, " -> 0\n"); 893 else if (opcode == BIT_IOR_EXPR) 894 fprintf (dump_file, " -> -1\n"); 895 } 896 897 if (opcode == BIT_AND_EXPR) 898 oe->op = build_zero_cst (TREE_TYPE (oe->op)); 899 else if (opcode == BIT_IOR_EXPR) 900 oe->op = build_all_ones_cst (TREE_TYPE (oe->op)); 901 902 reassociate_stats.ops_eliminated += ops->length () - 1; 903 ops->truncate (0); 904 ops->quick_push (oe); 905 return true; 906 } 907 } 908 909 return false; 910} 911 912/* Use constant value that may be present in OPS to try to eliminate 913 operands. Note that this function is only really used when we've 914 eliminated ops for other reasons, or merged constants. Across 915 single statements, fold already does all of this, plus more. There 916 is little point in duplicating logic, so I've only included the 917 identities that I could ever construct testcases to trigger. */ 918 919static void 920eliminate_using_constants (enum tree_code opcode, 921 vec<operand_entry_t> *ops) 922{ 923 operand_entry_t oelast = ops->last (); 924 tree type = TREE_TYPE (oelast->op); 925 926 if (oelast->rank == 0 927 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type))) 928 { 929 switch (opcode) 930 { 931 case BIT_AND_EXPR: 932 if (integer_zerop (oelast->op)) 933 { 934 if (ops->length () != 1) 935 { 936 if (dump_file && (dump_flags & TDF_DETAILS)) 937 fprintf (dump_file, "Found & 0, removing all other ops\n"); 938 939 reassociate_stats.ops_eliminated += ops->length () - 1; 940 941 ops->truncate (0); 942 ops->quick_push (oelast); 943 return; 944 } 945 } 946 else if (integer_all_onesp (oelast->op)) 947 { 948 if (ops->length () != 1) 949 { 950 if (dump_file && (dump_flags & TDF_DETAILS)) 951 fprintf (dump_file, "Found & -1, removing\n"); 952 ops->pop (); 953 reassociate_stats.ops_eliminated++; 954 } 955 } 956 break; 957 case BIT_IOR_EXPR: 958 if (integer_all_onesp (oelast->op)) 959 { 960 if (ops->length () != 1) 961 { 962 if (dump_file && (dump_flags & TDF_DETAILS)) 963 fprintf (dump_file, "Found | -1, removing all other ops\n"); 964 965 reassociate_stats.ops_eliminated += ops->length () - 1; 966 967 ops->truncate (0); 968 ops->quick_push (oelast); 969 return; 970 } 971 } 972 else if (integer_zerop (oelast->op)) 973 { 974 if (ops->length () != 1) 975 { 976 if (dump_file && (dump_flags & TDF_DETAILS)) 977 fprintf (dump_file, "Found | 0, removing\n"); 978 ops->pop (); 979 reassociate_stats.ops_eliminated++; 980 } 981 } 982 break; 983 case MULT_EXPR: 984 if (integer_zerop (oelast->op) 985 || (FLOAT_TYPE_P (type) 986 && !HONOR_NANS (type) 987 && !HONOR_SIGNED_ZEROS (type) 988 && real_zerop (oelast->op))) 989 { 990 if (ops->length () != 1) 991 { 992 if (dump_file && (dump_flags & TDF_DETAILS)) 993 fprintf (dump_file, "Found * 0, removing all other ops\n"); 994 995 reassociate_stats.ops_eliminated += ops->length () - 1; 996 ops->truncate (1); 997 ops->quick_push (oelast); 998 return; 999 } 1000 } 1001 else if (integer_onep (oelast->op) 1002 || (FLOAT_TYPE_P (type) 1003 && !HONOR_SNANS (type) 1004 && real_onep (oelast->op))) 1005 { 1006 if (ops->length () != 1) 1007 { 1008 if (dump_file && (dump_flags & TDF_DETAILS)) 1009 fprintf (dump_file, "Found * 1, removing\n"); 1010 ops->pop (); 1011 reassociate_stats.ops_eliminated++; 1012 return; 1013 } 1014 } 1015 break; 1016 case BIT_XOR_EXPR: 1017 case PLUS_EXPR: 1018 case MINUS_EXPR: 1019 if (integer_zerop (oelast->op) 1020 || (FLOAT_TYPE_P (type) 1021 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR) 1022 && fold_real_zero_addition_p (type, oelast->op, 1023 opcode == MINUS_EXPR))) 1024 { 1025 if (ops->length () != 1) 1026 { 1027 if (dump_file && (dump_flags & TDF_DETAILS)) 1028 fprintf (dump_file, "Found [|^+] 0, removing\n"); 1029 ops->pop (); 1030 reassociate_stats.ops_eliminated++; 1031 return; 1032 } 1033 } 1034 break; 1035 default: 1036 break; 1037 } 1038 } 1039} 1040 1041 1042static void linearize_expr_tree (vec<operand_entry_t> *, gimple, 1043 bool, bool); 1044 1045/* Structure for tracking and counting operands. */ 1046typedef struct oecount_s { 1047 int cnt; 1048 int id; 1049 enum tree_code oecode; 1050 tree op; 1051} oecount; 1052 1053 1054/* The heap for the oecount hashtable and the sorted list of operands. */ 1055static vec<oecount> cvec; 1056 1057 1058/* Oecount hashtable helpers. */ 1059 1060struct oecount_hasher 1061{ 1062 typedef int value_type; 1063 typedef int compare_type; 1064 typedef int store_values_directly; 1065 static inline hashval_t hash (const value_type &); 1066 static inline bool equal (const value_type &, const compare_type &); 1067 static bool is_deleted (int &v) { return v == 1; } 1068 static void mark_deleted (int &e) { e = 1; } 1069 static bool is_empty (int &v) { return v == 0; } 1070 static void mark_empty (int &e) { e = 0; } 1071 static void remove (int &) {} 1072}; 1073 1074/* Hash function for oecount. */ 1075 1076inline hashval_t 1077oecount_hasher::hash (const value_type &p) 1078{ 1079 const oecount *c = &cvec[p - 42]; 1080 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; 1081} 1082 1083/* Comparison function for oecount. */ 1084 1085inline bool 1086oecount_hasher::equal (const value_type &p1, const compare_type &p2) 1087{ 1088 const oecount *c1 = &cvec[p1 - 42]; 1089 const oecount *c2 = &cvec[p2 - 42]; 1090 return (c1->oecode == c2->oecode 1091 && c1->op == c2->op); 1092} 1093 1094/* Comparison function for qsort sorting oecount elements by count. */ 1095 1096static int 1097oecount_cmp (const void *p1, const void *p2) 1098{ 1099 const oecount *c1 = (const oecount *)p1; 1100 const oecount *c2 = (const oecount *)p2; 1101 if (c1->cnt != c2->cnt) 1102 return c1->cnt - c2->cnt; 1103 else 1104 /* If counts are identical, use unique IDs to stabilize qsort. */ 1105 return c1->id - c2->id; 1106} 1107 1108/* Return TRUE iff STMT represents a builtin call that raises OP 1109 to some exponent. */ 1110 1111static bool 1112stmt_is_power_of_op (gimple stmt, tree op) 1113{ 1114 tree fndecl; 1115 1116 if (!is_gimple_call (stmt)) 1117 return false; 1118 1119 fndecl = gimple_call_fndecl (stmt); 1120 1121 if (!fndecl 1122 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) 1123 return false; 1124 1125 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) 1126 { 1127 CASE_FLT_FN (BUILT_IN_POW): 1128 CASE_FLT_FN (BUILT_IN_POWI): 1129 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0)); 1130 1131 default: 1132 return false; 1133 } 1134} 1135 1136/* Given STMT which is a __builtin_pow* call, decrement its exponent 1137 in place and return the result. Assumes that stmt_is_power_of_op 1138 was previously called for STMT and returned TRUE. */ 1139 1140static HOST_WIDE_INT 1141decrement_power (gimple stmt) 1142{ 1143 REAL_VALUE_TYPE c, cint; 1144 HOST_WIDE_INT power; 1145 tree arg1; 1146 1147 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) 1148 { 1149 CASE_FLT_FN (BUILT_IN_POW): 1150 arg1 = gimple_call_arg (stmt, 1); 1151 c = TREE_REAL_CST (arg1); 1152 power = real_to_integer (&c) - 1; 1153 real_from_integer (&cint, VOIDmode, power, SIGNED); 1154 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint)); 1155 return power; 1156 1157 CASE_FLT_FN (BUILT_IN_POWI): 1158 arg1 = gimple_call_arg (stmt, 1); 1159 power = TREE_INT_CST_LOW (arg1) - 1; 1160 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power)); 1161 return power; 1162 1163 default: 1164 gcc_unreachable (); 1165 } 1166} 1167 1168/* Find the single immediate use of STMT's LHS, and replace it 1169 with OP. Remove STMT. If STMT's LHS is the same as *DEF, 1170 replace *DEF with OP as well. */ 1171 1172static void 1173propagate_op_to_single_use (tree op, gimple stmt, tree *def) 1174{ 1175 tree lhs; 1176 gimple use_stmt; 1177 use_operand_p use; 1178 gimple_stmt_iterator gsi; 1179 1180 if (is_gimple_call (stmt)) 1181 lhs = gimple_call_lhs (stmt); 1182 else 1183 lhs = gimple_assign_lhs (stmt); 1184 1185 gcc_assert (has_single_use (lhs)); 1186 single_imm_use (lhs, &use, &use_stmt); 1187 if (lhs == *def) 1188 *def = op; 1189 SET_USE (use, op); 1190 if (TREE_CODE (op) != SSA_NAME) 1191 update_stmt (use_stmt); 1192 gsi = gsi_for_stmt (stmt); 1193 unlink_stmt_vdef (stmt); 1194 reassoc_remove_stmt (&gsi); 1195 release_defs (stmt); 1196} 1197 1198/* Walks the linear chain with result *DEF searching for an operation 1199 with operand OP and code OPCODE removing that from the chain. *DEF 1200 is updated if there is only one operand but no operation left. */ 1201 1202static void 1203zero_one_operation (tree *def, enum tree_code opcode, tree op) 1204{ 1205 gimple stmt = SSA_NAME_DEF_STMT (*def); 1206 1207 do 1208 { 1209 tree name; 1210 1211 if (opcode == MULT_EXPR 1212 && stmt_is_power_of_op (stmt, op)) 1213 { 1214 if (decrement_power (stmt) == 1) 1215 propagate_op_to_single_use (op, stmt, def); 1216 return; 1217 } 1218 1219 name = gimple_assign_rhs1 (stmt); 1220 1221 /* If this is the operation we look for and one of the operands 1222 is ours simply propagate the other operand into the stmts 1223 single use. */ 1224 if (gimple_assign_rhs_code (stmt) == opcode 1225 && (name == op 1226 || gimple_assign_rhs2 (stmt) == op)) 1227 { 1228 if (name == op) 1229 name = gimple_assign_rhs2 (stmt); 1230 propagate_op_to_single_use (name, stmt, def); 1231 return; 1232 } 1233 1234 /* We might have a multiply of two __builtin_pow* calls, and 1235 the operand might be hiding in the rightmost one. */ 1236 if (opcode == MULT_EXPR 1237 && gimple_assign_rhs_code (stmt) == opcode 1238 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME 1239 && has_single_use (gimple_assign_rhs2 (stmt))) 1240 { 1241 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 1242 if (stmt_is_power_of_op (stmt2, op)) 1243 { 1244 if (decrement_power (stmt2) == 1) 1245 propagate_op_to_single_use (op, stmt2, def); 1246 return; 1247 } 1248 } 1249 1250 /* Continue walking the chain. */ 1251 gcc_assert (name != op 1252 && TREE_CODE (name) == SSA_NAME); 1253 stmt = SSA_NAME_DEF_STMT (name); 1254 } 1255 while (1); 1256} 1257 1258/* Returns true if statement S1 dominates statement S2. Like 1259 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */ 1260 1261static bool 1262reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2) 1263{ 1264 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); 1265 1266 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D) 1267 SSA_NAME. Assume it lives at the beginning of function and 1268 thus dominates everything. */ 1269 if (!bb1 || s1 == s2) 1270 return true; 1271 1272 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */ 1273 if (!bb2) 1274 return false; 1275 1276 if (bb1 == bb2) 1277 { 1278 /* PHIs in the same basic block are assumed to be 1279 executed all in parallel, if only one stmt is a PHI, 1280 it dominates the other stmt in the same basic block. */ 1281 if (gimple_code (s1) == GIMPLE_PHI) 1282 return true; 1283 1284 if (gimple_code (s2) == GIMPLE_PHI) 1285 return false; 1286 1287 gcc_assert (gimple_uid (s1) && gimple_uid (s2)); 1288 1289 if (gimple_uid (s1) < gimple_uid (s2)) 1290 return true; 1291 1292 if (gimple_uid (s1) > gimple_uid (s2)) 1293 return false; 1294 1295 gimple_stmt_iterator gsi = gsi_for_stmt (s1); 1296 unsigned int uid = gimple_uid (s1); 1297 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi)) 1298 { 1299 gimple s = gsi_stmt (gsi); 1300 if (gimple_uid (s) != uid) 1301 break; 1302 if (s == s2) 1303 return true; 1304 } 1305 1306 return false; 1307 } 1308 1309 return dominated_by_p (CDI_DOMINATORS, bb2, bb1); 1310} 1311 1312/* Insert STMT after INSERT_POINT. */ 1313 1314static void 1315insert_stmt_after (gimple stmt, gimple insert_point) 1316{ 1317 gimple_stmt_iterator gsi; 1318 basic_block bb; 1319 1320 if (gimple_code (insert_point) == GIMPLE_PHI) 1321 bb = gimple_bb (insert_point); 1322 else if (!stmt_ends_bb_p (insert_point)) 1323 { 1324 gsi = gsi_for_stmt (insert_point); 1325 gimple_set_uid (stmt, gimple_uid (insert_point)); 1326 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 1327 return; 1328 } 1329 else 1330 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME, 1331 thus if it must end a basic block, it should be a call that can 1332 throw, or some assignment that can throw. If it throws, the LHS 1333 of it will not be initialized though, so only valid places using 1334 the SSA_NAME should be dominated by the fallthru edge. */ 1335 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest; 1336 gsi = gsi_after_labels (bb); 1337 if (gsi_end_p (gsi)) 1338 { 1339 gimple_stmt_iterator gsi2 = gsi_last_bb (bb); 1340 gimple_set_uid (stmt, 1341 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); 1342 } 1343 else 1344 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi))); 1345 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 1346} 1347 1348/* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for 1349 the result. Places the statement after the definition of either 1350 OP1 or OP2. Returns the new statement. */ 1351 1352static gimple 1353build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode) 1354{ 1355 gimple op1def = NULL, op2def = NULL; 1356 gimple_stmt_iterator gsi; 1357 tree op; 1358 gassign *sum; 1359 1360 /* Create the addition statement. */ 1361 op = make_ssa_name (type); 1362 sum = gimple_build_assign (op, opcode, op1, op2); 1363 1364 /* Find an insertion place and insert. */ 1365 if (TREE_CODE (op1) == SSA_NAME) 1366 op1def = SSA_NAME_DEF_STMT (op1); 1367 if (TREE_CODE (op2) == SSA_NAME) 1368 op2def = SSA_NAME_DEF_STMT (op2); 1369 if ((!op1def || gimple_nop_p (op1def)) 1370 && (!op2def || gimple_nop_p (op2def))) 1371 { 1372 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1373 if (gsi_end_p (gsi)) 1374 { 1375 gimple_stmt_iterator gsi2 1376 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); 1377 gimple_set_uid (sum, 1378 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2))); 1379 } 1380 else 1381 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi))); 1382 gsi_insert_before (&gsi, sum, GSI_NEW_STMT); 1383 } 1384 else 1385 { 1386 gimple insert_point; 1387 if ((!op1def || gimple_nop_p (op1def)) 1388 || (op2def && !gimple_nop_p (op2def) 1389 && reassoc_stmt_dominates_stmt_p (op1def, op2def))) 1390 insert_point = op2def; 1391 else 1392 insert_point = op1def; 1393 insert_stmt_after (sum, insert_point); 1394 } 1395 update_stmt (sum); 1396 1397 return sum; 1398} 1399 1400/* Perform un-distribution of divisions and multiplications. 1401 A * X + B * X is transformed into (A + B) * X and A / X + B / X 1402 to (A + B) / X for real X. 1403 1404 The algorithm is organized as follows. 1405 1406 - First we walk the addition chain *OPS looking for summands that 1407 are defined by a multiplication or a real division. This results 1408 in the candidates bitmap with relevant indices into *OPS. 1409 1410 - Second we build the chains of multiplications or divisions for 1411 these candidates, counting the number of occurrences of (operand, code) 1412 pairs in all of the candidates chains. 1413 1414 - Third we sort the (operand, code) pairs by number of occurrence and 1415 process them starting with the pair with the most uses. 1416 1417 * For each such pair we walk the candidates again to build a 1418 second candidate bitmap noting all multiplication/division chains 1419 that have at least one occurrence of (operand, code). 1420 1421 * We build an alternate addition chain only covering these 1422 candidates with one (operand, code) operation removed from their 1423 multiplication/division chain. 1424 1425 * The first candidate gets replaced by the alternate addition chain 1426 multiplied/divided by the operand. 1427 1428 * All candidate chains get disabled for further processing and 1429 processing of (operand, code) pairs continues. 1430 1431 The alternate addition chains built are re-processed by the main 1432 reassociation algorithm which allows optimizing a * x * y + b * y * x 1433 to (a + b ) * x * y in one invocation of the reassociation pass. */ 1434 1435static bool 1436undistribute_ops_list (enum tree_code opcode, 1437 vec<operand_entry_t> *ops, struct loop *loop) 1438{ 1439 unsigned int length = ops->length (); 1440 operand_entry_t oe1; 1441 unsigned i, j; 1442 sbitmap candidates, candidates2; 1443 unsigned nr_candidates, nr_candidates2; 1444 sbitmap_iterator sbi0; 1445 vec<operand_entry_t> *subops; 1446 bool changed = false; 1447 int next_oecount_id = 0; 1448 1449 if (length <= 1 1450 || opcode != PLUS_EXPR) 1451 return false; 1452 1453 /* Build a list of candidates to process. */ 1454 candidates = sbitmap_alloc (length); 1455 bitmap_clear (candidates); 1456 nr_candidates = 0; 1457 FOR_EACH_VEC_ELT (*ops, i, oe1) 1458 { 1459 enum tree_code dcode; 1460 gimple oe1def; 1461 1462 if (TREE_CODE (oe1->op) != SSA_NAME) 1463 continue; 1464 oe1def = SSA_NAME_DEF_STMT (oe1->op); 1465 if (!is_gimple_assign (oe1def)) 1466 continue; 1467 dcode = gimple_assign_rhs_code (oe1def); 1468 if ((dcode != MULT_EXPR 1469 && dcode != RDIV_EXPR) 1470 || !is_reassociable_op (oe1def, dcode, loop)) 1471 continue; 1472 1473 bitmap_set_bit (candidates, i); 1474 nr_candidates++; 1475 } 1476 1477 if (nr_candidates < 2) 1478 { 1479 sbitmap_free (candidates); 1480 return false; 1481 } 1482 1483 if (dump_file && (dump_flags & TDF_DETAILS)) 1484 { 1485 fprintf (dump_file, "searching for un-distribute opportunities "); 1486 print_generic_expr (dump_file, 1487 (*ops)[bitmap_first_set_bit (candidates)]->op, 0); 1488 fprintf (dump_file, " %d\n", nr_candidates); 1489 } 1490 1491 /* Build linearized sub-operand lists and the counting table. */ 1492 cvec.create (0); 1493 1494 hash_table<oecount_hasher> ctable (15); 1495 1496 /* ??? Macro arguments cannot have multi-argument template types in 1497 them. This typedef is needed to workaround that limitation. */ 1498 typedef vec<operand_entry_t> vec_operand_entry_t_heap; 1499 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ()); 1500 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1501 { 1502 gimple oedef; 1503 enum tree_code oecode; 1504 unsigned j; 1505 1506 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op); 1507 oecode = gimple_assign_rhs_code (oedef); 1508 linearize_expr_tree (&subops[i], oedef, 1509 associative_tree_code (oecode), false); 1510 1511 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1512 { 1513 oecount c; 1514 int *slot; 1515 int idx; 1516 c.oecode = oecode; 1517 c.cnt = 1; 1518 c.id = next_oecount_id++; 1519 c.op = oe1->op; 1520 cvec.safe_push (c); 1521 idx = cvec.length () + 41; 1522 slot = ctable.find_slot (idx, INSERT); 1523 if (!*slot) 1524 { 1525 *slot = idx; 1526 } 1527 else 1528 { 1529 cvec.pop (); 1530 cvec[*slot - 42].cnt++; 1531 } 1532 } 1533 } 1534 1535 /* Sort the counting table. */ 1536 cvec.qsort (oecount_cmp); 1537 1538 if (dump_file && (dump_flags & TDF_DETAILS)) 1539 { 1540 oecount *c; 1541 fprintf (dump_file, "Candidates:\n"); 1542 FOR_EACH_VEC_ELT (cvec, j, c) 1543 { 1544 fprintf (dump_file, " %u %s: ", c->cnt, 1545 c->oecode == MULT_EXPR 1546 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?"); 1547 print_generic_expr (dump_file, c->op, 0); 1548 fprintf (dump_file, "\n"); 1549 } 1550 } 1551 1552 /* Process the (operand, code) pairs in order of most occurrence. */ 1553 candidates2 = sbitmap_alloc (length); 1554 while (!cvec.is_empty ()) 1555 { 1556 oecount *c = &cvec.last (); 1557 if (c->cnt < 2) 1558 break; 1559 1560 /* Now collect the operands in the outer chain that contain 1561 the common operand in their inner chain. */ 1562 bitmap_clear (candidates2); 1563 nr_candidates2 = 0; 1564 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) 1565 { 1566 gimple oedef; 1567 enum tree_code oecode; 1568 unsigned j; 1569 tree op = (*ops)[i]->op; 1570 1571 /* If we undistributed in this chain already this may be 1572 a constant. */ 1573 if (TREE_CODE (op) != SSA_NAME) 1574 continue; 1575 1576 oedef = SSA_NAME_DEF_STMT (op); 1577 oecode = gimple_assign_rhs_code (oedef); 1578 if (oecode != c->oecode) 1579 continue; 1580 1581 FOR_EACH_VEC_ELT (subops[i], j, oe1) 1582 { 1583 if (oe1->op == c->op) 1584 { 1585 bitmap_set_bit (candidates2, i); 1586 ++nr_candidates2; 1587 break; 1588 } 1589 } 1590 } 1591 1592 if (nr_candidates2 >= 2) 1593 { 1594 operand_entry_t oe1, oe2; 1595 gimple prod; 1596 int first = bitmap_first_set_bit (candidates2); 1597 1598 /* Build the new addition chain. */ 1599 oe1 = (*ops)[first]; 1600 if (dump_file && (dump_flags & TDF_DETAILS)) 1601 { 1602 fprintf (dump_file, "Building ("); 1603 print_generic_expr (dump_file, oe1->op, 0); 1604 } 1605 zero_one_operation (&oe1->op, c->oecode, c->op); 1606 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0) 1607 { 1608 gimple sum; 1609 oe2 = (*ops)[i]; 1610 if (dump_file && (dump_flags & TDF_DETAILS)) 1611 { 1612 fprintf (dump_file, " + "); 1613 print_generic_expr (dump_file, oe2->op, 0); 1614 } 1615 zero_one_operation (&oe2->op, c->oecode, c->op); 1616 sum = build_and_add_sum (TREE_TYPE (oe1->op), 1617 oe1->op, oe2->op, opcode); 1618 oe2->op = build_zero_cst (TREE_TYPE (oe2->op)); 1619 oe2->rank = 0; 1620 oe1->op = gimple_get_lhs (sum); 1621 } 1622 1623 /* Apply the multiplication/division. */ 1624 prod = build_and_add_sum (TREE_TYPE (oe1->op), 1625 oe1->op, c->op, c->oecode); 1626 if (dump_file && (dump_flags & TDF_DETAILS)) 1627 { 1628 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/"); 1629 print_generic_expr (dump_file, c->op, 0); 1630 fprintf (dump_file, "\n"); 1631 } 1632 1633 /* Record it in the addition chain and disable further 1634 undistribution with this op. */ 1635 oe1->op = gimple_assign_lhs (prod); 1636 oe1->rank = get_rank (oe1->op); 1637 subops[first].release (); 1638 1639 changed = true; 1640 } 1641 1642 cvec.pop (); 1643 } 1644 1645 for (i = 0; i < ops->length (); ++i) 1646 subops[i].release (); 1647 free (subops); 1648 cvec.release (); 1649 sbitmap_free (candidates); 1650 sbitmap_free (candidates2); 1651 1652 return changed; 1653} 1654 1655/* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison 1656 expression, examine the other OPS to see if any of them are comparisons 1657 of the same values, which we may be able to combine or eliminate. 1658 For example, we can rewrite (a < b) | (a == b) as (a <= b). */ 1659 1660static bool 1661eliminate_redundant_comparison (enum tree_code opcode, 1662 vec<operand_entry_t> *ops, 1663 unsigned int currindex, 1664 operand_entry_t curr) 1665{ 1666 tree op1, op2; 1667 enum tree_code lcode, rcode; 1668 gimple def1, def2; 1669 int i; 1670 operand_entry_t oe; 1671 1672 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) 1673 return false; 1674 1675 /* Check that CURR is a comparison. */ 1676 if (TREE_CODE (curr->op) != SSA_NAME) 1677 return false; 1678 def1 = SSA_NAME_DEF_STMT (curr->op); 1679 if (!is_gimple_assign (def1)) 1680 return false; 1681 lcode = gimple_assign_rhs_code (def1); 1682 if (TREE_CODE_CLASS (lcode) != tcc_comparison) 1683 return false; 1684 op1 = gimple_assign_rhs1 (def1); 1685 op2 = gimple_assign_rhs2 (def1); 1686 1687 /* Now look for a similar comparison in the remaining OPS. */ 1688 for (i = currindex + 1; ops->iterate (i, &oe); i++) 1689 { 1690 tree t; 1691 1692 if (TREE_CODE (oe->op) != SSA_NAME) 1693 continue; 1694 def2 = SSA_NAME_DEF_STMT (oe->op); 1695 if (!is_gimple_assign (def2)) 1696 continue; 1697 rcode = gimple_assign_rhs_code (def2); 1698 if (TREE_CODE_CLASS (rcode) != tcc_comparison) 1699 continue; 1700 1701 /* If we got here, we have a match. See if we can combine the 1702 two comparisons. */ 1703 if (opcode == BIT_IOR_EXPR) 1704 t = maybe_fold_or_comparisons (lcode, op1, op2, 1705 rcode, gimple_assign_rhs1 (def2), 1706 gimple_assign_rhs2 (def2)); 1707 else 1708 t = maybe_fold_and_comparisons (lcode, op1, op2, 1709 rcode, gimple_assign_rhs1 (def2), 1710 gimple_assign_rhs2 (def2)); 1711 if (!t) 1712 continue; 1713 1714 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons 1715 always give us a boolean_type_node value back. If the original 1716 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type, 1717 we need to convert. */ 1718 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t))) 1719 t = fold_convert (TREE_TYPE (curr->op), t); 1720 1721 if (TREE_CODE (t) != INTEGER_CST 1722 && !operand_equal_p (t, curr->op, 0)) 1723 { 1724 enum tree_code subcode; 1725 tree newop1, newop2; 1726 if (!COMPARISON_CLASS_P (t)) 1727 continue; 1728 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 1729 STRIP_USELESS_TYPE_CONVERSION (newop1); 1730 STRIP_USELESS_TYPE_CONVERSION (newop2); 1731 if (!is_gimple_val (newop1) || !is_gimple_val (newop2)) 1732 continue; 1733 } 1734 1735 if (dump_file && (dump_flags & TDF_DETAILS)) 1736 { 1737 fprintf (dump_file, "Equivalence: "); 1738 print_generic_expr (dump_file, curr->op, 0); 1739 fprintf (dump_file, " %s ", op_symbol_code (opcode)); 1740 print_generic_expr (dump_file, oe->op, 0); 1741 fprintf (dump_file, " -> "); 1742 print_generic_expr (dump_file, t, 0); 1743 fprintf (dump_file, "\n"); 1744 } 1745 1746 /* Now we can delete oe, as it has been subsumed by the new combined 1747 expression t. */ 1748 ops->ordered_remove (i); 1749 reassociate_stats.ops_eliminated ++; 1750 1751 /* If t is the same as curr->op, we're done. Otherwise we must 1752 replace curr->op with t. Special case is if we got a constant 1753 back, in which case we add it to the end instead of in place of 1754 the current entry. */ 1755 if (TREE_CODE (t) == INTEGER_CST) 1756 { 1757 ops->ordered_remove (currindex); 1758 add_to_ops_vec (ops, t); 1759 } 1760 else if (!operand_equal_p (t, curr->op, 0)) 1761 { 1762 gimple sum; 1763 enum tree_code subcode; 1764 tree newop1; 1765 tree newop2; 1766 gcc_assert (COMPARISON_CLASS_P (t)); 1767 extract_ops_from_tree (t, &subcode, &newop1, &newop2); 1768 STRIP_USELESS_TYPE_CONVERSION (newop1); 1769 STRIP_USELESS_TYPE_CONVERSION (newop2); 1770 gcc_checking_assert (is_gimple_val (newop1) 1771 && is_gimple_val (newop2)); 1772 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode); 1773 curr->op = gimple_get_lhs (sum); 1774 } 1775 return true; 1776 } 1777 1778 return false; 1779} 1780 1781/* Perform various identities and other optimizations on the list of 1782 operand entries, stored in OPS. The tree code for the binary 1783 operation between all the operands is OPCODE. */ 1784 1785static void 1786optimize_ops_list (enum tree_code opcode, 1787 vec<operand_entry_t> *ops) 1788{ 1789 unsigned int length = ops->length (); 1790 unsigned int i; 1791 operand_entry_t oe; 1792 operand_entry_t oelast = NULL; 1793 bool iterate = false; 1794 1795 if (length == 1) 1796 return; 1797 1798 oelast = ops->last (); 1799 1800 /* If the last two are constants, pop the constants off, merge them 1801 and try the next two. */ 1802 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) 1803 { 1804 operand_entry_t oelm1 = (*ops)[length - 2]; 1805 1806 if (oelm1->rank == 0 1807 && is_gimple_min_invariant (oelm1->op) 1808 && useless_type_conversion_p (TREE_TYPE (oelm1->op), 1809 TREE_TYPE (oelast->op))) 1810 { 1811 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op), 1812 oelm1->op, oelast->op); 1813 1814 if (folded && is_gimple_min_invariant (folded)) 1815 { 1816 if (dump_file && (dump_flags & TDF_DETAILS)) 1817 fprintf (dump_file, "Merging constants\n"); 1818 1819 ops->pop (); 1820 ops->pop (); 1821 1822 add_to_ops_vec (ops, folded); 1823 reassociate_stats.constants_eliminated++; 1824 1825 optimize_ops_list (opcode, ops); 1826 return; 1827 } 1828 } 1829 } 1830 1831 eliminate_using_constants (opcode, ops); 1832 oelast = NULL; 1833 1834 for (i = 0; ops->iterate (i, &oe);) 1835 { 1836 bool done = false; 1837 1838 if (eliminate_not_pairs (opcode, ops, i, oe)) 1839 return; 1840 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) 1841 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)) 1842 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe))) 1843 { 1844 if (done) 1845 return; 1846 iterate = true; 1847 oelast = NULL; 1848 continue; 1849 } 1850 oelast = oe; 1851 i++; 1852 } 1853 1854 length = ops->length (); 1855 oelast = ops->last (); 1856 1857 if (iterate) 1858 optimize_ops_list (opcode, ops); 1859} 1860 1861/* The following functions are subroutines to optimize_range_tests and allow 1862 it to try to change a logical combination of comparisons into a range 1863 test. 1864 1865 For example, both 1866 X == 2 || X == 5 || X == 3 || X == 4 1867 and 1868 X >= 2 && X <= 5 1869 are converted to 1870 (unsigned) (X - 2) <= 3 1871 1872 For more information see comments above fold_test_range in fold-const.c, 1873 this implementation is for GIMPLE. */ 1874 1875struct range_entry 1876{ 1877 tree exp; 1878 tree low; 1879 tree high; 1880 bool in_p; 1881 bool strict_overflow_p; 1882 unsigned int idx, next; 1883}; 1884 1885/* This is similar to make_range in fold-const.c, but on top of 1886 GIMPLE instead of trees. If EXP is non-NULL, it should be 1887 an SSA_NAME and STMT argument is ignored, otherwise STMT 1888 argument should be a GIMPLE_COND. */ 1889 1890static void 1891init_range_entry (struct range_entry *r, tree exp, gimple stmt) 1892{ 1893 int in_p; 1894 tree low, high; 1895 bool is_bool, strict_overflow_p; 1896 1897 r->exp = NULL_TREE; 1898 r->in_p = false; 1899 r->strict_overflow_p = false; 1900 r->low = NULL_TREE; 1901 r->high = NULL_TREE; 1902 if (exp != NULL_TREE 1903 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))) 1904 return; 1905 1906 /* Start with simply saying "EXP != 0" and then look at the code of EXP 1907 and see if we can refine the range. Some of the cases below may not 1908 happen, but it doesn't seem worth worrying about this. We "continue" 1909 the outer loop when we've changed something; otherwise we "break" 1910 the switch, which will "break" the while. */ 1911 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node; 1912 high = low; 1913 in_p = 0; 1914 strict_overflow_p = false; 1915 is_bool = false; 1916 if (exp == NULL_TREE) 1917 is_bool = true; 1918 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1) 1919 { 1920 if (TYPE_UNSIGNED (TREE_TYPE (exp))) 1921 is_bool = true; 1922 else 1923 return; 1924 } 1925 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) 1926 is_bool = true; 1927 1928 while (1) 1929 { 1930 enum tree_code code; 1931 tree arg0, arg1, exp_type; 1932 tree nexp; 1933 location_t loc; 1934 1935 if (exp != NULL_TREE) 1936 { 1937 if (TREE_CODE (exp) != SSA_NAME 1938 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp)) 1939 break; 1940 1941 stmt = SSA_NAME_DEF_STMT (exp); 1942 if (!is_gimple_assign (stmt)) 1943 break; 1944 1945 code = gimple_assign_rhs_code (stmt); 1946 arg0 = gimple_assign_rhs1 (stmt); 1947 arg1 = gimple_assign_rhs2 (stmt); 1948 exp_type = TREE_TYPE (exp); 1949 } 1950 else 1951 { 1952 code = gimple_cond_code (stmt); 1953 arg0 = gimple_cond_lhs (stmt); 1954 arg1 = gimple_cond_rhs (stmt); 1955 exp_type = boolean_type_node; 1956 } 1957 1958 if (TREE_CODE (arg0) != SSA_NAME) 1959 break; 1960 loc = gimple_location (stmt); 1961 switch (code) 1962 { 1963 case BIT_NOT_EXPR: 1964 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE 1965 /* Ensure the range is either +[-,0], +[0,0], 1966 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or 1967 -[1,1]. If it is e.g. +[-,-] or -[-,-] 1968 or similar expression of unconditional true or 1969 false, it should not be negated. */ 1970 && ((high && integer_zerop (high)) 1971 || (low && integer_onep (low)))) 1972 { 1973 in_p = !in_p; 1974 exp = arg0; 1975 continue; 1976 } 1977 break; 1978 case SSA_NAME: 1979 exp = arg0; 1980 continue; 1981 CASE_CONVERT: 1982 if (is_bool) 1983 goto do_default; 1984 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1) 1985 { 1986 if (TYPE_UNSIGNED (TREE_TYPE (arg0))) 1987 is_bool = true; 1988 else 1989 return; 1990 } 1991 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE) 1992 is_bool = true; 1993 goto do_default; 1994 case EQ_EXPR: 1995 case NE_EXPR: 1996 case LT_EXPR: 1997 case LE_EXPR: 1998 case GE_EXPR: 1999 case GT_EXPR: 2000 is_bool = true; 2001 /* FALLTHRU */ 2002 default: 2003 if (!is_bool) 2004 return; 2005 do_default: 2006 nexp = make_range_step (loc, code, arg0, arg1, exp_type, 2007 &low, &high, &in_p, 2008 &strict_overflow_p); 2009 if (nexp != NULL_TREE) 2010 { 2011 exp = nexp; 2012 gcc_assert (TREE_CODE (exp) == SSA_NAME); 2013 continue; 2014 } 2015 break; 2016 } 2017 break; 2018 } 2019 if (is_bool) 2020 { 2021 r->exp = exp; 2022 r->in_p = in_p; 2023 r->low = low; 2024 r->high = high; 2025 r->strict_overflow_p = strict_overflow_p; 2026 } 2027} 2028 2029/* Comparison function for qsort. Sort entries 2030 without SSA_NAME exp first, then with SSA_NAMEs sorted 2031 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs 2032 by increasing ->low and if ->low is the same, by increasing 2033 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE 2034 maximum. */ 2035 2036static int 2037range_entry_cmp (const void *a, const void *b) 2038{ 2039 const struct range_entry *p = (const struct range_entry *) a; 2040 const struct range_entry *q = (const struct range_entry *) b; 2041 2042 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) 2043 { 2044 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 2045 { 2046 /* Group range_entries for the same SSA_NAME together. */ 2047 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) 2048 return -1; 2049 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) 2050 return 1; 2051 /* If ->low is different, NULL low goes first, then by 2052 ascending low. */ 2053 if (p->low != NULL_TREE) 2054 { 2055 if (q->low != NULL_TREE) 2056 { 2057 tree tem = fold_binary (LT_EXPR, boolean_type_node, 2058 p->low, q->low); 2059 if (tem && integer_onep (tem)) 2060 return -1; 2061 tem = fold_binary (GT_EXPR, boolean_type_node, 2062 p->low, q->low); 2063 if (tem && integer_onep (tem)) 2064 return 1; 2065 } 2066 else 2067 return 1; 2068 } 2069 else if (q->low != NULL_TREE) 2070 return -1; 2071 /* If ->high is different, NULL high goes last, before that by 2072 ascending high. */ 2073 if (p->high != NULL_TREE) 2074 { 2075 if (q->high != NULL_TREE) 2076 { 2077 tree tem = fold_binary (LT_EXPR, boolean_type_node, 2078 p->high, q->high); 2079 if (tem && integer_onep (tem)) 2080 return -1; 2081 tem = fold_binary (GT_EXPR, boolean_type_node, 2082 p->high, q->high); 2083 if (tem && integer_onep (tem)) 2084 return 1; 2085 } 2086 else 2087 return -1; 2088 } 2089 else if (q->high != NULL_TREE) 2090 return 1; 2091 /* If both ranges are the same, sort below by ascending idx. */ 2092 } 2093 else 2094 return 1; 2095 } 2096 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) 2097 return -1; 2098 2099 if (p->idx < q->idx) 2100 return -1; 2101 else 2102 { 2103 gcc_checking_assert (p->idx > q->idx); 2104 return 1; 2105 } 2106} 2107 2108/* Helper routine of optimize_range_test. 2109 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for 2110 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, 2111 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE 2112 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to 2113 an array of COUNT pointers to other ranges. Return 2114 true if the range merge has been successful. 2115 If OPCODE is ERROR_MARK, this is called from within 2116 maybe_optimize_range_tests and is performing inter-bb range optimization. 2117 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in 2118 oe->rank. */ 2119 2120static bool 2121update_range_test (struct range_entry *range, struct range_entry *otherrange, 2122 struct range_entry **otherrangep, 2123 unsigned int count, enum tree_code opcode, 2124 vec<operand_entry_t> *ops, tree exp, gimple_seq seq, 2125 bool in_p, tree low, tree high, bool strict_overflow_p) 2126{ 2127 operand_entry_t oe = (*ops)[range->idx]; 2128 tree op = oe->op; 2129 gimple stmt = op ? SSA_NAME_DEF_STMT (op) : 2130 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 2131 location_t loc = gimple_location (stmt); 2132 tree optype = op ? TREE_TYPE (op) : boolean_type_node; 2133 tree tem = build_range_check (loc, optype, unshare_expr (exp), 2134 in_p, low, high); 2135 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; 2136 gimple_stmt_iterator gsi; 2137 unsigned int i, uid; 2138 2139 if (tem == NULL_TREE) 2140 return false; 2141 2142 /* If op is default def SSA_NAME, there is no place to insert the 2143 new comparison. Give up, unless we can use OP itself as the 2144 range test. */ 2145 if (op && SSA_NAME_IS_DEFAULT_DEF (op)) 2146 { 2147 if (op == range->exp 2148 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype)) 2149 || TREE_CODE (optype) == BOOLEAN_TYPE) 2150 && (op == tem 2151 || (TREE_CODE (tem) == EQ_EXPR 2152 && TREE_OPERAND (tem, 0) == op 2153 && integer_onep (TREE_OPERAND (tem, 1)))) 2154 && opcode != BIT_IOR_EXPR 2155 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR)) 2156 { 2157 stmt = NULL; 2158 tem = op; 2159 } 2160 else 2161 return false; 2162 } 2163 2164 if (strict_overflow_p && issue_strict_overflow_warning (wc)) 2165 warning_at (loc, OPT_Wstrict_overflow, 2166 "assuming signed overflow does not occur " 2167 "when simplifying range test"); 2168 2169 if (dump_file && (dump_flags & TDF_DETAILS)) 2170 { 2171 struct range_entry *r; 2172 fprintf (dump_file, "Optimizing range tests "); 2173 print_generic_expr (dump_file, range->exp, 0); 2174 fprintf (dump_file, " %c[", range->in_p ? '+' : '-'); 2175 print_generic_expr (dump_file, range->low, 0); 2176 fprintf (dump_file, ", "); 2177 print_generic_expr (dump_file, range->high, 0); 2178 fprintf (dump_file, "]"); 2179 for (i = 0; i < count; i++) 2180 { 2181 if (otherrange) 2182 r = otherrange + i; 2183 else 2184 r = otherrangep[i]; 2185 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-'); 2186 print_generic_expr (dump_file, r->low, 0); 2187 fprintf (dump_file, ", "); 2188 print_generic_expr (dump_file, r->high, 0); 2189 fprintf (dump_file, "]"); 2190 } 2191 fprintf (dump_file, "\n into "); 2192 print_generic_expr (dump_file, tem, 0); 2193 fprintf (dump_file, "\n"); 2194 } 2195 2196 if (opcode == BIT_IOR_EXPR 2197 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 2198 tem = invert_truthvalue_loc (loc, tem); 2199 2200 tem = fold_convert_loc (loc, optype, tem); 2201 if (stmt) 2202 { 2203 gsi = gsi_for_stmt (stmt); 2204 uid = gimple_uid (stmt); 2205 } 2206 else 2207 { 2208 gsi = gsi_none (); 2209 uid = 0; 2210 } 2211 if (stmt == NULL) 2212 gcc_checking_assert (tem == op); 2213 /* In rare cases range->exp can be equal to lhs of stmt. 2214 In that case we have to insert after the stmt rather then before 2215 it. If stmt is a PHI, insert it at the start of the basic block. */ 2216 else if (op != range->exp) 2217 { 2218 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 2219 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true, 2220 GSI_SAME_STMT); 2221 gsi_prev (&gsi); 2222 } 2223 else if (gimple_code (stmt) != GIMPLE_PHI) 2224 { 2225 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); 2226 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false, 2227 GSI_CONTINUE_LINKING); 2228 } 2229 else 2230 { 2231 gsi = gsi_after_labels (gimple_bb (stmt)); 2232 if (!gsi_end_p (gsi)) 2233 uid = gimple_uid (gsi_stmt (gsi)); 2234 else 2235 { 2236 gsi = gsi_start_bb (gimple_bb (stmt)); 2237 uid = 1; 2238 while (!gsi_end_p (gsi)) 2239 { 2240 uid = gimple_uid (gsi_stmt (gsi)); 2241 gsi_next (&gsi); 2242 } 2243 } 2244 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 2245 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true, 2246 GSI_SAME_STMT); 2247 if (gsi_end_p (gsi)) 2248 gsi = gsi_last_bb (gimple_bb (stmt)); 2249 else 2250 gsi_prev (&gsi); 2251 } 2252 for (; !gsi_end_p (gsi); gsi_prev (&gsi)) 2253 if (gimple_uid (gsi_stmt (gsi))) 2254 break; 2255 else 2256 gimple_set_uid (gsi_stmt (gsi), uid); 2257 2258 oe->op = tem; 2259 range->exp = exp; 2260 range->low = low; 2261 range->high = high; 2262 range->in_p = in_p; 2263 range->strict_overflow_p = false; 2264 2265 for (i = 0; i < count; i++) 2266 { 2267 if (otherrange) 2268 range = otherrange + i; 2269 else 2270 range = otherrangep[i]; 2271 oe = (*ops)[range->idx]; 2272 /* Now change all the other range test immediate uses, so that 2273 those tests will be optimized away. */ 2274 if (opcode == ERROR_MARK) 2275 { 2276 if (oe->op) 2277 oe->op = build_int_cst (TREE_TYPE (oe->op), 2278 oe->rank == BIT_IOR_EXPR ? 0 : 1); 2279 else 2280 oe->op = (oe->rank == BIT_IOR_EXPR 2281 ? boolean_false_node : boolean_true_node); 2282 } 2283 else 2284 oe->op = error_mark_node; 2285 range->exp = NULL_TREE; 2286 } 2287 return true; 2288} 2289 2290/* Optimize X == CST1 || X == CST2 2291 if popcount (CST1 ^ CST2) == 1 into 2292 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). 2293 Similarly for ranges. E.g. 2294 X != 2 && X != 3 && X != 10 && X != 11 2295 will be transformed by the previous optimization into 2296 !((X - 2U) <= 1U || (X - 10U) <= 1U) 2297 and this loop can transform that into 2298 !(((X & ~8) - 2U) <= 1U). */ 2299 2300static bool 2301optimize_range_tests_xor (enum tree_code opcode, tree type, 2302 tree lowi, tree lowj, tree highi, tree highj, 2303 vec<operand_entry_t> *ops, 2304 struct range_entry *rangei, 2305 struct range_entry *rangej) 2306{ 2307 tree lowxor, highxor, tem, exp; 2308 /* Check lowi ^ lowj == highi ^ highj and 2309 popcount (lowi ^ lowj) == 1. */ 2310 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj); 2311 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST) 2312 return false; 2313 if (!integer_pow2p (lowxor)) 2314 return false; 2315 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); 2316 if (!tree_int_cst_equal (lowxor, highxor)) 2317 return false; 2318 2319 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); 2320 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem); 2321 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); 2322 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); 2323 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp, 2324 NULL, rangei->in_p, lowj, highj, 2325 rangei->strict_overflow_p 2326 || rangej->strict_overflow_p)) 2327 return true; 2328 return false; 2329} 2330 2331/* Optimize X == CST1 || X == CST2 2332 if popcount (CST2 - CST1) == 1 into 2333 ((X - CST1) & ~(CST2 - CST1)) == 0. 2334 Similarly for ranges. E.g. 2335 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46 2336 || X == 75 || X == 45 2337 will be transformed by the previous optimization into 2338 (X - 43U) <= 3U || (X - 75U) <= 3U 2339 and this loop can transform that into 2340 ((X - 43U) & ~(75U - 43U)) <= 3U. */ 2341static bool 2342optimize_range_tests_diff (enum tree_code opcode, tree type, 2343 tree lowi, tree lowj, tree highi, tree highj, 2344 vec<operand_entry_t> *ops, 2345 struct range_entry *rangei, 2346 struct range_entry *rangej) 2347{ 2348 tree tem1, tem2, mask; 2349 /* Check highi - lowi == highj - lowj. */ 2350 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi); 2351 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) 2352 return false; 2353 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj); 2354 if (!tree_int_cst_equal (tem1, tem2)) 2355 return false; 2356 /* Check popcount (lowj - lowi) == 1. */ 2357 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi); 2358 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) 2359 return false; 2360 if (!integer_pow2p (tem1)) 2361 return false; 2362 2363 type = unsigned_type_for (type); 2364 tem1 = fold_convert (type, tem1); 2365 tem2 = fold_convert (type, tem2); 2366 lowi = fold_convert (type, lowi); 2367 mask = fold_build1 (BIT_NOT_EXPR, type, tem1); 2368 tem1 = fold_binary (MINUS_EXPR, type, 2369 fold_convert (type, rangei->exp), lowi); 2370 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask); 2371 lowj = build_int_cst (type, 0); 2372 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1, 2373 NULL, rangei->in_p, lowj, tem2, 2374 rangei->strict_overflow_p 2375 || rangej->strict_overflow_p)) 2376 return true; 2377 return false; 2378} 2379 2380/* It does some common checks for function optimize_range_tests_xor and 2381 optimize_range_tests_diff. 2382 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor. 2383 Else it calls optimize_range_tests_diff. */ 2384 2385static bool 2386optimize_range_tests_1 (enum tree_code opcode, int first, int length, 2387 bool optimize_xor, vec<operand_entry_t> *ops, 2388 struct range_entry *ranges) 2389{ 2390 int i, j; 2391 bool any_changes = false; 2392 for (i = first; i < length; i++) 2393 { 2394 tree lowi, highi, lowj, highj, type, tem; 2395 2396 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 2397 continue; 2398 type = TREE_TYPE (ranges[i].exp); 2399 if (!INTEGRAL_TYPE_P (type)) 2400 continue; 2401 lowi = ranges[i].low; 2402 if (lowi == NULL_TREE) 2403 lowi = TYPE_MIN_VALUE (type); 2404 highi = ranges[i].high; 2405 if (highi == NULL_TREE) 2406 continue; 2407 for (j = i + 1; j < length && j < i + 64; j++) 2408 { 2409 bool changes; 2410 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p) 2411 continue; 2412 lowj = ranges[j].low; 2413 if (lowj == NULL_TREE) 2414 continue; 2415 highj = ranges[j].high; 2416 if (highj == NULL_TREE) 2417 highj = TYPE_MAX_VALUE (type); 2418 /* Check lowj > highi. */ 2419 tem = fold_binary (GT_EXPR, boolean_type_node, 2420 lowj, highi); 2421 if (tem == NULL_TREE || !integer_onep (tem)) 2422 continue; 2423 if (optimize_xor) 2424 changes = optimize_range_tests_xor (opcode, type, lowi, lowj, 2425 highi, highj, ops, 2426 ranges + i, ranges + j); 2427 else 2428 changes = optimize_range_tests_diff (opcode, type, lowi, lowj, 2429 highi, highj, ops, 2430 ranges + i, ranges + j); 2431 if (changes) 2432 { 2433 any_changes = true; 2434 break; 2435 } 2436 } 2437 } 2438 return any_changes; 2439} 2440 2441/* Helper function of optimize_range_tests_to_bit_test. Handle a single 2442 range, EXP, LOW, HIGH, compute bit mask of bits to test and return 2443 EXP on success, NULL otherwise. */ 2444 2445static tree 2446extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high, 2447 wide_int *mask, tree *totallowp) 2448{ 2449 tree tem = int_const_binop (MINUS_EXPR, high, low); 2450 if (tem == NULL_TREE 2451 || TREE_CODE (tem) != INTEGER_CST 2452 || TREE_OVERFLOW (tem) 2453 || tree_int_cst_sgn (tem) == -1 2454 || compare_tree_int (tem, prec) != -1) 2455 return NULL_TREE; 2456 2457 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1; 2458 *mask = wi::shifted_mask (0, max, false, prec); 2459 if (TREE_CODE (exp) == BIT_AND_EXPR 2460 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) 2461 { 2462 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1)); 2463 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp))); 2464 if (wi::popcount (msk) == 1 2465 && wi::ltu_p (msk, prec - max)) 2466 { 2467 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec); 2468 max += msk.to_uhwi (); 2469 exp = TREE_OPERAND (exp, 0); 2470 if (integer_zerop (low) 2471 && TREE_CODE (exp) == PLUS_EXPR 2472 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST) 2473 { 2474 tree ret = TREE_OPERAND (exp, 0); 2475 STRIP_NOPS (ret); 2476 widest_int bias 2477 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)), 2478 TYPE_PRECISION (TREE_TYPE (low)))); 2479 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias); 2480 if (totallowp) 2481 { 2482 *totallowp = tbias; 2483 return ret; 2484 } 2485 else if (!tree_int_cst_lt (totallow, tbias)) 2486 return NULL_TREE; 2487 bias = wi::to_widest (tbias); 2488 bias -= wi::to_widest (totallow); 2489 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max)) 2490 { 2491 *mask = wi::lshift (*mask, bias); 2492 return ret; 2493 } 2494 } 2495 } 2496 } 2497 if (totallowp) 2498 return exp; 2499 if (!tree_int_cst_lt (totallow, low)) 2500 return exp; 2501 tem = int_const_binop (MINUS_EXPR, low, totallow); 2502 if (tem == NULL_TREE 2503 || TREE_CODE (tem) != INTEGER_CST 2504 || TREE_OVERFLOW (tem) 2505 || compare_tree_int (tem, prec - max) == 1) 2506 return NULL_TREE; 2507 2508 *mask = wi::lshift (*mask, wi::to_widest (tem)); 2509 return exp; 2510} 2511 2512/* Attempt to optimize small range tests using bit test. 2513 E.g. 2514 X != 43 && X != 76 && X != 44 && X != 78 && X != 49 2515 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82 2516 has been by earlier optimizations optimized into: 2517 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82 2518 As all the 43 through 82 range is less than 64 numbers, 2519 for 64-bit word targets optimize that into: 2520 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */ 2521 2522static bool 2523optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length, 2524 vec<operand_entry_t> *ops, 2525 struct range_entry *ranges) 2526{ 2527 int i, j; 2528 bool any_changes = false; 2529 int prec = GET_MODE_BITSIZE (word_mode); 2530 auto_vec<struct range_entry *, 64> candidates; 2531 2532 for (i = first; i < length - 2; i++) 2533 { 2534 tree lowi, highi, lowj, highj, type; 2535 2536 if (ranges[i].exp == NULL_TREE || ranges[i].in_p) 2537 continue; 2538 type = TREE_TYPE (ranges[i].exp); 2539 if (!INTEGRAL_TYPE_P (type)) 2540 continue; 2541 lowi = ranges[i].low; 2542 if (lowi == NULL_TREE) 2543 lowi = TYPE_MIN_VALUE (type); 2544 highi = ranges[i].high; 2545 if (highi == NULL_TREE) 2546 continue; 2547 wide_int mask; 2548 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi, 2549 highi, &mask, &lowi); 2550 if (exp == NULL_TREE) 2551 continue; 2552 bool strict_overflow_p = ranges[i].strict_overflow_p; 2553 candidates.truncate (0); 2554 int end = MIN (i + 64, length); 2555 for (j = i + 1; j < end; j++) 2556 { 2557 tree exp2; 2558 if (ranges[j].exp == NULL_TREE || ranges[j].in_p) 2559 continue; 2560 if (ranges[j].exp == exp) 2561 ; 2562 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR) 2563 { 2564 exp2 = TREE_OPERAND (ranges[j].exp, 0); 2565 if (exp2 == exp) 2566 ; 2567 else if (TREE_CODE (exp2) == PLUS_EXPR) 2568 { 2569 exp2 = TREE_OPERAND (exp2, 0); 2570 STRIP_NOPS (exp2); 2571 if (exp2 != exp) 2572 continue; 2573 } 2574 else 2575 continue; 2576 } 2577 else 2578 continue; 2579 lowj = ranges[j].low; 2580 if (lowj == NULL_TREE) 2581 continue; 2582 highj = ranges[j].high; 2583 if (highj == NULL_TREE) 2584 highj = TYPE_MAX_VALUE (type); 2585 wide_int mask2; 2586 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj, 2587 highj, &mask2, NULL); 2588 if (exp2 != exp) 2589 continue; 2590 mask |= mask2; 2591 strict_overflow_p |= ranges[j].strict_overflow_p; 2592 candidates.safe_push (&ranges[j]); 2593 } 2594 2595 /* If we need otherwise 3 or more comparisons, use a bit test. */ 2596 if (candidates.length () >= 2) 2597 { 2598 tree high = wide_int_to_tree (TREE_TYPE (lowi), 2599 wi::to_widest (lowi) 2600 + prec - 1 - wi::clz (mask)); 2601 operand_entry_t oe = (*ops)[ranges[i].idx]; 2602 tree op = oe->op; 2603 gimple stmt = op ? SSA_NAME_DEF_STMT (op) 2604 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); 2605 location_t loc = gimple_location (stmt); 2606 tree optype = op ? TREE_TYPE (op) : boolean_type_node; 2607 2608 /* See if it isn't cheaper to pretend the minimum value of the 2609 range is 0, if maximum value is small enough. 2610 We can avoid then subtraction of the minimum value, but the 2611 mask constant could be perhaps more expensive. */ 2612 if (compare_tree_int (lowi, 0) > 0 2613 && compare_tree_int (high, prec) < 0) 2614 { 2615 int cost_diff; 2616 HOST_WIDE_INT m = tree_to_uhwi (lowi); 2617 rtx reg = gen_raw_REG (word_mode, 10000); 2618 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt)); 2619 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg, 2620 GEN_INT (-m)), speed_p); 2621 rtx r = immed_wide_int_const (mask, word_mode); 2622 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r), 2623 speed_p); 2624 r = immed_wide_int_const (wi::lshift (mask, m), word_mode); 2625 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r), 2626 speed_p); 2627 if (cost_diff > 0) 2628 { 2629 mask = wi::lshift (mask, m); 2630 lowi = build_zero_cst (TREE_TYPE (lowi)); 2631 } 2632 } 2633 2634 tree tem = build_range_check (loc, optype, unshare_expr (exp), 2635 false, lowi, high); 2636 if (tem == NULL_TREE || is_gimple_val (tem)) 2637 continue; 2638 tree etype = unsigned_type_for (TREE_TYPE (exp)); 2639 exp = fold_build2_loc (loc, MINUS_EXPR, etype, 2640 fold_convert_loc (loc, etype, exp), 2641 fold_convert_loc (loc, etype, lowi)); 2642 exp = fold_convert_loc (loc, integer_type_node, exp); 2643 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1); 2644 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type, 2645 build_int_cst (word_type, 1), exp); 2646 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp, 2647 wide_int_to_tree (word_type, mask)); 2648 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp, 2649 build_zero_cst (word_type)); 2650 if (is_gimple_val (exp)) 2651 continue; 2652 2653 /* The shift might have undefined behavior if TEM is true, 2654 but reassociate_bb isn't prepared to have basic blocks 2655 split when it is running. So, temporarily emit a code 2656 with BIT_IOR_EXPR instead of &&, and fix it up in 2657 branch_fixup. */ 2658 gimple_seq seq; 2659 tem = force_gimple_operand (tem, &seq, true, NULL_TREE); 2660 gcc_assert (TREE_CODE (tem) == SSA_NAME); 2661 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true); 2662 gimple_seq seq2; 2663 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE); 2664 gimple_seq_add_seq_without_update (&seq, seq2); 2665 gcc_assert (TREE_CODE (exp) == SSA_NAME); 2666 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true); 2667 gimple g = gimple_build_assign (make_ssa_name (optype), 2668 BIT_IOR_EXPR, tem, exp); 2669 gimple_set_location (g, loc); 2670 gimple_seq_add_stmt_without_update (&seq, g); 2671 exp = gimple_assign_lhs (g); 2672 tree val = build_zero_cst (optype); 2673 if (update_range_test (&ranges[i], NULL, candidates.address (), 2674 candidates.length (), opcode, ops, exp, 2675 seq, false, val, val, strict_overflow_p)) 2676 { 2677 any_changes = true; 2678 reassoc_branch_fixups.safe_push (tem); 2679 } 2680 else 2681 gimple_seq_discard (seq); 2682 } 2683 } 2684 return any_changes; 2685} 2686 2687/* Optimize range tests, similarly how fold_range_test optimizes 2688 it on trees. The tree code for the binary 2689 operation between all the operands is OPCODE. 2690 If OPCODE is ERROR_MARK, optimize_range_tests is called from within 2691 maybe_optimize_range_tests for inter-bb range optimization. 2692 In that case if oe->op is NULL, oe->id is bb->index whose 2693 GIMPLE_COND is && or ||ed into the test, and oe->rank says 2694 the actual opcode. */ 2695 2696static bool 2697optimize_range_tests (enum tree_code opcode, 2698 vec<operand_entry_t> *ops) 2699{ 2700 unsigned int length = ops->length (), i, j, first; 2701 operand_entry_t oe; 2702 struct range_entry *ranges; 2703 bool any_changes = false; 2704 2705 if (length == 1) 2706 return false; 2707 2708 ranges = XNEWVEC (struct range_entry, length); 2709 for (i = 0; i < length; i++) 2710 { 2711 oe = (*ops)[i]; 2712 ranges[i].idx = i; 2713 init_range_entry (ranges + i, oe->op, 2714 oe->op ? NULL : 2715 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id))); 2716 /* For | invert it now, we will invert it again before emitting 2717 the optimized expression. */ 2718 if (opcode == BIT_IOR_EXPR 2719 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) 2720 ranges[i].in_p = !ranges[i].in_p; 2721 } 2722 2723 qsort (ranges, length, sizeof (*ranges), range_entry_cmp); 2724 for (i = 0; i < length; i++) 2725 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) 2726 break; 2727 2728 /* Try to merge ranges. */ 2729 for (first = i; i < length; i++) 2730 { 2731 tree low = ranges[i].low; 2732 tree high = ranges[i].high; 2733 int in_p = ranges[i].in_p; 2734 bool strict_overflow_p = ranges[i].strict_overflow_p; 2735 int update_fail_count = 0; 2736 2737 for (j = i + 1; j < length; j++) 2738 { 2739 if (ranges[i].exp != ranges[j].exp) 2740 break; 2741 if (!merge_ranges (&in_p, &low, &high, in_p, low, high, 2742 ranges[j].in_p, ranges[j].low, ranges[j].high)) 2743 break; 2744 strict_overflow_p |= ranges[j].strict_overflow_p; 2745 } 2746 2747 if (j == i + 1) 2748 continue; 2749 2750 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1, 2751 opcode, ops, ranges[i].exp, NULL, in_p, 2752 low, high, strict_overflow_p)) 2753 { 2754 i = j - 1; 2755 any_changes = true; 2756 } 2757 /* Avoid quadratic complexity if all merge_ranges calls would succeed, 2758 while update_range_test would fail. */ 2759 else if (update_fail_count == 64) 2760 i = j - 1; 2761 else 2762 ++update_fail_count; 2763 } 2764 2765 any_changes |= optimize_range_tests_1 (opcode, first, length, true, 2766 ops, ranges); 2767 2768 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2) 2769 any_changes |= optimize_range_tests_1 (opcode, first, length, false, 2770 ops, ranges); 2771 if (lshift_cheap_p (optimize_function_for_speed_p (cfun))) 2772 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length, 2773 ops, ranges); 2774 2775 if (any_changes && opcode != ERROR_MARK) 2776 { 2777 j = 0; 2778 FOR_EACH_VEC_ELT (*ops, i, oe) 2779 { 2780 if (oe->op == error_mark_node) 2781 continue; 2782 else if (i != j) 2783 (*ops)[j] = oe; 2784 j++; 2785 } 2786 ops->truncate (j); 2787 } 2788 2789 XDELETEVEC (ranges); 2790 return any_changes; 2791} 2792 2793/* Return true if STMT is a cast like: 2794 <bb N>: 2795 ... 2796 _123 = (int) _234; 2797 2798 <bb M>: 2799 # _345 = PHI <_123(N), 1(...), 1(...)> 2800 where _234 has bool type, _123 has single use and 2801 bb N has a single successor M. This is commonly used in 2802 the last block of a range test. */ 2803 2804static bool 2805final_range_test_p (gimple stmt) 2806{ 2807 basic_block bb, rhs_bb; 2808 edge e; 2809 tree lhs, rhs; 2810 use_operand_p use_p; 2811 gimple use_stmt; 2812 2813 if (!gimple_assign_cast_p (stmt)) 2814 return false; 2815 bb = gimple_bb (stmt); 2816 if (!single_succ_p (bb)) 2817 return false; 2818 e = single_succ_edge (bb); 2819 if (e->flags & EDGE_COMPLEX) 2820 return false; 2821 2822 lhs = gimple_assign_lhs (stmt); 2823 rhs = gimple_assign_rhs1 (stmt); 2824 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 2825 || TREE_CODE (rhs) != SSA_NAME 2826 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE) 2827 return false; 2828 2829 /* Test whether lhs is consumed only by a PHI in the only successor bb. */ 2830 if (!single_imm_use (lhs, &use_p, &use_stmt)) 2831 return false; 2832 2833 if (gimple_code (use_stmt) != GIMPLE_PHI 2834 || gimple_bb (use_stmt) != e->dest) 2835 return false; 2836 2837 /* And that the rhs is defined in the same loop. */ 2838 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)); 2839 if (rhs_bb == NULL 2840 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb)) 2841 return false; 2842 2843 return true; 2844} 2845 2846/* Return true if BB is suitable basic block for inter-bb range test 2847 optimization. If BACKWARD is true, BB should be the only predecessor 2848 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine, 2849 or compared with to find a common basic block to which all conditions 2850 branch to if true resp. false. If BACKWARD is false, TEST_BB should 2851 be the only predecessor of BB. */ 2852 2853static bool 2854suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb, 2855 bool backward) 2856{ 2857 edge_iterator ei, ei2; 2858 edge e, e2; 2859 gimple stmt; 2860 gphi_iterator gsi; 2861 bool other_edge_seen = false; 2862 bool is_cond; 2863 2864 if (test_bb == bb) 2865 return false; 2866 /* Check last stmt first. */ 2867 stmt = last_stmt (bb); 2868 if (stmt == NULL 2869 || (gimple_code (stmt) != GIMPLE_COND 2870 && (backward || !final_range_test_p (stmt))) 2871 || gimple_visited_p (stmt) 2872 || stmt_could_throw_p (stmt) 2873 || *other_bb == bb) 2874 return false; 2875 is_cond = gimple_code (stmt) == GIMPLE_COND; 2876 if (is_cond) 2877 { 2878 /* If last stmt is GIMPLE_COND, verify that one of the succ edges 2879 goes to the next bb (if BACKWARD, it is TEST_BB), and the other 2880 to *OTHER_BB (if not set yet, try to find it out). */ 2881 if (EDGE_COUNT (bb->succs) != 2) 2882 return false; 2883 FOR_EACH_EDGE (e, ei, bb->succs) 2884 { 2885 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 2886 return false; 2887 if (e->dest == test_bb) 2888 { 2889 if (backward) 2890 continue; 2891 else 2892 return false; 2893 } 2894 if (e->dest == bb) 2895 return false; 2896 if (*other_bb == NULL) 2897 { 2898 FOR_EACH_EDGE (e2, ei2, test_bb->succs) 2899 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) 2900 return false; 2901 else if (e->dest == e2->dest) 2902 *other_bb = e->dest; 2903 if (*other_bb == NULL) 2904 return false; 2905 } 2906 if (e->dest == *other_bb) 2907 other_edge_seen = true; 2908 else if (backward) 2909 return false; 2910 } 2911 if (*other_bb == NULL || !other_edge_seen) 2912 return false; 2913 } 2914 else if (single_succ (bb) != *other_bb) 2915 return false; 2916 2917 /* Now check all PHIs of *OTHER_BB. */ 2918 e = find_edge (bb, *other_bb); 2919 e2 = find_edge (test_bb, *other_bb); 2920 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) 2921 { 2922 gphi *phi = gsi.phi (); 2923 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments 2924 corresponding to BB and TEST_BB predecessor must be the same. */ 2925 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx), 2926 gimple_phi_arg_def (phi, e2->dest_idx), 0)) 2927 { 2928 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND, 2929 one of the PHIs should have the lhs of the last stmt in 2930 that block as PHI arg and that PHI should have 0 or 1 2931 corresponding to it in all other range test basic blocks 2932 considered. */ 2933 if (!is_cond) 2934 { 2935 if (gimple_phi_arg_def (phi, e->dest_idx) 2936 == gimple_assign_lhs (stmt) 2937 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx)) 2938 || integer_onep (gimple_phi_arg_def (phi, 2939 e2->dest_idx)))) 2940 continue; 2941 } 2942 else 2943 { 2944 gimple test_last = last_stmt (test_bb); 2945 if (gimple_code (test_last) != GIMPLE_COND 2946 && gimple_phi_arg_def (phi, e2->dest_idx) 2947 == gimple_assign_lhs (test_last) 2948 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx)) 2949 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx)))) 2950 continue; 2951 } 2952 2953 return false; 2954 } 2955 } 2956 return true; 2957} 2958 2959/* Return true if BB doesn't have side-effects that would disallow 2960 range test optimization, all SSA_NAMEs set in the bb are consumed 2961 in the bb and there are no PHIs. */ 2962 2963static bool 2964no_side_effect_bb (basic_block bb) 2965{ 2966 gimple_stmt_iterator gsi; 2967 gimple last; 2968 2969 if (!gimple_seq_empty_p (phi_nodes (bb))) 2970 return false; 2971 last = last_stmt (bb); 2972 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 2973 { 2974 gimple stmt = gsi_stmt (gsi); 2975 tree lhs; 2976 imm_use_iterator imm_iter; 2977 use_operand_p use_p; 2978 2979 if (is_gimple_debug (stmt)) 2980 continue; 2981 if (gimple_has_side_effects (stmt)) 2982 return false; 2983 if (stmt == last) 2984 return true; 2985 if (!is_gimple_assign (stmt)) 2986 return false; 2987 lhs = gimple_assign_lhs (stmt); 2988 if (TREE_CODE (lhs) != SSA_NAME) 2989 return false; 2990 if (gimple_assign_rhs_could_trap_p (stmt)) 2991 return false; 2992 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) 2993 { 2994 gimple use_stmt = USE_STMT (use_p); 2995 if (is_gimple_debug (use_stmt)) 2996 continue; 2997 if (gimple_bb (use_stmt) != bb) 2998 return false; 2999 } 3000 } 3001 return false; 3002} 3003 3004/* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable, 3005 return true and fill in *OPS recursively. */ 3006 3007static bool 3008get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops, 3009 struct loop *loop) 3010{ 3011 gimple stmt = SSA_NAME_DEF_STMT (var); 3012 tree rhs[2]; 3013 int i; 3014 3015 if (!is_reassociable_op (stmt, code, loop)) 3016 return false; 3017 3018 rhs[0] = gimple_assign_rhs1 (stmt); 3019 rhs[1] = gimple_assign_rhs2 (stmt); 3020 gimple_set_visited (stmt, true); 3021 for (i = 0; i < 2; i++) 3022 if (TREE_CODE (rhs[i]) == SSA_NAME 3023 && !get_ops (rhs[i], code, ops, loop) 3024 && has_single_use (rhs[i])) 3025 { 3026 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); 3027 3028 oe->op = rhs[i]; 3029 oe->rank = code; 3030 oe->id = 0; 3031 oe->count = 1; 3032 ops->safe_push (oe); 3033 } 3034 return true; 3035} 3036 3037/* Find the ops that were added by get_ops starting from VAR, see if 3038 they were changed during update_range_test and if yes, create new 3039 stmts. */ 3040 3041static tree 3042update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops, 3043 unsigned int *pidx, struct loop *loop) 3044{ 3045 gimple stmt = SSA_NAME_DEF_STMT (var); 3046 tree rhs[4]; 3047 int i; 3048 3049 if (!is_reassociable_op (stmt, code, loop)) 3050 return NULL; 3051 3052 rhs[0] = gimple_assign_rhs1 (stmt); 3053 rhs[1] = gimple_assign_rhs2 (stmt); 3054 rhs[2] = rhs[0]; 3055 rhs[3] = rhs[1]; 3056 for (i = 0; i < 2; i++) 3057 if (TREE_CODE (rhs[i]) == SSA_NAME) 3058 { 3059 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop); 3060 if (rhs[2 + i] == NULL_TREE) 3061 { 3062 if (has_single_use (rhs[i])) 3063 rhs[2 + i] = ops[(*pidx)++]->op; 3064 else 3065 rhs[2 + i] = rhs[i]; 3066 } 3067 } 3068 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1]) 3069 && (rhs[2] != rhs[1] || rhs[3] != rhs[0])) 3070 { 3071 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 3072 var = make_ssa_name (TREE_TYPE (var)); 3073 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt), 3074 rhs[2], rhs[3]); 3075 gimple_set_uid (g, gimple_uid (stmt)); 3076 gimple_set_visited (g, true); 3077 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3078 } 3079 return var; 3080} 3081 3082/* Structure to track the initial value passed to get_ops and 3083 the range in the ops vector for each basic block. */ 3084 3085struct inter_bb_range_test_entry 3086{ 3087 tree op; 3088 unsigned int first_idx, last_idx; 3089}; 3090 3091/* Inter-bb range test optimization. */ 3092 3093static void 3094maybe_optimize_range_tests (gimple stmt) 3095{ 3096 basic_block first_bb = gimple_bb (stmt); 3097 basic_block last_bb = first_bb; 3098 basic_block other_bb = NULL; 3099 basic_block bb; 3100 edge_iterator ei; 3101 edge e; 3102 auto_vec<operand_entry_t> ops; 3103 auto_vec<inter_bb_range_test_entry> bbinfo; 3104 bool any_changes = false; 3105 3106 /* Consider only basic blocks that end with GIMPLE_COND or 3107 a cast statement satisfying final_range_test_p. All 3108 but the last bb in the first_bb .. last_bb range 3109 should end with GIMPLE_COND. */ 3110 if (gimple_code (stmt) == GIMPLE_COND) 3111 { 3112 if (EDGE_COUNT (first_bb->succs) != 2) 3113 return; 3114 } 3115 else if (final_range_test_p (stmt)) 3116 other_bb = single_succ (first_bb); 3117 else 3118 return; 3119 3120 if (stmt_could_throw_p (stmt)) 3121 return; 3122 3123 /* As relative ordering of post-dominator sons isn't fixed, 3124 maybe_optimize_range_tests can be called first on any 3125 bb in the range we want to optimize. So, start searching 3126 backwards, if first_bb can be set to a predecessor. */ 3127 while (single_pred_p (first_bb)) 3128 { 3129 basic_block pred_bb = single_pred (first_bb); 3130 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true)) 3131 break; 3132 if (!no_side_effect_bb (first_bb)) 3133 break; 3134 first_bb = pred_bb; 3135 } 3136 /* If first_bb is last_bb, other_bb hasn't been computed yet. 3137 Before starting forward search in last_bb successors, find 3138 out the other_bb. */ 3139 if (first_bb == last_bb) 3140 { 3141 other_bb = NULL; 3142 /* As non-GIMPLE_COND last stmt always terminates the range, 3143 if forward search didn't discover anything, just give up. */ 3144 if (gimple_code (stmt) != GIMPLE_COND) 3145 return; 3146 /* Look at both successors. Either it ends with a GIMPLE_COND 3147 and satisfies suitable_cond_bb, or ends with a cast and 3148 other_bb is that cast's successor. */ 3149 FOR_EACH_EDGE (e, ei, first_bb->succs) 3150 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)) 3151 || e->dest == first_bb) 3152 return; 3153 else if (single_pred_p (e->dest)) 3154 { 3155 stmt = last_stmt (e->dest); 3156 if (stmt 3157 && gimple_code (stmt) == GIMPLE_COND 3158 && EDGE_COUNT (e->dest->succs) == 2) 3159 { 3160 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true)) 3161 break; 3162 else 3163 other_bb = NULL; 3164 } 3165 else if (stmt 3166 && final_range_test_p (stmt) 3167 && find_edge (first_bb, single_succ (e->dest))) 3168 { 3169 other_bb = single_succ (e->dest); 3170 if (other_bb == first_bb) 3171 other_bb = NULL; 3172 } 3173 } 3174 if (other_bb == NULL) 3175 return; 3176 } 3177 /* Now do the forward search, moving last_bb to successor bbs 3178 that aren't other_bb. */ 3179 while (EDGE_COUNT (last_bb->succs) == 2) 3180 { 3181 FOR_EACH_EDGE (e, ei, last_bb->succs) 3182 if (e->dest != other_bb) 3183 break; 3184 if (e == NULL) 3185 break; 3186 if (!single_pred_p (e->dest)) 3187 break; 3188 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false)) 3189 break; 3190 if (!no_side_effect_bb (e->dest)) 3191 break; 3192 last_bb = e->dest; 3193 } 3194 if (first_bb == last_bb) 3195 return; 3196 /* Here basic blocks first_bb through last_bb's predecessor 3197 end with GIMPLE_COND, all of them have one of the edges to 3198 other_bb and another to another block in the range, 3199 all blocks except first_bb don't have side-effects and 3200 last_bb ends with either GIMPLE_COND, or cast satisfying 3201 final_range_test_p. */ 3202 for (bb = last_bb; ; bb = single_pred (bb)) 3203 { 3204 enum tree_code code; 3205 tree lhs, rhs; 3206 inter_bb_range_test_entry bb_ent; 3207 3208 bb_ent.op = NULL_TREE; 3209 bb_ent.first_idx = ops.length (); 3210 bb_ent.last_idx = bb_ent.first_idx; 3211 e = find_edge (bb, other_bb); 3212 stmt = last_stmt (bb); 3213 gimple_set_visited (stmt, true); 3214 if (gimple_code (stmt) != GIMPLE_COND) 3215 { 3216 use_operand_p use_p; 3217 gimple phi; 3218 edge e2; 3219 unsigned int d; 3220 3221 lhs = gimple_assign_lhs (stmt); 3222 rhs = gimple_assign_rhs1 (stmt); 3223 gcc_assert (bb == last_bb); 3224 3225 /* stmt is 3226 _123 = (int) _234; 3227 3228 followed by: 3229 <bb M>: 3230 # _345 = PHI <_123(N), 1(...), 1(...)> 3231 3232 or 0 instead of 1. If it is 0, the _234 3233 range test is anded together with all the 3234 other range tests, if it is 1, it is ored with 3235 them. */ 3236 single_imm_use (lhs, &use_p, &phi); 3237 gcc_assert (gimple_code (phi) == GIMPLE_PHI); 3238 e2 = find_edge (first_bb, other_bb); 3239 d = e2->dest_idx; 3240 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs); 3241 if (integer_zerop (gimple_phi_arg_def (phi, d))) 3242 code = BIT_AND_EXPR; 3243 else 3244 { 3245 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d))); 3246 code = BIT_IOR_EXPR; 3247 } 3248 3249 /* If _234 SSA_NAME_DEF_STMT is 3250 _234 = _567 | _789; 3251 (or &, corresponding to 1/0 in the phi arguments, 3252 push into ops the individual range test arguments 3253 of the bitwise or resp. and, recursively. */ 3254 if (!get_ops (rhs, code, &ops, 3255 loop_containing_stmt (stmt)) 3256 && has_single_use (rhs)) 3257 { 3258 /* Otherwise, push the _234 range test itself. */ 3259 operand_entry_t oe 3260 = (operand_entry_t) pool_alloc (operand_entry_pool); 3261 3262 oe->op = rhs; 3263 oe->rank = code; 3264 oe->id = 0; 3265 oe->count = 1; 3266 ops.safe_push (oe); 3267 bb_ent.last_idx++; 3268 } 3269 else 3270 bb_ent.last_idx = ops.length (); 3271 bb_ent.op = rhs; 3272 bbinfo.safe_push (bb_ent); 3273 continue; 3274 } 3275 /* Otherwise stmt is GIMPLE_COND. */ 3276 code = gimple_cond_code (stmt); 3277 lhs = gimple_cond_lhs (stmt); 3278 rhs = gimple_cond_rhs (stmt); 3279 if (TREE_CODE (lhs) == SSA_NAME 3280 && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) 3281 && ((code != EQ_EXPR && code != NE_EXPR) 3282 || rhs != boolean_false_node 3283 /* Either push into ops the individual bitwise 3284 or resp. and operands, depending on which 3285 edge is other_bb. */ 3286 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0) 3287 ^ (code == EQ_EXPR)) 3288 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops, 3289 loop_containing_stmt (stmt)))) 3290 { 3291 /* Or push the GIMPLE_COND stmt itself. */ 3292 operand_entry_t oe 3293 = (operand_entry_t) pool_alloc (operand_entry_pool); 3294 3295 oe->op = NULL; 3296 oe->rank = (e->flags & EDGE_TRUE_VALUE) 3297 ? BIT_IOR_EXPR : BIT_AND_EXPR; 3298 /* oe->op = NULL signs that there is no SSA_NAME 3299 for the range test, and oe->id instead is the 3300 basic block number, at which's end the GIMPLE_COND 3301 is. */ 3302 oe->id = bb->index; 3303 oe->count = 1; 3304 ops.safe_push (oe); 3305 bb_ent.op = NULL; 3306 bb_ent.last_idx++; 3307 } 3308 else if (ops.length () > bb_ent.first_idx) 3309 { 3310 bb_ent.op = lhs; 3311 bb_ent.last_idx = ops.length (); 3312 } 3313 bbinfo.safe_push (bb_ent); 3314 if (bb == first_bb) 3315 break; 3316 } 3317 if (ops.length () > 1) 3318 any_changes = optimize_range_tests (ERROR_MARK, &ops); 3319 if (any_changes) 3320 { 3321 unsigned int idx, max_idx = 0; 3322 /* update_ops relies on has_single_use predicates returning the 3323 same values as it did during get_ops earlier. Additionally it 3324 never removes statements, only adds new ones and it should walk 3325 from the single imm use and check the predicate already before 3326 making those changes. 3327 On the other side, the handling of GIMPLE_COND directly can turn 3328 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so 3329 it needs to be done in a separate loop afterwards. */ 3330 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) 3331 { 3332 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx 3333 && bbinfo[idx].op != NULL_TREE) 3334 { 3335 tree new_op; 3336 3337 max_idx = idx; 3338 stmt = last_stmt (bb); 3339 new_op = update_ops (bbinfo[idx].op, 3340 (enum tree_code) 3341 ops[bbinfo[idx].first_idx]->rank, 3342 ops, &bbinfo[idx].first_idx, 3343 loop_containing_stmt (stmt)); 3344 if (new_op == NULL_TREE) 3345 { 3346 gcc_assert (bb == last_bb); 3347 new_op = ops[bbinfo[idx].first_idx++]->op; 3348 } 3349 if (bbinfo[idx].op != new_op) 3350 { 3351 imm_use_iterator iter; 3352 use_operand_p use_p; 3353 gimple use_stmt, cast_stmt = NULL; 3354 3355 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op) 3356 if (is_gimple_debug (use_stmt)) 3357 continue; 3358 else if (gimple_code (use_stmt) == GIMPLE_COND 3359 || gimple_code (use_stmt) == GIMPLE_PHI) 3360 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 3361 SET_USE (use_p, new_op); 3362 else if (gimple_assign_cast_p (use_stmt)) 3363 cast_stmt = use_stmt; 3364 else 3365 gcc_unreachable (); 3366 if (cast_stmt) 3367 { 3368 gcc_assert (bb == last_bb); 3369 tree lhs = gimple_assign_lhs (cast_stmt); 3370 tree new_lhs = make_ssa_name (TREE_TYPE (lhs)); 3371 enum tree_code rhs_code 3372 = gimple_assign_rhs_code (cast_stmt); 3373 gassign *g; 3374 if (is_gimple_min_invariant (new_op)) 3375 { 3376 new_op = fold_convert (TREE_TYPE (lhs), new_op); 3377 g = gimple_build_assign (new_lhs, new_op); 3378 } 3379 else 3380 g = gimple_build_assign (new_lhs, rhs_code, new_op); 3381 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt); 3382 gimple_set_uid (g, gimple_uid (cast_stmt)); 3383 gimple_set_visited (g, true); 3384 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3385 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) 3386 if (is_gimple_debug (use_stmt)) 3387 continue; 3388 else if (gimple_code (use_stmt) == GIMPLE_COND 3389 || gimple_code (use_stmt) == GIMPLE_PHI) 3390 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) 3391 SET_USE (use_p, new_lhs); 3392 else 3393 gcc_unreachable (); 3394 } 3395 } 3396 } 3397 if (bb == first_bb) 3398 break; 3399 } 3400 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++) 3401 { 3402 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx 3403 && bbinfo[idx].op == NULL_TREE 3404 && ops[bbinfo[idx].first_idx]->op != NULL_TREE) 3405 { 3406 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb)); 3407 3408 if (idx > max_idx) 3409 max_idx = idx; 3410 3411 if (integer_zerop (ops[bbinfo[idx].first_idx]->op)) 3412 gimple_cond_make_false (cond_stmt); 3413 else if (integer_onep (ops[bbinfo[idx].first_idx]->op)) 3414 gimple_cond_make_true (cond_stmt); 3415 else 3416 { 3417 gimple_cond_set_code (cond_stmt, NE_EXPR); 3418 gimple_cond_set_lhs (cond_stmt, 3419 ops[bbinfo[idx].first_idx]->op); 3420 gimple_cond_set_rhs (cond_stmt, boolean_false_node); 3421 } 3422 update_stmt (cond_stmt); 3423 } 3424 if (bb == first_bb) 3425 break; 3426 } 3427 3428 /* The above changes could result in basic blocks after the first 3429 modified one, up to and including last_bb, to be executed even if 3430 they would not be in the original program. If the value ranges of 3431 assignment lhs' in those bbs were dependent on the conditions 3432 guarding those basic blocks which now can change, the VRs might 3433 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs 3434 are only used within the same bb, it should be not a big deal if 3435 we just reset all the VRs in those bbs. See PR68671. */ 3436 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++) 3437 reset_flow_sensitive_info_in_bb (bb); 3438 } 3439} 3440 3441/* Return true if OPERAND is defined by a PHI node which uses the LHS 3442 of STMT in it's operands. This is also known as a "destructive 3443 update" operation. */ 3444 3445static bool 3446is_phi_for_stmt (gimple stmt, tree operand) 3447{ 3448 gimple def_stmt; 3449 gphi *def_phi; 3450 tree lhs; 3451 use_operand_p arg_p; 3452 ssa_op_iter i; 3453 3454 if (TREE_CODE (operand) != SSA_NAME) 3455 return false; 3456 3457 lhs = gimple_assign_lhs (stmt); 3458 3459 def_stmt = SSA_NAME_DEF_STMT (operand); 3460 def_phi = dyn_cast <gphi *> (def_stmt); 3461 if (!def_phi) 3462 return false; 3463 3464 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE) 3465 if (lhs == USE_FROM_PTR (arg_p)) 3466 return true; 3467 return false; 3468} 3469 3470/* Remove def stmt of VAR if VAR has zero uses and recurse 3471 on rhs1 operand if so. */ 3472 3473static void 3474remove_visited_stmt_chain (tree var) 3475{ 3476 gimple stmt; 3477 gimple_stmt_iterator gsi; 3478 3479 while (1) 3480 { 3481 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var)) 3482 return; 3483 stmt = SSA_NAME_DEF_STMT (var); 3484 if (is_gimple_assign (stmt) && gimple_visited_p (stmt)) 3485 { 3486 var = gimple_assign_rhs1 (stmt); 3487 gsi = gsi_for_stmt (stmt); 3488 reassoc_remove_stmt (&gsi); 3489 release_defs (stmt); 3490 } 3491 else 3492 return; 3493 } 3494} 3495 3496/* This function checks three consequtive operands in 3497 passed operands vector OPS starting from OPINDEX and 3498 swaps two operands if it is profitable for binary operation 3499 consuming OPINDEX + 1 abnd OPINDEX + 2 operands. 3500 3501 We pair ops with the same rank if possible. 3502 3503 The alternative we try is to see if STMT is a destructive 3504 update style statement, which is like: 3505 b = phi (a, ...) 3506 a = c + b; 3507 In that case, we want to use the destructive update form to 3508 expose the possible vectorizer sum reduction opportunity. 3509 In that case, the third operand will be the phi node. This 3510 check is not performed if STMT is null. 3511 3512 We could, of course, try to be better as noted above, and do a 3513 lot of work to try to find these opportunities in >3 operand 3514 cases, but it is unlikely to be worth it. */ 3515 3516static void 3517swap_ops_for_binary_stmt (vec<operand_entry_t> ops, 3518 unsigned int opindex, gimple stmt) 3519{ 3520 operand_entry_t oe1, oe2, oe3; 3521 3522 oe1 = ops[opindex]; 3523 oe2 = ops[opindex + 1]; 3524 oe3 = ops[opindex + 2]; 3525 3526 if ((oe1->rank == oe2->rank 3527 && oe2->rank != oe3->rank) 3528 || (stmt && is_phi_for_stmt (stmt, oe3->op) 3529 && !is_phi_for_stmt (stmt, oe1->op) 3530 && !is_phi_for_stmt (stmt, oe2->op))) 3531 { 3532 struct operand_entry temp = *oe3; 3533 oe3->op = oe1->op; 3534 oe3->rank = oe1->rank; 3535 oe1->op = temp.op; 3536 oe1->rank= temp.rank; 3537 } 3538 else if ((oe1->rank == oe3->rank 3539 && oe2->rank != oe3->rank) 3540 || (stmt && is_phi_for_stmt (stmt, oe2->op) 3541 && !is_phi_for_stmt (stmt, oe1->op) 3542 && !is_phi_for_stmt (stmt, oe3->op))) 3543 { 3544 struct operand_entry temp = *oe2; 3545 oe2->op = oe1->op; 3546 oe2->rank = oe1->rank; 3547 oe1->op = temp.op; 3548 oe1->rank = temp.rank; 3549 } 3550} 3551 3552/* If definition of RHS1 or RHS2 dominates STMT, return the later of those 3553 two definitions, otherwise return STMT. */ 3554 3555static inline gimple 3556find_insert_point (gimple stmt, tree rhs1, tree rhs2) 3557{ 3558 if (TREE_CODE (rhs1) == SSA_NAME 3559 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1))) 3560 stmt = SSA_NAME_DEF_STMT (rhs1); 3561 if (TREE_CODE (rhs2) == SSA_NAME 3562 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2))) 3563 stmt = SSA_NAME_DEF_STMT (rhs2); 3564 return stmt; 3565} 3566 3567/* Recursively rewrite our linearized statements so that the operators 3568 match those in OPS[OPINDEX], putting the computation in rank 3569 order. Return new lhs. */ 3570 3571static tree 3572rewrite_expr_tree (gimple stmt, unsigned int opindex, 3573 vec<operand_entry_t> ops, bool changed) 3574{ 3575 tree rhs1 = gimple_assign_rhs1 (stmt); 3576 tree rhs2 = gimple_assign_rhs2 (stmt); 3577 tree lhs = gimple_assign_lhs (stmt); 3578 operand_entry_t oe; 3579 3580 /* The final recursion case for this function is that you have 3581 exactly two operations left. 3582 If we had exactly one op in the entire list to start with, we 3583 would have never called this function, and the tail recursion 3584 rewrites them one at a time. */ 3585 if (opindex + 2 == ops.length ()) 3586 { 3587 operand_entry_t oe1, oe2; 3588 3589 oe1 = ops[opindex]; 3590 oe2 = ops[opindex + 1]; 3591 3592 if (rhs1 != oe1->op || rhs2 != oe2->op) 3593 { 3594 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 3595 unsigned int uid = gimple_uid (stmt); 3596 3597 if (dump_file && (dump_flags & TDF_DETAILS)) 3598 { 3599 fprintf (dump_file, "Transforming "); 3600 print_gimple_stmt (dump_file, stmt, 0, 0); 3601 } 3602 3603 /* Even when changed is false, reassociation could have e.g. removed 3604 some redundant operations, so unless we are just swapping the 3605 arguments or unless there is no change at all (then we just 3606 return lhs), force creation of a new SSA_NAME. */ 3607 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex)) 3608 { 3609 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op); 3610 lhs = make_ssa_name (TREE_TYPE (lhs)); 3611 stmt 3612 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt), 3613 oe1->op, oe2->op); 3614 gimple_set_uid (stmt, uid); 3615 gimple_set_visited (stmt, true); 3616 if (insert_point == gsi_stmt (gsi)) 3617 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 3618 else 3619 insert_stmt_after (stmt, insert_point); 3620 } 3621 else 3622 { 3623 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op) 3624 == stmt); 3625 gimple_assign_set_rhs1 (stmt, oe1->op); 3626 gimple_assign_set_rhs2 (stmt, oe2->op); 3627 update_stmt (stmt); 3628 } 3629 3630 if (rhs1 != oe1->op && rhs1 != oe2->op) 3631 remove_visited_stmt_chain (rhs1); 3632 3633 if (dump_file && (dump_flags & TDF_DETAILS)) 3634 { 3635 fprintf (dump_file, " into "); 3636 print_gimple_stmt (dump_file, stmt, 0, 0); 3637 } 3638 } 3639 return lhs; 3640 } 3641 3642 /* If we hit here, we should have 3 or more ops left. */ 3643 gcc_assert (opindex + 2 < ops.length ()); 3644 3645 /* Rewrite the next operator. */ 3646 oe = ops[opindex]; 3647 3648 /* Recurse on the LHS of the binary operator, which is guaranteed to 3649 be the non-leaf side. */ 3650 tree new_rhs1 3651 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, 3652 changed || oe->op != rhs2); 3653 3654 if (oe->op != rhs2 || new_rhs1 != rhs1) 3655 { 3656 if (dump_file && (dump_flags & TDF_DETAILS)) 3657 { 3658 fprintf (dump_file, "Transforming "); 3659 print_gimple_stmt (dump_file, stmt, 0, 0); 3660 } 3661 3662 /* If changed is false, this is either opindex == 0 3663 or all outer rhs2's were equal to corresponding oe->op, 3664 and powi_result is NULL. 3665 That means lhs is equivalent before and after reassociation. 3666 Otherwise ensure the old lhs SSA_NAME is not reused and 3667 create a new stmt as well, so that any debug stmts will be 3668 properly adjusted. */ 3669 if (changed) 3670 { 3671 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 3672 unsigned int uid = gimple_uid (stmt); 3673 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op); 3674 3675 lhs = make_ssa_name (TREE_TYPE (lhs)); 3676 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt), 3677 new_rhs1, oe->op); 3678 gimple_set_uid (stmt, uid); 3679 gimple_set_visited (stmt, true); 3680 if (insert_point == gsi_stmt (gsi)) 3681 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 3682 else 3683 insert_stmt_after (stmt, insert_point); 3684 } 3685 else 3686 { 3687 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op) 3688 == stmt); 3689 gimple_assign_set_rhs1 (stmt, new_rhs1); 3690 gimple_assign_set_rhs2 (stmt, oe->op); 3691 update_stmt (stmt); 3692 } 3693 3694 if (dump_file && (dump_flags & TDF_DETAILS)) 3695 { 3696 fprintf (dump_file, " into "); 3697 print_gimple_stmt (dump_file, stmt, 0, 0); 3698 } 3699 } 3700 return lhs; 3701} 3702 3703/* Find out how many cycles we need to compute statements chain. 3704 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a 3705 maximum number of independent statements we may execute per cycle. */ 3706 3707static int 3708get_required_cycles (int ops_num, int cpu_width) 3709{ 3710 int res; 3711 int elog; 3712 unsigned int rest; 3713 3714 /* While we have more than 2 * cpu_width operands 3715 we may reduce number of operands by cpu_width 3716 per cycle. */ 3717 res = ops_num / (2 * cpu_width); 3718 3719 /* Remained operands count may be reduced twice per cycle 3720 until we have only one operand. */ 3721 rest = (unsigned)(ops_num - res * cpu_width); 3722 elog = exact_log2 (rest); 3723 if (elog >= 0) 3724 res += elog; 3725 else 3726 res += floor_log2 (rest) + 1; 3727 3728 return res; 3729} 3730 3731/* Returns an optimal number of registers to use for computation of 3732 given statements. */ 3733 3734static int 3735get_reassociation_width (int ops_num, enum tree_code opc, 3736 machine_mode mode) 3737{ 3738 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH); 3739 int width; 3740 int width_min; 3741 int cycles_best; 3742 3743 if (param_width > 0) 3744 width = param_width; 3745 else 3746 width = targetm.sched.reassociation_width (opc, mode); 3747 3748 if (width == 1) 3749 return width; 3750 3751 /* Get the minimal time required for sequence computation. */ 3752 cycles_best = get_required_cycles (ops_num, width); 3753 3754 /* Check if we may use less width and still compute sequence for 3755 the same time. It will allow us to reduce registers usage. 3756 get_required_cycles is monotonically increasing with lower width 3757 so we can perform a binary search for the minimal width that still 3758 results in the optimal cycle count. */ 3759 width_min = 1; 3760 while (width > width_min) 3761 { 3762 int width_mid = (width + width_min) / 2; 3763 3764 if (get_required_cycles (ops_num, width_mid) == cycles_best) 3765 width = width_mid; 3766 else if (width_min < width_mid) 3767 width_min = width_mid; 3768 else 3769 break; 3770 } 3771 3772 return width; 3773} 3774 3775/* Recursively rewrite our linearized statements so that the operators 3776 match those in OPS[OPINDEX], putting the computation in rank 3777 order and trying to allow operations to be executed in 3778 parallel. */ 3779 3780static void 3781rewrite_expr_tree_parallel (gassign *stmt, int width, 3782 vec<operand_entry_t> ops) 3783{ 3784 enum tree_code opcode = gimple_assign_rhs_code (stmt); 3785 int op_num = ops.length (); 3786 int stmt_num = op_num - 1; 3787 gimple *stmts = XALLOCAVEC (gimple, stmt_num); 3788 int op_index = op_num - 1; 3789 int stmt_index = 0; 3790 int ready_stmts_end = 0; 3791 int i = 0; 3792 tree last_rhs1 = gimple_assign_rhs1 (stmt); 3793 3794 /* We start expression rewriting from the top statements. 3795 So, in this loop we create a full list of statements 3796 we will work with. */ 3797 stmts[stmt_num - 1] = stmt; 3798 for (i = stmt_num - 2; i >= 0; i--) 3799 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1])); 3800 3801 for (i = 0; i < stmt_num; i++) 3802 { 3803 tree op1, op2; 3804 3805 /* Determine whether we should use results of 3806 already handled statements or not. */ 3807 if (ready_stmts_end == 0 3808 && (i - stmt_index >= width || op_index < 1)) 3809 ready_stmts_end = i; 3810 3811 /* Now we choose operands for the next statement. Non zero 3812 value in ready_stmts_end means here that we should use 3813 the result of already generated statements as new operand. */ 3814 if (ready_stmts_end > 0) 3815 { 3816 op1 = gimple_assign_lhs (stmts[stmt_index++]); 3817 if (ready_stmts_end > stmt_index) 3818 op2 = gimple_assign_lhs (stmts[stmt_index++]); 3819 else if (op_index >= 0) 3820 op2 = ops[op_index--]->op; 3821 else 3822 { 3823 gcc_assert (stmt_index < i); 3824 op2 = gimple_assign_lhs (stmts[stmt_index++]); 3825 } 3826 3827 if (stmt_index >= ready_stmts_end) 3828 ready_stmts_end = 0; 3829 } 3830 else 3831 { 3832 if (op_index > 1) 3833 swap_ops_for_binary_stmt (ops, op_index - 2, NULL); 3834 op2 = ops[op_index--]->op; 3835 op1 = ops[op_index--]->op; 3836 } 3837 3838 /* If we emit the last statement then we should put 3839 operands into the last statement. It will also 3840 break the loop. */ 3841 if (op_index < 0 && stmt_index == i) 3842 i = stmt_num - 1; 3843 3844 if (dump_file && (dump_flags & TDF_DETAILS)) 3845 { 3846 fprintf (dump_file, "Transforming "); 3847 print_gimple_stmt (dump_file, stmts[i], 0, 0); 3848 } 3849 3850 /* We keep original statement only for the last one. All 3851 others are recreated. */ 3852 if (i == stmt_num - 1) 3853 { 3854 gimple_assign_set_rhs1 (stmts[i], op1); 3855 gimple_assign_set_rhs2 (stmts[i], op2); 3856 update_stmt (stmts[i]); 3857 } 3858 else 3859 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); 3860 3861 if (dump_file && (dump_flags & TDF_DETAILS)) 3862 { 3863 fprintf (dump_file, " into "); 3864 print_gimple_stmt (dump_file, stmts[i], 0, 0); 3865 } 3866 } 3867 3868 remove_visited_stmt_chain (last_rhs1); 3869} 3870 3871/* Transform STMT, which is really (A +B) + (C + D) into the left 3872 linear form, ((A+B)+C)+D. 3873 Recurse on D if necessary. */ 3874 3875static void 3876linearize_expr (gimple stmt) 3877{ 3878 gimple_stmt_iterator gsi; 3879 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 3880 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 3881 gimple oldbinrhs = binrhs; 3882 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 3883 gimple newbinrhs = NULL; 3884 struct loop *loop = loop_containing_stmt (stmt); 3885 tree lhs = gimple_assign_lhs (stmt); 3886 3887 gcc_assert (is_reassociable_op (binlhs, rhscode, loop) 3888 && is_reassociable_op (binrhs, rhscode, loop)); 3889 3890 gsi = gsi_for_stmt (stmt); 3891 3892 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs)); 3893 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)), 3894 gimple_assign_rhs_code (binrhs), 3895 gimple_assign_lhs (binlhs), 3896 gimple_assign_rhs2 (binrhs)); 3897 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs)); 3898 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT); 3899 gimple_set_uid (binrhs, gimple_uid (stmt)); 3900 3901 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) 3902 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 3903 3904 if (dump_file && (dump_flags & TDF_DETAILS)) 3905 { 3906 fprintf (dump_file, "Linearized: "); 3907 print_gimple_stmt (dump_file, stmt, 0, 0); 3908 } 3909 3910 reassociate_stats.linearized++; 3911 update_stmt (stmt); 3912 3913 gsi = gsi_for_stmt (oldbinrhs); 3914 reassoc_remove_stmt (&gsi); 3915 release_defs (oldbinrhs); 3916 3917 gimple_set_visited (stmt, true); 3918 gimple_set_visited (binlhs, true); 3919 gimple_set_visited (binrhs, true); 3920 3921 /* Tail recurse on the new rhs if it still needs reassociation. */ 3922 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop)) 3923 /* ??? This should probably be linearize_expr (newbinrhs) but I don't 3924 want to change the algorithm while converting to tuples. */ 3925 linearize_expr (stmt); 3926} 3927 3928/* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return 3929 it. Otherwise, return NULL. */ 3930 3931static gimple 3932get_single_immediate_use (tree lhs) 3933{ 3934 use_operand_p immuse; 3935 gimple immusestmt; 3936 3937 if (TREE_CODE (lhs) == SSA_NAME 3938 && single_imm_use (lhs, &immuse, &immusestmt) 3939 && is_gimple_assign (immusestmt)) 3940 return immusestmt; 3941 3942 return NULL; 3943} 3944 3945/* Recursively negate the value of TONEGATE, and return the SSA_NAME 3946 representing the negated value. Insertions of any necessary 3947 instructions go before GSI. 3948 This function is recursive in that, if you hand it "a_5" as the 3949 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will 3950 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ 3951 3952static tree 3953negate_value (tree tonegate, gimple_stmt_iterator *gsip) 3954{ 3955 gimple negatedefstmt = NULL; 3956 tree resultofnegate; 3957 gimple_stmt_iterator gsi; 3958 unsigned int uid; 3959 3960 /* If we are trying to negate a name, defined by an add, negate the 3961 add operands instead. */ 3962 if (TREE_CODE (tonegate) == SSA_NAME) 3963 negatedefstmt = SSA_NAME_DEF_STMT (tonegate); 3964 if (TREE_CODE (tonegate) == SSA_NAME 3965 && is_gimple_assign (negatedefstmt) 3966 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME 3967 && has_single_use (gimple_assign_lhs (negatedefstmt)) 3968 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR) 3969 { 3970 tree rhs1 = gimple_assign_rhs1 (negatedefstmt); 3971 tree rhs2 = gimple_assign_rhs2 (negatedefstmt); 3972 tree lhs = gimple_assign_lhs (negatedefstmt); 3973 gimple g; 3974 3975 gsi = gsi_for_stmt (negatedefstmt); 3976 rhs1 = negate_value (rhs1, &gsi); 3977 3978 gsi = gsi_for_stmt (negatedefstmt); 3979 rhs2 = negate_value (rhs2, &gsi); 3980 3981 gsi = gsi_for_stmt (negatedefstmt); 3982 lhs = make_ssa_name (TREE_TYPE (lhs)); 3983 gimple_set_visited (negatedefstmt, true); 3984 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2); 3985 gimple_set_uid (g, gimple_uid (negatedefstmt)); 3986 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3987 return lhs; 3988 } 3989 3990 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); 3991 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true, 3992 NULL_TREE, true, GSI_SAME_STMT); 3993 gsi = *gsip; 3994 uid = gimple_uid (gsi_stmt (gsi)); 3995 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi)) 3996 { 3997 gimple stmt = gsi_stmt (gsi); 3998 if (gimple_uid (stmt) != 0) 3999 break; 4000 gimple_set_uid (stmt, uid); 4001 } 4002 return resultofnegate; 4003} 4004 4005/* Return true if we should break up the subtract in STMT into an add 4006 with negate. This is true when we the subtract operands are really 4007 adds, or the subtract itself is used in an add expression. In 4008 either case, breaking up the subtract into an add with negate 4009 exposes the adds to reassociation. */ 4010 4011static bool 4012should_break_up_subtract (gimple stmt) 4013{ 4014 tree lhs = gimple_assign_lhs (stmt); 4015 tree binlhs = gimple_assign_rhs1 (stmt); 4016 tree binrhs = gimple_assign_rhs2 (stmt); 4017 gimple immusestmt; 4018 struct loop *loop = loop_containing_stmt (stmt); 4019 4020 if (TREE_CODE (binlhs) == SSA_NAME 4021 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) 4022 return true; 4023 4024 if (TREE_CODE (binrhs) == SSA_NAME 4025 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop)) 4026 return true; 4027 4028 if (TREE_CODE (lhs) == SSA_NAME 4029 && (immusestmt = get_single_immediate_use (lhs)) 4030 && is_gimple_assign (immusestmt) 4031 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR 4032 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR)) 4033 return true; 4034 return false; 4035} 4036 4037/* Transform STMT from A - B into A + -B. */ 4038 4039static void 4040break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip) 4041{ 4042 tree rhs1 = gimple_assign_rhs1 (stmt); 4043 tree rhs2 = gimple_assign_rhs2 (stmt); 4044 4045 if (dump_file && (dump_flags & TDF_DETAILS)) 4046 { 4047 fprintf (dump_file, "Breaking up subtract "); 4048 print_gimple_stmt (dump_file, stmt, 0, 0); 4049 } 4050 4051 rhs2 = negate_value (rhs2, gsip); 4052 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2); 4053 update_stmt (stmt); 4054} 4055 4056/* Determine whether STMT is a builtin call that raises an SSA name 4057 to an integer power and has only one use. If so, and this is early 4058 reassociation and unsafe math optimizations are permitted, place 4059 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE. 4060 If any of these conditions does not hold, return FALSE. */ 4061 4062static bool 4063acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent) 4064{ 4065 tree fndecl, arg1; 4066 REAL_VALUE_TYPE c, cint; 4067 4068 if (!first_pass_instance 4069 || !flag_unsafe_math_optimizations 4070 || !is_gimple_call (stmt) 4071 || !has_single_use (gimple_call_lhs (stmt))) 4072 return false; 4073 4074 fndecl = gimple_call_fndecl (stmt); 4075 4076 if (!fndecl 4077 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) 4078 return false; 4079 4080 switch (DECL_FUNCTION_CODE (fndecl)) 4081 { 4082 CASE_FLT_FN (BUILT_IN_POW): 4083 if (flag_errno_math) 4084 return false; 4085 4086 *base = gimple_call_arg (stmt, 0); 4087 arg1 = gimple_call_arg (stmt, 1); 4088 4089 if (TREE_CODE (arg1) != REAL_CST) 4090 return false; 4091 4092 c = TREE_REAL_CST (arg1); 4093 4094 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT) 4095 return false; 4096 4097 *exponent = real_to_integer (&c); 4098 real_from_integer (&cint, VOIDmode, *exponent, SIGNED); 4099 if (!real_identical (&c, &cint)) 4100 return false; 4101 4102 break; 4103 4104 CASE_FLT_FN (BUILT_IN_POWI): 4105 *base = gimple_call_arg (stmt, 0); 4106 arg1 = gimple_call_arg (stmt, 1); 4107 4108 if (!tree_fits_shwi_p (arg1)) 4109 return false; 4110 4111 *exponent = tree_to_shwi (arg1); 4112 break; 4113 4114 default: 4115 return false; 4116 } 4117 4118 /* Expanding negative exponents is generally unproductive, so we don't 4119 complicate matters with those. Exponents of zero and one should 4120 have been handled by expression folding. */ 4121 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME) 4122 return false; 4123 4124 return true; 4125} 4126 4127/* Recursively linearize a binary expression that is the RHS of STMT. 4128 Place the operands of the expression tree in the vector named OPS. */ 4129 4130static void 4131linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt, 4132 bool is_associative, bool set_visited) 4133{ 4134 tree binlhs = gimple_assign_rhs1 (stmt); 4135 tree binrhs = gimple_assign_rhs2 (stmt); 4136 gimple binlhsdef = NULL, binrhsdef = NULL; 4137 bool binlhsisreassoc = false; 4138 bool binrhsisreassoc = false; 4139 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 4140 struct loop *loop = loop_containing_stmt (stmt); 4141 tree base = NULL_TREE; 4142 HOST_WIDE_INT exponent = 0; 4143 4144 if (set_visited) 4145 gimple_set_visited (stmt, true); 4146 4147 if (TREE_CODE (binlhs) == SSA_NAME) 4148 { 4149 binlhsdef = SSA_NAME_DEF_STMT (binlhs); 4150 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop) 4151 && !stmt_could_throw_p (binlhsdef)); 4152 } 4153 4154 if (TREE_CODE (binrhs) == SSA_NAME) 4155 { 4156 binrhsdef = SSA_NAME_DEF_STMT (binrhs); 4157 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop) 4158 && !stmt_could_throw_p (binrhsdef)); 4159 } 4160 4161 /* If the LHS is not reassociable, but the RHS is, we need to swap 4162 them. If neither is reassociable, there is nothing we can do, so 4163 just put them in the ops vector. If the LHS is reassociable, 4164 linearize it. If both are reassociable, then linearize the RHS 4165 and the LHS. */ 4166 4167 if (!binlhsisreassoc) 4168 { 4169 tree temp; 4170 4171 /* If this is not a associative operation like division, give up. */ 4172 if (!is_associative) 4173 { 4174 add_to_ops_vec (ops, binrhs); 4175 return; 4176 } 4177 4178 if (!binrhsisreassoc) 4179 { 4180 if (rhscode == MULT_EXPR 4181 && TREE_CODE (binrhs) == SSA_NAME 4182 && acceptable_pow_call (binrhsdef, &base, &exponent)) 4183 { 4184 add_repeat_to_ops_vec (ops, base, exponent); 4185 gimple_set_visited (binrhsdef, true); 4186 } 4187 else 4188 add_to_ops_vec (ops, binrhs); 4189 4190 if (rhscode == MULT_EXPR 4191 && TREE_CODE (binlhs) == SSA_NAME 4192 && acceptable_pow_call (binlhsdef, &base, &exponent)) 4193 { 4194 add_repeat_to_ops_vec (ops, base, exponent); 4195 gimple_set_visited (binlhsdef, true); 4196 } 4197 else 4198 add_to_ops_vec (ops, binlhs); 4199 4200 return; 4201 } 4202 4203 if (dump_file && (dump_flags & TDF_DETAILS)) 4204 { 4205 fprintf (dump_file, "swapping operands of "); 4206 print_gimple_stmt (dump_file, stmt, 0, 0); 4207 } 4208 4209 swap_ssa_operands (stmt, 4210 gimple_assign_rhs1_ptr (stmt), 4211 gimple_assign_rhs2_ptr (stmt)); 4212 update_stmt (stmt); 4213 4214 if (dump_file && (dump_flags & TDF_DETAILS)) 4215 { 4216 fprintf (dump_file, " is now "); 4217 print_gimple_stmt (dump_file, stmt, 0, 0); 4218 } 4219 4220 /* We want to make it so the lhs is always the reassociative op, 4221 so swap. */ 4222 temp = binlhs; 4223 binlhs = binrhs; 4224 binrhs = temp; 4225 } 4226 else if (binrhsisreassoc) 4227 { 4228 linearize_expr (stmt); 4229 binlhs = gimple_assign_rhs1 (stmt); 4230 binrhs = gimple_assign_rhs2 (stmt); 4231 } 4232 4233 gcc_assert (TREE_CODE (binrhs) != SSA_NAME 4234 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), 4235 rhscode, loop)); 4236 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs), 4237 is_associative, set_visited); 4238 4239 if (rhscode == MULT_EXPR 4240 && TREE_CODE (binrhs) == SSA_NAME 4241 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent)) 4242 { 4243 add_repeat_to_ops_vec (ops, base, exponent); 4244 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true); 4245 } 4246 else 4247 add_to_ops_vec (ops, binrhs); 4248} 4249 4250/* Repropagate the negates back into subtracts, since no other pass 4251 currently does it. */ 4252 4253static void 4254repropagate_negates (void) 4255{ 4256 unsigned int i = 0; 4257 tree negate; 4258 4259 FOR_EACH_VEC_ELT (plus_negates, i, negate) 4260 { 4261 gimple user = get_single_immediate_use (negate); 4262 4263 if (!user || !is_gimple_assign (user)) 4264 continue; 4265 4266 /* The negate operand can be either operand of a PLUS_EXPR 4267 (it can be the LHS if the RHS is a constant for example). 4268 4269 Force the negate operand to the RHS of the PLUS_EXPR, then 4270 transform the PLUS_EXPR into a MINUS_EXPR. */ 4271 if (gimple_assign_rhs_code (user) == PLUS_EXPR) 4272 { 4273 /* If the negated operand appears on the LHS of the 4274 PLUS_EXPR, exchange the operands of the PLUS_EXPR 4275 to force the negated operand to the RHS of the PLUS_EXPR. */ 4276 if (gimple_assign_rhs1 (user) == negate) 4277 { 4278 swap_ssa_operands (user, 4279 gimple_assign_rhs1_ptr (user), 4280 gimple_assign_rhs2_ptr (user)); 4281 } 4282 4283 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace 4284 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */ 4285 if (gimple_assign_rhs2 (user) == negate) 4286 { 4287 tree rhs1 = gimple_assign_rhs1 (user); 4288 tree rhs2 = get_unary_op (negate, NEGATE_EXPR); 4289 gimple_stmt_iterator gsi = gsi_for_stmt (user); 4290 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2); 4291 update_stmt (user); 4292 } 4293 } 4294 else if (gimple_assign_rhs_code (user) == MINUS_EXPR) 4295 { 4296 if (gimple_assign_rhs1 (user) == negate) 4297 { 4298 /* We have 4299 x = -a 4300 y = x - b 4301 which we transform into 4302 x = a + b 4303 y = -x . 4304 This pushes down the negate which we possibly can merge 4305 into some other operation, hence insert it into the 4306 plus_negates vector. */ 4307 gimple feed = SSA_NAME_DEF_STMT (negate); 4308 tree a = gimple_assign_rhs1 (feed); 4309 tree b = gimple_assign_rhs2 (user); 4310 gimple_stmt_iterator gsi = gsi_for_stmt (feed); 4311 gimple_stmt_iterator gsi2 = gsi_for_stmt (user); 4312 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed))); 4313 gimple g = gimple_build_assign (x, PLUS_EXPR, a, b); 4314 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); 4315 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x); 4316 user = gsi_stmt (gsi2); 4317 update_stmt (user); 4318 reassoc_remove_stmt (&gsi); 4319 release_defs (feed); 4320 plus_negates.safe_push (gimple_assign_lhs (user)); 4321 } 4322 else 4323 { 4324 /* Transform "x = -a; y = b - x" into "y = b + a", getting 4325 rid of one operation. */ 4326 gimple feed = SSA_NAME_DEF_STMT (negate); 4327 tree a = gimple_assign_rhs1 (feed); 4328 tree rhs1 = gimple_assign_rhs1 (user); 4329 gimple_stmt_iterator gsi = gsi_for_stmt (user); 4330 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a); 4331 update_stmt (gsi_stmt (gsi)); 4332 } 4333 } 4334 } 4335} 4336 4337/* Returns true if OP is of a type for which we can do reassociation. 4338 That is for integral or non-saturating fixed-point types, and for 4339 floating point type when associative-math is enabled. */ 4340 4341static bool 4342can_reassociate_p (tree op) 4343{ 4344 tree type = TREE_TYPE (op); 4345 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)) 4346 || NON_SAT_FIXED_POINT_TYPE_P (type) 4347 || (flag_associative_math && FLOAT_TYPE_P (type))) 4348 return true; 4349 return false; 4350} 4351 4352/* Break up subtract operations in block BB. 4353 4354 We do this top down because we don't know whether the subtract is 4355 part of a possible chain of reassociation except at the top. 4356 4357 IE given 4358 d = f + g 4359 c = a + e 4360 b = c - d 4361 q = b - r 4362 k = t - q 4363 4364 we want to break up k = t - q, but we won't until we've transformed q 4365 = b - r, which won't be broken up until we transform b = c - d. 4366 4367 En passant, clear the GIMPLE visited flag on every statement 4368 and set UIDs within each basic block. */ 4369 4370static void 4371break_up_subtract_bb (basic_block bb) 4372{ 4373 gimple_stmt_iterator gsi; 4374 basic_block son; 4375 unsigned int uid = 1; 4376 4377 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 4378 { 4379 gimple stmt = gsi_stmt (gsi); 4380 gimple_set_visited (stmt, false); 4381 gimple_set_uid (stmt, uid++); 4382 4383 if (!is_gimple_assign (stmt) 4384 || !can_reassociate_p (gimple_assign_lhs (stmt))) 4385 continue; 4386 4387 /* Look for simple gimple subtract operations. */ 4388 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR) 4389 { 4390 if (!can_reassociate_p (gimple_assign_rhs1 (stmt)) 4391 || !can_reassociate_p (gimple_assign_rhs2 (stmt))) 4392 continue; 4393 4394 /* Check for a subtract used only in an addition. If this 4395 is the case, transform it into add of a negate for better 4396 reassociation. IE transform C = A-B into C = A + -B if C 4397 is only used in an addition. */ 4398 if (should_break_up_subtract (stmt)) 4399 break_up_subtract (stmt, &gsi); 4400 } 4401 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR 4402 && can_reassociate_p (gimple_assign_rhs1 (stmt))) 4403 plus_negates.safe_push (gimple_assign_lhs (stmt)); 4404 } 4405 for (son = first_dom_son (CDI_DOMINATORS, bb); 4406 son; 4407 son = next_dom_son (CDI_DOMINATORS, son)) 4408 break_up_subtract_bb (son); 4409} 4410 4411/* Used for repeated factor analysis. */ 4412struct repeat_factor_d 4413{ 4414 /* An SSA name that occurs in a multiply chain. */ 4415 tree factor; 4416 4417 /* Cached rank of the factor. */ 4418 unsigned rank; 4419 4420 /* Number of occurrences of the factor in the chain. */ 4421 HOST_WIDE_INT count; 4422 4423 /* An SSA name representing the product of this factor and 4424 all factors appearing later in the repeated factor vector. */ 4425 tree repr; 4426}; 4427 4428typedef struct repeat_factor_d repeat_factor, *repeat_factor_t; 4429typedef const struct repeat_factor_d *const_repeat_factor_t; 4430 4431 4432static vec<repeat_factor> repeat_factor_vec; 4433 4434/* Used for sorting the repeat factor vector. Sort primarily by 4435 ascending occurrence count, secondarily by descending rank. */ 4436 4437static int 4438compare_repeat_factors (const void *x1, const void *x2) 4439{ 4440 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1; 4441 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2; 4442 4443 if (rf1->count != rf2->count) 4444 return rf1->count - rf2->count; 4445 4446 return rf2->rank - rf1->rank; 4447} 4448 4449/* Look for repeated operands in OPS in the multiply tree rooted at 4450 STMT. Replace them with an optimal sequence of multiplies and powi 4451 builtin calls, and remove the used operands from OPS. Return an 4452 SSA name representing the value of the replacement sequence. */ 4453 4454static tree 4455attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops) 4456{ 4457 unsigned i, j, vec_len; 4458 int ii; 4459 operand_entry_t oe; 4460 repeat_factor_t rf1, rf2; 4461 repeat_factor rfnew; 4462 tree result = NULL_TREE; 4463 tree target_ssa, iter_result; 4464 tree type = TREE_TYPE (gimple_get_lhs (stmt)); 4465 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI); 4466 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 4467 gimple mul_stmt, pow_stmt; 4468 4469 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and 4470 target. */ 4471 if (!powi_fndecl) 4472 return NULL_TREE; 4473 4474 /* Allocate the repeated factor vector. */ 4475 repeat_factor_vec.create (10); 4476 4477 /* Scan the OPS vector for all SSA names in the product and build 4478 up a vector of occurrence counts for each factor. */ 4479 FOR_EACH_VEC_ELT (*ops, i, oe) 4480 { 4481 if (TREE_CODE (oe->op) == SSA_NAME) 4482 { 4483 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 4484 { 4485 if (rf1->factor == oe->op) 4486 { 4487 rf1->count += oe->count; 4488 break; 4489 } 4490 } 4491 4492 if (j >= repeat_factor_vec.length ()) 4493 { 4494 rfnew.factor = oe->op; 4495 rfnew.rank = oe->rank; 4496 rfnew.count = oe->count; 4497 rfnew.repr = NULL_TREE; 4498 repeat_factor_vec.safe_push (rfnew); 4499 } 4500 } 4501 } 4502 4503 /* Sort the repeated factor vector by (a) increasing occurrence count, 4504 and (b) decreasing rank. */ 4505 repeat_factor_vec.qsort (compare_repeat_factors); 4506 4507 /* It is generally best to combine as many base factors as possible 4508 into a product before applying __builtin_powi to the result. 4509 However, the sort order chosen for the repeated factor vector 4510 allows us to cache partial results for the product of the base 4511 factors for subsequent use. When we already have a cached partial 4512 result from a previous iteration, it is best to make use of it 4513 before looking for another __builtin_pow opportunity. 4514 4515 As an example, consider x * x * y * y * y * z * z * z * z. 4516 We want to first compose the product x * y * z, raise it to the 4517 second power, then multiply this by y * z, and finally multiply 4518 by z. This can be done in 5 multiplies provided we cache y * z 4519 for use in both expressions: 4520 4521 t1 = y * z 4522 t2 = t1 * x 4523 t3 = t2 * t2 4524 t4 = t1 * t3 4525 result = t4 * z 4526 4527 If we instead ignored the cached y * z and first multiplied by 4528 the __builtin_pow opportunity z * z, we would get the inferior: 4529 4530 t1 = y * z 4531 t2 = t1 * x 4532 t3 = t2 * t2 4533 t4 = z * z 4534 t5 = t3 * t4 4535 result = t5 * y */ 4536 4537 vec_len = repeat_factor_vec.length (); 4538 4539 /* Repeatedly look for opportunities to create a builtin_powi call. */ 4540 while (true) 4541 { 4542 HOST_WIDE_INT power; 4543 4544 /* First look for the largest cached product of factors from 4545 preceding iterations. If found, create a builtin_powi for 4546 it if the minimum occurrence count for its factors is at 4547 least 2, or just use this cached product as our next 4548 multiplicand if the minimum occurrence count is 1. */ 4549 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 4550 { 4551 if (rf1->repr && rf1->count > 0) 4552 break; 4553 } 4554 4555 if (j < vec_len) 4556 { 4557 power = rf1->count; 4558 4559 if (power == 1) 4560 { 4561 iter_result = rf1->repr; 4562 4563 if (dump_file && (dump_flags & TDF_DETAILS)) 4564 { 4565 unsigned elt; 4566 repeat_factor_t rf; 4567 fputs ("Multiplying by cached product ", dump_file); 4568 for (elt = j; elt < vec_len; elt++) 4569 { 4570 rf = &repeat_factor_vec[elt]; 4571 print_generic_expr (dump_file, rf->factor, 0); 4572 if (elt < vec_len - 1) 4573 fputs (" * ", dump_file); 4574 } 4575 fputs ("\n", dump_file); 4576 } 4577 } 4578 else 4579 { 4580 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 4581 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 4582 build_int_cst (integer_type_node, 4583 power)); 4584 gimple_call_set_lhs (pow_stmt, iter_result); 4585 gimple_set_location (pow_stmt, gimple_location (stmt)); 4586 gimple_set_uid (pow_stmt, gimple_uid (stmt)); 4587 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 4588 4589 if (dump_file && (dump_flags & TDF_DETAILS)) 4590 { 4591 unsigned elt; 4592 repeat_factor_t rf; 4593 fputs ("Building __builtin_pow call for cached product (", 4594 dump_file); 4595 for (elt = j; elt < vec_len; elt++) 4596 { 4597 rf = &repeat_factor_vec[elt]; 4598 print_generic_expr (dump_file, rf->factor, 0); 4599 if (elt < vec_len - 1) 4600 fputs (" * ", dump_file); 4601 } 4602 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", 4603 power); 4604 } 4605 } 4606 } 4607 else 4608 { 4609 /* Otherwise, find the first factor in the repeated factor 4610 vector whose occurrence count is at least 2. If no such 4611 factor exists, there are no builtin_powi opportunities 4612 remaining. */ 4613 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1) 4614 { 4615 if (rf1->count >= 2) 4616 break; 4617 } 4618 4619 if (j >= vec_len) 4620 break; 4621 4622 power = rf1->count; 4623 4624 if (dump_file && (dump_flags & TDF_DETAILS)) 4625 { 4626 unsigned elt; 4627 repeat_factor_t rf; 4628 fputs ("Building __builtin_pow call for (", dump_file); 4629 for (elt = j; elt < vec_len; elt++) 4630 { 4631 rf = &repeat_factor_vec[elt]; 4632 print_generic_expr (dump_file, rf->factor, 0); 4633 if (elt < vec_len - 1) 4634 fputs (" * ", dump_file); 4635 } 4636 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power); 4637 } 4638 4639 reassociate_stats.pows_created++; 4640 4641 /* Visit each element of the vector in reverse order (so that 4642 high-occurrence elements are visited first, and within the 4643 same occurrence count, lower-ranked elements are visited 4644 first). Form a linear product of all elements in this order 4645 whose occurrencce count is at least that of element J. 4646 Record the SSA name representing the product of each element 4647 with all subsequent elements in the vector. */ 4648 if (j == vec_len - 1) 4649 rf1->repr = rf1->factor; 4650 else 4651 { 4652 for (ii = vec_len - 2; ii >= (int)j; ii--) 4653 { 4654 tree op1, op2; 4655 4656 rf1 = &repeat_factor_vec[ii]; 4657 rf2 = &repeat_factor_vec[ii + 1]; 4658 4659 /* Init the last factor's representative to be itself. */ 4660 if (!rf2->repr) 4661 rf2->repr = rf2->factor; 4662 4663 op1 = rf1->factor; 4664 op2 = rf2->repr; 4665 4666 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow"); 4667 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR, 4668 op1, op2); 4669 gimple_set_location (mul_stmt, gimple_location (stmt)); 4670 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 4671 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 4672 rf1->repr = target_ssa; 4673 4674 /* Don't reprocess the multiply we just introduced. */ 4675 gimple_set_visited (mul_stmt, true); 4676 } 4677 } 4678 4679 /* Form a call to __builtin_powi for the maximum product 4680 just formed, raised to the power obtained earlier. */ 4681 rf1 = &repeat_factor_vec[j]; 4682 iter_result = make_temp_ssa_name (type, NULL, "reassocpow"); 4683 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr, 4684 build_int_cst (integer_type_node, 4685 power)); 4686 gimple_call_set_lhs (pow_stmt, iter_result); 4687 gimple_set_location (pow_stmt, gimple_location (stmt)); 4688 gimple_set_uid (pow_stmt, gimple_uid (stmt)); 4689 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT); 4690 } 4691 4692 /* If we previously formed at least one other builtin_powi call, 4693 form the product of this one and those others. */ 4694 if (result) 4695 { 4696 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow"); 4697 mul_stmt = gimple_build_assign (new_result, MULT_EXPR, 4698 result, iter_result); 4699 gimple_set_location (mul_stmt, gimple_location (stmt)); 4700 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 4701 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT); 4702 gimple_set_visited (mul_stmt, true); 4703 result = new_result; 4704 } 4705 else 4706 result = iter_result; 4707 4708 /* Decrement the occurrence count of each element in the product 4709 by the count found above, and remove this many copies of each 4710 factor from OPS. */ 4711 for (i = j; i < vec_len; i++) 4712 { 4713 unsigned k = power; 4714 unsigned n; 4715 4716 rf1 = &repeat_factor_vec[i]; 4717 rf1->count -= power; 4718 4719 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe) 4720 { 4721 if (oe->op == rf1->factor) 4722 { 4723 if (oe->count <= k) 4724 { 4725 ops->ordered_remove (n); 4726 k -= oe->count; 4727 4728 if (k == 0) 4729 break; 4730 } 4731 else 4732 { 4733 oe->count -= k; 4734 break; 4735 } 4736 } 4737 } 4738 } 4739 } 4740 4741 /* At this point all elements in the repeated factor vector have a 4742 remaining occurrence count of 0 or 1, and those with a count of 1 4743 don't have cached representatives. Re-sort the ops vector and 4744 clean up. */ 4745 ops->qsort (sort_by_operand_rank); 4746 repeat_factor_vec.release (); 4747 4748 /* Return the final product computed herein. Note that there may 4749 still be some elements with single occurrence count left in OPS; 4750 those will be handled by the normal reassociation logic. */ 4751 return result; 4752} 4753 4754/* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */ 4755 4756static void 4757transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs) 4758{ 4759 tree rhs1; 4760 4761 if (dump_file && (dump_flags & TDF_DETAILS)) 4762 { 4763 fprintf (dump_file, "Transforming "); 4764 print_gimple_stmt (dump_file, stmt, 0, 0); 4765 } 4766 4767 rhs1 = gimple_assign_rhs1 (stmt); 4768 gimple_assign_set_rhs_from_tree (gsi, new_rhs); 4769 update_stmt (stmt); 4770 remove_visited_stmt_chain (rhs1); 4771 4772 if (dump_file && (dump_flags & TDF_DETAILS)) 4773 { 4774 fprintf (dump_file, " into "); 4775 print_gimple_stmt (dump_file, stmt, 0, 0); 4776 } 4777} 4778 4779/* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */ 4780 4781static void 4782transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt, 4783 tree rhs1, tree rhs2) 4784{ 4785 if (dump_file && (dump_flags & TDF_DETAILS)) 4786 { 4787 fprintf (dump_file, "Transforming "); 4788 print_gimple_stmt (dump_file, stmt, 0, 0); 4789 } 4790 4791 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2); 4792 update_stmt (gsi_stmt (*gsi)); 4793 remove_visited_stmt_chain (rhs1); 4794 4795 if (dump_file && (dump_flags & TDF_DETAILS)) 4796 { 4797 fprintf (dump_file, " into "); 4798 print_gimple_stmt (dump_file, stmt, 0, 0); 4799 } 4800} 4801 4802/* Reassociate expressions in basic block BB and its post-dominator as 4803 children. */ 4804 4805static void 4806reassociate_bb (basic_block bb) 4807{ 4808 gimple_stmt_iterator gsi; 4809 basic_block son; 4810 gimple stmt = last_stmt (bb); 4811 4812 if (stmt && !gimple_visited_p (stmt)) 4813 maybe_optimize_range_tests (stmt); 4814 4815 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) 4816 { 4817 stmt = gsi_stmt (gsi); 4818 4819 if (is_gimple_assign (stmt) 4820 && !stmt_could_throw_p (stmt)) 4821 { 4822 tree lhs, rhs1, rhs2; 4823 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 4824 4825 /* If this is not a gimple binary expression, there is 4826 nothing for us to do with it. */ 4827 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) 4828 continue; 4829 4830 /* If this was part of an already processed statement, 4831 we don't need to touch it again. */ 4832 if (gimple_visited_p (stmt)) 4833 { 4834 /* This statement might have become dead because of previous 4835 reassociations. */ 4836 if (has_zero_uses (gimple_get_lhs (stmt))) 4837 { 4838 reassoc_remove_stmt (&gsi); 4839 release_defs (stmt); 4840 /* We might end up removing the last stmt above which 4841 places the iterator to the end of the sequence. 4842 Reset it to the last stmt in this case which might 4843 be the end of the sequence as well if we removed 4844 the last statement of the sequence. In which case 4845 we need to bail out. */ 4846 if (gsi_end_p (gsi)) 4847 { 4848 gsi = gsi_last_bb (bb); 4849 if (gsi_end_p (gsi)) 4850 break; 4851 } 4852 } 4853 continue; 4854 } 4855 4856 lhs = gimple_assign_lhs (stmt); 4857 rhs1 = gimple_assign_rhs1 (stmt); 4858 rhs2 = gimple_assign_rhs2 (stmt); 4859 4860 /* For non-bit or min/max operations we can't associate 4861 all types. Verify that here. */ 4862 if (rhs_code != BIT_IOR_EXPR 4863 && rhs_code != BIT_AND_EXPR 4864 && rhs_code != BIT_XOR_EXPR 4865 && rhs_code != MIN_EXPR 4866 && rhs_code != MAX_EXPR 4867 && (!can_reassociate_p (lhs) 4868 || !can_reassociate_p (rhs1) 4869 || !can_reassociate_p (rhs2))) 4870 continue; 4871 4872 if (associative_tree_code (rhs_code)) 4873 { 4874 auto_vec<operand_entry_t> ops; 4875 tree powi_result = NULL_TREE; 4876 4877 /* There may be no immediate uses left by the time we 4878 get here because we may have eliminated them all. */ 4879 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs)) 4880 continue; 4881 4882 gimple_set_visited (stmt, true); 4883 linearize_expr_tree (&ops, stmt, true, true); 4884 ops.qsort (sort_by_operand_rank); 4885 optimize_ops_list (rhs_code, &ops); 4886 if (undistribute_ops_list (rhs_code, &ops, 4887 loop_containing_stmt (stmt))) 4888 { 4889 ops.qsort (sort_by_operand_rank); 4890 optimize_ops_list (rhs_code, &ops); 4891 } 4892 4893 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) 4894 optimize_range_tests (rhs_code, &ops); 4895 4896 if (first_pass_instance 4897 && rhs_code == MULT_EXPR 4898 && flag_unsafe_math_optimizations) 4899 powi_result = attempt_builtin_powi (stmt, &ops); 4900 4901 /* If the operand vector is now empty, all operands were 4902 consumed by the __builtin_powi optimization. */ 4903 if (ops.length () == 0) 4904 transform_stmt_to_copy (&gsi, stmt, powi_result); 4905 else if (ops.length () == 1) 4906 { 4907 tree last_op = ops.last ()->op; 4908 4909 if (powi_result) 4910 transform_stmt_to_multiply (&gsi, stmt, last_op, 4911 powi_result); 4912 else 4913 transform_stmt_to_copy (&gsi, stmt, last_op); 4914 } 4915 else 4916 { 4917 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); 4918 int ops_num = ops.length (); 4919 int width = get_reassociation_width (ops_num, rhs_code, mode); 4920 tree new_lhs = lhs; 4921 4922 if (dump_file && (dump_flags & TDF_DETAILS)) 4923 fprintf (dump_file, 4924 "Width = %d was chosen for reassociation\n", width); 4925 4926 if (width > 1 4927 && ops.length () > 3) 4928 rewrite_expr_tree_parallel (as_a <gassign *> (stmt), 4929 width, ops); 4930 else 4931 { 4932 /* When there are three operands left, we want 4933 to make sure the ones that get the double 4934 binary op are chosen wisely. */ 4935 int len = ops.length (); 4936 if (len >= 3) 4937 swap_ops_for_binary_stmt (ops, len - 3, stmt); 4938 4939 new_lhs = rewrite_expr_tree (stmt, 0, ops, 4940 powi_result != NULL); 4941 } 4942 4943 /* If we combined some repeated factors into a 4944 __builtin_powi call, multiply that result by the 4945 reassociated operands. */ 4946 if (powi_result) 4947 { 4948 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs); 4949 tree type = TREE_TYPE (lhs); 4950 tree target_ssa = make_temp_ssa_name (type, NULL, 4951 "reassocpow"); 4952 gimple_set_lhs (lhs_stmt, target_ssa); 4953 update_stmt (lhs_stmt); 4954 if (lhs != new_lhs) 4955 target_ssa = new_lhs; 4956 mul_stmt = gimple_build_assign (lhs, MULT_EXPR, 4957 powi_result, target_ssa); 4958 gimple_set_location (mul_stmt, gimple_location (stmt)); 4959 gimple_set_uid (mul_stmt, gimple_uid (stmt)); 4960 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT); 4961 } 4962 } 4963 } 4964 } 4965 } 4966 for (son = first_dom_son (CDI_POST_DOMINATORS, bb); 4967 son; 4968 son = next_dom_son (CDI_POST_DOMINATORS, son)) 4969 reassociate_bb (son); 4970} 4971 4972/* Add jumps around shifts for range tests turned into bit tests. 4973 For each SSA_NAME VAR we have code like: 4974 VAR = ...; // final stmt of range comparison 4975 // bit test here...; 4976 OTHERVAR = ...; // final stmt of the bit test sequence 4977 RES = VAR | OTHERVAR; 4978 Turn the above into: 4979 VAR = ...; 4980 if (VAR != 0) 4981 goto <l3>; 4982 else 4983 goto <l2>; 4984 <l2>: 4985 // bit test here...; 4986 OTHERVAR = ...; 4987 <l3>: 4988 # RES = PHI<1(l1), OTHERVAR(l2)>; */ 4989 4990static void 4991branch_fixup (void) 4992{ 4993 tree var; 4994 unsigned int i; 4995 4996 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var) 4997 { 4998 gimple def_stmt = SSA_NAME_DEF_STMT (var); 4999 gimple use_stmt; 5000 use_operand_p use; 5001 bool ok = single_imm_use (var, &use, &use_stmt); 5002 gcc_assert (ok 5003 && is_gimple_assign (use_stmt) 5004 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR 5005 && gimple_bb (def_stmt) == gimple_bb (use_stmt)); 5006 5007 basic_block cond_bb = gimple_bb (def_stmt); 5008 basic_block then_bb = split_block (cond_bb, def_stmt)->dest; 5009 basic_block merge_bb = split_block (then_bb, use_stmt)->dest; 5010 5011 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt); 5012 gimple g = gimple_build_cond (NE_EXPR, var, 5013 build_zero_cst (TREE_TYPE (var)), 5014 NULL_TREE, NULL_TREE); 5015 location_t loc = gimple_location (use_stmt); 5016 gimple_set_location (g, loc); 5017 gsi_insert_after (&gsi, g, GSI_NEW_STMT); 5018 5019 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE); 5020 etrue->probability = REG_BR_PROB_BASE / 2; 5021 etrue->count = cond_bb->count / 2; 5022 edge efalse = find_edge (cond_bb, then_bb); 5023 efalse->flags = EDGE_FALSE_VALUE; 5024 efalse->probability -= etrue->probability; 5025 efalse->count -= etrue->count; 5026 then_bb->count -= etrue->count; 5027 5028 tree othervar = NULL_TREE; 5029 if (gimple_assign_rhs1 (use_stmt) == var) 5030 othervar = gimple_assign_rhs2 (use_stmt); 5031 else if (gimple_assign_rhs2 (use_stmt) == var) 5032 othervar = gimple_assign_rhs1 (use_stmt); 5033 else 5034 gcc_unreachable (); 5035 tree lhs = gimple_assign_lhs (use_stmt); 5036 gphi *phi = create_phi_node (lhs, merge_bb); 5037 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc); 5038 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc); 5039 gsi = gsi_for_stmt (use_stmt); 5040 gsi_remove (&gsi, true); 5041 5042 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb); 5043 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb); 5044 } 5045 reassoc_branch_fixups.release (); 5046} 5047 5048void dump_ops_vector (FILE *file, vec<operand_entry_t> ops); 5049void debug_ops_vector (vec<operand_entry_t> ops); 5050 5051/* Dump the operand entry vector OPS to FILE. */ 5052 5053void 5054dump_ops_vector (FILE *file, vec<operand_entry_t> ops) 5055{ 5056 operand_entry_t oe; 5057 unsigned int i; 5058 5059 FOR_EACH_VEC_ELT (ops, i, oe) 5060 { 5061 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank); 5062 print_generic_expr (file, oe->op, 0); 5063 } 5064} 5065 5066/* Dump the operand entry vector OPS to STDERR. */ 5067 5068DEBUG_FUNCTION void 5069debug_ops_vector (vec<operand_entry_t> ops) 5070{ 5071 dump_ops_vector (stderr, ops); 5072} 5073 5074static void 5075do_reassoc (void) 5076{ 5077 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); 5078 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)); 5079} 5080 5081/* Initialize the reassociation pass. */ 5082 5083static void 5084init_reassoc (void) 5085{ 5086 int i; 5087 long rank = 2; 5088 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); 5089 5090 /* Find the loops, so that we can prevent moving calculations in 5091 them. */ 5092 loop_optimizer_init (AVOID_CFG_MODIFICATIONS); 5093 5094 memset (&reassociate_stats, 0, sizeof (reassociate_stats)); 5095 5096 operand_entry_pool = create_alloc_pool ("operand entry pool", 5097 sizeof (struct operand_entry), 30); 5098 next_operand_entry_id = 0; 5099 5100 /* Reverse RPO (Reverse Post Order) will give us something where 5101 deeper loops come later. */ 5102 pre_and_rev_post_order_compute (NULL, bbs, false); 5103 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun)); 5104 operand_rank = new hash_map<tree, long>; 5105 5106 /* Give each default definition a distinct rank. This includes 5107 parameters and the static chain. Walk backwards over all 5108 SSA names so that we get proper rank ordering according 5109 to tree_swap_operands_p. */ 5110 for (i = num_ssa_names - 1; i > 0; --i) 5111 { 5112 tree name = ssa_name (i); 5113 if (name && SSA_NAME_IS_DEFAULT_DEF (name)) 5114 insert_operand_rank (name, ++rank); 5115 } 5116 5117 /* Set up rank for each BB */ 5118 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++) 5119 bb_rank[bbs[i]] = ++rank << 16; 5120 5121 free (bbs); 5122 calculate_dominance_info (CDI_POST_DOMINATORS); 5123 plus_negates = vNULL; 5124} 5125 5126/* Cleanup after the reassociation pass, and print stats if 5127 requested. */ 5128 5129static void 5130fini_reassoc (void) 5131{ 5132 statistics_counter_event (cfun, "Linearized", 5133 reassociate_stats.linearized); 5134 statistics_counter_event (cfun, "Constants eliminated", 5135 reassociate_stats.constants_eliminated); 5136 statistics_counter_event (cfun, "Ops eliminated", 5137 reassociate_stats.ops_eliminated); 5138 statistics_counter_event (cfun, "Statements rewritten", 5139 reassociate_stats.rewritten); 5140 statistics_counter_event (cfun, "Built-in pow[i] calls encountered", 5141 reassociate_stats.pows_encountered); 5142 statistics_counter_event (cfun, "Built-in powi calls created", 5143 reassociate_stats.pows_created); 5144 5145 delete operand_rank; 5146 free_alloc_pool (operand_entry_pool); 5147 free (bb_rank); 5148 plus_negates.release (); 5149 free_dominance_info (CDI_POST_DOMINATORS); 5150 loop_optimizer_finalize (); 5151} 5152 5153/* Gate and execute functions for Reassociation. */ 5154 5155static unsigned int 5156execute_reassoc (void) 5157{ 5158 init_reassoc (); 5159 5160 do_reassoc (); 5161 repropagate_negates (); 5162 branch_fixup (); 5163 5164 fini_reassoc (); 5165 return 0; 5166} 5167 5168namespace { 5169 5170const pass_data pass_data_reassoc = 5171{ 5172 GIMPLE_PASS, /* type */ 5173 "reassoc", /* name */ 5174 OPTGROUP_NONE, /* optinfo_flags */ 5175 TV_TREE_REASSOC, /* tv_id */ 5176 ( PROP_cfg | PROP_ssa ), /* properties_required */ 5177 0, /* properties_provided */ 5178 0, /* properties_destroyed */ 5179 0, /* todo_flags_start */ 5180 TODO_update_ssa_only_virtuals, /* todo_flags_finish */ 5181}; 5182 5183class pass_reassoc : public gimple_opt_pass 5184{ 5185public: 5186 pass_reassoc (gcc::context *ctxt) 5187 : gimple_opt_pass (pass_data_reassoc, ctxt) 5188 {} 5189 5190 /* opt_pass methods: */ 5191 opt_pass * clone () { return new pass_reassoc (m_ctxt); } 5192 virtual bool gate (function *) { return flag_tree_reassoc != 0; } 5193 virtual unsigned int execute (function *) { return execute_reassoc (); } 5194 5195}; // class pass_reassoc 5196 5197} // anon namespace 5198 5199gimple_opt_pass * 5200make_pass_reassoc (gcc::context *ctxt) 5201{ 5202 return new pass_reassoc (ctxt); 5203} 5204