1/* Induction variable optimizations. 2 Copyright (C) 2003-2015 Free Software Foundation, Inc. 3 4This file is part of GCC. 5 6GCC is free software; you can redistribute it and/or modify it 7under the terms of the GNU General Public License as published by the 8Free Software Foundation; either version 3, or (at your option) any 9later version. 10 11GCC is distributed in the hope that it will be useful, but WITHOUT 12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14for more details. 15 16You should have received a copy of the GNU General Public License 17along with GCC; see the file COPYING3. If not see 18<http://www.gnu.org/licenses/>. */ 19 20/* This pass tries to find the optimal set of induction variables for the loop. 21 It optimizes just the basic linear induction variables (although adding 22 support for other types should not be too hard). It includes the 23 optimizations commonly known as strength reduction, induction variable 24 coalescing and induction variable elimination. It does it in the 25 following steps: 26 27 1) The interesting uses of induction variables are found. This includes 28 29 -- uses of induction variables in non-linear expressions 30 -- addresses of arrays 31 -- comparisons of induction variables 32 33 2) Candidates for the induction variables are found. This includes 34 35 -- old induction variables 36 -- the variables defined by expressions derived from the "interesting 37 uses" above 38 39 3) The optimal (w.r. to a cost function) set of variables is chosen. The 40 cost function assigns a cost to sets of induction variables and consists 41 of three parts: 42 43 -- The use costs. Each of the interesting uses chooses the best induction 44 variable in the set and adds its cost to the sum. The cost reflects 45 the time spent on modifying the induction variables value to be usable 46 for the given purpose (adding base and offset for arrays, etc.). 47 -- The variable costs. Each of the variables has a cost assigned that 48 reflects the costs associated with incrementing the value of the 49 variable. The original variables are somewhat preferred. 50 -- The set cost. Depending on the size of the set, extra cost may be 51 added to reflect register pressure. 52 53 All the costs are defined in a machine-specific way, using the target 54 hooks and machine descriptions to determine them. 55 56 4) The trees are transformed to use the new variables, the dead code is 57 removed. 58 59 All of this is done loop by loop. Doing it globally is theoretically 60 possible, it might give a better performance and it might enable us 61 to decide costs more precisely, but getting all the interactions right 62 would be complicated. */ 63 64#include "config.h" 65#include "system.h" 66#include "coretypes.h" 67#include "tm.h" 68#include "hash-set.h" 69#include "machmode.h" 70#include "vec.h" 71#include "double-int.h" 72#include "input.h" 73#include "alias.h" 74#include "symtab.h" 75#include "wide-int.h" 76#include "inchash.h" 77#include "tree.h" 78#include "fold-const.h" 79#include "stor-layout.h" 80#include "tm_p.h" 81#include "predict.h" 82#include "hard-reg-set.h" 83#include "function.h" 84#include "dominance.h" 85#include "cfg.h" 86#include "basic-block.h" 87#include "gimple-pretty-print.h" 88#include "hash-map.h" 89#include "hash-table.h" 90#include "tree-ssa-alias.h" 91#include "internal-fn.h" 92#include "tree-eh.h" 93#include "gimple-expr.h" 94#include "is-a.h" 95#include "gimple.h" 96#include "gimplify.h" 97#include "gimple-iterator.h" 98#include "gimplify-me.h" 99#include "gimple-ssa.h" 100#include "plugin-api.h" 101#include "ipa-ref.h" 102#include "cgraph.h" 103#include "tree-cfg.h" 104#include "tree-phinodes.h" 105#include "ssa-iterators.h" 106#include "stringpool.h" 107#include "tree-ssanames.h" 108#include "tree-ssa-loop-ivopts.h" 109#include "tree-ssa-loop-manip.h" 110#include "tree-ssa-loop-niter.h" 111#include "tree-ssa-loop.h" 112#include "hashtab.h" 113#include "rtl.h" 114#include "flags.h" 115#include "statistics.h" 116#include "real.h" 117#include "fixed-value.h" 118#include "insn-config.h" 119#include "expmed.h" 120#include "dojump.h" 121#include "explow.h" 122#include "calls.h" 123#include "emit-rtl.h" 124#include "varasm.h" 125#include "stmt.h" 126#include "expr.h" 127#include "tree-dfa.h" 128#include "tree-ssa.h" 129#include "cfgloop.h" 130#include "tree-pass.h" 131#include "tree-chrec.h" 132#include "tree-scalar-evolution.h" 133#include "params.h" 134#include "langhooks.h" 135#include "tree-affine.h" 136#include "target.h" 137#include "tree-inline.h" 138#include "tree-ssa-propagate.h" 139#include "tree-ssa-address.h" 140#include "builtins.h" 141#include "tree-vectorizer.h" 142 143/* FIXME: Expressions are expanded to RTL in this pass to determine the 144 cost of different addressing modes. This should be moved to a TBD 145 interface between the GIMPLE and RTL worlds. */ 146#include "recog.h" 147 148/* The infinite cost. */ 149#define INFTY 10000000 150 151#define AVG_LOOP_NITER(LOOP) 5 152 153/* Returns the expected number of loop iterations for LOOP. 154 The average trip count is computed from profile data if it 155 exists. */ 156 157static inline HOST_WIDE_INT 158avg_loop_niter (struct loop *loop) 159{ 160 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop); 161 if (niter == -1) 162 return AVG_LOOP_NITER (loop); 163 164 return niter; 165} 166 167/* Representation of the induction variable. */ 168struct iv 169{ 170 tree base; /* Initial value of the iv. */ 171 tree base_object; /* A memory object to that the induction variable points. */ 172 tree step; /* Step of the iv (constant only). */ 173 tree ssa_name; /* The ssa name with the value. */ 174 bool biv_p; /* Is it a biv? */ 175 bool have_use_for; /* Do we already have a use for it? */ 176 unsigned use_id; /* The identifier in the use if it is the case. */ 177}; 178 179/* Per-ssa version information (induction variable descriptions, etc.). */ 180struct version_info 181{ 182 tree name; /* The ssa name. */ 183 struct iv *iv; /* Induction variable description. */ 184 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in 185 an expression that is not an induction variable. */ 186 bool preserve_biv; /* For the original biv, whether to preserve it. */ 187 unsigned inv_id; /* Id of an invariant. */ 188}; 189 190/* Types of uses. */ 191enum use_type 192{ 193 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */ 194 USE_ADDRESS, /* Use in an address. */ 195 USE_COMPARE /* Use is a compare. */ 196}; 197 198/* Cost of a computation. */ 199typedef struct 200{ 201 int cost; /* The runtime cost. */ 202 unsigned complexity; /* The estimate of the complexity of the code for 203 the computation (in no concrete units -- 204 complexity field should be larger for more 205 complex expressions and addressing modes). */ 206} comp_cost; 207 208static const comp_cost no_cost = {0, 0}; 209static const comp_cost infinite_cost = {INFTY, INFTY}; 210 211/* The candidate - cost pair. */ 212struct cost_pair 213{ 214 struct iv_cand *cand; /* The candidate. */ 215 comp_cost cost; /* The cost. */ 216 bitmap depends_on; /* The list of invariants that have to be 217 preserved. */ 218 tree value; /* For final value elimination, the expression for 219 the final value of the iv. For iv elimination, 220 the new bound to compare with. */ 221 enum tree_code comp; /* For iv elimination, the comparison. */ 222 int inv_expr_id; /* Loop invariant expression id. */ 223}; 224 225/* Use. */ 226struct iv_use 227{ 228 unsigned id; /* The id of the use. */ 229 enum use_type type; /* Type of the use. */ 230 struct iv *iv; /* The induction variable it is based on. */ 231 gimple stmt; /* Statement in that it occurs. */ 232 tree *op_p; /* The place where it occurs. */ 233 bitmap related_cands; /* The set of "related" iv candidates, plus the common 234 important ones. */ 235 236 unsigned n_map_members; /* Number of candidates in the cost_map list. */ 237 struct cost_pair *cost_map; 238 /* The costs wrto the iv candidates. */ 239 240 struct iv_cand *selected; 241 /* The selected candidate. */ 242}; 243 244/* The position where the iv is computed. */ 245enum iv_position 246{ 247 IP_NORMAL, /* At the end, just before the exit condition. */ 248 IP_END, /* At the end of the latch block. */ 249 IP_BEFORE_USE, /* Immediately before a specific use. */ 250 IP_AFTER_USE, /* Immediately after a specific use. */ 251 IP_ORIGINAL /* The original biv. */ 252}; 253 254/* The induction variable candidate. */ 255struct iv_cand 256{ 257 unsigned id; /* The number of the candidate. */ 258 bool important; /* Whether this is an "important" candidate, i.e. such 259 that it should be considered by all uses. */ 260 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */ 261 gimple incremented_at;/* For original biv, the statement where it is 262 incremented. */ 263 tree var_before; /* The variable used for it before increment. */ 264 tree var_after; /* The variable used for it after increment. */ 265 struct iv *iv; /* The value of the candidate. NULL for 266 "pseudocandidate" used to indicate the possibility 267 to replace the final value of an iv by direct 268 computation of the value. */ 269 unsigned cost; /* Cost of the candidate. */ 270 unsigned cost_step; /* Cost of the candidate's increment operation. */ 271 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place 272 where it is incremented. */ 273 bitmap depends_on; /* The list of invariants that are used in step of the 274 biv. */ 275}; 276 277/* Loop invariant expression hashtable entry. */ 278struct iv_inv_expr_ent 279{ 280 tree expr; 281 int id; 282 hashval_t hash; 283}; 284 285/* The data used by the induction variable optimizations. */ 286 287typedef struct iv_use *iv_use_p; 288 289typedef struct iv_cand *iv_cand_p; 290 291/* Hashtable helpers. */ 292 293struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent> 294{ 295 typedef iv_inv_expr_ent value_type; 296 typedef iv_inv_expr_ent compare_type; 297 static inline hashval_t hash (const value_type *); 298 static inline bool equal (const value_type *, const compare_type *); 299}; 300 301/* Hash function for loop invariant expressions. */ 302 303inline hashval_t 304iv_inv_expr_hasher::hash (const value_type *expr) 305{ 306 return expr->hash; 307} 308 309/* Hash table equality function for expressions. */ 310 311inline bool 312iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2) 313{ 314 return expr1->hash == expr2->hash 315 && operand_equal_p (expr1->expr, expr2->expr, 0); 316} 317 318struct ivopts_data 319{ 320 /* The currently optimized loop. */ 321 struct loop *current_loop; 322 source_location loop_loc; 323 324 /* Numbers of iterations for all exits of the current loop. */ 325 hash_map<edge, tree_niter_desc *> *niters; 326 327 /* Number of registers used in it. */ 328 unsigned regs_used; 329 330 /* The size of version_info array allocated. */ 331 unsigned version_info_size; 332 333 /* The array of information for the ssa names. */ 334 struct version_info *version_info; 335 336 /* The hashtable of loop invariant expressions created 337 by ivopt. */ 338 hash_table<iv_inv_expr_hasher> *inv_expr_tab; 339 340 /* Loop invariant expression id. */ 341 int inv_expr_id; 342 343 /* The bitmap of indices in version_info whose value was changed. */ 344 bitmap relevant; 345 346 /* The uses of induction variables. */ 347 vec<iv_use_p> iv_uses; 348 349 /* The candidates. */ 350 vec<iv_cand_p> iv_candidates; 351 352 /* A bitmap of important candidates. */ 353 bitmap important_candidates; 354 355 /* Cache used by tree_to_aff_combination_expand. */ 356 hash_map<tree, name_expansion *> *name_expansion_cache; 357 358 /* The maximum invariant id. */ 359 unsigned max_inv_id; 360 361 /* Whether to consider just related and important candidates when replacing a 362 use. */ 363 bool consider_all_candidates; 364 365 /* Are we optimizing for speed? */ 366 bool speed; 367 368 /* Whether the loop body includes any function calls. */ 369 bool body_includes_call; 370 371 /* Whether the loop body can only be exited via single exit. */ 372 bool loop_single_exit_p; 373}; 374 375/* An assignment of iv candidates to uses. */ 376 377struct iv_ca 378{ 379 /* The number of uses covered by the assignment. */ 380 unsigned upto; 381 382 /* Number of uses that cannot be expressed by the candidates in the set. */ 383 unsigned bad_uses; 384 385 /* Candidate assigned to a use, together with the related costs. */ 386 struct cost_pair **cand_for_use; 387 388 /* Number of times each candidate is used. */ 389 unsigned *n_cand_uses; 390 391 /* The candidates used. */ 392 bitmap cands; 393 394 /* The number of candidates in the set. */ 395 unsigned n_cands; 396 397 /* Total number of registers needed. */ 398 unsigned n_regs; 399 400 /* Total cost of expressing uses. */ 401 comp_cost cand_use_cost; 402 403 /* Total cost of candidates. */ 404 unsigned cand_cost; 405 406 /* Number of times each invariant is used. */ 407 unsigned *n_invariant_uses; 408 409 /* The array holding the number of uses of each loop 410 invariant expressions created by ivopt. */ 411 unsigned *used_inv_expr; 412 413 /* The number of created loop invariants. */ 414 unsigned num_used_inv_expr; 415 416 /* Total cost of the assignment. */ 417 comp_cost cost; 418}; 419 420/* Difference of two iv candidate assignments. */ 421 422struct iv_ca_delta 423{ 424 /* Changed use. */ 425 struct iv_use *use; 426 427 /* An old assignment (for rollback purposes). */ 428 struct cost_pair *old_cp; 429 430 /* A new assignment. */ 431 struct cost_pair *new_cp; 432 433 /* Next change in the list. */ 434 struct iv_ca_delta *next_change; 435}; 436 437/* Bound on number of candidates below that all candidates are considered. */ 438 439#define CONSIDER_ALL_CANDIDATES_BOUND \ 440 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND)) 441 442/* If there are more iv occurrences, we just give up (it is quite unlikely that 443 optimizing such a loop would help, and it would take ages). */ 444 445#define MAX_CONSIDERED_USES \ 446 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES)) 447 448/* If there are at most this number of ivs in the set, try removing unnecessary 449 ivs from the set always. */ 450 451#define ALWAYS_PRUNE_CAND_SET_BOUND \ 452 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND)) 453 454/* The list of trees for that the decl_rtl field must be reset is stored 455 here. */ 456 457static vec<tree> decl_rtl_to_reset; 458 459static comp_cost force_expr_to_var_cost (tree, bool); 460 461/* Number of uses recorded in DATA. */ 462 463static inline unsigned 464n_iv_uses (struct ivopts_data *data) 465{ 466 return data->iv_uses.length (); 467} 468 469/* Ith use recorded in DATA. */ 470 471static inline struct iv_use * 472iv_use (struct ivopts_data *data, unsigned i) 473{ 474 return data->iv_uses[i]; 475} 476 477/* Number of candidates recorded in DATA. */ 478 479static inline unsigned 480n_iv_cands (struct ivopts_data *data) 481{ 482 return data->iv_candidates.length (); 483} 484 485/* Ith candidate recorded in DATA. */ 486 487static inline struct iv_cand * 488iv_cand (struct ivopts_data *data, unsigned i) 489{ 490 return data->iv_candidates[i]; 491} 492 493/* The single loop exit if it dominates the latch, NULL otherwise. */ 494 495edge 496single_dom_exit (struct loop *loop) 497{ 498 edge exit = single_exit (loop); 499 500 if (!exit) 501 return NULL; 502 503 if (!just_once_each_iteration_p (loop, exit->src)) 504 return NULL; 505 506 return exit; 507} 508 509/* Dumps information about the induction variable IV to FILE. */ 510 511void 512dump_iv (FILE *file, struct iv *iv) 513{ 514 if (iv->ssa_name) 515 { 516 fprintf (file, "ssa name "); 517 print_generic_expr (file, iv->ssa_name, TDF_SLIM); 518 fprintf (file, "\n"); 519 } 520 521 fprintf (file, " type "); 522 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM); 523 fprintf (file, "\n"); 524 525 if (iv->step) 526 { 527 fprintf (file, " base "); 528 print_generic_expr (file, iv->base, TDF_SLIM); 529 fprintf (file, "\n"); 530 531 fprintf (file, " step "); 532 print_generic_expr (file, iv->step, TDF_SLIM); 533 fprintf (file, "\n"); 534 } 535 else 536 { 537 fprintf (file, " invariant "); 538 print_generic_expr (file, iv->base, TDF_SLIM); 539 fprintf (file, "\n"); 540 } 541 542 if (iv->base_object) 543 { 544 fprintf (file, " base object "); 545 print_generic_expr (file, iv->base_object, TDF_SLIM); 546 fprintf (file, "\n"); 547 } 548 549 if (iv->biv_p) 550 fprintf (file, " is a biv\n"); 551} 552 553/* Dumps information about the USE to FILE. */ 554 555void 556dump_use (FILE *file, struct iv_use *use) 557{ 558 fprintf (file, "use %d\n", use->id); 559 560 switch (use->type) 561 { 562 case USE_NONLINEAR_EXPR: 563 fprintf (file, " generic\n"); 564 break; 565 566 case USE_ADDRESS: 567 fprintf (file, " address\n"); 568 break; 569 570 case USE_COMPARE: 571 fprintf (file, " compare\n"); 572 break; 573 574 default: 575 gcc_unreachable (); 576 } 577 578 fprintf (file, " in statement "); 579 print_gimple_stmt (file, use->stmt, 0, 0); 580 fprintf (file, "\n"); 581 582 fprintf (file, " at position "); 583 if (use->op_p) 584 print_generic_expr (file, *use->op_p, TDF_SLIM); 585 fprintf (file, "\n"); 586 587 dump_iv (file, use->iv); 588 589 if (use->related_cands) 590 { 591 fprintf (file, " related candidates "); 592 dump_bitmap (file, use->related_cands); 593 } 594} 595 596/* Dumps information about the uses to FILE. */ 597 598void 599dump_uses (FILE *file, struct ivopts_data *data) 600{ 601 unsigned i; 602 struct iv_use *use; 603 604 for (i = 0; i < n_iv_uses (data); i++) 605 { 606 use = iv_use (data, i); 607 608 dump_use (file, use); 609 fprintf (file, "\n"); 610 } 611} 612 613/* Dumps information about induction variable candidate CAND to FILE. */ 614 615void 616dump_cand (FILE *file, struct iv_cand *cand) 617{ 618 struct iv *iv = cand->iv; 619 620 fprintf (file, "candidate %d%s\n", 621 cand->id, cand->important ? " (important)" : ""); 622 623 if (cand->depends_on) 624 { 625 fprintf (file, " depends on "); 626 dump_bitmap (file, cand->depends_on); 627 } 628 629 if (!iv) 630 { 631 fprintf (file, " final value replacement\n"); 632 return; 633 } 634 635 if (cand->var_before) 636 { 637 fprintf (file, " var_before "); 638 print_generic_expr (file, cand->var_before, TDF_SLIM); 639 fprintf (file, "\n"); 640 } 641 if (cand->var_after) 642 { 643 fprintf (file, " var_after "); 644 print_generic_expr (file, cand->var_after, TDF_SLIM); 645 fprintf (file, "\n"); 646 } 647 648 switch (cand->pos) 649 { 650 case IP_NORMAL: 651 fprintf (file, " incremented before exit test\n"); 652 break; 653 654 case IP_BEFORE_USE: 655 fprintf (file, " incremented before use %d\n", cand->ainc_use->id); 656 break; 657 658 case IP_AFTER_USE: 659 fprintf (file, " incremented after use %d\n", cand->ainc_use->id); 660 break; 661 662 case IP_END: 663 fprintf (file, " incremented at end\n"); 664 break; 665 666 case IP_ORIGINAL: 667 fprintf (file, " original biv\n"); 668 break; 669 } 670 671 dump_iv (file, iv); 672} 673 674/* Returns the info for ssa version VER. */ 675 676static inline struct version_info * 677ver_info (struct ivopts_data *data, unsigned ver) 678{ 679 return data->version_info + ver; 680} 681 682/* Returns the info for ssa name NAME. */ 683 684static inline struct version_info * 685name_info (struct ivopts_data *data, tree name) 686{ 687 return ver_info (data, SSA_NAME_VERSION (name)); 688} 689 690/* Returns true if STMT is after the place where the IP_NORMAL ivs will be 691 emitted in LOOP. */ 692 693static bool 694stmt_after_ip_normal_pos (struct loop *loop, gimple stmt) 695{ 696 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt); 697 698 gcc_assert (bb); 699 700 if (sbb == loop->latch) 701 return true; 702 703 if (sbb != bb) 704 return false; 705 706 return stmt == last_stmt (bb); 707} 708 709/* Returns true if STMT if after the place where the original induction 710 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true 711 if the positions are identical. */ 712 713static bool 714stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal) 715{ 716 basic_block cand_bb = gimple_bb (cand->incremented_at); 717 basic_block stmt_bb = gimple_bb (stmt); 718 719 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb)) 720 return false; 721 722 if (stmt_bb != cand_bb) 723 return true; 724 725 if (true_if_equal 726 && gimple_uid (stmt) == gimple_uid (cand->incremented_at)) 727 return true; 728 return gimple_uid (stmt) > gimple_uid (cand->incremented_at); 729} 730 731/* Returns true if STMT if after the place where the induction variable 732 CAND is incremented in LOOP. */ 733 734static bool 735stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt) 736{ 737 switch (cand->pos) 738 { 739 case IP_END: 740 return false; 741 742 case IP_NORMAL: 743 return stmt_after_ip_normal_pos (loop, stmt); 744 745 case IP_ORIGINAL: 746 case IP_AFTER_USE: 747 return stmt_after_inc_pos (cand, stmt, false); 748 749 case IP_BEFORE_USE: 750 return stmt_after_inc_pos (cand, stmt, true); 751 752 default: 753 gcc_unreachable (); 754 } 755} 756 757/* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */ 758 759static bool 760abnormal_ssa_name_p (tree exp) 761{ 762 if (!exp) 763 return false; 764 765 if (TREE_CODE (exp) != SSA_NAME) 766 return false; 767 768 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0; 769} 770 771/* Returns false if BASE or INDEX contains a ssa name that occurs in an 772 abnormal phi node. Callback for for_each_index. */ 773 774static bool 775idx_contains_abnormal_ssa_name_p (tree base, tree *index, 776 void *data ATTRIBUTE_UNUSED) 777{ 778 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 779 { 780 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2))) 781 return false; 782 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3))) 783 return false; 784 } 785 786 return !abnormal_ssa_name_p (*index); 787} 788 789/* Returns true if EXPR contains a ssa name that occurs in an 790 abnormal phi node. */ 791 792bool 793contains_abnormal_ssa_name_p (tree expr) 794{ 795 enum tree_code code; 796 enum tree_code_class codeclass; 797 798 if (!expr) 799 return false; 800 801 code = TREE_CODE (expr); 802 codeclass = TREE_CODE_CLASS (code); 803 804 if (code == SSA_NAME) 805 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0; 806 807 if (code == INTEGER_CST 808 || is_gimple_min_invariant (expr)) 809 return false; 810 811 if (code == ADDR_EXPR) 812 return !for_each_index (&TREE_OPERAND (expr, 0), 813 idx_contains_abnormal_ssa_name_p, 814 NULL); 815 816 if (code == COND_EXPR) 817 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)) 818 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)) 819 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2)); 820 821 switch (codeclass) 822 { 823 case tcc_binary: 824 case tcc_comparison: 825 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))) 826 return true; 827 828 /* Fallthru. */ 829 case tcc_unary: 830 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))) 831 return true; 832 833 break; 834 835 default: 836 gcc_unreachable (); 837 } 838 839 return false; 840} 841 842/* Returns the structure describing number of iterations determined from 843 EXIT of DATA->current_loop, or NULL if something goes wrong. */ 844 845static struct tree_niter_desc * 846niter_for_exit (struct ivopts_data *data, edge exit) 847{ 848 struct tree_niter_desc *desc; 849 tree_niter_desc **slot; 850 851 if (!data->niters) 852 { 853 data->niters = new hash_map<edge, tree_niter_desc *>; 854 slot = NULL; 855 } 856 else 857 slot = data->niters->get (exit); 858 859 if (!slot) 860 { 861 /* Try to determine number of iterations. We cannot safely work with ssa 862 names that appear in phi nodes on abnormal edges, so that we do not 863 create overlapping life ranges for them (PR 27283). */ 864 desc = XNEW (struct tree_niter_desc); 865 if (!number_of_iterations_exit (data->current_loop, 866 exit, desc, true) 867 || contains_abnormal_ssa_name_p (desc->niter)) 868 { 869 XDELETE (desc); 870 desc = NULL; 871 } 872 data->niters->put (exit, desc); 873 } 874 else 875 desc = *slot; 876 877 return desc; 878} 879 880/* Returns the structure describing number of iterations determined from 881 single dominating exit of DATA->current_loop, or NULL if something 882 goes wrong. */ 883 884static struct tree_niter_desc * 885niter_for_single_dom_exit (struct ivopts_data *data) 886{ 887 edge exit = single_dom_exit (data->current_loop); 888 889 if (!exit) 890 return NULL; 891 892 return niter_for_exit (data, exit); 893} 894 895/* Initializes data structures used by the iv optimization pass, stored 896 in DATA. */ 897 898static void 899tree_ssa_iv_optimize_init (struct ivopts_data *data) 900{ 901 data->version_info_size = 2 * num_ssa_names; 902 data->version_info = XCNEWVEC (struct version_info, data->version_info_size); 903 data->relevant = BITMAP_ALLOC (NULL); 904 data->important_candidates = BITMAP_ALLOC (NULL); 905 data->max_inv_id = 0; 906 data->niters = NULL; 907 data->iv_uses.create (20); 908 data->iv_candidates.create (20); 909 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10); 910 data->inv_expr_id = 0; 911 data->name_expansion_cache = NULL; 912 decl_rtl_to_reset.create (20); 913} 914 915/* Returns a memory object to that EXPR points. In case we are able to 916 determine that it does not point to any such object, NULL is returned. */ 917 918static tree 919determine_base_object (tree expr) 920{ 921 enum tree_code code = TREE_CODE (expr); 922 tree base, obj; 923 924 /* If this is a pointer casted to any type, we need to determine 925 the base object for the pointer; so handle conversions before 926 throwing away non-pointer expressions. */ 927 if (CONVERT_EXPR_P (expr)) 928 return determine_base_object (TREE_OPERAND (expr, 0)); 929 930 if (!POINTER_TYPE_P (TREE_TYPE (expr))) 931 return NULL_TREE; 932 933 switch (code) 934 { 935 case INTEGER_CST: 936 return NULL_TREE; 937 938 case ADDR_EXPR: 939 obj = TREE_OPERAND (expr, 0); 940 base = get_base_address (obj); 941 942 if (!base) 943 return expr; 944 945 if (TREE_CODE (base) == MEM_REF) 946 return determine_base_object (TREE_OPERAND (base, 0)); 947 948 return fold_convert (ptr_type_node, 949 build_fold_addr_expr (base)); 950 951 case POINTER_PLUS_EXPR: 952 return determine_base_object (TREE_OPERAND (expr, 0)); 953 954 case PLUS_EXPR: 955 case MINUS_EXPR: 956 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */ 957 gcc_unreachable (); 958 959 default: 960 return fold_convert (ptr_type_node, expr); 961 } 962} 963 964/* Return true if address expression with non-DECL_P operand appears 965 in EXPR. */ 966 967static bool 968contain_complex_addr_expr (tree expr) 969{ 970 bool res = false; 971 972 STRIP_NOPS (expr); 973 switch (TREE_CODE (expr)) 974 { 975 case POINTER_PLUS_EXPR: 976 case PLUS_EXPR: 977 case MINUS_EXPR: 978 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0)); 979 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1)); 980 break; 981 982 case ADDR_EXPR: 983 return (!DECL_P (TREE_OPERAND (expr, 0))); 984 985 default: 986 return false; 987 } 988 989 return res; 990} 991 992/* Allocates an induction variable with given initial value BASE and step STEP 993 for loop LOOP. */ 994 995static struct iv * 996alloc_iv (tree base, tree step) 997{ 998 tree expr = base; 999 struct iv *iv = XCNEW (struct iv); 1000 gcc_assert (step != NULL_TREE); 1001 1002 /* Lower address expression in base except ones with DECL_P as operand. 1003 By doing this: 1004 1) More accurate cost can be computed for address expressions; 1005 2) Duplicate candidates won't be created for bases in different 1006 forms, like &a[0] and &a. */ 1007 STRIP_NOPS (expr); 1008 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0))) 1009 || contain_complex_addr_expr (expr)) 1010 { 1011 aff_tree comb; 1012 tree_to_aff_combination (expr, TREE_TYPE (base), &comb); 1013 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb)); 1014 } 1015 1016 iv->base = base; 1017 iv->base_object = determine_base_object (base); 1018 iv->step = step; 1019 iv->biv_p = false; 1020 iv->have_use_for = false; 1021 iv->use_id = 0; 1022 iv->ssa_name = NULL_TREE; 1023 1024 return iv; 1025} 1026 1027/* Sets STEP and BASE for induction variable IV. */ 1028 1029static void 1030set_iv (struct ivopts_data *data, tree iv, tree base, tree step) 1031{ 1032 struct version_info *info = name_info (data, iv); 1033 1034 gcc_assert (!info->iv); 1035 1036 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv)); 1037 info->iv = alloc_iv (base, step); 1038 info->iv->ssa_name = iv; 1039} 1040 1041/* Finds induction variable declaration for VAR. */ 1042 1043static struct iv * 1044get_iv (struct ivopts_data *data, tree var) 1045{ 1046 basic_block bb; 1047 tree type = TREE_TYPE (var); 1048 1049 if (!POINTER_TYPE_P (type) 1050 && !INTEGRAL_TYPE_P (type)) 1051 return NULL; 1052 1053 if (!name_info (data, var)->iv) 1054 { 1055 bb = gimple_bb (SSA_NAME_DEF_STMT (var)); 1056 1057 if (!bb 1058 || !flow_bb_inside_loop_p (data->current_loop, bb)) 1059 set_iv (data, var, var, build_int_cst (type, 0)); 1060 } 1061 1062 return name_info (data, var)->iv; 1063} 1064 1065/* Determines the step of a biv defined in PHI. Returns NULL if PHI does 1066 not define a simple affine biv with nonzero step. */ 1067 1068static tree 1069determine_biv_step (gphi *phi) 1070{ 1071 struct loop *loop = gimple_bb (phi)->loop_father; 1072 tree name = PHI_RESULT (phi); 1073 affine_iv iv; 1074 1075 if (virtual_operand_p (name)) 1076 return NULL_TREE; 1077 1078 if (!simple_iv (loop, loop, name, &iv, true)) 1079 return NULL_TREE; 1080 1081 return integer_zerop (iv.step) ? NULL_TREE : iv.step; 1082} 1083 1084/* Return the first non-invariant ssa var found in EXPR. */ 1085 1086static tree 1087extract_single_var_from_expr (tree expr) 1088{ 1089 int i, n; 1090 tree tmp; 1091 enum tree_code code; 1092 1093 if (!expr || is_gimple_min_invariant (expr)) 1094 return NULL; 1095 1096 code = TREE_CODE (expr); 1097 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))) 1098 { 1099 n = TREE_OPERAND_LENGTH (expr); 1100 for (i = 0; i < n; i++) 1101 { 1102 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i)); 1103 1104 if (tmp) 1105 return tmp; 1106 } 1107 } 1108 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL; 1109} 1110 1111/* Finds basic ivs. */ 1112 1113static bool 1114find_bivs (struct ivopts_data *data) 1115{ 1116 gphi *phi; 1117 tree step, type, base, stop; 1118 bool found = false; 1119 struct loop *loop = data->current_loop; 1120 gphi_iterator psi; 1121 1122 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 1123 { 1124 phi = psi.phi (); 1125 1126 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi))) 1127 continue; 1128 1129 step = determine_biv_step (phi); 1130 if (!step) 1131 continue; 1132 1133 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); 1134 /* Stop expanding iv base at the first ssa var referred by iv step. 1135 Ideally we should stop at any ssa var, because that's expensive 1136 and unusual to happen, we just do it on the first one. 1137 1138 See PR64705 for the rationale. */ 1139 stop = extract_single_var_from_expr (step); 1140 base = expand_simple_operations (base, stop); 1141 if (contains_abnormal_ssa_name_p (base) 1142 || contains_abnormal_ssa_name_p (step)) 1143 continue; 1144 1145 type = TREE_TYPE (PHI_RESULT (phi)); 1146 base = fold_convert (type, base); 1147 if (step) 1148 { 1149 if (POINTER_TYPE_P (type)) 1150 step = convert_to_ptrofftype (step); 1151 else 1152 step = fold_convert (type, step); 1153 } 1154 1155 set_iv (data, PHI_RESULT (phi), base, step); 1156 found = true; 1157 } 1158 1159 return found; 1160} 1161 1162/* Marks basic ivs. */ 1163 1164static void 1165mark_bivs (struct ivopts_data *data) 1166{ 1167 gphi *phi; 1168 gimple def; 1169 tree var; 1170 struct iv *iv, *incr_iv; 1171 struct loop *loop = data->current_loop; 1172 basic_block incr_bb; 1173 gphi_iterator psi; 1174 1175 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 1176 { 1177 phi = psi.phi (); 1178 1179 iv = get_iv (data, PHI_RESULT (phi)); 1180 if (!iv) 1181 continue; 1182 1183 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); 1184 def = SSA_NAME_DEF_STMT (var); 1185 /* Don't mark iv peeled from other one as biv. */ 1186 if (def 1187 && gimple_code (def) == GIMPLE_PHI 1188 && gimple_bb (def) == loop->header) 1189 continue; 1190 1191 incr_iv = get_iv (data, var); 1192 if (!incr_iv) 1193 continue; 1194 1195 /* If the increment is in the subloop, ignore it. */ 1196 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var)); 1197 if (incr_bb->loop_father != data->current_loop 1198 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP)) 1199 continue; 1200 1201 iv->biv_p = true; 1202 incr_iv->biv_p = true; 1203 } 1204} 1205 1206/* Checks whether STMT defines a linear induction variable and stores its 1207 parameters to IV. */ 1208 1209static bool 1210find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv) 1211{ 1212 tree lhs, stop; 1213 struct loop *loop = data->current_loop; 1214 1215 iv->base = NULL_TREE; 1216 iv->step = NULL_TREE; 1217 1218 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1219 return false; 1220 1221 lhs = gimple_assign_lhs (stmt); 1222 if (TREE_CODE (lhs) != SSA_NAME) 1223 return false; 1224 1225 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true)) 1226 return false; 1227 1228 /* Stop expanding iv base at the first ssa var referred by iv step. 1229 Ideally we should stop at any ssa var, because that's expensive 1230 and unusual to happen, we just do it on the first one. 1231 1232 See PR64705 for the rationale. */ 1233 stop = extract_single_var_from_expr (iv->step); 1234 iv->base = expand_simple_operations (iv->base, stop); 1235 if (contains_abnormal_ssa_name_p (iv->base) 1236 || contains_abnormal_ssa_name_p (iv->step)) 1237 return false; 1238 1239 /* If STMT could throw, then do not consider STMT as defining a GIV. 1240 While this will suppress optimizations, we can not safely delete this 1241 GIV and associated statements, even if it appears it is not used. */ 1242 if (stmt_could_throw_p (stmt)) 1243 return false; 1244 1245 return true; 1246} 1247 1248/* Finds general ivs in statement STMT. */ 1249 1250static void 1251find_givs_in_stmt (struct ivopts_data *data, gimple stmt) 1252{ 1253 affine_iv iv; 1254 1255 if (!find_givs_in_stmt_scev (data, stmt, &iv)) 1256 return; 1257 1258 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step); 1259} 1260 1261/* Finds general ivs in basic block BB. */ 1262 1263static void 1264find_givs_in_bb (struct ivopts_data *data, basic_block bb) 1265{ 1266 gimple_stmt_iterator bsi; 1267 1268 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 1269 find_givs_in_stmt (data, gsi_stmt (bsi)); 1270} 1271 1272/* Finds general ivs. */ 1273 1274static void 1275find_givs (struct ivopts_data *data) 1276{ 1277 struct loop *loop = data->current_loop; 1278 basic_block *body = get_loop_body_in_dom_order (loop); 1279 unsigned i; 1280 1281 for (i = 0; i < loop->num_nodes; i++) 1282 find_givs_in_bb (data, body[i]); 1283 free (body); 1284} 1285 1286/* For each ssa name defined in LOOP determines whether it is an induction 1287 variable and if so, its initial value and step. */ 1288 1289static bool 1290find_induction_variables (struct ivopts_data *data) 1291{ 1292 unsigned i; 1293 bitmap_iterator bi; 1294 1295 if (!find_bivs (data)) 1296 return false; 1297 1298 find_givs (data); 1299 mark_bivs (data); 1300 1301 if (dump_file && (dump_flags & TDF_DETAILS)) 1302 { 1303 struct tree_niter_desc *niter = niter_for_single_dom_exit (data); 1304 1305 if (niter) 1306 { 1307 fprintf (dump_file, " number of iterations "); 1308 print_generic_expr (dump_file, niter->niter, TDF_SLIM); 1309 if (!integer_zerop (niter->may_be_zero)) 1310 { 1311 fprintf (dump_file, "; zero if "); 1312 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM); 1313 } 1314 fprintf (dump_file, "\n\n"); 1315 }; 1316 1317 fprintf (dump_file, "Induction variables:\n\n"); 1318 1319 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 1320 { 1321 if (ver_info (data, i)->iv) 1322 dump_iv (dump_file, ver_info (data, i)->iv); 1323 } 1324 } 1325 1326 return true; 1327} 1328 1329/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */ 1330 1331static struct iv_use * 1332record_use (struct ivopts_data *data, tree *use_p, struct iv *iv, 1333 gimple stmt, enum use_type use_type) 1334{ 1335 struct iv_use *use = XCNEW (struct iv_use); 1336 1337 use->id = n_iv_uses (data); 1338 use->type = use_type; 1339 use->iv = iv; 1340 use->stmt = stmt; 1341 use->op_p = use_p; 1342 use->related_cands = BITMAP_ALLOC (NULL); 1343 1344 /* To avoid showing ssa name in the dumps, if it was not reset by the 1345 caller. */ 1346 iv->ssa_name = NULL_TREE; 1347 1348 if (dump_file && (dump_flags & TDF_DETAILS)) 1349 dump_use (dump_file, use); 1350 1351 data->iv_uses.safe_push (use); 1352 1353 return use; 1354} 1355 1356/* Checks whether OP is a loop-level invariant and if so, records it. 1357 NONLINEAR_USE is true if the invariant is used in a way we do not 1358 handle specially. */ 1359 1360static void 1361record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use) 1362{ 1363 basic_block bb; 1364 struct version_info *info; 1365 1366 if (TREE_CODE (op) != SSA_NAME 1367 || virtual_operand_p (op)) 1368 return; 1369 1370 bb = gimple_bb (SSA_NAME_DEF_STMT (op)); 1371 if (bb 1372 && flow_bb_inside_loop_p (data->current_loop, bb)) 1373 return; 1374 1375 info = name_info (data, op); 1376 info->name = op; 1377 info->has_nonlin_use |= nonlinear_use; 1378 if (!info->inv_id) 1379 info->inv_id = ++data->max_inv_id; 1380 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op)); 1381} 1382 1383/* Checks whether the use OP is interesting and if so, records it. */ 1384 1385static struct iv_use * 1386find_interesting_uses_op (struct ivopts_data *data, tree op) 1387{ 1388 struct iv *iv; 1389 struct iv *civ; 1390 gimple stmt; 1391 struct iv_use *use; 1392 1393 if (TREE_CODE (op) != SSA_NAME) 1394 return NULL; 1395 1396 iv = get_iv (data, op); 1397 if (!iv) 1398 return NULL; 1399 1400 if (iv->have_use_for) 1401 { 1402 use = iv_use (data, iv->use_id); 1403 1404 gcc_assert (use->type == USE_NONLINEAR_EXPR); 1405 return use; 1406 } 1407 1408 if (integer_zerop (iv->step)) 1409 { 1410 record_invariant (data, op, true); 1411 return NULL; 1412 } 1413 iv->have_use_for = true; 1414 1415 civ = XNEW (struct iv); 1416 *civ = *iv; 1417 1418 stmt = SSA_NAME_DEF_STMT (op); 1419 gcc_assert (gimple_code (stmt) == GIMPLE_PHI 1420 || is_gimple_assign (stmt)); 1421 1422 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR); 1423 iv->use_id = use->id; 1424 1425 return use; 1426} 1427 1428/* Given a condition in statement STMT, checks whether it is a compare 1429 of an induction variable and an invariant. If this is the case, 1430 CONTROL_VAR is set to location of the iv, BOUND to the location of 1431 the invariant, IV_VAR and IV_BOUND are set to the corresponding 1432 induction variable descriptions, and true is returned. If this is not 1433 the case, CONTROL_VAR and BOUND are set to the arguments of the 1434 condition and false is returned. */ 1435 1436static bool 1437extract_cond_operands (struct ivopts_data *data, gimple stmt, 1438 tree **control_var, tree **bound, 1439 struct iv **iv_var, struct iv **iv_bound) 1440{ 1441 /* The objects returned when COND has constant operands. */ 1442 static struct iv const_iv; 1443 static tree zero; 1444 tree *op0 = &zero, *op1 = &zero, *tmp_op; 1445 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv; 1446 bool ret = false; 1447 1448 if (gimple_code (stmt) == GIMPLE_COND) 1449 { 1450 gcond *cond_stmt = as_a <gcond *> (stmt); 1451 op0 = gimple_cond_lhs_ptr (cond_stmt); 1452 op1 = gimple_cond_rhs_ptr (cond_stmt); 1453 } 1454 else 1455 { 1456 op0 = gimple_assign_rhs1_ptr (stmt); 1457 op1 = gimple_assign_rhs2_ptr (stmt); 1458 } 1459 1460 zero = integer_zero_node; 1461 const_iv.step = integer_zero_node; 1462 1463 if (TREE_CODE (*op0) == SSA_NAME) 1464 iv0 = get_iv (data, *op0); 1465 if (TREE_CODE (*op1) == SSA_NAME) 1466 iv1 = get_iv (data, *op1); 1467 1468 /* Exactly one of the compared values must be an iv, and the other one must 1469 be an invariant. */ 1470 if (!iv0 || !iv1) 1471 goto end; 1472 1473 if (integer_zerop (iv0->step)) 1474 { 1475 /* Control variable may be on the other side. */ 1476 tmp_op = op0; op0 = op1; op1 = tmp_op; 1477 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv; 1478 } 1479 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step); 1480 1481end: 1482 if (control_var) 1483 *control_var = op0;; 1484 if (iv_var) 1485 *iv_var = iv0;; 1486 if (bound) 1487 *bound = op1; 1488 if (iv_bound) 1489 *iv_bound = iv1; 1490 1491 return ret; 1492} 1493 1494/* Checks whether the condition in STMT is interesting and if so, 1495 records it. */ 1496 1497static void 1498find_interesting_uses_cond (struct ivopts_data *data, gimple stmt) 1499{ 1500 tree *var_p, *bound_p; 1501 struct iv *var_iv, *civ; 1502 1503 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL)) 1504 { 1505 find_interesting_uses_op (data, *var_p); 1506 find_interesting_uses_op (data, *bound_p); 1507 return; 1508 } 1509 1510 civ = XNEW (struct iv); 1511 *civ = *var_iv; 1512 record_use (data, NULL, civ, stmt, USE_COMPARE); 1513} 1514 1515/* Returns the outermost loop EXPR is obviously invariant in 1516 relative to the loop LOOP, i.e. if all its operands are defined 1517 outside of the returned loop. Returns NULL if EXPR is not 1518 even obviously invariant in LOOP. */ 1519 1520struct loop * 1521outermost_invariant_loop_for_expr (struct loop *loop, tree expr) 1522{ 1523 basic_block def_bb; 1524 unsigned i, len; 1525 1526 if (is_gimple_min_invariant (expr)) 1527 return current_loops->tree_root; 1528 1529 if (TREE_CODE (expr) == SSA_NAME) 1530 { 1531 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); 1532 if (def_bb) 1533 { 1534 if (flow_bb_inside_loop_p (loop, def_bb)) 1535 return NULL; 1536 return superloop_at_depth (loop, 1537 loop_depth (def_bb->loop_father) + 1); 1538 } 1539 1540 return current_loops->tree_root; 1541 } 1542 1543 if (!EXPR_P (expr)) 1544 return NULL; 1545 1546 unsigned maxdepth = 0; 1547 len = TREE_OPERAND_LENGTH (expr); 1548 for (i = 0; i < len; i++) 1549 { 1550 struct loop *ivloop; 1551 if (!TREE_OPERAND (expr, i)) 1552 continue; 1553 1554 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i)); 1555 if (!ivloop) 1556 return NULL; 1557 maxdepth = MAX (maxdepth, loop_depth (ivloop)); 1558 } 1559 1560 return superloop_at_depth (loop, maxdepth); 1561} 1562 1563/* Returns true if expression EXPR is obviously invariant in LOOP, 1564 i.e. if all its operands are defined outside of the LOOP. LOOP 1565 should not be the function body. */ 1566 1567bool 1568expr_invariant_in_loop_p (struct loop *loop, tree expr) 1569{ 1570 basic_block def_bb; 1571 unsigned i, len; 1572 1573 gcc_assert (loop_depth (loop) > 0); 1574 1575 if (is_gimple_min_invariant (expr)) 1576 return true; 1577 1578 if (TREE_CODE (expr) == SSA_NAME) 1579 { 1580 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); 1581 if (def_bb 1582 && flow_bb_inside_loop_p (loop, def_bb)) 1583 return false; 1584 1585 return true; 1586 } 1587 1588 if (!EXPR_P (expr)) 1589 return false; 1590 1591 len = TREE_OPERAND_LENGTH (expr); 1592 for (i = 0; i < len; i++) 1593 if (TREE_OPERAND (expr, i) 1594 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i))) 1595 return false; 1596 1597 return true; 1598} 1599 1600/* Cumulates the steps of indices into DATA and replaces their values with the 1601 initial ones. Returns false when the value of the index cannot be determined. 1602 Callback for for_each_index. */ 1603 1604struct ifs_ivopts_data 1605{ 1606 struct ivopts_data *ivopts_data; 1607 gimple stmt; 1608 tree step; 1609}; 1610 1611static bool 1612idx_find_step (tree base, tree *idx, void *data) 1613{ 1614 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data; 1615 struct iv *iv; 1616 tree step, iv_base, iv_step, lbound, off; 1617 struct loop *loop = dta->ivopts_data->current_loop; 1618 1619 /* If base is a component ref, require that the offset of the reference 1620 be invariant. */ 1621 if (TREE_CODE (base) == COMPONENT_REF) 1622 { 1623 off = component_ref_field_offset (base); 1624 return expr_invariant_in_loop_p (loop, off); 1625 } 1626 1627 /* If base is array, first check whether we will be able to move the 1628 reference out of the loop (in order to take its address in strength 1629 reduction). In order for this to work we need both lower bound 1630 and step to be loop invariants. */ 1631 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 1632 { 1633 /* Moreover, for a range, the size needs to be invariant as well. */ 1634 if (TREE_CODE (base) == ARRAY_RANGE_REF 1635 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base)))) 1636 return false; 1637 1638 step = array_ref_element_size (base); 1639 lbound = array_ref_low_bound (base); 1640 1641 if (!expr_invariant_in_loop_p (loop, step) 1642 || !expr_invariant_in_loop_p (loop, lbound)) 1643 return false; 1644 } 1645 1646 if (TREE_CODE (*idx) != SSA_NAME) 1647 return true; 1648 1649 iv = get_iv (dta->ivopts_data, *idx); 1650 if (!iv) 1651 return false; 1652 1653 /* XXX We produce for a base of *D42 with iv->base being &x[0] 1654 *&x[0], which is not folded and does not trigger the 1655 ARRAY_REF path below. */ 1656 *idx = iv->base; 1657 1658 if (integer_zerop (iv->step)) 1659 return true; 1660 1661 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 1662 { 1663 step = array_ref_element_size (base); 1664 1665 /* We only handle addresses whose step is an integer constant. */ 1666 if (TREE_CODE (step) != INTEGER_CST) 1667 return false; 1668 } 1669 else 1670 /* The step for pointer arithmetics already is 1 byte. */ 1671 step = size_one_node; 1672 1673 iv_base = iv->base; 1674 iv_step = iv->step; 1675 if (!convert_affine_scev (dta->ivopts_data->current_loop, 1676 sizetype, &iv_base, &iv_step, dta->stmt, 1677 false)) 1678 { 1679 /* The index might wrap. */ 1680 return false; 1681 } 1682 1683 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step); 1684 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step); 1685 1686 return true; 1687} 1688 1689/* Records use in index IDX. Callback for for_each_index. Ivopts data 1690 object is passed to it in DATA. */ 1691 1692static bool 1693idx_record_use (tree base, tree *idx, 1694 void *vdata) 1695{ 1696 struct ivopts_data *data = (struct ivopts_data *) vdata; 1697 find_interesting_uses_op (data, *idx); 1698 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF) 1699 { 1700 find_interesting_uses_op (data, array_ref_element_size (base)); 1701 find_interesting_uses_op (data, array_ref_low_bound (base)); 1702 } 1703 return true; 1704} 1705 1706/* If we can prove that TOP = cst * BOT for some constant cst, 1707 store cst to MUL and return true. Otherwise return false. 1708 The returned value is always sign-extended, regardless of the 1709 signedness of TOP and BOT. */ 1710 1711static bool 1712constant_multiple_of (tree top, tree bot, widest_int *mul) 1713{ 1714 tree mby; 1715 enum tree_code code; 1716 unsigned precision = TYPE_PRECISION (TREE_TYPE (top)); 1717 widest_int res, p0, p1; 1718 1719 STRIP_NOPS (top); 1720 STRIP_NOPS (bot); 1721 1722 if (operand_equal_p (top, bot, 0)) 1723 { 1724 *mul = 1; 1725 return true; 1726 } 1727 1728 code = TREE_CODE (top); 1729 switch (code) 1730 { 1731 case MULT_EXPR: 1732 mby = TREE_OPERAND (top, 1); 1733 if (TREE_CODE (mby) != INTEGER_CST) 1734 return false; 1735 1736 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res)) 1737 return false; 1738 1739 *mul = wi::sext (res * wi::to_widest (mby), precision); 1740 return true; 1741 1742 case PLUS_EXPR: 1743 case MINUS_EXPR: 1744 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0) 1745 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1)) 1746 return false; 1747 1748 if (code == MINUS_EXPR) 1749 p1 = -p1; 1750 *mul = wi::sext (p0 + p1, precision); 1751 return true; 1752 1753 case INTEGER_CST: 1754 if (TREE_CODE (bot) != INTEGER_CST) 1755 return false; 1756 1757 p0 = widest_int::from (top, SIGNED); 1758 p1 = widest_int::from (bot, SIGNED); 1759 if (p1 == 0) 1760 return false; 1761 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision); 1762 return res == 0; 1763 1764 default: 1765 return false; 1766 } 1767} 1768 1769/* Return true if memory reference REF with step STEP may be unaligned. */ 1770 1771static bool 1772may_be_unaligned_p (tree ref, tree step) 1773{ 1774 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target, 1775 thus they are not misaligned. */ 1776 if (TREE_CODE (ref) == TARGET_MEM_REF) 1777 return false; 1778 1779 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref)); 1780 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align) 1781 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))); 1782 1783 unsigned HOST_WIDE_INT bitpos; 1784 unsigned int ref_align; 1785 get_object_alignment_1 (ref, &ref_align, &bitpos); 1786 if (ref_align < align 1787 || (bitpos % align) != 0 1788 || (bitpos % BITS_PER_UNIT) != 0) 1789 return true; 1790 1791 unsigned int trailing_zeros = tree_ctz (step); 1792 if (trailing_zeros < HOST_BITS_PER_INT 1793 && (1U << trailing_zeros) * BITS_PER_UNIT < align) 1794 return true; 1795 1796 return false; 1797} 1798 1799/* Return true if EXPR may be non-addressable. */ 1800 1801bool 1802may_be_nonaddressable_p (tree expr) 1803{ 1804 switch (TREE_CODE (expr)) 1805 { 1806 case TARGET_MEM_REF: 1807 /* TARGET_MEM_REFs are translated directly to valid MEMs on the 1808 target, thus they are always addressable. */ 1809 return false; 1810 1811 case COMPONENT_REF: 1812 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1)) 1813 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 1814 1815 case VIEW_CONVERT_EXPR: 1816 /* This kind of view-conversions may wrap non-addressable objects 1817 and make them look addressable. After some processing the 1818 non-addressability may be uncovered again, causing ADDR_EXPRs 1819 of inappropriate objects to be built. */ 1820 if (is_gimple_reg (TREE_OPERAND (expr, 0)) 1821 || !is_gimple_addressable (TREE_OPERAND (expr, 0))) 1822 return true; 1823 1824 /* ... fall through ... */ 1825 1826 case ARRAY_REF: 1827 case ARRAY_RANGE_REF: 1828 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0)); 1829 1830 CASE_CONVERT: 1831 return true; 1832 1833 default: 1834 break; 1835 } 1836 1837 return false; 1838} 1839 1840/* Finds addresses in *OP_P inside STMT. */ 1841 1842static void 1843find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p) 1844{ 1845 tree base = *op_p, step = size_zero_node; 1846 struct iv *civ; 1847 struct ifs_ivopts_data ifs_ivopts_data; 1848 1849 /* Do not play with volatile memory references. A bit too conservative, 1850 perhaps, but safe. */ 1851 if (gimple_has_volatile_ops (stmt)) 1852 goto fail; 1853 1854 /* Ignore bitfields for now. Not really something terribly complicated 1855 to handle. TODO. */ 1856 if (TREE_CODE (base) == BIT_FIELD_REF) 1857 goto fail; 1858 1859 base = unshare_expr (base); 1860 1861 if (TREE_CODE (base) == TARGET_MEM_REF) 1862 { 1863 tree type = build_pointer_type (TREE_TYPE (base)); 1864 tree astep; 1865 1866 if (TMR_BASE (base) 1867 && TREE_CODE (TMR_BASE (base)) == SSA_NAME) 1868 { 1869 civ = get_iv (data, TMR_BASE (base)); 1870 if (!civ) 1871 goto fail; 1872 1873 TMR_BASE (base) = civ->base; 1874 step = civ->step; 1875 } 1876 if (TMR_INDEX2 (base) 1877 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME) 1878 { 1879 civ = get_iv (data, TMR_INDEX2 (base)); 1880 if (!civ) 1881 goto fail; 1882 1883 TMR_INDEX2 (base) = civ->base; 1884 step = civ->step; 1885 } 1886 if (TMR_INDEX (base) 1887 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME) 1888 { 1889 civ = get_iv (data, TMR_INDEX (base)); 1890 if (!civ) 1891 goto fail; 1892 1893 TMR_INDEX (base) = civ->base; 1894 astep = civ->step; 1895 1896 if (astep) 1897 { 1898 if (TMR_STEP (base)) 1899 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep); 1900 1901 step = fold_build2 (PLUS_EXPR, type, step, astep); 1902 } 1903 } 1904 1905 if (integer_zerop (step)) 1906 goto fail; 1907 base = tree_mem_ref_addr (type, base); 1908 } 1909 else 1910 { 1911 ifs_ivopts_data.ivopts_data = data; 1912 ifs_ivopts_data.stmt = stmt; 1913 ifs_ivopts_data.step = size_zero_node; 1914 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data) 1915 || integer_zerop (ifs_ivopts_data.step)) 1916 goto fail; 1917 step = ifs_ivopts_data.step; 1918 1919 /* Check that the base expression is addressable. This needs 1920 to be done after substituting bases of IVs into it. */ 1921 if (may_be_nonaddressable_p (base)) 1922 goto fail; 1923 1924 /* Moreover, on strict alignment platforms, check that it is 1925 sufficiently aligned. */ 1926 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step)) 1927 goto fail; 1928 1929 base = build_fold_addr_expr (base); 1930 1931 /* Substituting bases of IVs into the base expression might 1932 have caused folding opportunities. */ 1933 if (TREE_CODE (base) == ADDR_EXPR) 1934 { 1935 tree *ref = &TREE_OPERAND (base, 0); 1936 while (handled_component_p (*ref)) 1937 ref = &TREE_OPERAND (*ref, 0); 1938 if (TREE_CODE (*ref) == MEM_REF) 1939 { 1940 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref), 1941 TREE_OPERAND (*ref, 0), 1942 TREE_OPERAND (*ref, 1)); 1943 if (tem) 1944 *ref = tem; 1945 } 1946 } 1947 } 1948 1949 civ = alloc_iv (base, step); 1950 record_use (data, op_p, civ, stmt, USE_ADDRESS); 1951 return; 1952 1953fail: 1954 for_each_index (op_p, idx_record_use, data); 1955} 1956 1957/* Finds and records invariants used in STMT. */ 1958 1959static void 1960find_invariants_stmt (struct ivopts_data *data, gimple stmt) 1961{ 1962 ssa_op_iter iter; 1963 use_operand_p use_p; 1964 tree op; 1965 1966 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 1967 { 1968 op = USE_FROM_PTR (use_p); 1969 record_invariant (data, op, false); 1970 } 1971} 1972 1973/* Finds interesting uses of induction variables in the statement STMT. */ 1974 1975static void 1976find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt) 1977{ 1978 struct iv *iv; 1979 tree op, *lhs, *rhs; 1980 ssa_op_iter iter; 1981 use_operand_p use_p; 1982 enum tree_code code; 1983 1984 find_invariants_stmt (data, stmt); 1985 1986 if (gimple_code (stmt) == GIMPLE_COND) 1987 { 1988 find_interesting_uses_cond (data, stmt); 1989 return; 1990 } 1991 1992 if (is_gimple_assign (stmt)) 1993 { 1994 lhs = gimple_assign_lhs_ptr (stmt); 1995 rhs = gimple_assign_rhs1_ptr (stmt); 1996 1997 if (TREE_CODE (*lhs) == SSA_NAME) 1998 { 1999 /* If the statement defines an induction variable, the uses are not 2000 interesting by themselves. */ 2001 2002 iv = get_iv (data, *lhs); 2003 2004 if (iv && !integer_zerop (iv->step)) 2005 return; 2006 } 2007 2008 code = gimple_assign_rhs_code (stmt); 2009 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS 2010 && (REFERENCE_CLASS_P (*rhs) 2011 || is_gimple_val (*rhs))) 2012 { 2013 if (REFERENCE_CLASS_P (*rhs)) 2014 find_interesting_uses_address (data, stmt, rhs); 2015 else 2016 find_interesting_uses_op (data, *rhs); 2017 2018 if (REFERENCE_CLASS_P (*lhs)) 2019 find_interesting_uses_address (data, stmt, lhs); 2020 return; 2021 } 2022 else if (TREE_CODE_CLASS (code) == tcc_comparison) 2023 { 2024 find_interesting_uses_cond (data, stmt); 2025 return; 2026 } 2027 2028 /* TODO -- we should also handle address uses of type 2029 2030 memory = call (whatever); 2031 2032 and 2033 2034 call (memory). */ 2035 } 2036 2037 if (gimple_code (stmt) == GIMPLE_PHI 2038 && gimple_bb (stmt) == data->current_loop->header) 2039 { 2040 iv = get_iv (data, PHI_RESULT (stmt)); 2041 2042 if (iv && !integer_zerop (iv->step)) 2043 return; 2044 } 2045 2046 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 2047 { 2048 op = USE_FROM_PTR (use_p); 2049 2050 if (TREE_CODE (op) != SSA_NAME) 2051 continue; 2052 2053 iv = get_iv (data, op); 2054 if (!iv) 2055 continue; 2056 2057 find_interesting_uses_op (data, op); 2058 } 2059} 2060 2061/* Finds interesting uses of induction variables outside of loops 2062 on loop exit edge EXIT. */ 2063 2064static void 2065find_interesting_uses_outside (struct ivopts_data *data, edge exit) 2066{ 2067 gphi *phi; 2068 gphi_iterator psi; 2069 tree def; 2070 2071 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi)) 2072 { 2073 phi = psi.phi (); 2074 def = PHI_ARG_DEF_FROM_EDGE (phi, exit); 2075 if (!virtual_operand_p (def)) 2076 find_interesting_uses_op (data, def); 2077 } 2078} 2079 2080/* Finds uses of the induction variables that are interesting. */ 2081 2082static void 2083find_interesting_uses (struct ivopts_data *data) 2084{ 2085 basic_block bb; 2086 gimple_stmt_iterator bsi; 2087 basic_block *body = get_loop_body (data->current_loop); 2088 unsigned i; 2089 struct version_info *info; 2090 edge e; 2091 2092 if (dump_file && (dump_flags & TDF_DETAILS)) 2093 fprintf (dump_file, "Uses:\n\n"); 2094 2095 for (i = 0; i < data->current_loop->num_nodes; i++) 2096 { 2097 edge_iterator ei; 2098 bb = body[i]; 2099 2100 FOR_EACH_EDGE (e, ei, bb->succs) 2101 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) 2102 && !flow_bb_inside_loop_p (data->current_loop, e->dest)) 2103 find_interesting_uses_outside (data, e); 2104 2105 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 2106 find_interesting_uses_stmt (data, gsi_stmt (bsi)); 2107 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 2108 if (!is_gimple_debug (gsi_stmt (bsi))) 2109 find_interesting_uses_stmt (data, gsi_stmt (bsi)); 2110 } 2111 2112 if (dump_file && (dump_flags & TDF_DETAILS)) 2113 { 2114 bitmap_iterator bi; 2115 2116 fprintf (dump_file, "\n"); 2117 2118 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 2119 { 2120 info = ver_info (data, i); 2121 if (info->inv_id) 2122 { 2123 fprintf (dump_file, " "); 2124 print_generic_expr (dump_file, info->name, TDF_SLIM); 2125 fprintf (dump_file, " is invariant (%d)%s\n", 2126 info->inv_id, info->has_nonlin_use ? "" : ", eliminable"); 2127 } 2128 } 2129 2130 fprintf (dump_file, "\n"); 2131 } 2132 2133 free (body); 2134} 2135 2136/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR 2137 is true, assume we are inside an address. If TOP_COMPREF is true, assume 2138 we are at the top-level of the processed address. */ 2139 2140static tree 2141strip_offset_1 (tree expr, bool inside_addr, bool top_compref, 2142 HOST_WIDE_INT *offset) 2143{ 2144 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step; 2145 enum tree_code code; 2146 tree type, orig_type = TREE_TYPE (expr); 2147 HOST_WIDE_INT off0, off1, st; 2148 tree orig_expr = expr; 2149 2150 STRIP_NOPS (expr); 2151 2152 type = TREE_TYPE (expr); 2153 code = TREE_CODE (expr); 2154 *offset = 0; 2155 2156 switch (code) 2157 { 2158 case INTEGER_CST: 2159 if (!cst_and_fits_in_hwi (expr) 2160 || integer_zerop (expr)) 2161 return orig_expr; 2162 2163 *offset = int_cst_value (expr); 2164 return build_int_cst (orig_type, 0); 2165 2166 case POINTER_PLUS_EXPR: 2167 case PLUS_EXPR: 2168 case MINUS_EXPR: 2169 op0 = TREE_OPERAND (expr, 0); 2170 op1 = TREE_OPERAND (expr, 1); 2171 2172 op0 = strip_offset_1 (op0, false, false, &off0); 2173 op1 = strip_offset_1 (op1, false, false, &off1); 2174 2175 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1); 2176 if (op0 == TREE_OPERAND (expr, 0) 2177 && op1 == TREE_OPERAND (expr, 1)) 2178 return orig_expr; 2179 2180 if (integer_zerop (op1)) 2181 expr = op0; 2182 else if (integer_zerop (op0)) 2183 { 2184 if (code == MINUS_EXPR) 2185 expr = fold_build1 (NEGATE_EXPR, type, op1); 2186 else 2187 expr = op1; 2188 } 2189 else 2190 expr = fold_build2 (code, type, op0, op1); 2191 2192 return fold_convert (orig_type, expr); 2193 2194 case MULT_EXPR: 2195 op1 = TREE_OPERAND (expr, 1); 2196 if (!cst_and_fits_in_hwi (op1)) 2197 return orig_expr; 2198 2199 op0 = TREE_OPERAND (expr, 0); 2200 op0 = strip_offset_1 (op0, false, false, &off0); 2201 if (op0 == TREE_OPERAND (expr, 0)) 2202 return orig_expr; 2203 2204 *offset = off0 * int_cst_value (op1); 2205 if (integer_zerop (op0)) 2206 expr = op0; 2207 else 2208 expr = fold_build2 (MULT_EXPR, type, op0, op1); 2209 2210 return fold_convert (orig_type, expr); 2211 2212 case ARRAY_REF: 2213 case ARRAY_RANGE_REF: 2214 if (!inside_addr) 2215 return orig_expr; 2216 2217 step = array_ref_element_size (expr); 2218 if (!cst_and_fits_in_hwi (step)) 2219 break; 2220 2221 st = int_cst_value (step); 2222 op1 = TREE_OPERAND (expr, 1); 2223 op1 = strip_offset_1 (op1, false, false, &off1); 2224 *offset = off1 * st; 2225 2226 if (top_compref 2227 && integer_zerop (op1)) 2228 { 2229 /* Strip the component reference completely. */ 2230 op0 = TREE_OPERAND (expr, 0); 2231 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); 2232 *offset += off0; 2233 return op0; 2234 } 2235 break; 2236 2237 case COMPONENT_REF: 2238 { 2239 tree field; 2240 2241 if (!inside_addr) 2242 return orig_expr; 2243 2244 tmp = component_ref_field_offset (expr); 2245 field = TREE_OPERAND (expr, 1); 2246 if (top_compref 2247 && cst_and_fits_in_hwi (tmp) 2248 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field))) 2249 { 2250 HOST_WIDE_INT boffset, abs_off; 2251 2252 /* Strip the component reference completely. */ 2253 op0 = TREE_OPERAND (expr, 0); 2254 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); 2255 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field)); 2256 abs_off = abs_hwi (boffset) / BITS_PER_UNIT; 2257 if (boffset < 0) 2258 abs_off = -abs_off; 2259 2260 *offset = off0 + int_cst_value (tmp) + abs_off; 2261 return op0; 2262 } 2263 } 2264 break; 2265 2266 case ADDR_EXPR: 2267 op0 = TREE_OPERAND (expr, 0); 2268 op0 = strip_offset_1 (op0, true, true, &off0); 2269 *offset += off0; 2270 2271 if (op0 == TREE_OPERAND (expr, 0)) 2272 return orig_expr; 2273 2274 expr = build_fold_addr_expr (op0); 2275 return fold_convert (orig_type, expr); 2276 2277 case MEM_REF: 2278 /* ??? Offset operand? */ 2279 inside_addr = false; 2280 break; 2281 2282 default: 2283 return orig_expr; 2284 } 2285 2286 /* Default handling of expressions for that we want to recurse into 2287 the first operand. */ 2288 op0 = TREE_OPERAND (expr, 0); 2289 op0 = strip_offset_1 (op0, inside_addr, false, &off0); 2290 *offset += off0; 2291 2292 if (op0 == TREE_OPERAND (expr, 0) 2293 && (!op1 || op1 == TREE_OPERAND (expr, 1))) 2294 return orig_expr; 2295 2296 expr = copy_node (expr); 2297 TREE_OPERAND (expr, 0) = op0; 2298 if (op1) 2299 TREE_OPERAND (expr, 1) = op1; 2300 2301 /* Inside address, we might strip the top level component references, 2302 thus changing type of the expression. Handling of ADDR_EXPR 2303 will fix that. */ 2304 expr = fold_convert (orig_type, expr); 2305 2306 return expr; 2307} 2308 2309/* Strips constant offsets from EXPR and stores them to OFFSET. */ 2310 2311static tree 2312strip_offset (tree expr, unsigned HOST_WIDE_INT *offset) 2313{ 2314 HOST_WIDE_INT off; 2315 tree core = strip_offset_1 (expr, false, false, &off); 2316 *offset = off; 2317 return core; 2318} 2319 2320/* Returns variant of TYPE that can be used as base for different uses. 2321 We return unsigned type with the same precision, which avoids problems 2322 with overflows. */ 2323 2324static tree 2325generic_type_for (tree type) 2326{ 2327 if (POINTER_TYPE_P (type)) 2328 return unsigned_type_for (type); 2329 2330 if (TYPE_UNSIGNED (type)) 2331 return type; 2332 2333 return unsigned_type_for (type); 2334} 2335 2336/* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains 2337 the bitmap to that we should store it. */ 2338 2339static struct ivopts_data *fd_ivopts_data; 2340static tree 2341find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data) 2342{ 2343 bitmap *depends_on = (bitmap *) data; 2344 struct version_info *info; 2345 2346 if (TREE_CODE (*expr_p) != SSA_NAME) 2347 return NULL_TREE; 2348 info = name_info (fd_ivopts_data, *expr_p); 2349 2350 if (!info->inv_id || info->has_nonlin_use) 2351 return NULL_TREE; 2352 2353 if (!*depends_on) 2354 *depends_on = BITMAP_ALLOC (NULL); 2355 bitmap_set_bit (*depends_on, info->inv_id); 2356 2357 return NULL_TREE; 2358} 2359 2360/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and 2361 position to POS. If USE is not NULL, the candidate is set as related to 2362 it. If both BASE and STEP are NULL, we add a pseudocandidate for the 2363 replacement of the final value of the iv by a direct computation. */ 2364 2365static struct iv_cand * 2366add_candidate_1 (struct ivopts_data *data, 2367 tree base, tree step, bool important, enum iv_position pos, 2368 struct iv_use *use, gimple incremented_at) 2369{ 2370 unsigned i; 2371 struct iv_cand *cand = NULL; 2372 tree type, orig_type; 2373 2374 /* For non-original variables, make sure their values are computed in a type 2375 that does not invoke undefined behavior on overflows (since in general, 2376 we cannot prove that these induction variables are non-wrapping). */ 2377 if (pos != IP_ORIGINAL) 2378 { 2379 orig_type = TREE_TYPE (base); 2380 type = generic_type_for (orig_type); 2381 if (type != orig_type) 2382 { 2383 base = fold_convert (type, base); 2384 step = fold_convert (type, step); 2385 } 2386 } 2387 2388 for (i = 0; i < n_iv_cands (data); i++) 2389 { 2390 cand = iv_cand (data, i); 2391 2392 if (cand->pos != pos) 2393 continue; 2394 2395 if (cand->incremented_at != incremented_at 2396 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE) 2397 && cand->ainc_use != use)) 2398 continue; 2399 2400 if (!cand->iv) 2401 { 2402 if (!base && !step) 2403 break; 2404 2405 continue; 2406 } 2407 2408 if (!base && !step) 2409 continue; 2410 2411 if (operand_equal_p (base, cand->iv->base, 0) 2412 && operand_equal_p (step, cand->iv->step, 0) 2413 && (TYPE_PRECISION (TREE_TYPE (base)) 2414 == TYPE_PRECISION (TREE_TYPE (cand->iv->base)))) 2415 break; 2416 } 2417 2418 if (i == n_iv_cands (data)) 2419 { 2420 cand = XCNEW (struct iv_cand); 2421 cand->id = i; 2422 2423 if (!base && !step) 2424 cand->iv = NULL; 2425 else 2426 cand->iv = alloc_iv (base, step); 2427 2428 cand->pos = pos; 2429 if (pos != IP_ORIGINAL && cand->iv) 2430 { 2431 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp"); 2432 cand->var_after = cand->var_before; 2433 } 2434 cand->important = important; 2435 cand->incremented_at = incremented_at; 2436 data->iv_candidates.safe_push (cand); 2437 2438 if (step 2439 && TREE_CODE (step) != INTEGER_CST) 2440 { 2441 fd_ivopts_data = data; 2442 walk_tree (&step, find_depends, &cand->depends_on, NULL); 2443 } 2444 2445 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE) 2446 cand->ainc_use = use; 2447 else 2448 cand->ainc_use = NULL; 2449 2450 if (dump_file && (dump_flags & TDF_DETAILS)) 2451 dump_cand (dump_file, cand); 2452 } 2453 2454 if (important && !cand->important) 2455 { 2456 cand->important = true; 2457 if (dump_file && (dump_flags & TDF_DETAILS)) 2458 fprintf (dump_file, "Candidate %d is important\n", cand->id); 2459 } 2460 2461 if (use) 2462 { 2463 bitmap_set_bit (use->related_cands, i); 2464 if (dump_file && (dump_flags & TDF_DETAILS)) 2465 fprintf (dump_file, "Candidate %d is related to use %d\n", 2466 cand->id, use->id); 2467 } 2468 2469 return cand; 2470} 2471 2472/* Returns true if incrementing the induction variable at the end of the LOOP 2473 is allowed. 2474 2475 The purpose is to avoid splitting latch edge with a biv increment, thus 2476 creating a jump, possibly confusing other optimization passes and leaving 2477 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS 2478 is not available (so we do not have a better alternative), or if the latch 2479 edge is already nonempty. */ 2480 2481static bool 2482allow_ip_end_pos_p (struct loop *loop) 2483{ 2484 if (!ip_normal_pos (loop)) 2485 return true; 2486 2487 if (!empty_block_p (ip_end_pos (loop))) 2488 return true; 2489 2490 return false; 2491} 2492 2493/* If possible, adds autoincrement candidates BASE + STEP * i based on use USE. 2494 Important field is set to IMPORTANT. */ 2495 2496static void 2497add_autoinc_candidates (struct ivopts_data *data, tree base, tree step, 2498 bool important, struct iv_use *use) 2499{ 2500 basic_block use_bb = gimple_bb (use->stmt); 2501 machine_mode mem_mode; 2502 unsigned HOST_WIDE_INT cstepi; 2503 2504 /* If we insert the increment in any position other than the standard 2505 ones, we must ensure that it is incremented once per iteration. 2506 It must not be in an inner nested loop, or one side of an if 2507 statement. */ 2508 if (use_bb->loop_father != data->current_loop 2509 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb) 2510 || stmt_could_throw_p (use->stmt) 2511 || !cst_and_fits_in_hwi (step)) 2512 return; 2513 2514 cstepi = int_cst_value (step); 2515 2516 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p)); 2517 if (((USE_LOAD_PRE_INCREMENT (mem_mode) 2518 || USE_STORE_PRE_INCREMENT (mem_mode)) 2519 && GET_MODE_SIZE (mem_mode) == cstepi) 2520 || ((USE_LOAD_PRE_DECREMENT (mem_mode) 2521 || USE_STORE_PRE_DECREMENT (mem_mode)) 2522 && GET_MODE_SIZE (mem_mode) == -cstepi)) 2523 { 2524 enum tree_code code = MINUS_EXPR; 2525 tree new_base; 2526 tree new_step = step; 2527 2528 if (POINTER_TYPE_P (TREE_TYPE (base))) 2529 { 2530 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); 2531 code = POINTER_PLUS_EXPR; 2532 } 2533 else 2534 new_step = fold_convert (TREE_TYPE (base), new_step); 2535 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step); 2536 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use, 2537 use->stmt); 2538 } 2539 if (((USE_LOAD_POST_INCREMENT (mem_mode) 2540 || USE_STORE_POST_INCREMENT (mem_mode)) 2541 && GET_MODE_SIZE (mem_mode) == cstepi) 2542 || ((USE_LOAD_POST_DECREMENT (mem_mode) 2543 || USE_STORE_POST_DECREMENT (mem_mode)) 2544 && GET_MODE_SIZE (mem_mode) == -cstepi)) 2545 { 2546 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use, 2547 use->stmt); 2548 } 2549} 2550 2551/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and 2552 position to POS. If USE is not NULL, the candidate is set as related to 2553 it. The candidate computation is scheduled on all available positions. */ 2554 2555static void 2556add_candidate (struct ivopts_data *data, 2557 tree base, tree step, bool important, struct iv_use *use) 2558{ 2559 if (ip_normal_pos (data->current_loop)) 2560 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL); 2561 if (ip_end_pos (data->current_loop) 2562 && allow_ip_end_pos_p (data->current_loop)) 2563 add_candidate_1 (data, base, step, important, IP_END, use, NULL); 2564 2565 if (use != NULL && use->type == USE_ADDRESS) 2566 add_autoinc_candidates (data, base, step, important, use); 2567} 2568 2569/* Adds standard iv candidates. */ 2570 2571static void 2572add_standard_iv_candidates (struct ivopts_data *data) 2573{ 2574 add_candidate (data, integer_zero_node, integer_one_node, true, NULL); 2575 2576 /* The same for a double-integer type if it is still fast enough. */ 2577 if (TYPE_PRECISION 2578 (long_integer_type_node) > TYPE_PRECISION (integer_type_node) 2579 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD) 2580 add_candidate (data, build_int_cst (long_integer_type_node, 0), 2581 build_int_cst (long_integer_type_node, 1), true, NULL); 2582 2583 /* The same for a double-integer type if it is still fast enough. */ 2584 if (TYPE_PRECISION 2585 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node) 2586 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD) 2587 add_candidate (data, build_int_cst (long_long_integer_type_node, 0), 2588 build_int_cst (long_long_integer_type_node, 1), true, NULL); 2589} 2590 2591 2592/* Adds candidates bases on the old induction variable IV. */ 2593 2594static void 2595add_old_iv_candidates (struct ivopts_data *data, struct iv *iv) 2596{ 2597 gimple phi; 2598 tree def; 2599 struct iv_cand *cand; 2600 2601 add_candidate (data, iv->base, iv->step, true, NULL); 2602 2603 /* The same, but with initial value zero. */ 2604 if (POINTER_TYPE_P (TREE_TYPE (iv->base))) 2605 add_candidate (data, size_int (0), iv->step, true, NULL); 2606 else 2607 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0), 2608 iv->step, true, NULL); 2609 2610 phi = SSA_NAME_DEF_STMT (iv->ssa_name); 2611 if (gimple_code (phi) == GIMPLE_PHI) 2612 { 2613 /* Additionally record the possibility of leaving the original iv 2614 untouched. */ 2615 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop)); 2616 /* Don't add candidate if it's from another PHI node because 2617 it's an affine iv appearing in the form of PEELED_CHREC. */ 2618 phi = SSA_NAME_DEF_STMT (def); 2619 if (gimple_code (phi) != GIMPLE_PHI) 2620 { 2621 cand = add_candidate_1 (data, 2622 iv->base, iv->step, true, IP_ORIGINAL, NULL, 2623 SSA_NAME_DEF_STMT (def)); 2624 cand->var_before = iv->ssa_name; 2625 cand->var_after = def; 2626 } 2627 else 2628 gcc_assert (gimple_bb (phi) == data->current_loop->header); 2629 } 2630} 2631 2632/* Adds candidates based on the old induction variables. */ 2633 2634static void 2635add_old_ivs_candidates (struct ivopts_data *data) 2636{ 2637 unsigned i; 2638 struct iv *iv; 2639 bitmap_iterator bi; 2640 2641 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 2642 { 2643 iv = ver_info (data, i)->iv; 2644 if (iv && iv->biv_p && !integer_zerop (iv->step)) 2645 add_old_iv_candidates (data, iv); 2646 } 2647} 2648 2649/* Adds candidates based on the value of the induction variable IV and USE. */ 2650 2651static void 2652add_iv_value_candidates (struct ivopts_data *data, 2653 struct iv *iv, struct iv_use *use) 2654{ 2655 unsigned HOST_WIDE_INT offset; 2656 tree base; 2657 tree basetype; 2658 2659 add_candidate (data, iv->base, iv->step, false, use); 2660 2661 /* The same, but with initial value zero. Make such variable important, 2662 since it is generic enough so that possibly many uses may be based 2663 on it. */ 2664 basetype = TREE_TYPE (iv->base); 2665 if (POINTER_TYPE_P (basetype)) 2666 basetype = sizetype; 2667 add_candidate (data, build_int_cst (basetype, 0), 2668 iv->step, true, use); 2669 2670 /* Third, try removing the constant offset. Make sure to even 2671 add a candidate for &a[0] vs. (T *)&a. */ 2672 base = strip_offset (iv->base, &offset); 2673 if (offset 2674 || base != iv->base) 2675 add_candidate (data, base, iv->step, false, use); 2676} 2677 2678/* Adds candidates based on the uses. */ 2679 2680static void 2681add_derived_ivs_candidates (struct ivopts_data *data) 2682{ 2683 unsigned i; 2684 2685 for (i = 0; i < n_iv_uses (data); i++) 2686 { 2687 struct iv_use *use = iv_use (data, i); 2688 2689 if (!use) 2690 continue; 2691 2692 switch (use->type) 2693 { 2694 case USE_NONLINEAR_EXPR: 2695 case USE_COMPARE: 2696 case USE_ADDRESS: 2697 /* Just add the ivs based on the value of the iv used here. */ 2698 add_iv_value_candidates (data, use->iv, use); 2699 break; 2700 2701 default: 2702 gcc_unreachable (); 2703 } 2704 } 2705} 2706 2707/* Record important candidates and add them to related_cands bitmaps 2708 if needed. */ 2709 2710static void 2711record_important_candidates (struct ivopts_data *data) 2712{ 2713 unsigned i; 2714 struct iv_use *use; 2715 2716 for (i = 0; i < n_iv_cands (data); i++) 2717 { 2718 struct iv_cand *cand = iv_cand (data, i); 2719 2720 if (cand->important) 2721 bitmap_set_bit (data->important_candidates, i); 2722 } 2723 2724 data->consider_all_candidates = (n_iv_cands (data) 2725 <= CONSIDER_ALL_CANDIDATES_BOUND); 2726 2727 if (data->consider_all_candidates) 2728 { 2729 /* We will not need "related_cands" bitmaps in this case, 2730 so release them to decrease peak memory consumption. */ 2731 for (i = 0; i < n_iv_uses (data); i++) 2732 { 2733 use = iv_use (data, i); 2734 BITMAP_FREE (use->related_cands); 2735 } 2736 } 2737 else 2738 { 2739 /* Add important candidates to the related_cands bitmaps. */ 2740 for (i = 0; i < n_iv_uses (data); i++) 2741 bitmap_ior_into (iv_use (data, i)->related_cands, 2742 data->important_candidates); 2743 } 2744} 2745 2746/* Allocates the data structure mapping the (use, candidate) pairs to costs. 2747 If consider_all_candidates is true, we use a two-dimensional array, otherwise 2748 we allocate a simple list to every use. */ 2749 2750static void 2751alloc_use_cost_map (struct ivopts_data *data) 2752{ 2753 unsigned i, size, s; 2754 2755 for (i = 0; i < n_iv_uses (data); i++) 2756 { 2757 struct iv_use *use = iv_use (data, i); 2758 2759 if (data->consider_all_candidates) 2760 size = n_iv_cands (data); 2761 else 2762 { 2763 s = bitmap_count_bits (use->related_cands); 2764 2765 /* Round up to the power of two, so that moduling by it is fast. */ 2766 size = s ? (1 << ceil_log2 (s)) : 1; 2767 } 2768 2769 use->n_map_members = size; 2770 use->cost_map = XCNEWVEC (struct cost_pair, size); 2771 } 2772} 2773 2774/* Returns description of computation cost of expression whose runtime 2775 cost is RUNTIME and complexity corresponds to COMPLEXITY. */ 2776 2777static comp_cost 2778new_cost (unsigned runtime, unsigned complexity) 2779{ 2780 comp_cost cost; 2781 2782 cost.cost = runtime; 2783 cost.complexity = complexity; 2784 2785 return cost; 2786} 2787 2788/* Adds costs COST1 and COST2. */ 2789 2790static comp_cost 2791add_costs (comp_cost cost1, comp_cost cost2) 2792{ 2793 cost1.cost += cost2.cost; 2794 cost1.complexity += cost2.complexity; 2795 2796 return cost1; 2797} 2798/* Subtracts costs COST1 and COST2. */ 2799 2800static comp_cost 2801sub_costs (comp_cost cost1, comp_cost cost2) 2802{ 2803 cost1.cost -= cost2.cost; 2804 cost1.complexity -= cost2.complexity; 2805 2806 return cost1; 2807} 2808 2809/* Returns a negative number if COST1 < COST2, a positive number if 2810 COST1 > COST2, and 0 if COST1 = COST2. */ 2811 2812static int 2813compare_costs (comp_cost cost1, comp_cost cost2) 2814{ 2815 if (cost1.cost == cost2.cost) 2816 return cost1.complexity - cost2.complexity; 2817 2818 return cost1.cost - cost2.cost; 2819} 2820 2821/* Returns true if COST is infinite. */ 2822 2823static bool 2824infinite_cost_p (comp_cost cost) 2825{ 2826 return cost.cost == INFTY; 2827} 2828 2829/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends 2830 on invariants DEPENDS_ON and that the value used in expressing it 2831 is VALUE, and in case of iv elimination the comparison operator is COMP. */ 2832 2833static void 2834set_use_iv_cost (struct ivopts_data *data, 2835 struct iv_use *use, struct iv_cand *cand, 2836 comp_cost cost, bitmap depends_on, tree value, 2837 enum tree_code comp, int inv_expr_id) 2838{ 2839 unsigned i, s; 2840 2841 if (infinite_cost_p (cost)) 2842 { 2843 BITMAP_FREE (depends_on); 2844 return; 2845 } 2846 2847 if (data->consider_all_candidates) 2848 { 2849 use->cost_map[cand->id].cand = cand; 2850 use->cost_map[cand->id].cost = cost; 2851 use->cost_map[cand->id].depends_on = depends_on; 2852 use->cost_map[cand->id].value = value; 2853 use->cost_map[cand->id].comp = comp; 2854 use->cost_map[cand->id].inv_expr_id = inv_expr_id; 2855 return; 2856 } 2857 2858 /* n_map_members is a power of two, so this computes modulo. */ 2859 s = cand->id & (use->n_map_members - 1); 2860 for (i = s; i < use->n_map_members; i++) 2861 if (!use->cost_map[i].cand) 2862 goto found; 2863 for (i = 0; i < s; i++) 2864 if (!use->cost_map[i].cand) 2865 goto found; 2866 2867 gcc_unreachable (); 2868 2869found: 2870 use->cost_map[i].cand = cand; 2871 use->cost_map[i].cost = cost; 2872 use->cost_map[i].depends_on = depends_on; 2873 use->cost_map[i].value = value; 2874 use->cost_map[i].comp = comp; 2875 use->cost_map[i].inv_expr_id = inv_expr_id; 2876} 2877 2878/* Gets cost of (USE, CANDIDATE) pair. */ 2879 2880static struct cost_pair * 2881get_use_iv_cost (struct ivopts_data *data, struct iv_use *use, 2882 struct iv_cand *cand) 2883{ 2884 unsigned i, s; 2885 struct cost_pair *ret; 2886 2887 if (!cand) 2888 return NULL; 2889 2890 if (data->consider_all_candidates) 2891 { 2892 ret = use->cost_map + cand->id; 2893 if (!ret->cand) 2894 return NULL; 2895 2896 return ret; 2897 } 2898 2899 /* n_map_members is a power of two, so this computes modulo. */ 2900 s = cand->id & (use->n_map_members - 1); 2901 for (i = s; i < use->n_map_members; i++) 2902 if (use->cost_map[i].cand == cand) 2903 return use->cost_map + i; 2904 else if (use->cost_map[i].cand == NULL) 2905 return NULL; 2906 for (i = 0; i < s; i++) 2907 if (use->cost_map[i].cand == cand) 2908 return use->cost_map + i; 2909 else if (use->cost_map[i].cand == NULL) 2910 return NULL; 2911 2912 return NULL; 2913} 2914 2915/* Produce DECL_RTL for object obj so it looks like it is stored in memory. */ 2916static rtx 2917produce_memory_decl_rtl (tree obj, int *regno) 2918{ 2919 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj)); 2920 machine_mode address_mode = targetm.addr_space.address_mode (as); 2921 rtx x; 2922 2923 gcc_assert (obj); 2924 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj)) 2925 { 2926 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj)); 2927 x = gen_rtx_SYMBOL_REF (address_mode, name); 2928 SET_SYMBOL_REF_DECL (x, obj); 2929 x = gen_rtx_MEM (DECL_MODE (obj), x); 2930 set_mem_addr_space (x, as); 2931 targetm.encode_section_info (obj, x, true); 2932 } 2933 else 2934 { 2935 x = gen_raw_REG (address_mode, (*regno)++); 2936 x = gen_rtx_MEM (DECL_MODE (obj), x); 2937 set_mem_addr_space (x, as); 2938 } 2939 2940 return x; 2941} 2942 2943/* Prepares decl_rtl for variables referred in *EXPR_P. Callback for 2944 walk_tree. DATA contains the actual fake register number. */ 2945 2946static tree 2947prepare_decl_rtl (tree *expr_p, int *ws, void *data) 2948{ 2949 tree obj = NULL_TREE; 2950 rtx x = NULL_RTX; 2951 int *regno = (int *) data; 2952 2953 switch (TREE_CODE (*expr_p)) 2954 { 2955 case ADDR_EXPR: 2956 for (expr_p = &TREE_OPERAND (*expr_p, 0); 2957 handled_component_p (*expr_p); 2958 expr_p = &TREE_OPERAND (*expr_p, 0)) 2959 continue; 2960 obj = *expr_p; 2961 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj)) 2962 x = produce_memory_decl_rtl (obj, regno); 2963 break; 2964 2965 case SSA_NAME: 2966 *ws = 0; 2967 obj = SSA_NAME_VAR (*expr_p); 2968 /* Defer handling of anonymous SSA_NAMEs to the expander. */ 2969 if (!obj) 2970 return NULL_TREE; 2971 if (!DECL_RTL_SET_P (obj)) 2972 x = gen_raw_REG (DECL_MODE (obj), (*regno)++); 2973 break; 2974 2975 case VAR_DECL: 2976 case PARM_DECL: 2977 case RESULT_DECL: 2978 *ws = 0; 2979 obj = *expr_p; 2980 2981 if (DECL_RTL_SET_P (obj)) 2982 break; 2983 2984 if (DECL_MODE (obj) == BLKmode) 2985 x = produce_memory_decl_rtl (obj, regno); 2986 else 2987 x = gen_raw_REG (DECL_MODE (obj), (*regno)++); 2988 2989 break; 2990 2991 default: 2992 break; 2993 } 2994 2995 if (x) 2996 { 2997 decl_rtl_to_reset.safe_push (obj); 2998 SET_DECL_RTL (obj, x); 2999 } 3000 3001 return NULL_TREE; 3002} 3003 3004/* Determines cost of the computation of EXPR. */ 3005 3006static unsigned 3007computation_cost (tree expr, bool speed) 3008{ 3009 rtx_insn *seq; 3010 rtx rslt; 3011 tree type = TREE_TYPE (expr); 3012 unsigned cost; 3013 /* Avoid using hard regs in ways which may be unsupported. */ 3014 int regno = LAST_VIRTUAL_REGISTER + 1; 3015 struct cgraph_node *node = cgraph_node::get (current_function_decl); 3016 enum node_frequency real_frequency = node->frequency; 3017 3018 node->frequency = NODE_FREQUENCY_NORMAL; 3019 crtl->maybe_hot_insn_p = speed; 3020 walk_tree (&expr, prepare_decl_rtl, ®no, NULL); 3021 start_sequence (); 3022 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL); 3023 seq = get_insns (); 3024 end_sequence (); 3025 default_rtl_profile (); 3026 node->frequency = real_frequency; 3027 3028 cost = seq_cost (seq, speed); 3029 if (MEM_P (rslt)) 3030 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), 3031 TYPE_ADDR_SPACE (type), speed); 3032 else if (!REG_P (rslt)) 3033 cost += set_src_cost (rslt, speed); 3034 3035 return cost; 3036} 3037 3038/* Returns variable containing the value of candidate CAND at statement AT. */ 3039 3040static tree 3041var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt) 3042{ 3043 if (stmt_after_increment (loop, cand, stmt)) 3044 return cand->var_after; 3045 else 3046 return cand->var_before; 3047} 3048 3049/* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the 3050 same precision that is at least as wide as the precision of TYPE, stores 3051 BA to A and BB to B, and returns the type of BA. Otherwise, returns the 3052 type of A and B. */ 3053 3054static tree 3055determine_common_wider_type (tree *a, tree *b) 3056{ 3057 tree wider_type = NULL; 3058 tree suba, subb; 3059 tree atype = TREE_TYPE (*a); 3060 3061 if (CONVERT_EXPR_P (*a)) 3062 { 3063 suba = TREE_OPERAND (*a, 0); 3064 wider_type = TREE_TYPE (suba); 3065 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype)) 3066 return atype; 3067 } 3068 else 3069 return atype; 3070 3071 if (CONVERT_EXPR_P (*b)) 3072 { 3073 subb = TREE_OPERAND (*b, 0); 3074 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb))) 3075 return atype; 3076 } 3077 else 3078 return atype; 3079 3080 *a = suba; 3081 *b = subb; 3082 return wider_type; 3083} 3084 3085/* Determines the expression by that USE is expressed from induction variable 3086 CAND at statement AT in LOOP. The expression is stored in a decomposed 3087 form into AFF. Returns false if USE cannot be expressed using CAND. */ 3088 3089static bool 3090get_computation_aff (struct loop *loop, 3091 struct iv_use *use, struct iv_cand *cand, gimple at, 3092 struct aff_tree *aff) 3093{ 3094 tree ubase = use->iv->base; 3095 tree ustep = use->iv->step; 3096 tree cbase = cand->iv->base; 3097 tree cstep = cand->iv->step, cstep_common; 3098 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase); 3099 tree common_type, var; 3100 tree uutype; 3101 aff_tree cbase_aff, var_aff; 3102 widest_int rat; 3103 3104 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) 3105 { 3106 /* We do not have a precision to express the values of use. */ 3107 return false; 3108 } 3109 3110 var = var_at_stmt (loop, cand, at); 3111 uutype = unsigned_type_for (utype); 3112 3113 /* If the conversion is not noop, perform it. */ 3114 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) 3115 { 3116 cstep = fold_convert (uutype, cstep); 3117 cbase = fold_convert (uutype, cbase); 3118 var = fold_convert (uutype, var); 3119 } 3120 3121 if (!constant_multiple_of (ustep, cstep, &rat)) 3122 return false; 3123 3124 /* In case both UBASE and CBASE are shortened to UUTYPE from some common 3125 type, we achieve better folding by computing their difference in this 3126 wider type, and cast the result to UUTYPE. We do not need to worry about 3127 overflows, as all the arithmetics will in the end be performed in UUTYPE 3128 anyway. */ 3129 common_type = determine_common_wider_type (&ubase, &cbase); 3130 3131 /* use = ubase - ratio * cbase + ratio * var. */ 3132 tree_to_aff_combination (ubase, common_type, aff); 3133 tree_to_aff_combination (cbase, common_type, &cbase_aff); 3134 tree_to_aff_combination (var, uutype, &var_aff); 3135 3136 /* We need to shift the value if we are after the increment. */ 3137 if (stmt_after_increment (loop, cand, at)) 3138 { 3139 aff_tree cstep_aff; 3140 3141 if (common_type != uutype) 3142 cstep_common = fold_convert (common_type, cstep); 3143 else 3144 cstep_common = cstep; 3145 3146 tree_to_aff_combination (cstep_common, common_type, &cstep_aff); 3147 aff_combination_add (&cbase_aff, &cstep_aff); 3148 } 3149 3150 aff_combination_scale (&cbase_aff, -rat); 3151 aff_combination_add (aff, &cbase_aff); 3152 if (common_type != uutype) 3153 aff_combination_convert (aff, uutype); 3154 3155 aff_combination_scale (&var_aff, rat); 3156 aff_combination_add (aff, &var_aff); 3157 3158 return true; 3159} 3160 3161/* Return the type of USE. */ 3162 3163static tree 3164get_use_type (struct iv_use *use) 3165{ 3166 tree base_type = TREE_TYPE (use->iv->base); 3167 tree type; 3168 3169 if (use->type == USE_ADDRESS) 3170 { 3171 /* The base_type may be a void pointer. Create a pointer type based on 3172 the mem_ref instead. */ 3173 type = build_pointer_type (TREE_TYPE (*use->op_p)); 3174 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type)) 3175 == TYPE_ADDR_SPACE (TREE_TYPE (base_type))); 3176 } 3177 else 3178 type = base_type; 3179 3180 return type; 3181} 3182 3183/* Determines the expression by that USE is expressed from induction variable 3184 CAND at statement AT in LOOP. The computation is unshared. */ 3185 3186static tree 3187get_computation_at (struct loop *loop, 3188 struct iv_use *use, struct iv_cand *cand, gimple at) 3189{ 3190 aff_tree aff; 3191 tree type = get_use_type (use); 3192 3193 if (!get_computation_aff (loop, use, cand, at, &aff)) 3194 return NULL_TREE; 3195 unshare_aff_combination (&aff); 3196 return fold_convert (type, aff_combination_to_tree (&aff)); 3197} 3198 3199/* Determines the expression by that USE is expressed from induction variable 3200 CAND in LOOP. The computation is unshared. */ 3201 3202static tree 3203get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand) 3204{ 3205 return get_computation_at (loop, use, cand, use->stmt); 3206} 3207 3208/* Adjust the cost COST for being in loop setup rather than loop body. 3209 If we're optimizing for space, the loop setup overhead is constant; 3210 if we're optimizing for speed, amortize it over the per-iteration cost. */ 3211static unsigned 3212adjust_setup_cost (struct ivopts_data *data, unsigned cost) 3213{ 3214 if (cost == INFTY) 3215 return cost; 3216 else if (optimize_loop_for_speed_p (data->current_loop)) 3217 return cost / avg_loop_niter (data->current_loop); 3218 else 3219 return cost; 3220} 3221 3222/* Returns true if multiplying by RATIO is allowed in an address. Test the 3223 validity for a memory reference accessing memory of mode MODE in 3224 address space AS. */ 3225 3226 3227bool 3228multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode, 3229 addr_space_t as) 3230{ 3231#define MAX_RATIO 128 3232 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode; 3233 static vec<sbitmap> valid_mult_list; 3234 sbitmap valid_mult; 3235 3236 if (data_index >= valid_mult_list.length ()) 3237 valid_mult_list.safe_grow_cleared (data_index + 1); 3238 3239 valid_mult = valid_mult_list[data_index]; 3240 if (!valid_mult) 3241 { 3242 machine_mode address_mode = targetm.addr_space.address_mode (as); 3243 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); 3244 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2); 3245 rtx addr, scaled; 3246 HOST_WIDE_INT i; 3247 3248 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1); 3249 bitmap_clear (valid_mult); 3250 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX); 3251 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2); 3252 for (i = -MAX_RATIO; i <= MAX_RATIO; i++) 3253 { 3254 XEXP (scaled, 1) = gen_int_mode (i, address_mode); 3255 if (memory_address_addr_space_p (mode, addr, as) 3256 || memory_address_addr_space_p (mode, scaled, as)) 3257 bitmap_set_bit (valid_mult, i + MAX_RATIO); 3258 } 3259 3260 if (dump_file && (dump_flags & TDF_DETAILS)) 3261 { 3262 fprintf (dump_file, " allowed multipliers:"); 3263 for (i = -MAX_RATIO; i <= MAX_RATIO; i++) 3264 if (bitmap_bit_p (valid_mult, i + MAX_RATIO)) 3265 fprintf (dump_file, " %d", (int) i); 3266 fprintf (dump_file, "\n"); 3267 fprintf (dump_file, "\n"); 3268 } 3269 3270 valid_mult_list[data_index] = valid_mult; 3271 } 3272 3273 if (ratio > MAX_RATIO || ratio < -MAX_RATIO) 3274 return false; 3275 3276 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO); 3277} 3278 3279/* Returns cost of address in shape symbol + var + OFFSET + RATIO * index. 3280 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false, 3281 variable is omitted. Compute the cost for a memory reference that accesses 3282 a memory location of mode MEM_MODE in address space AS. 3283 3284 MAY_AUTOINC is set to true if the autoincrement (increasing index by 3285 size of MEM_MODE / RATIO) is available. To make this determination, we 3286 look at the size of the increment to be made, which is given in CSTEP. 3287 CSTEP may be zero if the step is unknown. 3288 STMT_AFTER_INC is true iff the statement we're looking at is after the 3289 increment of the original biv. 3290 3291 TODO -- there must be some better way. This all is quite crude. */ 3292 3293enum ainc_type 3294{ 3295 AINC_PRE_INC, /* Pre increment. */ 3296 AINC_PRE_DEC, /* Pre decrement. */ 3297 AINC_POST_INC, /* Post increment. */ 3298 AINC_POST_DEC, /* Post decrement. */ 3299 AINC_NONE /* Also the number of auto increment types. */ 3300}; 3301 3302typedef struct address_cost_data_s 3303{ 3304 HOST_WIDE_INT min_offset, max_offset; 3305 unsigned costs[2][2][2][2]; 3306 unsigned ainc_costs[AINC_NONE]; 3307} *address_cost_data; 3308 3309 3310static comp_cost 3311get_address_cost (bool symbol_present, bool var_present, 3312 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio, 3313 HOST_WIDE_INT cstep, machine_mode mem_mode, 3314 addr_space_t as, bool speed, 3315 bool stmt_after_inc, bool *may_autoinc) 3316{ 3317 machine_mode address_mode = targetm.addr_space.address_mode (as); 3318 static vec<address_cost_data> address_cost_data_list; 3319 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode; 3320 address_cost_data data; 3321 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE]; 3322 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE]; 3323 unsigned cost, acost, complexity; 3324 enum ainc_type autoinc_type; 3325 bool offset_p, ratio_p, autoinc; 3326 HOST_WIDE_INT s_offset, autoinc_offset, msize; 3327 unsigned HOST_WIDE_INT mask; 3328 unsigned bits; 3329 3330 if (data_index >= address_cost_data_list.length ()) 3331 address_cost_data_list.safe_grow_cleared (data_index + 1); 3332 3333 data = address_cost_data_list[data_index]; 3334 if (!data) 3335 { 3336 HOST_WIDE_INT i; 3337 HOST_WIDE_INT rat, off = 0; 3338 int old_cse_not_expected, width; 3339 unsigned sym_p, var_p, off_p, rat_p, add_c; 3340 rtx_insn *seq; 3341 rtx addr, base; 3342 rtx reg0, reg1; 3343 3344 data = (address_cost_data) xcalloc (1, sizeof (*data)); 3345 3346 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); 3347 3348 width = GET_MODE_BITSIZE (address_mode) - 1; 3349 if (width > (HOST_BITS_PER_WIDE_INT - 1)) 3350 width = HOST_BITS_PER_WIDE_INT - 1; 3351 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX); 3352 3353 for (i = width; i >= 0; i--) 3354 { 3355 off = -((unsigned HOST_WIDE_INT) 1 << i); 3356 XEXP (addr, 1) = gen_int_mode (off, address_mode); 3357 if (memory_address_addr_space_p (mem_mode, addr, as)) 3358 break; 3359 } 3360 data->min_offset = (i == -1? 0 : off); 3361 3362 for (i = width; i >= 0; i--) 3363 { 3364 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1; 3365 XEXP (addr, 1) = gen_int_mode (off, address_mode); 3366 if (memory_address_addr_space_p (mem_mode, addr, as)) 3367 break; 3368 /* For some strict-alignment targets, the offset must be naturally 3369 aligned. Try an aligned offset if mem_mode is not QImode. */ 3370 off = mem_mode != QImode 3371 ? ((unsigned HOST_WIDE_INT) 1 << i) 3372 - GET_MODE_SIZE (mem_mode) 3373 : 0; 3374 if (off > 0) 3375 { 3376 XEXP (addr, 1) = gen_int_mode (off, address_mode); 3377 if (memory_address_addr_space_p (mem_mode, addr, as)) 3378 break; 3379 } 3380 } 3381 if (i == -1) 3382 off = 0; 3383 data->max_offset = off; 3384 3385 if (dump_file && (dump_flags & TDF_DETAILS)) 3386 { 3387 fprintf (dump_file, "get_address_cost:\n"); 3388 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n", 3389 GET_MODE_NAME (mem_mode), 3390 data->min_offset); 3391 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n", 3392 GET_MODE_NAME (mem_mode), 3393 data->max_offset); 3394 } 3395 3396 rat = 1; 3397 for (i = 2; i <= MAX_RATIO; i++) 3398 if (multiplier_allowed_in_address_p (i, mem_mode, as)) 3399 { 3400 rat = i; 3401 break; 3402 } 3403 3404 /* Compute the cost of various addressing modes. */ 3405 acost = 0; 3406 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); 3407 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2); 3408 3409 if (USE_LOAD_PRE_DECREMENT (mem_mode) 3410 || USE_STORE_PRE_DECREMENT (mem_mode)) 3411 { 3412 addr = gen_rtx_PRE_DEC (address_mode, reg0); 3413 has_predec[mem_mode] 3414 = memory_address_addr_space_p (mem_mode, addr, as); 3415 3416 if (has_predec[mem_mode]) 3417 data->ainc_costs[AINC_PRE_DEC] 3418 = address_cost (addr, mem_mode, as, speed); 3419 } 3420 if (USE_LOAD_POST_DECREMENT (mem_mode) 3421 || USE_STORE_POST_DECREMENT (mem_mode)) 3422 { 3423 addr = gen_rtx_POST_DEC (address_mode, reg0); 3424 has_postdec[mem_mode] 3425 = memory_address_addr_space_p (mem_mode, addr, as); 3426 3427 if (has_postdec[mem_mode]) 3428 data->ainc_costs[AINC_POST_DEC] 3429 = address_cost (addr, mem_mode, as, speed); 3430 } 3431 if (USE_LOAD_PRE_INCREMENT (mem_mode) 3432 || USE_STORE_PRE_DECREMENT (mem_mode)) 3433 { 3434 addr = gen_rtx_PRE_INC (address_mode, reg0); 3435 has_preinc[mem_mode] 3436 = memory_address_addr_space_p (mem_mode, addr, as); 3437 3438 if (has_preinc[mem_mode]) 3439 data->ainc_costs[AINC_PRE_INC] 3440 = address_cost (addr, mem_mode, as, speed); 3441 } 3442 if (USE_LOAD_POST_INCREMENT (mem_mode) 3443 || USE_STORE_POST_INCREMENT (mem_mode)) 3444 { 3445 addr = gen_rtx_POST_INC (address_mode, reg0); 3446 has_postinc[mem_mode] 3447 = memory_address_addr_space_p (mem_mode, addr, as); 3448 3449 if (has_postinc[mem_mode]) 3450 data->ainc_costs[AINC_POST_INC] 3451 = address_cost (addr, mem_mode, as, speed); 3452 } 3453 for (i = 0; i < 16; i++) 3454 { 3455 sym_p = i & 1; 3456 var_p = (i >> 1) & 1; 3457 off_p = (i >> 2) & 1; 3458 rat_p = (i >> 3) & 1; 3459 3460 addr = reg0; 3461 if (rat_p) 3462 addr = gen_rtx_fmt_ee (MULT, address_mode, addr, 3463 gen_int_mode (rat, address_mode)); 3464 3465 if (var_p) 3466 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1); 3467 3468 if (sym_p) 3469 { 3470 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup ("")); 3471 /* ??? We can run into trouble with some backends by presenting 3472 it with symbols which haven't been properly passed through 3473 targetm.encode_section_info. By setting the local bit, we 3474 enhance the probability of things working. */ 3475 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL; 3476 3477 if (off_p) 3478 base = gen_rtx_fmt_e (CONST, address_mode, 3479 gen_rtx_fmt_ee 3480 (PLUS, address_mode, base, 3481 gen_int_mode (off, address_mode))); 3482 } 3483 else if (off_p) 3484 base = gen_int_mode (off, address_mode); 3485 else 3486 base = NULL_RTX; 3487 3488 if (base) 3489 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base); 3490 3491 start_sequence (); 3492 /* To avoid splitting addressing modes, pretend that no cse will 3493 follow. */ 3494 old_cse_not_expected = cse_not_expected; 3495 cse_not_expected = true; 3496 addr = memory_address_addr_space (mem_mode, addr, as); 3497 cse_not_expected = old_cse_not_expected; 3498 seq = get_insns (); 3499 end_sequence (); 3500 3501 acost = seq_cost (seq, speed); 3502 acost += address_cost (addr, mem_mode, as, speed); 3503 3504 if (!acost) 3505 acost = 1; 3506 data->costs[sym_p][var_p][off_p][rat_p] = acost; 3507 } 3508 3509 /* On some targets, it is quite expensive to load symbol to a register, 3510 which makes addresses that contain symbols look much more expensive. 3511 However, the symbol will have to be loaded in any case before the 3512 loop (and quite likely we have it in register already), so it does not 3513 make much sense to penalize them too heavily. So make some final 3514 tweaks for the SYMBOL_PRESENT modes: 3515 3516 If VAR_PRESENT is false, and the mode obtained by changing symbol to 3517 var is cheaper, use this mode with small penalty. 3518 If VAR_PRESENT is true, try whether the mode with 3519 SYMBOL_PRESENT = false is cheaper even with cost of addition, and 3520 if this is the case, use it. */ 3521 add_c = add_cost (speed, address_mode); 3522 for (i = 0; i < 8; i++) 3523 { 3524 var_p = i & 1; 3525 off_p = (i >> 1) & 1; 3526 rat_p = (i >> 2) & 1; 3527 3528 acost = data->costs[0][1][off_p][rat_p] + 1; 3529 if (var_p) 3530 acost += add_c; 3531 3532 if (acost < data->costs[1][var_p][off_p][rat_p]) 3533 data->costs[1][var_p][off_p][rat_p] = acost; 3534 } 3535 3536 if (dump_file && (dump_flags & TDF_DETAILS)) 3537 { 3538 fprintf (dump_file, "Address costs:\n"); 3539 3540 for (i = 0; i < 16; i++) 3541 { 3542 sym_p = i & 1; 3543 var_p = (i >> 1) & 1; 3544 off_p = (i >> 2) & 1; 3545 rat_p = (i >> 3) & 1; 3546 3547 fprintf (dump_file, " "); 3548 if (sym_p) 3549 fprintf (dump_file, "sym + "); 3550 if (var_p) 3551 fprintf (dump_file, "var + "); 3552 if (off_p) 3553 fprintf (dump_file, "cst + "); 3554 if (rat_p) 3555 fprintf (dump_file, "rat * "); 3556 3557 acost = data->costs[sym_p][var_p][off_p][rat_p]; 3558 fprintf (dump_file, "index costs %d\n", acost); 3559 } 3560 if (has_predec[mem_mode] || has_postdec[mem_mode] 3561 || has_preinc[mem_mode] || has_postinc[mem_mode]) 3562 fprintf (dump_file, " May include autoinc/dec\n"); 3563 fprintf (dump_file, "\n"); 3564 } 3565 3566 address_cost_data_list[data_index] = data; 3567 } 3568 3569 bits = GET_MODE_BITSIZE (address_mode); 3570 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1); 3571 offset &= mask; 3572 if ((offset >> (bits - 1) & 1)) 3573 offset |= ~mask; 3574 s_offset = offset; 3575 3576 autoinc = false; 3577 autoinc_type = AINC_NONE; 3578 msize = GET_MODE_SIZE (mem_mode); 3579 autoinc_offset = offset; 3580 if (stmt_after_inc) 3581 autoinc_offset += ratio * cstep; 3582 if (symbol_present || var_present || ratio != 1) 3583 autoinc = false; 3584 else 3585 { 3586 if (has_postinc[mem_mode] && autoinc_offset == 0 3587 && msize == cstep) 3588 autoinc_type = AINC_POST_INC; 3589 else if (has_postdec[mem_mode] && autoinc_offset == 0 3590 && msize == -cstep) 3591 autoinc_type = AINC_POST_DEC; 3592 else if (has_preinc[mem_mode] && autoinc_offset == msize 3593 && msize == cstep) 3594 autoinc_type = AINC_PRE_INC; 3595 else if (has_predec[mem_mode] && autoinc_offset == -msize 3596 && msize == -cstep) 3597 autoinc_type = AINC_PRE_DEC; 3598 3599 if (autoinc_type != AINC_NONE) 3600 autoinc = true; 3601 } 3602 3603 cost = 0; 3604 offset_p = (s_offset != 0 3605 && data->min_offset <= s_offset 3606 && s_offset <= data->max_offset); 3607 ratio_p = (ratio != 1 3608 && multiplier_allowed_in_address_p (ratio, mem_mode, as)); 3609 3610 if (ratio != 1 && !ratio_p) 3611 cost += mult_by_coeff_cost (ratio, address_mode, speed); 3612 3613 if (s_offset && !offset_p && !symbol_present) 3614 cost += add_cost (speed, address_mode); 3615 3616 if (may_autoinc) 3617 *may_autoinc = autoinc; 3618 if (autoinc) 3619 acost = data->ainc_costs[autoinc_type]; 3620 else 3621 acost = data->costs[symbol_present][var_present][offset_p][ratio_p]; 3622 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p; 3623 return new_cost (cost + acost, complexity); 3624} 3625 3626 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the 3627 the EXPR operand holding the shift. COST0 and COST1 are the costs for 3628 calculating the operands of EXPR. Returns true if successful, and returns 3629 the cost in COST. */ 3630 3631static bool 3632get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0, 3633 comp_cost cost1, tree mult, bool speed, comp_cost *cost) 3634{ 3635 comp_cost res; 3636 tree op1 = TREE_OPERAND (expr, 1); 3637 tree cst = TREE_OPERAND (mult, 1); 3638 tree multop = TREE_OPERAND (mult, 0); 3639 int m = exact_log2 (int_cst_value (cst)); 3640 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode)); 3641 int as_cost, sa_cost; 3642 bool mult_in_op1; 3643 3644 if (!(m >= 0 && m < maxm)) 3645 return false; 3646 3647 mult_in_op1 = operand_equal_p (op1, mult, 0); 3648 3649 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); 3650 3651 /* If the target has a cheap shift-and-add or shift-and-sub instruction, 3652 use that in preference to a shift insn followed by an add insn. */ 3653 sa_cost = (TREE_CODE (expr) != MINUS_EXPR 3654 ? shiftadd_cost (speed, mode, m) 3655 : (mult_in_op1 3656 ? shiftsub1_cost (speed, mode, m) 3657 : shiftsub0_cost (speed, mode, m))); 3658 3659 res = new_cost (MIN (as_cost, sa_cost), 0); 3660 res = add_costs (res, mult_in_op1 ? cost0 : cost1); 3661 3662 STRIP_NOPS (multop); 3663 if (!is_gimple_val (multop)) 3664 res = add_costs (res, force_expr_to_var_cost (multop, speed)); 3665 3666 *cost = res; 3667 return true; 3668} 3669 3670/* Estimates cost of forcing expression EXPR into a variable. */ 3671 3672static comp_cost 3673force_expr_to_var_cost (tree expr, bool speed) 3674{ 3675 static bool costs_initialized = false; 3676 static unsigned integer_cost [2]; 3677 static unsigned symbol_cost [2]; 3678 static unsigned address_cost [2]; 3679 tree op0, op1; 3680 comp_cost cost0, cost1, cost; 3681 machine_mode mode; 3682 3683 if (!costs_initialized) 3684 { 3685 tree type = build_pointer_type (integer_type_node); 3686 tree var, addr; 3687 rtx x; 3688 int i; 3689 3690 var = create_tmp_var_raw (integer_type_node, "test_var"); 3691 TREE_STATIC (var) = 1; 3692 x = produce_memory_decl_rtl (var, NULL); 3693 SET_DECL_RTL (var, x); 3694 3695 addr = build1 (ADDR_EXPR, type, var); 3696 3697 3698 for (i = 0; i < 2; i++) 3699 { 3700 integer_cost[i] = computation_cost (build_int_cst (integer_type_node, 3701 2000), i); 3702 3703 symbol_cost[i] = computation_cost (addr, i) + 1; 3704 3705 address_cost[i] 3706 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1; 3707 if (dump_file && (dump_flags & TDF_DETAILS)) 3708 { 3709 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size"); 3710 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]); 3711 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]); 3712 fprintf (dump_file, " address %d\n", (int) address_cost[i]); 3713 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]); 3714 fprintf (dump_file, "\n"); 3715 } 3716 } 3717 3718 costs_initialized = true; 3719 } 3720 3721 STRIP_NOPS (expr); 3722 3723 if (SSA_VAR_P (expr)) 3724 return no_cost; 3725 3726 if (is_gimple_min_invariant (expr)) 3727 { 3728 if (TREE_CODE (expr) == INTEGER_CST) 3729 return new_cost (integer_cost [speed], 0); 3730 3731 if (TREE_CODE (expr) == ADDR_EXPR) 3732 { 3733 tree obj = TREE_OPERAND (expr, 0); 3734 3735 if (TREE_CODE (obj) == VAR_DECL 3736 || TREE_CODE (obj) == PARM_DECL 3737 || TREE_CODE (obj) == RESULT_DECL) 3738 return new_cost (symbol_cost [speed], 0); 3739 } 3740 3741 return new_cost (address_cost [speed], 0); 3742 } 3743 3744 switch (TREE_CODE (expr)) 3745 { 3746 case POINTER_PLUS_EXPR: 3747 case PLUS_EXPR: 3748 case MINUS_EXPR: 3749 case MULT_EXPR: 3750 op0 = TREE_OPERAND (expr, 0); 3751 op1 = TREE_OPERAND (expr, 1); 3752 STRIP_NOPS (op0); 3753 STRIP_NOPS (op1); 3754 break; 3755 3756 CASE_CONVERT: 3757 case NEGATE_EXPR: 3758 op0 = TREE_OPERAND (expr, 0); 3759 STRIP_NOPS (op0); 3760 op1 = NULL_TREE; 3761 break; 3762 3763 default: 3764 /* Just an arbitrary value, FIXME. */ 3765 return new_cost (target_spill_cost[speed], 0); 3766 } 3767 3768 if (op0 == NULL_TREE 3769 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0)) 3770 cost0 = no_cost; 3771 else 3772 cost0 = force_expr_to_var_cost (op0, speed); 3773 3774 if (op1 == NULL_TREE 3775 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1)) 3776 cost1 = no_cost; 3777 else 3778 cost1 = force_expr_to_var_cost (op1, speed); 3779 3780 mode = TYPE_MODE (TREE_TYPE (expr)); 3781 switch (TREE_CODE (expr)) 3782 { 3783 case POINTER_PLUS_EXPR: 3784 case PLUS_EXPR: 3785 case MINUS_EXPR: 3786 case NEGATE_EXPR: 3787 cost = new_cost (add_cost (speed, mode), 0); 3788 if (TREE_CODE (expr) != NEGATE_EXPR) 3789 { 3790 tree mult = NULL_TREE; 3791 comp_cost sa_cost; 3792 if (TREE_CODE (op1) == MULT_EXPR) 3793 mult = op1; 3794 else if (TREE_CODE (op0) == MULT_EXPR) 3795 mult = op0; 3796 3797 if (mult != NULL_TREE 3798 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1)) 3799 && get_shiftadd_cost (expr, mode, cost0, cost1, mult, 3800 speed, &sa_cost)) 3801 return sa_cost; 3802 } 3803 break; 3804 3805 CASE_CONVERT: 3806 { 3807 tree inner_mode, outer_mode; 3808 outer_mode = TREE_TYPE (expr); 3809 inner_mode = TREE_TYPE (op0); 3810 cost = new_cost (convert_cost (TYPE_MODE (outer_mode), 3811 TYPE_MODE (inner_mode), speed), 0); 3812 } 3813 break; 3814 3815 case MULT_EXPR: 3816 if (cst_and_fits_in_hwi (op0)) 3817 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0), 3818 mode, speed), 0); 3819 else if (cst_and_fits_in_hwi (op1)) 3820 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1), 3821 mode, speed), 0); 3822 else 3823 return new_cost (target_spill_cost [speed], 0); 3824 break; 3825 3826 default: 3827 gcc_unreachable (); 3828 } 3829 3830 cost = add_costs (cost, cost0); 3831 cost = add_costs (cost, cost1); 3832 3833 /* Bound the cost by target_spill_cost. The parts of complicated 3834 computations often are either loop invariant or at least can 3835 be shared between several iv uses, so letting this grow without 3836 limits would not give reasonable results. */ 3837 if (cost.cost > (int) target_spill_cost [speed]) 3838 cost.cost = target_spill_cost [speed]; 3839 3840 return cost; 3841} 3842 3843/* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the 3844 invariants the computation depends on. */ 3845 3846static comp_cost 3847force_var_cost (struct ivopts_data *data, 3848 tree expr, bitmap *depends_on) 3849{ 3850 if (depends_on) 3851 { 3852 fd_ivopts_data = data; 3853 walk_tree (&expr, find_depends, depends_on, NULL); 3854 } 3855 3856 return force_expr_to_var_cost (expr, data->speed); 3857} 3858 3859/* Estimates cost of expressing address ADDR as var + symbol + offset. The 3860 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set 3861 to false if the corresponding part is missing. DEPENDS_ON is a set of the 3862 invariants the computation depends on. */ 3863 3864static comp_cost 3865split_address_cost (struct ivopts_data *data, 3866 tree addr, bool *symbol_present, bool *var_present, 3867 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 3868{ 3869 tree core; 3870 HOST_WIDE_INT bitsize; 3871 HOST_WIDE_INT bitpos; 3872 tree toffset; 3873 machine_mode mode; 3874 int unsignedp, volatilep; 3875 3876 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode, 3877 &unsignedp, &volatilep, false); 3878 3879 if (toffset != 0 3880 || bitpos % BITS_PER_UNIT != 0 3881 || TREE_CODE (core) != VAR_DECL) 3882 { 3883 *symbol_present = false; 3884 *var_present = true; 3885 fd_ivopts_data = data; 3886 walk_tree (&addr, find_depends, depends_on, NULL); 3887 return new_cost (target_spill_cost[data->speed], 0); 3888 } 3889 3890 *offset += bitpos / BITS_PER_UNIT; 3891 if (TREE_STATIC (core) 3892 || DECL_EXTERNAL (core)) 3893 { 3894 *symbol_present = true; 3895 *var_present = false; 3896 return no_cost; 3897 } 3898 3899 *symbol_present = false; 3900 *var_present = true; 3901 return no_cost; 3902} 3903 3904/* Estimates cost of expressing difference of addresses E1 - E2 as 3905 var + symbol + offset. The value of offset is added to OFFSET, 3906 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding 3907 part is missing. DEPENDS_ON is a set of the invariants the computation 3908 depends on. */ 3909 3910static comp_cost 3911ptr_difference_cost (struct ivopts_data *data, 3912 tree e1, tree e2, bool *symbol_present, bool *var_present, 3913 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 3914{ 3915 HOST_WIDE_INT diff = 0; 3916 aff_tree aff_e1, aff_e2; 3917 tree type; 3918 3919 gcc_assert (TREE_CODE (e1) == ADDR_EXPR); 3920 3921 if (ptr_difference_const (e1, e2, &diff)) 3922 { 3923 *offset += diff; 3924 *symbol_present = false; 3925 *var_present = false; 3926 return no_cost; 3927 } 3928 3929 if (integer_zerop (e2)) 3930 return split_address_cost (data, TREE_OPERAND (e1, 0), 3931 symbol_present, var_present, offset, depends_on); 3932 3933 *symbol_present = false; 3934 *var_present = true; 3935 3936 type = signed_type_for (TREE_TYPE (e1)); 3937 tree_to_aff_combination (e1, type, &aff_e1); 3938 tree_to_aff_combination (e2, type, &aff_e2); 3939 aff_combination_scale (&aff_e2, -1); 3940 aff_combination_add (&aff_e1, &aff_e2); 3941 3942 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on); 3943} 3944 3945/* Estimates cost of expressing difference E1 - E2 as 3946 var + symbol + offset. The value of offset is added to OFFSET, 3947 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding 3948 part is missing. DEPENDS_ON is a set of the invariants the computation 3949 depends on. */ 3950 3951static comp_cost 3952difference_cost (struct ivopts_data *data, 3953 tree e1, tree e2, bool *symbol_present, bool *var_present, 3954 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 3955{ 3956 machine_mode mode = TYPE_MODE (TREE_TYPE (e1)); 3957 unsigned HOST_WIDE_INT off1, off2; 3958 aff_tree aff_e1, aff_e2; 3959 tree type; 3960 3961 e1 = strip_offset (e1, &off1); 3962 e2 = strip_offset (e2, &off2); 3963 *offset += off1 - off2; 3964 3965 STRIP_NOPS (e1); 3966 STRIP_NOPS (e2); 3967 3968 if (TREE_CODE (e1) == ADDR_EXPR) 3969 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, 3970 offset, depends_on); 3971 *symbol_present = false; 3972 3973 if (operand_equal_p (e1, e2, 0)) 3974 { 3975 *var_present = false; 3976 return no_cost; 3977 } 3978 3979 *var_present = true; 3980 3981 if (integer_zerop (e2)) 3982 return force_var_cost (data, e1, depends_on); 3983 3984 if (integer_zerop (e1)) 3985 { 3986 comp_cost cost = force_var_cost (data, e2, depends_on); 3987 cost.cost += mult_by_coeff_cost (-1, mode, data->speed); 3988 return cost; 3989 } 3990 3991 type = signed_type_for (TREE_TYPE (e1)); 3992 tree_to_aff_combination (e1, type, &aff_e1); 3993 tree_to_aff_combination (e2, type, &aff_e2); 3994 aff_combination_scale (&aff_e2, -1); 3995 aff_combination_add (&aff_e1, &aff_e2); 3996 3997 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on); 3998} 3999 4000/* Returns true if AFF1 and AFF2 are identical. */ 4001 4002static bool 4003compare_aff_trees (aff_tree *aff1, aff_tree *aff2) 4004{ 4005 unsigned i; 4006 4007 if (aff1->n != aff2->n) 4008 return false; 4009 4010 for (i = 0; i < aff1->n; i++) 4011 { 4012 if (aff1->elts[i].coef != aff2->elts[i].coef) 4013 return false; 4014 4015 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0)) 4016 return false; 4017 } 4018 return true; 4019} 4020 4021/* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */ 4022 4023static int 4024get_expr_id (struct ivopts_data *data, tree expr) 4025{ 4026 struct iv_inv_expr_ent ent; 4027 struct iv_inv_expr_ent **slot; 4028 4029 ent.expr = expr; 4030 ent.hash = iterative_hash_expr (expr, 0); 4031 slot = data->inv_expr_tab->find_slot (&ent, INSERT); 4032 if (*slot) 4033 return (*slot)->id; 4034 4035 *slot = XNEW (struct iv_inv_expr_ent); 4036 (*slot)->expr = expr; 4037 (*slot)->hash = ent.hash; 4038 (*slot)->id = data->inv_expr_id++; 4039 return (*slot)->id; 4040} 4041 4042/* Returns the pseudo expr id if expression UBASE - RATIO * CBASE 4043 requires a new compiler generated temporary. Returns -1 otherwise. 4044 ADDRESS_P is a flag indicating if the expression is for address 4045 computation. */ 4046 4047static int 4048get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase, 4049 tree cbase, HOST_WIDE_INT ratio, 4050 bool address_p) 4051{ 4052 aff_tree ubase_aff, cbase_aff; 4053 tree expr, ub, cb; 4054 4055 STRIP_NOPS (ubase); 4056 STRIP_NOPS (cbase); 4057 ub = ubase; 4058 cb = cbase; 4059 4060 if ((TREE_CODE (ubase) == INTEGER_CST) 4061 && (TREE_CODE (cbase) == INTEGER_CST)) 4062 return -1; 4063 4064 /* Strips the constant part. */ 4065 if (TREE_CODE (ubase) == PLUS_EXPR 4066 || TREE_CODE (ubase) == MINUS_EXPR 4067 || TREE_CODE (ubase) == POINTER_PLUS_EXPR) 4068 { 4069 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST) 4070 ubase = TREE_OPERAND (ubase, 0); 4071 } 4072 4073 /* Strips the constant part. */ 4074 if (TREE_CODE (cbase) == PLUS_EXPR 4075 || TREE_CODE (cbase) == MINUS_EXPR 4076 || TREE_CODE (cbase) == POINTER_PLUS_EXPR) 4077 { 4078 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST) 4079 cbase = TREE_OPERAND (cbase, 0); 4080 } 4081 4082 if (address_p) 4083 { 4084 if (((TREE_CODE (ubase) == SSA_NAME) 4085 || (TREE_CODE (ubase) == ADDR_EXPR 4086 && is_gimple_min_invariant (ubase))) 4087 && (TREE_CODE (cbase) == INTEGER_CST)) 4088 return -1; 4089 4090 if (((TREE_CODE (cbase) == SSA_NAME) 4091 || (TREE_CODE (cbase) == ADDR_EXPR 4092 && is_gimple_min_invariant (cbase))) 4093 && (TREE_CODE (ubase) == INTEGER_CST)) 4094 return -1; 4095 } 4096 4097 if (ratio == 1) 4098 { 4099 if (operand_equal_p (ubase, cbase, 0)) 4100 return -1; 4101 4102 if (TREE_CODE (ubase) == ADDR_EXPR 4103 && TREE_CODE (cbase) == ADDR_EXPR) 4104 { 4105 tree usym, csym; 4106 4107 usym = TREE_OPERAND (ubase, 0); 4108 csym = TREE_OPERAND (cbase, 0); 4109 if (TREE_CODE (usym) == ARRAY_REF) 4110 { 4111 tree ind = TREE_OPERAND (usym, 1); 4112 if (TREE_CODE (ind) == INTEGER_CST 4113 && tree_fits_shwi_p (ind) 4114 && tree_to_shwi (ind) == 0) 4115 usym = TREE_OPERAND (usym, 0); 4116 } 4117 if (TREE_CODE (csym) == ARRAY_REF) 4118 { 4119 tree ind = TREE_OPERAND (csym, 1); 4120 if (TREE_CODE (ind) == INTEGER_CST 4121 && tree_fits_shwi_p (ind) 4122 && tree_to_shwi (ind) == 0) 4123 csym = TREE_OPERAND (csym, 0); 4124 } 4125 if (operand_equal_p (usym, csym, 0)) 4126 return -1; 4127 } 4128 /* Now do more complex comparison */ 4129 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff); 4130 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff); 4131 if (compare_aff_trees (&ubase_aff, &cbase_aff)) 4132 return -1; 4133 } 4134 4135 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff); 4136 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff); 4137 4138 aff_combination_scale (&cbase_aff, -1 * ratio); 4139 aff_combination_add (&ubase_aff, &cbase_aff); 4140 expr = aff_combination_to_tree (&ubase_aff); 4141 return get_expr_id (data, expr); 4142} 4143 4144 4145 4146/* Determines the cost of the computation by that USE is expressed 4147 from induction variable CAND. If ADDRESS_P is true, we just need 4148 to create an address from it, otherwise we want to get it into 4149 register. A set of invariants we depend on is stored in 4150 DEPENDS_ON. AT is the statement at that the value is computed. 4151 If CAN_AUTOINC is nonnull, use it to record whether autoinc 4152 addressing is likely. */ 4153 4154static comp_cost 4155get_computation_cost_at (struct ivopts_data *data, 4156 struct iv_use *use, struct iv_cand *cand, 4157 bool address_p, bitmap *depends_on, gimple at, 4158 bool *can_autoinc, 4159 int *inv_expr_id) 4160{ 4161 tree ubase = use->iv->base, ustep = use->iv->step; 4162 tree cbase, cstep; 4163 tree utype = TREE_TYPE (ubase), ctype; 4164 unsigned HOST_WIDE_INT cstepi, offset = 0; 4165 HOST_WIDE_INT ratio, aratio; 4166 bool var_present, symbol_present, stmt_is_after_inc; 4167 comp_cost cost; 4168 widest_int rat; 4169 bool speed = optimize_bb_for_speed_p (gimple_bb (at)); 4170 machine_mode mem_mode = (address_p 4171 ? TYPE_MODE (TREE_TYPE (*use->op_p)) 4172 : VOIDmode); 4173 4174 *depends_on = NULL; 4175 4176 /* Only consider real candidates. */ 4177 if (!cand->iv) 4178 return infinite_cost; 4179 4180 cbase = cand->iv->base; 4181 cstep = cand->iv->step; 4182 ctype = TREE_TYPE (cbase); 4183 4184 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) 4185 { 4186 /* We do not have a precision to express the values of use. */ 4187 return infinite_cost; 4188 } 4189 4190 if (address_p 4191 || (use->iv->base_object 4192 && cand->iv->base_object 4193 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object)) 4194 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object)))) 4195 { 4196 /* Do not try to express address of an object with computation based 4197 on address of a different object. This may cause problems in rtl 4198 level alias analysis (that does not expect this to be happening, 4199 as this is illegal in C), and would be unlikely to be useful 4200 anyway. */ 4201 if (use->iv->base_object 4202 && cand->iv->base_object 4203 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0)) 4204 return infinite_cost; 4205 } 4206 4207 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) 4208 { 4209 /* TODO -- add direct handling of this case. */ 4210 goto fallback; 4211 } 4212 4213 /* CSTEPI is removed from the offset in case statement is after the 4214 increment. If the step is not constant, we use zero instead. 4215 This is a bit imprecise (there is the extra addition), but 4216 redundancy elimination is likely to transform the code so that 4217 it uses value of the variable before increment anyway, 4218 so it is not that much unrealistic. */ 4219 if (cst_and_fits_in_hwi (cstep)) 4220 cstepi = int_cst_value (cstep); 4221 else 4222 cstepi = 0; 4223 4224 if (!constant_multiple_of (ustep, cstep, &rat)) 4225 return infinite_cost; 4226 4227 if (wi::fits_shwi_p (rat)) 4228 ratio = rat.to_shwi (); 4229 else 4230 return infinite_cost; 4231 4232 STRIP_NOPS (cbase); 4233 ctype = TREE_TYPE (cbase); 4234 4235 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at); 4236 4237 /* use = ubase + ratio * (var - cbase). If either cbase is a constant 4238 or ratio == 1, it is better to handle this like 4239 4240 ubase - ratio * cbase + ratio * var 4241 4242 (also holds in the case ratio == -1, TODO. */ 4243 4244 if (cst_and_fits_in_hwi (cbase)) 4245 { 4246 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase); 4247 cost = difference_cost (data, 4248 ubase, build_int_cst (utype, 0), 4249 &symbol_present, &var_present, &offset, 4250 depends_on); 4251 cost.cost /= avg_loop_niter (data->current_loop); 4252 } 4253 else if (ratio == 1) 4254 { 4255 tree real_cbase = cbase; 4256 4257 /* Check to see if any adjustment is needed. */ 4258 if (cstepi == 0 && stmt_is_after_inc) 4259 { 4260 aff_tree real_cbase_aff; 4261 aff_tree cstep_aff; 4262 4263 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase), 4264 &real_cbase_aff); 4265 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff); 4266 4267 aff_combination_add (&real_cbase_aff, &cstep_aff); 4268 real_cbase = aff_combination_to_tree (&real_cbase_aff); 4269 } 4270 4271 cost = difference_cost (data, 4272 ubase, real_cbase, 4273 &symbol_present, &var_present, &offset, 4274 depends_on); 4275 cost.cost /= avg_loop_niter (data->current_loop); 4276 } 4277 else if (address_p 4278 && !POINTER_TYPE_P (ctype) 4279 && multiplier_allowed_in_address_p 4280 (ratio, mem_mode, 4281 TYPE_ADDR_SPACE (TREE_TYPE (utype)))) 4282 { 4283 cbase 4284 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio)); 4285 cost = difference_cost (data, 4286 ubase, cbase, 4287 &symbol_present, &var_present, &offset, 4288 depends_on); 4289 cost.cost /= avg_loop_niter (data->current_loop); 4290 } 4291 else 4292 { 4293 cost = force_var_cost (data, cbase, depends_on); 4294 cost = add_costs (cost, 4295 difference_cost (data, 4296 ubase, build_int_cst (utype, 0), 4297 &symbol_present, &var_present, 4298 &offset, depends_on)); 4299 cost.cost /= avg_loop_niter (data->current_loop); 4300 cost.cost += add_cost (data->speed, TYPE_MODE (ctype)); 4301 } 4302 4303 if (inv_expr_id) 4304 { 4305 *inv_expr_id = 4306 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p); 4307 /* Clear depends on. */ 4308 if (*inv_expr_id != -1 && depends_on && *depends_on) 4309 bitmap_clear (*depends_on); 4310 } 4311 4312 /* If we are after the increment, the value of the candidate is higher by 4313 one iteration. */ 4314 if (stmt_is_after_inc) 4315 offset -= ratio * cstepi; 4316 4317 /* Now the computation is in shape symbol + var1 + const + ratio * var2. 4318 (symbol/var1/const parts may be omitted). If we are looking for an 4319 address, find the cost of addressing this. */ 4320 if (address_p) 4321 return add_costs (cost, 4322 get_address_cost (symbol_present, var_present, 4323 offset, ratio, cstepi, 4324 mem_mode, 4325 TYPE_ADDR_SPACE (TREE_TYPE (utype)), 4326 speed, stmt_is_after_inc, 4327 can_autoinc)); 4328 4329 /* Otherwise estimate the costs for computing the expression. */ 4330 if (!symbol_present && !var_present && !offset) 4331 { 4332 if (ratio != 1) 4333 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed); 4334 return cost; 4335 } 4336 4337 /* Symbol + offset should be compile-time computable so consider that they 4338 are added once to the variable, if present. */ 4339 if (var_present && (symbol_present || offset)) 4340 cost.cost += adjust_setup_cost (data, 4341 add_cost (speed, TYPE_MODE (ctype))); 4342 4343 /* Having offset does not affect runtime cost in case it is added to 4344 symbol, but it increases complexity. */ 4345 if (offset) 4346 cost.complexity++; 4347 4348 cost.cost += add_cost (speed, TYPE_MODE (ctype)); 4349 4350 aratio = ratio > 0 ? ratio : -ratio; 4351 if (aratio != 1) 4352 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed); 4353 return cost; 4354 4355fallback: 4356 if (can_autoinc) 4357 *can_autoinc = false; 4358 4359 { 4360 /* Just get the expression, expand it and measure the cost. */ 4361 tree comp = get_computation_at (data->current_loop, use, cand, at); 4362 4363 if (!comp) 4364 return infinite_cost; 4365 4366 if (address_p) 4367 comp = build_simple_mem_ref (comp); 4368 4369 return new_cost (computation_cost (comp, speed), 0); 4370 } 4371} 4372 4373/* Determines the cost of the computation by that USE is expressed 4374 from induction variable CAND. If ADDRESS_P is true, we just need 4375 to create an address from it, otherwise we want to get it into 4376 register. A set of invariants we depend on is stored in 4377 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether 4378 autoinc addressing is likely. */ 4379 4380static comp_cost 4381get_computation_cost (struct ivopts_data *data, 4382 struct iv_use *use, struct iv_cand *cand, 4383 bool address_p, bitmap *depends_on, 4384 bool *can_autoinc, int *inv_expr_id) 4385{ 4386 return get_computation_cost_at (data, 4387 use, cand, address_p, depends_on, use->stmt, 4388 can_autoinc, inv_expr_id); 4389} 4390 4391/* Determines cost of basing replacement of USE on CAND in a generic 4392 expression. */ 4393 4394static bool 4395determine_use_iv_cost_generic (struct ivopts_data *data, 4396 struct iv_use *use, struct iv_cand *cand) 4397{ 4398 bitmap depends_on; 4399 comp_cost cost; 4400 int inv_expr_id = -1; 4401 4402 /* The simple case first -- if we need to express value of the preserved 4403 original biv, the cost is 0. This also prevents us from counting the 4404 cost of increment twice -- once at this use and once in the cost of 4405 the candidate. */ 4406 if (cand->pos == IP_ORIGINAL 4407 && cand->incremented_at == use->stmt) 4408 { 4409 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE, 4410 ERROR_MARK, -1); 4411 return true; 4412 } 4413 4414 cost = get_computation_cost (data, use, cand, false, &depends_on, 4415 NULL, &inv_expr_id); 4416 4417 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK, 4418 inv_expr_id); 4419 4420 return !infinite_cost_p (cost); 4421} 4422 4423/* Determines cost of basing replacement of USE on CAND in an address. */ 4424 4425static bool 4426determine_use_iv_cost_address (struct ivopts_data *data, 4427 struct iv_use *use, struct iv_cand *cand) 4428{ 4429 bitmap depends_on; 4430 bool can_autoinc; 4431 int inv_expr_id = -1; 4432 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on, 4433 &can_autoinc, &inv_expr_id); 4434 4435 if (cand->ainc_use == use) 4436 { 4437 if (can_autoinc) 4438 cost.cost -= cand->cost_step; 4439 /* If we generated the candidate solely for exploiting autoincrement 4440 opportunities, and it turns out it can't be used, set the cost to 4441 infinity to make sure we ignore it. */ 4442 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE) 4443 cost = infinite_cost; 4444 } 4445 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK, 4446 inv_expr_id); 4447 4448 return !infinite_cost_p (cost); 4449} 4450 4451/* Computes value of candidate CAND at position AT in iteration NITER, and 4452 stores it to VAL. */ 4453 4454static void 4455cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter, 4456 aff_tree *val) 4457{ 4458 aff_tree step, delta, nit; 4459 struct iv *iv = cand->iv; 4460 tree type = TREE_TYPE (iv->base); 4461 tree steptype = type; 4462 if (POINTER_TYPE_P (type)) 4463 steptype = sizetype; 4464 steptype = unsigned_type_for (type); 4465 4466 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step); 4467 aff_combination_convert (&step, steptype); 4468 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit); 4469 aff_combination_convert (&nit, steptype); 4470 aff_combination_mult (&nit, &step, &delta); 4471 if (stmt_after_increment (loop, cand, at)) 4472 aff_combination_add (&delta, &step); 4473 4474 tree_to_aff_combination (iv->base, type, val); 4475 if (!POINTER_TYPE_P (type)) 4476 aff_combination_convert (val, steptype); 4477 aff_combination_add (val, &delta); 4478} 4479 4480/* Returns period of induction variable iv. */ 4481 4482static tree 4483iv_period (struct iv *iv) 4484{ 4485 tree step = iv->step, period, type; 4486 tree pow2div; 4487 4488 gcc_assert (step && TREE_CODE (step) == INTEGER_CST); 4489 4490 type = unsigned_type_for (TREE_TYPE (step)); 4491 /* Period of the iv is lcm (step, type_range)/step -1, 4492 i.e., N*type_range/step - 1. Since type range is power 4493 of two, N == (step >> num_of_ending_zeros_binary (step), 4494 so the final result is 4495 4496 (type_range >> num_of_ending_zeros_binary (step)) - 1 4497 4498 */ 4499 pow2div = num_ending_zeros (step); 4500 4501 period = build_low_bits_mask (type, 4502 (TYPE_PRECISION (type) 4503 - tree_to_uhwi (pow2div))); 4504 4505 return period; 4506} 4507 4508/* Returns the comparison operator used when eliminating the iv USE. */ 4509 4510static enum tree_code 4511iv_elimination_compare (struct ivopts_data *data, struct iv_use *use) 4512{ 4513 struct loop *loop = data->current_loop; 4514 basic_block ex_bb; 4515 edge exit; 4516 4517 ex_bb = gimple_bb (use->stmt); 4518 exit = EDGE_SUCC (ex_bb, 0); 4519 if (flow_bb_inside_loop_p (loop, exit->dest)) 4520 exit = EDGE_SUCC (ex_bb, 1); 4521 4522 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR); 4523} 4524 4525/* Returns true if we can prove that BASE - OFFSET does not overflow. For now, 4526 we only detect the situation that BASE = SOMETHING + OFFSET, where the 4527 calculation is performed in non-wrapping type. 4528 4529 TODO: More generally, we could test for the situation that 4530 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero. 4531 This would require knowing the sign of OFFSET. */ 4532 4533static bool 4534difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset) 4535{ 4536 enum tree_code code; 4537 tree e1, e2; 4538 aff_tree aff_e1, aff_e2, aff_offset; 4539 4540 if (!nowrap_type_p (TREE_TYPE (base))) 4541 return false; 4542 4543 base = expand_simple_operations (base); 4544 4545 if (TREE_CODE (base) == SSA_NAME) 4546 { 4547 gimple stmt = SSA_NAME_DEF_STMT (base); 4548 4549 if (gimple_code (stmt) != GIMPLE_ASSIGN) 4550 return false; 4551 4552 code = gimple_assign_rhs_code (stmt); 4553 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) 4554 return false; 4555 4556 e1 = gimple_assign_rhs1 (stmt); 4557 e2 = gimple_assign_rhs2 (stmt); 4558 } 4559 else 4560 { 4561 code = TREE_CODE (base); 4562 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) 4563 return false; 4564 e1 = TREE_OPERAND (base, 0); 4565 e2 = TREE_OPERAND (base, 1); 4566 } 4567 4568 /* Use affine expansion as deeper inspection to prove the equality. */ 4569 tree_to_aff_combination_expand (e2, TREE_TYPE (e2), 4570 &aff_e2, &data->name_expansion_cache); 4571 tree_to_aff_combination_expand (offset, TREE_TYPE (offset), 4572 &aff_offset, &data->name_expansion_cache); 4573 aff_combination_scale (&aff_offset, -1); 4574 switch (code) 4575 { 4576 case PLUS_EXPR: 4577 aff_combination_add (&aff_e2, &aff_offset); 4578 if (aff_combination_zero_p (&aff_e2)) 4579 return true; 4580 4581 tree_to_aff_combination_expand (e1, TREE_TYPE (e1), 4582 &aff_e1, &data->name_expansion_cache); 4583 aff_combination_add (&aff_e1, &aff_offset); 4584 return aff_combination_zero_p (&aff_e1); 4585 4586 case POINTER_PLUS_EXPR: 4587 aff_combination_add (&aff_e2, &aff_offset); 4588 return aff_combination_zero_p (&aff_e2); 4589 4590 default: 4591 return false; 4592 } 4593} 4594 4595/* Tries to replace loop exit by one formulated in terms of a LT_EXPR 4596 comparison with CAND. NITER describes the number of iterations of 4597 the loops. If successful, the comparison in COMP_P is altered accordingly. 4598 4599 We aim to handle the following situation: 4600 4601 sometype *base, *p; 4602 int a, b, i; 4603 4604 i = a; 4605 p = p_0 = base + a; 4606 4607 do 4608 { 4609 bla (*p); 4610 p++; 4611 i++; 4612 } 4613 while (i < b); 4614 4615 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1. 4616 We aim to optimize this to 4617 4618 p = p_0 = base + a; 4619 do 4620 { 4621 bla (*p); 4622 p++; 4623 } 4624 while (p < p_0 - a + b); 4625 4626 This preserves the correctness, since the pointer arithmetics does not 4627 overflow. More precisely: 4628 4629 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no 4630 overflow in computing it or the values of p. 4631 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not 4632 overflow. To prove this, we use the fact that p_0 = base + a. */ 4633 4634static bool 4635iv_elimination_compare_lt (struct ivopts_data *data, 4636 struct iv_cand *cand, enum tree_code *comp_p, 4637 struct tree_niter_desc *niter) 4638{ 4639 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset; 4640 struct aff_tree nit, tmpa, tmpb; 4641 enum tree_code comp; 4642 HOST_WIDE_INT step; 4643 4644 /* We need to know that the candidate induction variable does not overflow. 4645 While more complex analysis may be used to prove this, for now just 4646 check that the variable appears in the original program and that it 4647 is computed in a type that guarantees no overflows. */ 4648 cand_type = TREE_TYPE (cand->iv->base); 4649 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type)) 4650 return false; 4651 4652 /* Make sure that the loop iterates till the loop bound is hit, as otherwise 4653 the calculation of the BOUND could overflow, making the comparison 4654 invalid. */ 4655 if (!data->loop_single_exit_p) 4656 return false; 4657 4658 /* We need to be able to decide whether candidate is increasing or decreasing 4659 in order to choose the right comparison operator. */ 4660 if (!cst_and_fits_in_hwi (cand->iv->step)) 4661 return false; 4662 step = int_cst_value (cand->iv->step); 4663 4664 /* Check that the number of iterations matches the expected pattern: 4665 a + 1 > b ? 0 : b - a - 1. */ 4666 mbz = niter->may_be_zero; 4667 if (TREE_CODE (mbz) == GT_EXPR) 4668 { 4669 /* Handle a + 1 > b. */ 4670 tree op0 = TREE_OPERAND (mbz, 0); 4671 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1))) 4672 { 4673 a = TREE_OPERAND (op0, 0); 4674 b = TREE_OPERAND (mbz, 1); 4675 } 4676 else 4677 return false; 4678 } 4679 else if (TREE_CODE (mbz) == LT_EXPR) 4680 { 4681 tree op1 = TREE_OPERAND (mbz, 1); 4682 4683 /* Handle b < a + 1. */ 4684 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1))) 4685 { 4686 a = TREE_OPERAND (op1, 0); 4687 b = TREE_OPERAND (mbz, 0); 4688 } 4689 else 4690 return false; 4691 } 4692 else 4693 return false; 4694 4695 /* Expected number of iterations is B - A - 1. Check that it matches 4696 the actual number, i.e., that B - A - NITER = 1. */ 4697 tree_to_aff_combination (niter->niter, nit_type, &nit); 4698 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa); 4699 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb); 4700 aff_combination_scale (&nit, -1); 4701 aff_combination_scale (&tmpa, -1); 4702 aff_combination_add (&tmpb, &tmpa); 4703 aff_combination_add (&tmpb, &nit); 4704 if (tmpb.n != 0 || tmpb.offset != 1) 4705 return false; 4706 4707 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not 4708 overflow. */ 4709 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step), 4710 cand->iv->step, 4711 fold_convert (TREE_TYPE (cand->iv->step), a)); 4712 if (!difference_cannot_overflow_p (data, cand->iv->base, offset)) 4713 return false; 4714 4715 /* Determine the new comparison operator. */ 4716 comp = step < 0 ? GT_EXPR : LT_EXPR; 4717 if (*comp_p == NE_EXPR) 4718 *comp_p = comp; 4719 else if (*comp_p == EQ_EXPR) 4720 *comp_p = invert_tree_comparison (comp, false); 4721 else 4722 gcc_unreachable (); 4723 4724 return true; 4725} 4726 4727/* Check whether it is possible to express the condition in USE by comparison 4728 of candidate CAND. If so, store the value compared with to BOUND, and the 4729 comparison operator to COMP. */ 4730 4731static bool 4732may_eliminate_iv (struct ivopts_data *data, 4733 struct iv_use *use, struct iv_cand *cand, tree *bound, 4734 enum tree_code *comp) 4735{ 4736 basic_block ex_bb; 4737 edge exit; 4738 tree period; 4739 struct loop *loop = data->current_loop; 4740 aff_tree bnd; 4741 struct tree_niter_desc *desc = NULL; 4742 4743 if (TREE_CODE (cand->iv->step) != INTEGER_CST) 4744 return false; 4745 4746 /* For now works only for exits that dominate the loop latch. 4747 TODO: extend to other conditions inside loop body. */ 4748 ex_bb = gimple_bb (use->stmt); 4749 if (use->stmt != last_stmt (ex_bb) 4750 || gimple_code (use->stmt) != GIMPLE_COND 4751 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb)) 4752 return false; 4753 4754 exit = EDGE_SUCC (ex_bb, 0); 4755 if (flow_bb_inside_loop_p (loop, exit->dest)) 4756 exit = EDGE_SUCC (ex_bb, 1); 4757 if (flow_bb_inside_loop_p (loop, exit->dest)) 4758 return false; 4759 4760 desc = niter_for_exit (data, exit); 4761 if (!desc) 4762 return false; 4763 4764 /* Determine whether we can use the variable to test the exit condition. 4765 This is the case iff the period of the induction variable is greater 4766 than the number of iterations for which the exit condition is true. */ 4767 period = iv_period (cand->iv); 4768 4769 /* If the number of iterations is constant, compare against it directly. */ 4770 if (TREE_CODE (desc->niter) == INTEGER_CST) 4771 { 4772 /* See cand_value_at. */ 4773 if (stmt_after_increment (loop, cand, use->stmt)) 4774 { 4775 if (!tree_int_cst_lt (desc->niter, period)) 4776 return false; 4777 } 4778 else 4779 { 4780 if (tree_int_cst_lt (period, desc->niter)) 4781 return false; 4782 } 4783 } 4784 4785 /* If not, and if this is the only possible exit of the loop, see whether 4786 we can get a conservative estimate on the number of iterations of the 4787 entire loop and compare against that instead. */ 4788 else 4789 { 4790 widest_int period_value, max_niter; 4791 4792 max_niter = desc->max; 4793 if (stmt_after_increment (loop, cand, use->stmt)) 4794 max_niter += 1; 4795 period_value = wi::to_widest (period); 4796 if (wi::gtu_p (max_niter, period_value)) 4797 { 4798 /* See if we can take advantage of inferred loop bound information. */ 4799 if (data->loop_single_exit_p) 4800 { 4801 if (!max_loop_iterations (loop, &max_niter)) 4802 return false; 4803 /* The loop bound is already adjusted by adding 1. */ 4804 if (wi::gtu_p (max_niter, period_value)) 4805 return false; 4806 } 4807 else 4808 return false; 4809 } 4810 } 4811 4812 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd); 4813 4814 *bound = fold_convert (TREE_TYPE (cand->iv->base), 4815 aff_combination_to_tree (&bnd)); 4816 *comp = iv_elimination_compare (data, use); 4817 4818 /* It is unlikely that computing the number of iterations using division 4819 would be more profitable than keeping the original induction variable. */ 4820 if (expression_expensive_p (*bound)) 4821 return false; 4822 4823 /* Sometimes, it is possible to handle the situation that the number of 4824 iterations may be zero unless additional assumtions by using < 4825 instead of != in the exit condition. 4826 4827 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and 4828 base the exit condition on it. However, that is often too 4829 expensive. */ 4830 if (!integer_zerop (desc->may_be_zero)) 4831 return iv_elimination_compare_lt (data, cand, comp, desc); 4832 4833 return true; 4834} 4835 4836 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must 4837 be copied, if is is used in the loop body and DATA->body_includes_call. */ 4838 4839static int 4840parm_decl_cost (struct ivopts_data *data, tree bound) 4841{ 4842 tree sbound = bound; 4843 STRIP_NOPS (sbound); 4844 4845 if (TREE_CODE (sbound) == SSA_NAME 4846 && SSA_NAME_IS_DEFAULT_DEF (sbound) 4847 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL 4848 && data->body_includes_call) 4849 return COSTS_N_INSNS (1); 4850 4851 return 0; 4852} 4853 4854/* Determines cost of basing replacement of USE on CAND in a condition. */ 4855 4856static bool 4857determine_use_iv_cost_condition (struct ivopts_data *data, 4858 struct iv_use *use, struct iv_cand *cand) 4859{ 4860 tree bound = NULL_TREE; 4861 struct iv *cmp_iv; 4862 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on; 4863 comp_cost elim_cost, express_cost, cost, bound_cost; 4864 bool ok; 4865 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id; 4866 tree *control_var, *bound_cst; 4867 enum tree_code comp = ERROR_MARK; 4868 4869 /* Only consider real candidates. */ 4870 if (!cand->iv) 4871 { 4872 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, 4873 ERROR_MARK, -1); 4874 return false; 4875 } 4876 4877 /* Try iv elimination. */ 4878 if (may_eliminate_iv (data, use, cand, &bound, &comp)) 4879 { 4880 elim_cost = force_var_cost (data, bound, &depends_on_elim); 4881 if (elim_cost.cost == 0) 4882 elim_cost.cost = parm_decl_cost (data, bound); 4883 else if (TREE_CODE (bound) == INTEGER_CST) 4884 elim_cost.cost = 0; 4885 /* If we replace a loop condition 'i < n' with 'p < base + n', 4886 depends_on_elim will have 'base' and 'n' set, which implies 4887 that both 'base' and 'n' will be live during the loop. More likely, 4888 'base + n' will be loop invariant, resulting in only one live value 4889 during the loop. So in that case we clear depends_on_elim and set 4890 elim_inv_expr_id instead. */ 4891 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1) 4892 { 4893 elim_inv_expr_id = get_expr_id (data, bound); 4894 bitmap_clear (depends_on_elim); 4895 } 4896 /* The bound is a loop invariant, so it will be only computed 4897 once. */ 4898 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost); 4899 } 4900 else 4901 elim_cost = infinite_cost; 4902 4903 /* Try expressing the original giv. If it is compared with an invariant, 4904 note that we cannot get rid of it. */ 4905 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst, 4906 NULL, &cmp_iv); 4907 gcc_assert (ok); 4908 4909 /* When the condition is a comparison of the candidate IV against 4910 zero, prefer this IV. 4911 4912 TODO: The constant that we're subtracting from the cost should 4913 be target-dependent. This information should be added to the 4914 target costs for each backend. */ 4915 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */ 4916 && integer_zerop (*bound_cst) 4917 && (operand_equal_p (*control_var, cand->var_after, 0) 4918 || operand_equal_p (*control_var, cand->var_before, 0))) 4919 elim_cost.cost -= 1; 4920 4921 express_cost = get_computation_cost (data, use, cand, false, 4922 &depends_on_express, NULL, 4923 &express_inv_expr_id); 4924 fd_ivopts_data = data; 4925 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL); 4926 4927 /* Count the cost of the original bound as well. */ 4928 bound_cost = force_var_cost (data, *bound_cst, NULL); 4929 if (bound_cost.cost == 0) 4930 bound_cost.cost = parm_decl_cost (data, *bound_cst); 4931 else if (TREE_CODE (*bound_cst) == INTEGER_CST) 4932 bound_cost.cost = 0; 4933 express_cost.cost += bound_cost.cost; 4934 4935 /* Choose the better approach, preferring the eliminated IV. */ 4936 if (compare_costs (elim_cost, express_cost) <= 0) 4937 { 4938 cost = elim_cost; 4939 depends_on = depends_on_elim; 4940 depends_on_elim = NULL; 4941 inv_expr_id = elim_inv_expr_id; 4942 } 4943 else 4944 { 4945 cost = express_cost; 4946 depends_on = depends_on_express; 4947 depends_on_express = NULL; 4948 bound = NULL_TREE; 4949 comp = ERROR_MARK; 4950 inv_expr_id = express_inv_expr_id; 4951 } 4952 4953 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id); 4954 4955 if (depends_on_elim) 4956 BITMAP_FREE (depends_on_elim); 4957 if (depends_on_express) 4958 BITMAP_FREE (depends_on_express); 4959 4960 return !infinite_cost_p (cost); 4961} 4962 4963/* Determines cost of basing replacement of USE on CAND. Returns false 4964 if USE cannot be based on CAND. */ 4965 4966static bool 4967determine_use_iv_cost (struct ivopts_data *data, 4968 struct iv_use *use, struct iv_cand *cand) 4969{ 4970 switch (use->type) 4971 { 4972 case USE_NONLINEAR_EXPR: 4973 return determine_use_iv_cost_generic (data, use, cand); 4974 4975 case USE_ADDRESS: 4976 return determine_use_iv_cost_address (data, use, cand); 4977 4978 case USE_COMPARE: 4979 return determine_use_iv_cost_condition (data, use, cand); 4980 4981 default: 4982 gcc_unreachable (); 4983 } 4984} 4985 4986/* Return true if get_computation_cost indicates that autoincrement is 4987 a possibility for the pair of USE and CAND, false otherwise. */ 4988 4989static bool 4990autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use, 4991 struct iv_cand *cand) 4992{ 4993 bitmap depends_on; 4994 bool can_autoinc; 4995 comp_cost cost; 4996 4997 if (use->type != USE_ADDRESS) 4998 return false; 4999 5000 cost = get_computation_cost (data, use, cand, true, &depends_on, 5001 &can_autoinc, NULL); 5002 5003 BITMAP_FREE (depends_on); 5004 5005 return !infinite_cost_p (cost) && can_autoinc; 5006} 5007 5008/* Examine IP_ORIGINAL candidates to see if they are incremented next to a 5009 use that allows autoincrement, and set their AINC_USE if possible. */ 5010 5011static void 5012set_autoinc_for_original_candidates (struct ivopts_data *data) 5013{ 5014 unsigned i, j; 5015 5016 for (i = 0; i < n_iv_cands (data); i++) 5017 { 5018 struct iv_cand *cand = iv_cand (data, i); 5019 struct iv_use *closest_before = NULL; 5020 struct iv_use *closest_after = NULL; 5021 if (cand->pos != IP_ORIGINAL) 5022 continue; 5023 5024 for (j = 0; j < n_iv_uses (data); j++) 5025 { 5026 struct iv_use *use = iv_use (data, j); 5027 unsigned uid = gimple_uid (use->stmt); 5028 5029 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)) 5030 continue; 5031 5032 if (uid < gimple_uid (cand->incremented_at) 5033 && (closest_before == NULL 5034 || uid > gimple_uid (closest_before->stmt))) 5035 closest_before = use; 5036 5037 if (uid > gimple_uid (cand->incremented_at) 5038 && (closest_after == NULL 5039 || uid < gimple_uid (closest_after->stmt))) 5040 closest_after = use; 5041 } 5042 5043 if (closest_before != NULL 5044 && autoinc_possible_for_pair (data, closest_before, cand)) 5045 cand->ainc_use = closest_before; 5046 else if (closest_after != NULL 5047 && autoinc_possible_for_pair (data, closest_after, cand)) 5048 cand->ainc_use = closest_after; 5049 } 5050} 5051 5052/* Finds the candidates for the induction variables. */ 5053 5054static void 5055find_iv_candidates (struct ivopts_data *data) 5056{ 5057 /* Add commonly used ivs. */ 5058 add_standard_iv_candidates (data); 5059 5060 /* Add old induction variables. */ 5061 add_old_ivs_candidates (data); 5062 5063 /* Add induction variables derived from uses. */ 5064 add_derived_ivs_candidates (data); 5065 5066 set_autoinc_for_original_candidates (data); 5067 5068 /* Record the important candidates. */ 5069 record_important_candidates (data); 5070} 5071 5072/* Determines costs of basing the use of the iv on an iv candidate. */ 5073 5074static void 5075determine_use_iv_costs (struct ivopts_data *data) 5076{ 5077 unsigned i, j; 5078 struct iv_use *use; 5079 struct iv_cand *cand; 5080 bitmap to_clear = BITMAP_ALLOC (NULL); 5081 5082 alloc_use_cost_map (data); 5083 5084 for (i = 0; i < n_iv_uses (data); i++) 5085 { 5086 use = iv_use (data, i); 5087 5088 if (data->consider_all_candidates) 5089 { 5090 for (j = 0; j < n_iv_cands (data); j++) 5091 { 5092 cand = iv_cand (data, j); 5093 determine_use_iv_cost (data, use, cand); 5094 } 5095 } 5096 else 5097 { 5098 bitmap_iterator bi; 5099 5100 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi) 5101 { 5102 cand = iv_cand (data, j); 5103 if (!determine_use_iv_cost (data, use, cand)) 5104 bitmap_set_bit (to_clear, j); 5105 } 5106 5107 /* Remove the candidates for that the cost is infinite from 5108 the list of related candidates. */ 5109 bitmap_and_compl_into (use->related_cands, to_clear); 5110 bitmap_clear (to_clear); 5111 } 5112 } 5113 5114 BITMAP_FREE (to_clear); 5115 5116 if (dump_file && (dump_flags & TDF_DETAILS)) 5117 { 5118 fprintf (dump_file, "Use-candidate costs:\n"); 5119 5120 for (i = 0; i < n_iv_uses (data); i++) 5121 { 5122 use = iv_use (data, i); 5123 5124 fprintf (dump_file, "Use %d:\n", i); 5125 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n"); 5126 for (j = 0; j < use->n_map_members; j++) 5127 { 5128 if (!use->cost_map[j].cand 5129 || infinite_cost_p (use->cost_map[j].cost)) 5130 continue; 5131 5132 fprintf (dump_file, " %d\t%d\t%d\t", 5133 use->cost_map[j].cand->id, 5134 use->cost_map[j].cost.cost, 5135 use->cost_map[j].cost.complexity); 5136 if (use->cost_map[j].depends_on) 5137 bitmap_print (dump_file, 5138 use->cost_map[j].depends_on, "",""); 5139 if (use->cost_map[j].inv_expr_id != -1) 5140 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id); 5141 fprintf (dump_file, "\n"); 5142 } 5143 5144 fprintf (dump_file, "\n"); 5145 } 5146 fprintf (dump_file, "\n"); 5147 } 5148} 5149 5150/* Determines cost of the candidate CAND. */ 5151 5152static void 5153determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand) 5154{ 5155 comp_cost cost_base; 5156 unsigned cost, cost_step; 5157 tree base; 5158 5159 if (!cand->iv) 5160 { 5161 cand->cost = 0; 5162 return; 5163 } 5164 5165 /* There are two costs associated with the candidate -- its increment 5166 and its initialization. The second is almost negligible for any loop 5167 that rolls enough, so we take it just very little into account. */ 5168 5169 base = cand->iv->base; 5170 cost_base = force_var_cost (data, base, NULL); 5171 /* It will be exceptional that the iv register happens to be initialized with 5172 the proper value at no cost. In general, there will at least be a regcopy 5173 or a const set. */ 5174 if (cost_base.cost == 0) 5175 cost_base.cost = COSTS_N_INSNS (1); 5176 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base))); 5177 5178 cost = cost_step + adjust_setup_cost (data, cost_base.cost); 5179 5180 /* Prefer the original ivs unless we may gain something by replacing it. 5181 The reason is to make debugging simpler; so this is not relevant for 5182 artificial ivs created by other optimization passes. */ 5183 if (cand->pos != IP_ORIGINAL 5184 || !SSA_NAME_VAR (cand->var_before) 5185 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before))) 5186 cost++; 5187 5188 /* Prefer not to insert statements into latch unless there are some 5189 already (so that we do not create unnecessary jumps). */ 5190 if (cand->pos == IP_END 5191 && empty_block_p (ip_end_pos (data->current_loop))) 5192 cost++; 5193 5194 cand->cost = cost; 5195 cand->cost_step = cost_step; 5196} 5197 5198/* Determines costs of computation of the candidates. */ 5199 5200static void 5201determine_iv_costs (struct ivopts_data *data) 5202{ 5203 unsigned i; 5204 5205 if (dump_file && (dump_flags & TDF_DETAILS)) 5206 { 5207 fprintf (dump_file, "Candidate costs:\n"); 5208 fprintf (dump_file, " cand\tcost\n"); 5209 } 5210 5211 for (i = 0; i < n_iv_cands (data); i++) 5212 { 5213 struct iv_cand *cand = iv_cand (data, i); 5214 5215 determine_iv_cost (data, cand); 5216 5217 if (dump_file && (dump_flags & TDF_DETAILS)) 5218 fprintf (dump_file, " %d\t%d\n", i, cand->cost); 5219 } 5220 5221 if (dump_file && (dump_flags & TDF_DETAILS)) 5222 fprintf (dump_file, "\n"); 5223} 5224 5225/* Calculates cost for having SIZE induction variables. */ 5226 5227static unsigned 5228ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size) 5229{ 5230 /* We add size to the cost, so that we prefer eliminating ivs 5231 if possible. */ 5232 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed, 5233 data->body_includes_call); 5234} 5235 5236/* For each size of the induction variable set determine the penalty. */ 5237 5238static void 5239determine_set_costs (struct ivopts_data *data) 5240{ 5241 unsigned j, n; 5242 gphi *phi; 5243 gphi_iterator psi; 5244 tree op; 5245 struct loop *loop = data->current_loop; 5246 bitmap_iterator bi; 5247 5248 if (dump_file && (dump_flags & TDF_DETAILS)) 5249 { 5250 fprintf (dump_file, "Global costs:\n"); 5251 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs); 5252 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs); 5253 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]); 5254 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]); 5255 } 5256 5257 n = 0; 5258 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 5259 { 5260 phi = psi.phi (); 5261 op = PHI_RESULT (phi); 5262 5263 if (virtual_operand_p (op)) 5264 continue; 5265 5266 if (get_iv (data, op)) 5267 continue; 5268 5269 n++; 5270 } 5271 5272 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi) 5273 { 5274 struct version_info *info = ver_info (data, j); 5275 5276 if (info->inv_id && info->has_nonlin_use) 5277 n++; 5278 } 5279 5280 data->regs_used = n; 5281 if (dump_file && (dump_flags & TDF_DETAILS)) 5282 fprintf (dump_file, " regs_used %d\n", n); 5283 5284 if (dump_file && (dump_flags & TDF_DETAILS)) 5285 { 5286 fprintf (dump_file, " cost for size:\n"); 5287 fprintf (dump_file, " ivs\tcost\n"); 5288 for (j = 0; j <= 2 * target_avail_regs; j++) 5289 fprintf (dump_file, " %d\t%d\n", j, 5290 ivopts_global_cost_for_size (data, j)); 5291 fprintf (dump_file, "\n"); 5292 } 5293} 5294 5295/* Returns true if A is a cheaper cost pair than B. */ 5296 5297static bool 5298cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b) 5299{ 5300 int cmp; 5301 5302 if (!a) 5303 return false; 5304 5305 if (!b) 5306 return true; 5307 5308 cmp = compare_costs (a->cost, b->cost); 5309 if (cmp < 0) 5310 return true; 5311 5312 if (cmp > 0) 5313 return false; 5314 5315 /* In case the costs are the same, prefer the cheaper candidate. */ 5316 if (a->cand->cost < b->cand->cost) 5317 return true; 5318 5319 return false; 5320} 5321 5322 5323/* Returns candidate by that USE is expressed in IVS. */ 5324 5325static struct cost_pair * 5326iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use) 5327{ 5328 return ivs->cand_for_use[use->id]; 5329} 5330 5331/* Computes the cost field of IVS structure. */ 5332 5333static void 5334iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs) 5335{ 5336 comp_cost cost = ivs->cand_use_cost; 5337 5338 cost.cost += ivs->cand_cost; 5339 5340 cost.cost += ivopts_global_cost_for_size (data, 5341 ivs->n_regs + ivs->num_used_inv_expr); 5342 5343 ivs->cost = cost; 5344} 5345 5346/* Remove invariants in set INVS to set IVS. */ 5347 5348static void 5349iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs) 5350{ 5351 bitmap_iterator bi; 5352 unsigned iid; 5353 5354 if (!invs) 5355 return; 5356 5357 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi) 5358 { 5359 ivs->n_invariant_uses[iid]--; 5360 if (ivs->n_invariant_uses[iid] == 0) 5361 ivs->n_regs--; 5362 } 5363} 5364 5365/* Set USE not to be expressed by any candidate in IVS. */ 5366 5367static void 5368iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs, 5369 struct iv_use *use) 5370{ 5371 unsigned uid = use->id, cid; 5372 struct cost_pair *cp; 5373 5374 cp = ivs->cand_for_use[uid]; 5375 if (!cp) 5376 return; 5377 cid = cp->cand->id; 5378 5379 ivs->bad_uses++; 5380 ivs->cand_for_use[uid] = NULL; 5381 ivs->n_cand_uses[cid]--; 5382 5383 if (ivs->n_cand_uses[cid] == 0) 5384 { 5385 bitmap_clear_bit (ivs->cands, cid); 5386 /* Do not count the pseudocandidates. */ 5387 if (cp->cand->iv) 5388 ivs->n_regs--; 5389 ivs->n_cands--; 5390 ivs->cand_cost -= cp->cand->cost; 5391 5392 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on); 5393 } 5394 5395 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost); 5396 5397 iv_ca_set_remove_invariants (ivs, cp->depends_on); 5398 5399 if (cp->inv_expr_id != -1) 5400 { 5401 ivs->used_inv_expr[cp->inv_expr_id]--; 5402 if (ivs->used_inv_expr[cp->inv_expr_id] == 0) 5403 ivs->num_used_inv_expr--; 5404 } 5405 iv_ca_recount_cost (data, ivs); 5406} 5407 5408/* Add invariants in set INVS to set IVS. */ 5409 5410static void 5411iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs) 5412{ 5413 bitmap_iterator bi; 5414 unsigned iid; 5415 5416 if (!invs) 5417 return; 5418 5419 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi) 5420 { 5421 ivs->n_invariant_uses[iid]++; 5422 if (ivs->n_invariant_uses[iid] == 1) 5423 ivs->n_regs++; 5424 } 5425} 5426 5427/* Set cost pair for USE in set IVS to CP. */ 5428 5429static void 5430iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs, 5431 struct iv_use *use, struct cost_pair *cp) 5432{ 5433 unsigned uid = use->id, cid; 5434 5435 if (ivs->cand_for_use[uid] == cp) 5436 return; 5437 5438 if (ivs->cand_for_use[uid]) 5439 iv_ca_set_no_cp (data, ivs, use); 5440 5441 if (cp) 5442 { 5443 cid = cp->cand->id; 5444 5445 ivs->bad_uses--; 5446 ivs->cand_for_use[uid] = cp; 5447 ivs->n_cand_uses[cid]++; 5448 if (ivs->n_cand_uses[cid] == 1) 5449 { 5450 bitmap_set_bit (ivs->cands, cid); 5451 /* Do not count the pseudocandidates. */ 5452 if (cp->cand->iv) 5453 ivs->n_regs++; 5454 ivs->n_cands++; 5455 ivs->cand_cost += cp->cand->cost; 5456 5457 iv_ca_set_add_invariants (ivs, cp->cand->depends_on); 5458 } 5459 5460 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost); 5461 iv_ca_set_add_invariants (ivs, cp->depends_on); 5462 5463 if (cp->inv_expr_id != -1) 5464 { 5465 ivs->used_inv_expr[cp->inv_expr_id]++; 5466 if (ivs->used_inv_expr[cp->inv_expr_id] == 1) 5467 ivs->num_used_inv_expr++; 5468 } 5469 iv_ca_recount_cost (data, ivs); 5470 } 5471} 5472 5473/* Extend set IVS by expressing USE by some of the candidates in it 5474 if possible. Consider all important candidates if candidates in 5475 set IVS don't give any result. */ 5476 5477static void 5478iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs, 5479 struct iv_use *use) 5480{ 5481 struct cost_pair *best_cp = NULL, *cp; 5482 bitmap_iterator bi; 5483 unsigned i; 5484 struct iv_cand *cand; 5485 5486 gcc_assert (ivs->upto >= use->id); 5487 ivs->upto++; 5488 ivs->bad_uses++; 5489 5490 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 5491 { 5492 cand = iv_cand (data, i); 5493 cp = get_use_iv_cost (data, use, cand); 5494 if (cheaper_cost_pair (cp, best_cp)) 5495 best_cp = cp; 5496 } 5497 5498 if (best_cp == NULL) 5499 { 5500 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi) 5501 { 5502 cand = iv_cand (data, i); 5503 cp = get_use_iv_cost (data, use, cand); 5504 if (cheaper_cost_pair (cp, best_cp)) 5505 best_cp = cp; 5506 } 5507 } 5508 5509 iv_ca_set_cp (data, ivs, use, best_cp); 5510} 5511 5512/* Get cost for assignment IVS. */ 5513 5514static comp_cost 5515iv_ca_cost (struct iv_ca *ivs) 5516{ 5517 /* This was a conditional expression but it triggered a bug in 5518 Sun C 5.5. */ 5519 if (ivs->bad_uses) 5520 return infinite_cost; 5521 else 5522 return ivs->cost; 5523} 5524 5525/* Returns true if all dependences of CP are among invariants in IVS. */ 5526 5527static bool 5528iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp) 5529{ 5530 unsigned i; 5531 bitmap_iterator bi; 5532 5533 if (!cp->depends_on) 5534 return true; 5535 5536 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi) 5537 { 5538 if (ivs->n_invariant_uses[i] == 0) 5539 return false; 5540 } 5541 5542 return true; 5543} 5544 5545/* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains 5546 it before NEXT_CHANGE. */ 5547 5548static struct iv_ca_delta * 5549iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp, 5550 struct cost_pair *new_cp, struct iv_ca_delta *next_change) 5551{ 5552 struct iv_ca_delta *change = XNEW (struct iv_ca_delta); 5553 5554 change->use = use; 5555 change->old_cp = old_cp; 5556 change->new_cp = new_cp; 5557 change->next_change = next_change; 5558 5559 return change; 5560} 5561 5562/* Joins two lists of changes L1 and L2. Destructive -- old lists 5563 are rewritten. */ 5564 5565static struct iv_ca_delta * 5566iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2) 5567{ 5568 struct iv_ca_delta *last; 5569 5570 if (!l2) 5571 return l1; 5572 5573 if (!l1) 5574 return l2; 5575 5576 for (last = l1; last->next_change; last = last->next_change) 5577 continue; 5578 last->next_change = l2; 5579 5580 return l1; 5581} 5582 5583/* Reverse the list of changes DELTA, forming the inverse to it. */ 5584 5585static struct iv_ca_delta * 5586iv_ca_delta_reverse (struct iv_ca_delta *delta) 5587{ 5588 struct iv_ca_delta *act, *next, *prev = NULL; 5589 struct cost_pair *tmp; 5590 5591 for (act = delta; act; act = next) 5592 { 5593 next = act->next_change; 5594 act->next_change = prev; 5595 prev = act; 5596 5597 tmp = act->old_cp; 5598 act->old_cp = act->new_cp; 5599 act->new_cp = tmp; 5600 } 5601 5602 return prev; 5603} 5604 5605/* Commit changes in DELTA to IVS. If FORWARD is false, the changes are 5606 reverted instead. */ 5607 5608static void 5609iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs, 5610 struct iv_ca_delta *delta, bool forward) 5611{ 5612 struct cost_pair *from, *to; 5613 struct iv_ca_delta *act; 5614 5615 if (!forward) 5616 delta = iv_ca_delta_reverse (delta); 5617 5618 for (act = delta; act; act = act->next_change) 5619 { 5620 from = act->old_cp; 5621 to = act->new_cp; 5622 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from); 5623 iv_ca_set_cp (data, ivs, act->use, to); 5624 } 5625 5626 if (!forward) 5627 iv_ca_delta_reverse (delta); 5628} 5629 5630/* Returns true if CAND is used in IVS. */ 5631 5632static bool 5633iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand) 5634{ 5635 return ivs->n_cand_uses[cand->id] > 0; 5636} 5637 5638/* Returns number of induction variable candidates in the set IVS. */ 5639 5640static unsigned 5641iv_ca_n_cands (struct iv_ca *ivs) 5642{ 5643 return ivs->n_cands; 5644} 5645 5646/* Free the list of changes DELTA. */ 5647 5648static void 5649iv_ca_delta_free (struct iv_ca_delta **delta) 5650{ 5651 struct iv_ca_delta *act, *next; 5652 5653 for (act = *delta; act; act = next) 5654 { 5655 next = act->next_change; 5656 free (act); 5657 } 5658 5659 *delta = NULL; 5660} 5661 5662/* Allocates new iv candidates assignment. */ 5663 5664static struct iv_ca * 5665iv_ca_new (struct ivopts_data *data) 5666{ 5667 struct iv_ca *nw = XNEW (struct iv_ca); 5668 5669 nw->upto = 0; 5670 nw->bad_uses = 0; 5671 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data)); 5672 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data)); 5673 nw->cands = BITMAP_ALLOC (NULL); 5674 nw->n_cands = 0; 5675 nw->n_regs = 0; 5676 nw->cand_use_cost = no_cost; 5677 nw->cand_cost = 0; 5678 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1); 5679 nw->cost = no_cost; 5680 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1); 5681 nw->num_used_inv_expr = 0; 5682 5683 return nw; 5684} 5685 5686/* Free memory occupied by the set IVS. */ 5687 5688static void 5689iv_ca_free (struct iv_ca **ivs) 5690{ 5691 free ((*ivs)->cand_for_use); 5692 free ((*ivs)->n_cand_uses); 5693 BITMAP_FREE ((*ivs)->cands); 5694 free ((*ivs)->n_invariant_uses); 5695 free ((*ivs)->used_inv_expr); 5696 free (*ivs); 5697 *ivs = NULL; 5698} 5699 5700/* Dumps IVS to FILE. */ 5701 5702static void 5703iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs) 5704{ 5705 const char *pref = " invariants "; 5706 unsigned i; 5707 comp_cost cost = iv_ca_cost (ivs); 5708 5709 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity); 5710 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n", 5711 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity); 5712 bitmap_print (file, ivs->cands, " candidates: ","\n"); 5713 5714 for (i = 0; i < ivs->upto; i++) 5715 { 5716 struct iv_use *use = iv_use (data, i); 5717 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use); 5718 if (cp) 5719 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n", 5720 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity); 5721 else 5722 fprintf (file, " use:%d --> ??\n", use->id); 5723 } 5724 5725 for (i = 1; i <= data->max_inv_id; i++) 5726 if (ivs->n_invariant_uses[i]) 5727 { 5728 fprintf (file, "%s%d", pref, i); 5729 pref = ", "; 5730 } 5731 fprintf (file, "\n\n"); 5732} 5733 5734/* Try changing candidate in IVS to CAND for each use. Return cost of the 5735 new set, and store differences in DELTA. Number of induction variables 5736 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true 5737 the function will try to find a solution with mimimal iv candidates. */ 5738 5739static comp_cost 5740iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs, 5741 struct iv_cand *cand, struct iv_ca_delta **delta, 5742 unsigned *n_ivs, bool min_ncand) 5743{ 5744 unsigned i; 5745 comp_cost cost; 5746 struct iv_use *use; 5747 struct cost_pair *old_cp, *new_cp; 5748 5749 *delta = NULL; 5750 for (i = 0; i < ivs->upto; i++) 5751 { 5752 use = iv_use (data, i); 5753 old_cp = iv_ca_cand_for_use (ivs, use); 5754 5755 if (old_cp 5756 && old_cp->cand == cand) 5757 continue; 5758 5759 new_cp = get_use_iv_cost (data, use, cand); 5760 if (!new_cp) 5761 continue; 5762 5763 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp)) 5764 continue; 5765 5766 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp)) 5767 continue; 5768 5769 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta); 5770 } 5771 5772 iv_ca_delta_commit (data, ivs, *delta, true); 5773 cost = iv_ca_cost (ivs); 5774 if (n_ivs) 5775 *n_ivs = iv_ca_n_cands (ivs); 5776 iv_ca_delta_commit (data, ivs, *delta, false); 5777 5778 return cost; 5779} 5780 5781/* Try narrowing set IVS by removing CAND. Return the cost of 5782 the new set and store the differences in DELTA. START is 5783 the candidate with which we start narrowing. */ 5784 5785static comp_cost 5786iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs, 5787 struct iv_cand *cand, struct iv_cand *start, 5788 struct iv_ca_delta **delta) 5789{ 5790 unsigned i, ci; 5791 struct iv_use *use; 5792 struct cost_pair *old_cp, *new_cp, *cp; 5793 bitmap_iterator bi; 5794 struct iv_cand *cnd; 5795 comp_cost cost, best_cost, acost; 5796 5797 *delta = NULL; 5798 for (i = 0; i < n_iv_uses (data); i++) 5799 { 5800 use = iv_use (data, i); 5801 5802 old_cp = iv_ca_cand_for_use (ivs, use); 5803 if (old_cp->cand != cand) 5804 continue; 5805 5806 best_cost = iv_ca_cost (ivs); 5807 /* Start narrowing with START. */ 5808 new_cp = get_use_iv_cost (data, use, start); 5809 5810 if (data->consider_all_candidates) 5811 { 5812 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi) 5813 { 5814 if (ci == cand->id || (start && ci == start->id)) 5815 continue; 5816 5817 cnd = iv_cand (data, ci); 5818 5819 cp = get_use_iv_cost (data, use, cnd); 5820 if (!cp) 5821 continue; 5822 5823 iv_ca_set_cp (data, ivs, use, cp); 5824 acost = iv_ca_cost (ivs); 5825 5826 if (compare_costs (acost, best_cost) < 0) 5827 { 5828 best_cost = acost; 5829 new_cp = cp; 5830 } 5831 } 5832 } 5833 else 5834 { 5835 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi) 5836 { 5837 if (ci == cand->id || (start && ci == start->id)) 5838 continue; 5839 5840 cnd = iv_cand (data, ci); 5841 5842 cp = get_use_iv_cost (data, use, cnd); 5843 if (!cp) 5844 continue; 5845 5846 iv_ca_set_cp (data, ivs, use, cp); 5847 acost = iv_ca_cost (ivs); 5848 5849 if (compare_costs (acost, best_cost) < 0) 5850 { 5851 best_cost = acost; 5852 new_cp = cp; 5853 } 5854 } 5855 } 5856 /* Restore to old cp for use. */ 5857 iv_ca_set_cp (data, ivs, use, old_cp); 5858 5859 if (!new_cp) 5860 { 5861 iv_ca_delta_free (delta); 5862 return infinite_cost; 5863 } 5864 5865 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta); 5866 } 5867 5868 iv_ca_delta_commit (data, ivs, *delta, true); 5869 cost = iv_ca_cost (ivs); 5870 iv_ca_delta_commit (data, ivs, *delta, false); 5871 5872 return cost; 5873} 5874 5875/* Try optimizing the set of candidates IVS by removing candidates different 5876 from to EXCEPT_CAND from it. Return cost of the new set, and store 5877 differences in DELTA. */ 5878 5879static comp_cost 5880iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs, 5881 struct iv_cand *except_cand, struct iv_ca_delta **delta) 5882{ 5883 bitmap_iterator bi; 5884 struct iv_ca_delta *act_delta, *best_delta; 5885 unsigned i; 5886 comp_cost best_cost, acost; 5887 struct iv_cand *cand; 5888 5889 best_delta = NULL; 5890 best_cost = iv_ca_cost (ivs); 5891 5892 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 5893 { 5894 cand = iv_cand (data, i); 5895 5896 if (cand == except_cand) 5897 continue; 5898 5899 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta); 5900 5901 if (compare_costs (acost, best_cost) < 0) 5902 { 5903 best_cost = acost; 5904 iv_ca_delta_free (&best_delta); 5905 best_delta = act_delta; 5906 } 5907 else 5908 iv_ca_delta_free (&act_delta); 5909 } 5910 5911 if (!best_delta) 5912 { 5913 *delta = NULL; 5914 return best_cost; 5915 } 5916 5917 /* Recurse to possibly remove other unnecessary ivs. */ 5918 iv_ca_delta_commit (data, ivs, best_delta, true); 5919 best_cost = iv_ca_prune (data, ivs, except_cand, delta); 5920 iv_ca_delta_commit (data, ivs, best_delta, false); 5921 *delta = iv_ca_delta_join (best_delta, *delta); 5922 return best_cost; 5923} 5924 5925/* Check if CAND_IDX is a candidate other than OLD_CAND and has 5926 cheaper local cost for USE than BEST_CP. Return pointer to 5927 the corresponding cost_pair, otherwise just return BEST_CP. */ 5928 5929static struct cost_pair* 5930cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use, 5931 unsigned int cand_idx, struct iv_cand *old_cand, 5932 struct cost_pair *best_cp) 5933{ 5934 struct iv_cand *cand; 5935 struct cost_pair *cp; 5936 5937 gcc_assert (old_cand != NULL && best_cp != NULL); 5938 if (cand_idx == old_cand->id) 5939 return best_cp; 5940 5941 cand = iv_cand (data, cand_idx); 5942 cp = get_use_iv_cost (data, use, cand); 5943 if (cp != NULL && cheaper_cost_pair (cp, best_cp)) 5944 return cp; 5945 5946 return best_cp; 5947} 5948 5949/* Try breaking local optimal fixed-point for IVS by replacing candidates 5950 which are used by more than one iv uses. For each of those candidates, 5951 this function tries to represent iv uses under that candidate using 5952 other ones with lower local cost, then tries to prune the new set. 5953 If the new set has lower cost, It returns the new cost after recording 5954 candidate replacement in list DELTA. */ 5955 5956static comp_cost 5957iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs, 5958 struct iv_ca_delta **delta) 5959{ 5960 bitmap_iterator bi, bj; 5961 unsigned int i, j, k; 5962 struct iv_use *use; 5963 struct iv_cand *cand; 5964 comp_cost orig_cost, acost; 5965 struct iv_ca_delta *act_delta, *tmp_delta; 5966 struct cost_pair *old_cp, *best_cp = NULL; 5967 5968 *delta = NULL; 5969 orig_cost = iv_ca_cost (ivs); 5970 5971 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 5972 { 5973 if (ivs->n_cand_uses[i] == 1 5974 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND) 5975 continue; 5976 5977 cand = iv_cand (data, i); 5978 5979 act_delta = NULL; 5980 /* Represent uses under current candidate using other ones with 5981 lower local cost. */ 5982 for (j = 0; j < ivs->upto; j++) 5983 { 5984 use = iv_use (data, j); 5985 old_cp = iv_ca_cand_for_use (ivs, use); 5986 5987 if (old_cp->cand != cand) 5988 continue; 5989 5990 best_cp = old_cp; 5991 if (data->consider_all_candidates) 5992 for (k = 0; k < n_iv_cands (data); k++) 5993 best_cp = cheaper_cost_with_cand (data, use, k, 5994 old_cp->cand, best_cp); 5995 else 5996 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj) 5997 best_cp = cheaper_cost_with_cand (data, use, k, 5998 old_cp->cand, best_cp); 5999 6000 if (best_cp == old_cp) 6001 continue; 6002 6003 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta); 6004 } 6005 /* No need for further prune. */ 6006 if (!act_delta) 6007 continue; 6008 6009 /* Prune the new candidate set. */ 6010 iv_ca_delta_commit (data, ivs, act_delta, true); 6011 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta); 6012 iv_ca_delta_commit (data, ivs, act_delta, false); 6013 act_delta = iv_ca_delta_join (act_delta, tmp_delta); 6014 6015 if (compare_costs (acost, orig_cost) < 0) 6016 { 6017 *delta = act_delta; 6018 return acost; 6019 } 6020 else 6021 iv_ca_delta_free (&act_delta); 6022 } 6023 6024 return orig_cost; 6025} 6026 6027/* Tries to extend the sets IVS in the best possible way in order 6028 to express the USE. If ORIGINALP is true, prefer candidates from 6029 the original set of IVs, otherwise favor important candidates not 6030 based on any memory object. */ 6031 6032static bool 6033try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, 6034 struct iv_use *use, bool originalp) 6035{ 6036 comp_cost best_cost, act_cost; 6037 unsigned i; 6038 bitmap_iterator bi; 6039 struct iv_cand *cand; 6040 struct iv_ca_delta *best_delta = NULL, *act_delta; 6041 struct cost_pair *cp; 6042 6043 iv_ca_add_use (data, ivs, use); 6044 best_cost = iv_ca_cost (ivs); 6045 cp = iv_ca_cand_for_use (ivs, use); 6046 if (cp) 6047 { 6048 best_delta = iv_ca_delta_add (use, NULL, cp, NULL); 6049 iv_ca_set_no_cp (data, ivs, use); 6050 } 6051 6052 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise 6053 first try important candidates not based on any memory object. Only if 6054 this fails, try the specific ones. Rationale -- in loops with many 6055 variables the best choice often is to use just one generic biv. If we 6056 added here many ivs specific to the uses, the optimization algorithm later 6057 would be likely to get stuck in a local minimum, thus causing us to create 6058 too many ivs. The approach from few ivs to more seems more likely to be 6059 successful -- starting from few ivs, replacing an expensive use by a 6060 specific iv should always be a win. */ 6061 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi) 6062 { 6063 cand = iv_cand (data, i); 6064 6065 if (originalp && cand->pos !=IP_ORIGINAL) 6066 continue; 6067 6068 if (!originalp && cand->iv->base_object != NULL_TREE) 6069 continue; 6070 6071 if (iv_ca_cand_used_p (ivs, cand)) 6072 continue; 6073 6074 cp = get_use_iv_cost (data, use, cand); 6075 if (!cp) 6076 continue; 6077 6078 iv_ca_set_cp (data, ivs, use, cp); 6079 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, 6080 true); 6081 iv_ca_set_no_cp (data, ivs, use); 6082 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta); 6083 6084 if (compare_costs (act_cost, best_cost) < 0) 6085 { 6086 best_cost = act_cost; 6087 6088 iv_ca_delta_free (&best_delta); 6089 best_delta = act_delta; 6090 } 6091 else 6092 iv_ca_delta_free (&act_delta); 6093 } 6094 6095 if (infinite_cost_p (best_cost)) 6096 { 6097 for (i = 0; i < use->n_map_members; i++) 6098 { 6099 cp = use->cost_map + i; 6100 cand = cp->cand; 6101 if (!cand) 6102 continue; 6103 6104 /* Already tried this. */ 6105 if (cand->important) 6106 { 6107 if (originalp && cand->pos == IP_ORIGINAL) 6108 continue; 6109 if (!originalp && cand->iv->base_object == NULL_TREE) 6110 continue; 6111 } 6112 6113 if (iv_ca_cand_used_p (ivs, cand)) 6114 continue; 6115 6116 act_delta = NULL; 6117 iv_ca_set_cp (data, ivs, use, cp); 6118 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true); 6119 iv_ca_set_no_cp (data, ivs, use); 6120 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use), 6121 cp, act_delta); 6122 6123 if (compare_costs (act_cost, best_cost) < 0) 6124 { 6125 best_cost = act_cost; 6126 6127 if (best_delta) 6128 iv_ca_delta_free (&best_delta); 6129 best_delta = act_delta; 6130 } 6131 else 6132 iv_ca_delta_free (&act_delta); 6133 } 6134 } 6135 6136 iv_ca_delta_commit (data, ivs, best_delta, true); 6137 iv_ca_delta_free (&best_delta); 6138 6139 return !infinite_cost_p (best_cost); 6140} 6141 6142/* Finds an initial assignment of candidates to uses. */ 6143 6144static struct iv_ca * 6145get_initial_solution (struct ivopts_data *data, bool originalp) 6146{ 6147 struct iv_ca *ivs = iv_ca_new (data); 6148 unsigned i; 6149 6150 for (i = 0; i < n_iv_uses (data); i++) 6151 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp)) 6152 { 6153 iv_ca_free (&ivs); 6154 return NULL; 6155 } 6156 6157 return ivs; 6158} 6159 6160/* Tries to improve set of induction variables IVS. TRY_REPLACE_P 6161 points to a bool variable, this function tries to break local 6162 optimal fixed-point by replacing candidates in IVS if it's true. */ 6163 6164static bool 6165try_improve_iv_set (struct ivopts_data *data, 6166 struct iv_ca *ivs, bool *try_replace_p) 6167{ 6168 unsigned i, n_ivs; 6169 comp_cost acost, best_cost = iv_ca_cost (ivs); 6170 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta; 6171 struct iv_cand *cand; 6172 6173 /* Try extending the set of induction variables by one. */ 6174 for (i = 0; i < n_iv_cands (data); i++) 6175 { 6176 cand = iv_cand (data, i); 6177 6178 if (iv_ca_cand_used_p (ivs, cand)) 6179 continue; 6180 6181 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false); 6182 if (!act_delta) 6183 continue; 6184 6185 /* If we successfully added the candidate and the set is small enough, 6186 try optimizing it by removing other candidates. */ 6187 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND) 6188 { 6189 iv_ca_delta_commit (data, ivs, act_delta, true); 6190 acost = iv_ca_prune (data, ivs, cand, &tmp_delta); 6191 iv_ca_delta_commit (data, ivs, act_delta, false); 6192 act_delta = iv_ca_delta_join (act_delta, tmp_delta); 6193 } 6194 6195 if (compare_costs (acost, best_cost) < 0) 6196 { 6197 best_cost = acost; 6198 iv_ca_delta_free (&best_delta); 6199 best_delta = act_delta; 6200 } 6201 else 6202 iv_ca_delta_free (&act_delta); 6203 } 6204 6205 if (!best_delta) 6206 { 6207 /* Try removing the candidates from the set instead. */ 6208 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta); 6209 6210 if (!best_delta && *try_replace_p) 6211 { 6212 *try_replace_p = false; 6213 /* So far candidate selecting algorithm tends to choose fewer IVs 6214 so that it can handle cases in which loops have many variables 6215 but the best choice is often to use only one general biv. One 6216 weakness is it can't handle opposite cases, in which different 6217 candidates should be chosen with respect to each use. To solve 6218 the problem, we replace candidates in a manner described by the 6219 comments of iv_ca_replace, thus give general algorithm a chance 6220 to break local optimal fixed-point in these cases. */ 6221 best_cost = iv_ca_replace (data, ivs, &best_delta); 6222 } 6223 6224 if (!best_delta) 6225 return false; 6226 } 6227 6228 iv_ca_delta_commit (data, ivs, best_delta, true); 6229 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0); 6230 iv_ca_delta_free (&best_delta); 6231 return true; 6232} 6233 6234/* Attempts to find the optimal set of induction variables. We do simple 6235 greedy heuristic -- we try to replace at most one candidate in the selected 6236 solution and remove the unused ivs while this improves the cost. */ 6237 6238static struct iv_ca * 6239find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp) 6240{ 6241 struct iv_ca *set; 6242 bool try_replace_p = true; 6243 6244 /* Get the initial solution. */ 6245 set = get_initial_solution (data, originalp); 6246 if (!set) 6247 { 6248 if (dump_file && (dump_flags & TDF_DETAILS)) 6249 fprintf (dump_file, "Unable to substitute for ivs, failed.\n"); 6250 return NULL; 6251 } 6252 6253 if (dump_file && (dump_flags & TDF_DETAILS)) 6254 { 6255 fprintf (dump_file, "Initial set of candidates:\n"); 6256 iv_ca_dump (data, dump_file, set); 6257 } 6258 6259 while (try_improve_iv_set (data, set, &try_replace_p)) 6260 { 6261 if (dump_file && (dump_flags & TDF_DETAILS)) 6262 { 6263 fprintf (dump_file, "Improved to:\n"); 6264 iv_ca_dump (data, dump_file, set); 6265 } 6266 } 6267 6268 return set; 6269} 6270 6271static struct iv_ca * 6272find_optimal_iv_set (struct ivopts_data *data) 6273{ 6274 unsigned i; 6275 struct iv_ca *set, *origset; 6276 struct iv_use *use; 6277 comp_cost cost, origcost; 6278 6279 /* Determine the cost based on a strategy that starts with original IVs, 6280 and try again using a strategy that prefers candidates not based 6281 on any IVs. */ 6282 origset = find_optimal_iv_set_1 (data, true); 6283 set = find_optimal_iv_set_1 (data, false); 6284 6285 if (!origset && !set) 6286 return NULL; 6287 6288 origcost = origset ? iv_ca_cost (origset) : infinite_cost; 6289 cost = set ? iv_ca_cost (set) : infinite_cost; 6290 6291 if (dump_file && (dump_flags & TDF_DETAILS)) 6292 { 6293 fprintf (dump_file, "Original cost %d (complexity %d)\n\n", 6294 origcost.cost, origcost.complexity); 6295 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", 6296 cost.cost, cost.complexity); 6297 } 6298 6299 /* Choose the one with the best cost. */ 6300 if (compare_costs (origcost, cost) <= 0) 6301 { 6302 if (set) 6303 iv_ca_free (&set); 6304 set = origset; 6305 } 6306 else if (origset) 6307 iv_ca_free (&origset); 6308 6309 for (i = 0; i < n_iv_uses (data); i++) 6310 { 6311 use = iv_use (data, i); 6312 use->selected = iv_ca_cand_for_use (set, use)->cand; 6313 } 6314 6315 return set; 6316} 6317 6318/* Creates a new induction variable corresponding to CAND. */ 6319 6320static void 6321create_new_iv (struct ivopts_data *data, struct iv_cand *cand) 6322{ 6323 gimple_stmt_iterator incr_pos; 6324 tree base; 6325 bool after = false; 6326 6327 if (!cand->iv) 6328 return; 6329 6330 switch (cand->pos) 6331 { 6332 case IP_NORMAL: 6333 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop)); 6334 break; 6335 6336 case IP_END: 6337 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop)); 6338 after = true; 6339 break; 6340 6341 case IP_AFTER_USE: 6342 after = true; 6343 /* fall through */ 6344 case IP_BEFORE_USE: 6345 incr_pos = gsi_for_stmt (cand->incremented_at); 6346 break; 6347 6348 case IP_ORIGINAL: 6349 /* Mark that the iv is preserved. */ 6350 name_info (data, cand->var_before)->preserve_biv = true; 6351 name_info (data, cand->var_after)->preserve_biv = true; 6352 6353 /* Rewrite the increment so that it uses var_before directly. */ 6354 find_interesting_uses_op (data, cand->var_after)->selected = cand; 6355 return; 6356 } 6357 6358 gimple_add_tmp_var (cand->var_before); 6359 6360 base = unshare_expr (cand->iv->base); 6361 6362 create_iv (base, unshare_expr (cand->iv->step), 6363 cand->var_before, data->current_loop, 6364 &incr_pos, after, &cand->var_before, &cand->var_after); 6365} 6366 6367/* Creates new induction variables described in SET. */ 6368 6369static void 6370create_new_ivs (struct ivopts_data *data, struct iv_ca *set) 6371{ 6372 unsigned i; 6373 struct iv_cand *cand; 6374 bitmap_iterator bi; 6375 6376 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) 6377 { 6378 cand = iv_cand (data, i); 6379 create_new_iv (data, cand); 6380 } 6381 6382 if (dump_file && (dump_flags & TDF_DETAILS)) 6383 { 6384 fprintf (dump_file, "Selected IV set for loop %d", 6385 data->current_loop->num); 6386 if (data->loop_loc != UNKNOWN_LOCATION) 6387 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc), 6388 LOCATION_LINE (data->loop_loc)); 6389 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands)); 6390 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) 6391 { 6392 cand = iv_cand (data, i); 6393 dump_cand (dump_file, cand); 6394 } 6395 fprintf (dump_file, "\n"); 6396 } 6397} 6398 6399/* Rewrites USE (definition of iv used in a nonlinear expression) 6400 using candidate CAND. */ 6401 6402static void 6403rewrite_use_nonlinear_expr (struct ivopts_data *data, 6404 struct iv_use *use, struct iv_cand *cand) 6405{ 6406 tree comp; 6407 tree op, tgt; 6408 gassign *ass; 6409 gimple_stmt_iterator bsi; 6410 6411 /* An important special case -- if we are asked to express value of 6412 the original iv by itself, just exit; there is no need to 6413 introduce a new computation (that might also need casting the 6414 variable to unsigned and back). */ 6415 if (cand->pos == IP_ORIGINAL 6416 && cand->incremented_at == use->stmt) 6417 { 6418 enum tree_code stmt_code; 6419 6420 gcc_assert (is_gimple_assign (use->stmt)); 6421 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after); 6422 6423 /* Check whether we may leave the computation unchanged. 6424 This is the case only if it does not rely on other 6425 computations in the loop -- otherwise, the computation 6426 we rely upon may be removed in remove_unused_ivs, 6427 thus leading to ICE. */ 6428 stmt_code = gimple_assign_rhs_code (use->stmt); 6429 if (stmt_code == PLUS_EXPR 6430 || stmt_code == MINUS_EXPR 6431 || stmt_code == POINTER_PLUS_EXPR) 6432 { 6433 if (gimple_assign_rhs1 (use->stmt) == cand->var_before) 6434 op = gimple_assign_rhs2 (use->stmt); 6435 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before) 6436 op = gimple_assign_rhs1 (use->stmt); 6437 else 6438 op = NULL_TREE; 6439 } 6440 else 6441 op = NULL_TREE; 6442 6443 if (op && expr_invariant_in_loop_p (data->current_loop, op)) 6444 return; 6445 } 6446 6447 comp = get_computation (data->current_loop, use, cand); 6448 gcc_assert (comp != NULL_TREE); 6449 6450 switch (gimple_code (use->stmt)) 6451 { 6452 case GIMPLE_PHI: 6453 tgt = PHI_RESULT (use->stmt); 6454 6455 /* If we should keep the biv, do not replace it. */ 6456 if (name_info (data, tgt)->preserve_biv) 6457 return; 6458 6459 bsi = gsi_after_labels (gimple_bb (use->stmt)); 6460 break; 6461 6462 case GIMPLE_ASSIGN: 6463 tgt = gimple_assign_lhs (use->stmt); 6464 bsi = gsi_for_stmt (use->stmt); 6465 break; 6466 6467 default: 6468 gcc_unreachable (); 6469 } 6470 6471 if (!valid_gimple_rhs_p (comp) 6472 || (gimple_code (use->stmt) != GIMPLE_PHI 6473 /* We can't allow re-allocating the stmt as it might be pointed 6474 to still. */ 6475 && (get_gimple_rhs_num_ops (TREE_CODE (comp)) 6476 >= gimple_num_ops (gsi_stmt (bsi))))) 6477 { 6478 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE, 6479 true, GSI_SAME_STMT); 6480 if (POINTER_TYPE_P (TREE_TYPE (tgt))) 6481 { 6482 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt)); 6483 /* As this isn't a plain copy we have to reset alignment 6484 information. */ 6485 if (SSA_NAME_PTR_INFO (comp)) 6486 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp)); 6487 } 6488 } 6489 6490 if (gimple_code (use->stmt) == GIMPLE_PHI) 6491 { 6492 ass = gimple_build_assign (tgt, comp); 6493 gsi_insert_before (&bsi, ass, GSI_SAME_STMT); 6494 6495 bsi = gsi_for_stmt (use->stmt); 6496 remove_phi_node (&bsi, false); 6497 } 6498 else 6499 { 6500 gimple_assign_set_rhs_from_tree (&bsi, comp); 6501 use->stmt = gsi_stmt (bsi); 6502 } 6503} 6504 6505/* Performs a peephole optimization to reorder the iv update statement with 6506 a mem ref to enable instruction combining in later phases. The mem ref uses 6507 the iv value before the update, so the reordering transformation requires 6508 adjustment of the offset. CAND is the selected IV_CAND. 6509 6510 Example: 6511 6512 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset 6513 iv2 = iv1 + 1; 6514 6515 if (t < val) (1) 6516 goto L; 6517 goto Head; 6518 6519 6520 directly propagating t over to (1) will introduce overlapping live range 6521 thus increase register pressure. This peephole transform it into: 6522 6523 6524 iv2 = iv1 + 1; 6525 t = MEM_REF (base, iv2, 8, 8); 6526 if (t < val) 6527 goto L; 6528 goto Head; 6529*/ 6530 6531static void 6532adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use) 6533{ 6534 tree var_after; 6535 gimple iv_update, stmt; 6536 basic_block bb; 6537 gimple_stmt_iterator gsi, gsi_iv; 6538 6539 if (cand->pos != IP_NORMAL) 6540 return; 6541 6542 var_after = cand->var_after; 6543 iv_update = SSA_NAME_DEF_STMT (var_after); 6544 6545 bb = gimple_bb (iv_update); 6546 gsi = gsi_last_nondebug_bb (bb); 6547 stmt = gsi_stmt (gsi); 6548 6549 /* Only handle conditional statement for now. */ 6550 if (gimple_code (stmt) != GIMPLE_COND) 6551 return; 6552 6553 gsi_prev_nondebug (&gsi); 6554 stmt = gsi_stmt (gsi); 6555 if (stmt != iv_update) 6556 return; 6557 6558 gsi_prev_nondebug (&gsi); 6559 if (gsi_end_p (gsi)) 6560 return; 6561 6562 stmt = gsi_stmt (gsi); 6563 if (gimple_code (stmt) != GIMPLE_ASSIGN) 6564 return; 6565 6566 if (stmt != use->stmt) 6567 return; 6568 6569 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) 6570 return; 6571 6572 if (dump_file && (dump_flags & TDF_DETAILS)) 6573 { 6574 fprintf (dump_file, "Reordering \n"); 6575 print_gimple_stmt (dump_file, iv_update, 0, 0); 6576 print_gimple_stmt (dump_file, use->stmt, 0, 0); 6577 fprintf (dump_file, "\n"); 6578 } 6579 6580 gsi = gsi_for_stmt (use->stmt); 6581 gsi_iv = gsi_for_stmt (iv_update); 6582 gsi_move_before (&gsi_iv, &gsi); 6583 6584 cand->pos = IP_BEFORE_USE; 6585 cand->incremented_at = use->stmt; 6586} 6587 6588/* Rewrites USE (address that is an iv) using candidate CAND. */ 6589 6590static void 6591rewrite_use_address (struct ivopts_data *data, 6592 struct iv_use *use, struct iv_cand *cand) 6593{ 6594 aff_tree aff; 6595 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt); 6596 tree base_hint = NULL_TREE; 6597 tree ref, iv; 6598 bool ok; 6599 6600 adjust_iv_update_pos (cand, use); 6601 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff); 6602 gcc_assert (ok); 6603 unshare_aff_combination (&aff); 6604 6605 /* To avoid undefined overflow problems, all IV candidates use unsigned 6606 integer types. The drawback is that this makes it impossible for 6607 create_mem_ref to distinguish an IV that is based on a memory object 6608 from one that represents simply an offset. 6609 6610 To work around this problem, we pass a hint to create_mem_ref that 6611 indicates which variable (if any) in aff is an IV based on a memory 6612 object. Note that we only consider the candidate. If this is not 6613 based on an object, the base of the reference is in some subexpression 6614 of the use -- but these will use pointer types, so they are recognized 6615 by the create_mem_ref heuristics anyway. */ 6616 if (cand->iv->base_object) 6617 base_hint = var_at_stmt (data->current_loop, cand, use->stmt); 6618 6619 iv = var_at_stmt (data->current_loop, cand, use->stmt); 6620 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, 6621 reference_alias_ptr_type (*use->op_p), 6622 iv, base_hint, data->speed); 6623 copy_ref_info (ref, *use->op_p); 6624 *use->op_p = ref; 6625} 6626 6627/* Rewrites USE (the condition such that one of the arguments is an iv) using 6628 candidate CAND. */ 6629 6630static void 6631rewrite_use_compare (struct ivopts_data *data, 6632 struct iv_use *use, struct iv_cand *cand) 6633{ 6634 tree comp, *var_p, op, bound; 6635 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt); 6636 enum tree_code compare; 6637 struct cost_pair *cp = get_use_iv_cost (data, use, cand); 6638 bool ok; 6639 6640 bound = cp->value; 6641 if (bound) 6642 { 6643 tree var = var_at_stmt (data->current_loop, cand, use->stmt); 6644 tree var_type = TREE_TYPE (var); 6645 gimple_seq stmts; 6646 6647 if (dump_file && (dump_flags & TDF_DETAILS)) 6648 { 6649 fprintf (dump_file, "Replacing exit test: "); 6650 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM); 6651 } 6652 compare = cp->comp; 6653 bound = unshare_expr (fold_convert (var_type, bound)); 6654 op = force_gimple_operand (bound, &stmts, true, NULL_TREE); 6655 if (stmts) 6656 gsi_insert_seq_on_edge_immediate ( 6657 loop_preheader_edge (data->current_loop), 6658 stmts); 6659 6660 gcond *cond_stmt = as_a <gcond *> (use->stmt); 6661 gimple_cond_set_lhs (cond_stmt, var); 6662 gimple_cond_set_code (cond_stmt, compare); 6663 gimple_cond_set_rhs (cond_stmt, op); 6664 return; 6665 } 6666 6667 /* The induction variable elimination failed; just express the original 6668 giv. */ 6669 comp = get_computation (data->current_loop, use, cand); 6670 gcc_assert (comp != NULL_TREE); 6671 6672 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL); 6673 gcc_assert (ok); 6674 6675 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p), 6676 true, GSI_SAME_STMT); 6677} 6678 6679/* Rewrites USE using candidate CAND. */ 6680 6681static void 6682rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand) 6683{ 6684 switch (use->type) 6685 { 6686 case USE_NONLINEAR_EXPR: 6687 rewrite_use_nonlinear_expr (data, use, cand); 6688 break; 6689 6690 case USE_ADDRESS: 6691 rewrite_use_address (data, use, cand); 6692 break; 6693 6694 case USE_COMPARE: 6695 rewrite_use_compare (data, use, cand); 6696 break; 6697 6698 default: 6699 gcc_unreachable (); 6700 } 6701 6702 update_stmt (use->stmt); 6703} 6704 6705/* Rewrite the uses using the selected induction variables. */ 6706 6707static void 6708rewrite_uses (struct ivopts_data *data) 6709{ 6710 unsigned i; 6711 struct iv_cand *cand; 6712 struct iv_use *use; 6713 6714 for (i = 0; i < n_iv_uses (data); i++) 6715 { 6716 use = iv_use (data, i); 6717 cand = use->selected; 6718 gcc_assert (cand); 6719 6720 rewrite_use (data, use, cand); 6721 } 6722} 6723 6724/* Removes the ivs that are not used after rewriting. */ 6725 6726static void 6727remove_unused_ivs (struct ivopts_data *data) 6728{ 6729 unsigned j; 6730 bitmap_iterator bi; 6731 bitmap toremove = BITMAP_ALLOC (NULL); 6732 6733 /* Figure out an order in which to release SSA DEFs so that we don't 6734 release something that we'd have to propagate into a debug stmt 6735 afterwards. */ 6736 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi) 6737 { 6738 struct version_info *info; 6739 6740 info = ver_info (data, j); 6741 if (info->iv 6742 && !integer_zerop (info->iv->step) 6743 && !info->inv_id 6744 && !info->iv->have_use_for 6745 && !info->preserve_biv) 6746 { 6747 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name)); 6748 6749 tree def = info->iv->ssa_name; 6750 6751 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def)) 6752 { 6753 imm_use_iterator imm_iter; 6754 use_operand_p use_p; 6755 gimple stmt; 6756 int count = 0; 6757 6758 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def) 6759 { 6760 if (!gimple_debug_bind_p (stmt)) 6761 continue; 6762 6763 /* We just want to determine whether to do nothing 6764 (count == 0), to substitute the computed 6765 expression into a single use of the SSA DEF by 6766 itself (count == 1), or to use a debug temp 6767 because the SSA DEF is used multiple times or as 6768 part of a larger expression (count > 1). */ 6769 count++; 6770 if (gimple_debug_bind_get_value (stmt) != def) 6771 count++; 6772 6773 if (count > 1) 6774 BREAK_FROM_IMM_USE_STMT (imm_iter); 6775 } 6776 6777 if (!count) 6778 continue; 6779 6780 struct iv_use dummy_use; 6781 struct iv_cand *best_cand = NULL, *cand; 6782 unsigned i, best_pref = 0, cand_pref; 6783 6784 memset (&dummy_use, 0, sizeof (dummy_use)); 6785 dummy_use.iv = info->iv; 6786 for (i = 0; i < n_iv_uses (data) && i < 64; i++) 6787 { 6788 cand = iv_use (data, i)->selected; 6789 if (cand == best_cand) 6790 continue; 6791 cand_pref = operand_equal_p (cand->iv->step, 6792 info->iv->step, 0) 6793 ? 4 : 0; 6794 cand_pref 6795 += TYPE_MODE (TREE_TYPE (cand->iv->base)) 6796 == TYPE_MODE (TREE_TYPE (info->iv->base)) 6797 ? 2 : 0; 6798 cand_pref 6799 += TREE_CODE (cand->iv->base) == INTEGER_CST 6800 ? 1 : 0; 6801 if (best_cand == NULL || best_pref < cand_pref) 6802 { 6803 best_cand = cand; 6804 best_pref = cand_pref; 6805 } 6806 } 6807 6808 if (!best_cand) 6809 continue; 6810 6811 tree comp = get_computation_at (data->current_loop, 6812 &dummy_use, best_cand, 6813 SSA_NAME_DEF_STMT (def)); 6814 if (!comp) 6815 continue; 6816 6817 if (count > 1) 6818 { 6819 tree vexpr = make_node (DEBUG_EXPR_DECL); 6820 DECL_ARTIFICIAL (vexpr) = 1; 6821 TREE_TYPE (vexpr) = TREE_TYPE (comp); 6822 if (SSA_NAME_VAR (def)) 6823 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def)); 6824 else 6825 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr)); 6826 gdebug *def_temp 6827 = gimple_build_debug_bind (vexpr, comp, NULL); 6828 gimple_stmt_iterator gsi; 6829 6830 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI) 6831 gsi = gsi_after_labels (gimple_bb 6832 (SSA_NAME_DEF_STMT (def))); 6833 else 6834 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def)); 6835 6836 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT); 6837 comp = vexpr; 6838 } 6839 6840 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def) 6841 { 6842 if (!gimple_debug_bind_p (stmt)) 6843 continue; 6844 6845 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) 6846 SET_USE (use_p, comp); 6847 6848 update_stmt (stmt); 6849 } 6850 } 6851 } 6852 } 6853 6854 release_defs_bitset (toremove); 6855 6856 BITMAP_FREE (toremove); 6857} 6858 6859/* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback 6860 for hash_map::traverse. */ 6861 6862bool 6863free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *) 6864{ 6865 free (value); 6866 return true; 6867} 6868 6869/* Frees data allocated by the optimization of a single loop. */ 6870 6871static void 6872free_loop_data (struct ivopts_data *data) 6873{ 6874 unsigned i, j; 6875 bitmap_iterator bi; 6876 tree obj; 6877 6878 if (data->niters) 6879 { 6880 data->niters->traverse<void *, free_tree_niter_desc> (NULL); 6881 delete data->niters; 6882 data->niters = NULL; 6883 } 6884 6885 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 6886 { 6887 struct version_info *info; 6888 6889 info = ver_info (data, i); 6890 free (info->iv); 6891 info->iv = NULL; 6892 info->has_nonlin_use = false; 6893 info->preserve_biv = false; 6894 info->inv_id = 0; 6895 } 6896 bitmap_clear (data->relevant); 6897 bitmap_clear (data->important_candidates); 6898 6899 for (i = 0; i < n_iv_uses (data); i++) 6900 { 6901 struct iv_use *use = iv_use (data, i); 6902 6903 free (use->iv); 6904 BITMAP_FREE (use->related_cands); 6905 for (j = 0; j < use->n_map_members; j++) 6906 if (use->cost_map[j].depends_on) 6907 BITMAP_FREE (use->cost_map[j].depends_on); 6908 free (use->cost_map); 6909 free (use); 6910 } 6911 data->iv_uses.truncate (0); 6912 6913 for (i = 0; i < n_iv_cands (data); i++) 6914 { 6915 struct iv_cand *cand = iv_cand (data, i); 6916 6917 free (cand->iv); 6918 if (cand->depends_on) 6919 BITMAP_FREE (cand->depends_on); 6920 free (cand); 6921 } 6922 data->iv_candidates.truncate (0); 6923 6924 if (data->version_info_size < num_ssa_names) 6925 { 6926 data->version_info_size = 2 * num_ssa_names; 6927 free (data->version_info); 6928 data->version_info = XCNEWVEC (struct version_info, data->version_info_size); 6929 } 6930 6931 data->max_inv_id = 0; 6932 6933 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj) 6934 SET_DECL_RTL (obj, NULL_RTX); 6935 6936 decl_rtl_to_reset.truncate (0); 6937 6938 data->inv_expr_tab->empty (); 6939 data->inv_expr_id = 0; 6940} 6941 6942/* Finalizes data structures used by the iv optimization pass. LOOPS is the 6943 loop tree. */ 6944 6945static void 6946tree_ssa_iv_optimize_finalize (struct ivopts_data *data) 6947{ 6948 free_loop_data (data); 6949 free (data->version_info); 6950 BITMAP_FREE (data->relevant); 6951 BITMAP_FREE (data->important_candidates); 6952 6953 decl_rtl_to_reset.release (); 6954 data->iv_uses.release (); 6955 data->iv_candidates.release (); 6956 delete data->inv_expr_tab; 6957 data->inv_expr_tab = NULL; 6958 free_affine_expand_cache (&data->name_expansion_cache); 6959} 6960 6961/* Returns true if the loop body BODY includes any function calls. */ 6962 6963static bool 6964loop_body_includes_call (basic_block *body, unsigned num_nodes) 6965{ 6966 gimple_stmt_iterator gsi; 6967 unsigned i; 6968 6969 for (i = 0; i < num_nodes; i++) 6970 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) 6971 { 6972 gimple stmt = gsi_stmt (gsi); 6973 if (is_gimple_call (stmt) 6974 && !is_inexpensive_builtin (gimple_call_fndecl (stmt))) 6975 return true; 6976 } 6977 return false; 6978} 6979 6980/* Optimizes the LOOP. Returns true if anything changed. */ 6981 6982static bool 6983tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop) 6984{ 6985 bool changed = false; 6986 struct iv_ca *iv_ca; 6987 edge exit = single_dom_exit (loop); 6988 basic_block *body; 6989 6990 gcc_assert (!data->niters); 6991 data->current_loop = loop; 6992 data->loop_loc = find_loop_location (loop); 6993 data->speed = optimize_loop_for_speed_p (loop); 6994 6995 if (dump_file && (dump_flags & TDF_DETAILS)) 6996 { 6997 fprintf (dump_file, "Processing loop %d", loop->num); 6998 if (data->loop_loc != UNKNOWN_LOCATION) 6999 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc), 7000 LOCATION_LINE (data->loop_loc)); 7001 fprintf (dump_file, "\n"); 7002 7003 if (exit) 7004 { 7005 fprintf (dump_file, " single exit %d -> %d, exit condition ", 7006 exit->src->index, exit->dest->index); 7007 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM); 7008 fprintf (dump_file, "\n"); 7009 } 7010 7011 fprintf (dump_file, "\n"); 7012 } 7013 7014 body = get_loop_body (loop); 7015 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes); 7016 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes); 7017 free (body); 7018 7019 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit); 7020 7021 /* For each ssa name determines whether it behaves as an induction variable 7022 in some loop. */ 7023 if (!find_induction_variables (data)) 7024 goto finish; 7025 7026 /* Finds interesting uses (item 1). */ 7027 find_interesting_uses (data); 7028 if (n_iv_uses (data) > MAX_CONSIDERED_USES) 7029 goto finish; 7030 7031 /* Finds candidates for the induction variables (item 2). */ 7032 find_iv_candidates (data); 7033 7034 /* Calculates the costs (item 3, part 1). */ 7035 determine_iv_costs (data); 7036 determine_use_iv_costs (data); 7037 determine_set_costs (data); 7038 7039 /* Find the optimal set of induction variables (item 3, part 2). */ 7040 iv_ca = find_optimal_iv_set (data); 7041 if (!iv_ca) 7042 goto finish; 7043 changed = true; 7044 7045 /* Create the new induction variables (item 4, part 1). */ 7046 create_new_ivs (data, iv_ca); 7047 iv_ca_free (&iv_ca); 7048 7049 /* Rewrite the uses (item 4, part 2). */ 7050 rewrite_uses (data); 7051 7052 /* Remove the ivs that are unused after rewriting. */ 7053 remove_unused_ivs (data); 7054 7055 /* We have changed the structure of induction variables; it might happen 7056 that definitions in the scev database refer to some of them that were 7057 eliminated. */ 7058 scev_reset (); 7059 7060finish: 7061 free_loop_data (data); 7062 7063 return changed; 7064} 7065 7066/* Main entry point. Optimizes induction variables in loops. */ 7067 7068void 7069tree_ssa_iv_optimize (void) 7070{ 7071 struct loop *loop; 7072 struct ivopts_data data; 7073 7074 tree_ssa_iv_optimize_init (&data); 7075 7076 /* Optimize the loops starting with the innermost ones. */ 7077 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) 7078 { 7079 if (dump_file && (dump_flags & TDF_DETAILS)) 7080 flow_loop_dump (loop, dump_file, NULL, 1); 7081 7082 tree_ssa_iv_optimize_loop (&data, loop); 7083 } 7084 7085 tree_ssa_iv_optimize_finalize (&data); 7086} 7087