1169689Skan/* Loop Vectorization 2169689Skan Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. 3169689Skan Contributed by Dorit Naishlos <dorit@il.ibm.com> 4169689Skan 5169689SkanThis file is part of GCC. 6169689Skan 7169689SkanGCC is free software; you can redistribute it and/or modify it under 8169689Skanthe terms of the GNU General Public License as published by the Free 9169689SkanSoftware Foundation; either version 2, or (at your option) any later 10169689Skanversion. 11169689Skan 12169689SkanGCC is distributed in the hope that it will be useful, but WITHOUT ANY 13169689SkanWARRANTY; without even the implied warranty of MERCHANTABILITY or 14169689SkanFITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15169689Skanfor more details. 16169689Skan 17169689SkanYou should have received a copy of the GNU General Public License 18169689Skanalong with GCC; see the file COPYING. If not, write to the Free 19169689SkanSoftware Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 20169689Skan02110-1301, USA. */ 21169689Skan 22169689Skan/* Loop Vectorization Pass. 23169689Skan 24169689Skan This pass tries to vectorize loops. This first implementation focuses on 25169689Skan simple inner-most loops, with no conditional control flow, and a set of 26169689Skan simple operations which vector form can be expressed using existing 27169689Skan tree codes (PLUS, MULT etc). 28169689Skan 29169689Skan For example, the vectorizer transforms the following simple loop: 30169689Skan 31169689Skan short a[N]; short b[N]; short c[N]; int i; 32169689Skan 33169689Skan for (i=0; i<N; i++){ 34169689Skan a[i] = b[i] + c[i]; 35169689Skan } 36169689Skan 37169689Skan as if it was manually vectorized by rewriting the source code into: 38169689Skan 39169689Skan typedef int __attribute__((mode(V8HI))) v8hi; 40169689Skan short a[N]; short b[N]; short c[N]; int i; 41169689Skan v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c; 42169689Skan v8hi va, vb, vc; 43169689Skan 44169689Skan for (i=0; i<N/8; i++){ 45169689Skan vb = pb[i]; 46169689Skan vc = pc[i]; 47169689Skan va = vb + vc; 48169689Skan pa[i] = va; 49169689Skan } 50169689Skan 51169689Skan The main entry to this pass is vectorize_loops(), in which 52169689Skan the vectorizer applies a set of analyses on a given set of loops, 53169689Skan followed by the actual vectorization transformation for the loops that 54169689Skan had successfully passed the analysis phase. 55169689Skan 56169689Skan Throughout this pass we make a distinction between two types of 57169689Skan data: scalars (which are represented by SSA_NAMES), and memory references 58169689Skan ("data-refs"). These two types of data require different handling both 59169689Skan during analysis and transformation. The types of data-refs that the 60169689Skan vectorizer currently supports are ARRAY_REFS which base is an array DECL 61169689Skan (not a pointer), and INDIRECT_REFS through pointers; both array and pointer 62169689Skan accesses are required to have a simple (consecutive) access pattern. 63169689Skan 64169689Skan Analysis phase: 65169689Skan =============== 66169689Skan The driver for the analysis phase is vect_analyze_loop_nest(). 67169689Skan It applies a set of analyses, some of which rely on the scalar evolution 68169689Skan analyzer (scev) developed by Sebastian Pop. 69169689Skan 70169689Skan During the analysis phase the vectorizer records some information 71169689Skan per stmt in a "stmt_vec_info" struct which is attached to each stmt in the 72169689Skan loop, as well as general information about the loop as a whole, which is 73169689Skan recorded in a "loop_vec_info" struct attached to each loop. 74169689Skan 75169689Skan Transformation phase: 76169689Skan ===================== 77169689Skan The loop transformation phase scans all the stmts in the loop, and 78169689Skan creates a vector stmt (or a sequence of stmts) for each scalar stmt S in 79169689Skan the loop that needs to be vectorized. It insert the vector code sequence 80169689Skan just before the scalar stmt S, and records a pointer to the vector code 81169689Skan in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct 82169689Skan attached to S). This pointer will be used for the vectorization of following 83169689Skan stmts which use the def of stmt S. Stmt S is removed if it writes to memory; 84169689Skan otherwise, we rely on dead code elimination for removing it. 85169689Skan 86169689Skan For example, say stmt S1 was vectorized into stmt VS1: 87169689Skan 88169689Skan VS1: vb = px[i]; 89169689Skan S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 90169689Skan S2: a = b; 91169689Skan 92169689Skan To vectorize stmt S2, the vectorizer first finds the stmt that defines 93169689Skan the operand 'b' (S1), and gets the relevant vector def 'vb' from the 94169689Skan vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The 95169689Skan resulting sequence would be: 96169689Skan 97169689Skan VS1: vb = px[i]; 98169689Skan S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 99169689Skan VS2: va = vb; 100169689Skan S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2 101169689Skan 102169689Skan Operands that are not SSA_NAMEs, are data-refs that appear in 103169689Skan load/store operations (like 'x[i]' in S1), and are handled differently. 104169689Skan 105169689Skan Target modeling: 106169689Skan ================= 107169689Skan Currently the only target specific information that is used is the 108169689Skan size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can 109169689Skan support different sizes of vectors, for now will need to specify one value 110169689Skan for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future. 111169689Skan 112169689Skan Since we only vectorize operations which vector form can be 113169689Skan expressed using existing tree codes, to verify that an operation is 114169689Skan supported, the vectorizer checks the relevant optab at the relevant 115169689Skan machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If 116169689Skan the value found is CODE_FOR_nothing, then there's no target support, and 117169689Skan we can't vectorize the stmt. 118169689Skan 119169689Skan For additional information on this project see: 120169689Skan http://gcc.gnu.org/projects/tree-ssa/vectorization.html 121169689Skan*/ 122169689Skan 123169689Skan#include "config.h" 124169689Skan#include "system.h" 125169689Skan#include "coretypes.h" 126169689Skan#include "tm.h" 127169689Skan#include "ggc.h" 128169689Skan#include "tree.h" 129169689Skan#include "target.h" 130169689Skan#include "rtl.h" 131169689Skan#include "basic-block.h" 132169689Skan#include "diagnostic.h" 133169689Skan#include "tree-flow.h" 134169689Skan#include "tree-dump.h" 135169689Skan#include "timevar.h" 136169689Skan#include "cfgloop.h" 137169689Skan#include "cfglayout.h" 138169689Skan#include "expr.h" 139169689Skan#include "optabs.h" 140169689Skan#include "params.h" 141169689Skan#include "toplev.h" 142169689Skan#include "tree-chrec.h" 143169689Skan#include "tree-data-ref.h" 144169689Skan#include "tree-scalar-evolution.h" 145169689Skan#include "input.h" 146169689Skan#include "tree-vectorizer.h" 147169689Skan#include "tree-pass.h" 148169689Skan 149169689Skan/************************************************************************* 150169689Skan Simple Loop Peeling Utilities 151169689Skan *************************************************************************/ 152169689Skanstatic struct loop *slpeel_tree_duplicate_loop_to_edge_cfg 153169689Skan (struct loop *, struct loops *, edge); 154169689Skanstatic void slpeel_update_phis_for_duplicate_loop 155169689Skan (struct loop *, struct loop *, bool after); 156169689Skanstatic void slpeel_update_phi_nodes_for_guard1 157169689Skan (edge, struct loop *, bool, basic_block *, bitmap *); 158169689Skanstatic void slpeel_update_phi_nodes_for_guard2 159169689Skan (edge, struct loop *, bool, basic_block *); 160169689Skanstatic edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block); 161169689Skan 162169689Skanstatic void rename_use_op (use_operand_p); 163169689Skanstatic void rename_variables_in_bb (basic_block); 164169689Skanstatic void rename_variables_in_loop (struct loop *); 165169689Skan 166169689Skan/************************************************************************* 167169689Skan General Vectorization Utilities 168169689Skan *************************************************************************/ 169169689Skanstatic void vect_set_dump_settings (void); 170169689Skan 171169689Skan/* vect_dump will be set to stderr or dump_file if exist. */ 172169689SkanFILE *vect_dump; 173169689Skan 174169689Skan/* vect_verbosity_level set to an invalid value 175169689Skan to mark that it's uninitialized. */ 176169689Skanenum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL; 177169689Skan 178169689Skan/* Number of loops, at the beginning of vectorization. */ 179169689Skanunsigned int vect_loops_num; 180169689Skan 181169689Skan/* Loop location. */ 182169689Skanstatic LOC vect_loop_location; 183169689Skan 184169689Skan/* Bitmap of virtual variables to be renamed. */ 185169689Skanbitmap vect_vnames_to_rename; 186169689Skan 187169689Skan/************************************************************************* 188169689Skan Simple Loop Peeling Utilities 189169689Skan 190169689Skan Utilities to support loop peeling for vectorization purposes. 191169689Skan *************************************************************************/ 192169689Skan 193169689Skan 194169689Skan/* Renames the use *OP_P. */ 195169689Skan 196169689Skanstatic void 197169689Skanrename_use_op (use_operand_p op_p) 198169689Skan{ 199169689Skan tree new_name; 200169689Skan 201169689Skan if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME) 202169689Skan return; 203169689Skan 204169689Skan new_name = get_current_def (USE_FROM_PTR (op_p)); 205169689Skan 206169689Skan /* Something defined outside of the loop. */ 207169689Skan if (!new_name) 208169689Skan return; 209169689Skan 210169689Skan /* An ordinary ssa name defined in the loop. */ 211169689Skan 212169689Skan SET_USE (op_p, new_name); 213169689Skan} 214169689Skan 215169689Skan 216169689Skan/* Renames the variables in basic block BB. */ 217169689Skan 218169689Skanstatic void 219169689Skanrename_variables_in_bb (basic_block bb) 220169689Skan{ 221169689Skan tree phi; 222169689Skan block_stmt_iterator bsi; 223169689Skan tree stmt; 224169689Skan use_operand_p use_p; 225169689Skan ssa_op_iter iter; 226169689Skan edge e; 227169689Skan edge_iterator ei; 228169689Skan struct loop *loop = bb->loop_father; 229169689Skan 230169689Skan for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) 231169689Skan { 232169689Skan stmt = bsi_stmt (bsi); 233169689Skan FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, 234169689Skan (SSA_OP_ALL_USES | SSA_OP_ALL_KILLS)) 235169689Skan rename_use_op (use_p); 236169689Skan } 237169689Skan 238169689Skan FOR_EACH_EDGE (e, ei, bb->succs) 239169689Skan { 240169689Skan if (!flow_bb_inside_loop_p (loop, e->dest)) 241169689Skan continue; 242169689Skan for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi)) 243169689Skan rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e)); 244169689Skan } 245169689Skan} 246169689Skan 247169689Skan 248169689Skan/* Renames variables in new generated LOOP. */ 249169689Skan 250169689Skanstatic void 251169689Skanrename_variables_in_loop (struct loop *loop) 252169689Skan{ 253169689Skan unsigned i; 254169689Skan basic_block *bbs; 255169689Skan 256169689Skan bbs = get_loop_body (loop); 257169689Skan 258169689Skan for (i = 0; i < loop->num_nodes; i++) 259169689Skan rename_variables_in_bb (bbs[i]); 260169689Skan 261169689Skan free (bbs); 262169689Skan} 263169689Skan 264169689Skan 265169689Skan/* Update the PHI nodes of NEW_LOOP. 266169689Skan 267169689Skan NEW_LOOP is a duplicate of ORIG_LOOP. 268169689Skan AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP: 269169689Skan AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it 270169689Skan executes before it. */ 271169689Skan 272169689Skanstatic void 273169689Skanslpeel_update_phis_for_duplicate_loop (struct loop *orig_loop, 274169689Skan struct loop *new_loop, bool after) 275169689Skan{ 276169689Skan tree new_ssa_name; 277169689Skan tree phi_new, phi_orig; 278169689Skan tree def; 279169689Skan edge orig_loop_latch = loop_latch_edge (orig_loop); 280169689Skan edge orig_entry_e = loop_preheader_edge (orig_loop); 281169689Skan edge new_loop_exit_e = new_loop->single_exit; 282169689Skan edge new_loop_entry_e = loop_preheader_edge (new_loop); 283169689Skan edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e); 284169689Skan 285169689Skan /* 286169689Skan step 1. For each loop-header-phi: 287169689Skan Add the first phi argument for the phi in NEW_LOOP 288169689Skan (the one associated with the entry of NEW_LOOP) 289169689Skan 290169689Skan step 2. For each loop-header-phi: 291169689Skan Add the second phi argument for the phi in NEW_LOOP 292169689Skan (the one associated with the latch of NEW_LOOP) 293169689Skan 294169689Skan step 3. Update the phis in the successor block of NEW_LOOP. 295169689Skan 296169689Skan case 1: NEW_LOOP was placed before ORIG_LOOP: 297169689Skan The successor block of NEW_LOOP is the header of ORIG_LOOP. 298169689Skan Updating the phis in the successor block can therefore be done 299169689Skan along with the scanning of the loop header phis, because the 300169689Skan header blocks of ORIG_LOOP and NEW_LOOP have exactly the same 301169689Skan phi nodes, organized in the same order. 302169689Skan 303169689Skan case 2: NEW_LOOP was placed after ORIG_LOOP: 304169689Skan The successor block of NEW_LOOP is the original exit block of 305169689Skan ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis. 306169689Skan We postpone updating these phis to a later stage (when 307169689Skan loop guards are added). 308169689Skan */ 309169689Skan 310169689Skan 311169689Skan /* Scan the phis in the headers of the old and new loops 312169689Skan (they are organized in exactly the same order). */ 313169689Skan 314169689Skan for (phi_new = phi_nodes (new_loop->header), 315169689Skan phi_orig = phi_nodes (orig_loop->header); 316169689Skan phi_new && phi_orig; 317169689Skan phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig)) 318169689Skan { 319169689Skan /* step 1. */ 320169689Skan def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e); 321169689Skan add_phi_arg (phi_new, def, new_loop_entry_e); 322169689Skan 323169689Skan /* step 2. */ 324169689Skan def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch); 325169689Skan if (TREE_CODE (def) != SSA_NAME) 326169689Skan continue; 327169689Skan 328169689Skan new_ssa_name = get_current_def (def); 329169689Skan if (!new_ssa_name) 330169689Skan { 331169689Skan /* This only happens if there are no definitions 332169689Skan inside the loop. use the phi_result in this case. */ 333169689Skan new_ssa_name = PHI_RESULT (phi_new); 334169689Skan } 335169689Skan 336169689Skan /* An ordinary ssa name defined in the loop. */ 337169689Skan add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop)); 338169689Skan 339169689Skan /* step 3 (case 1). */ 340169689Skan if (!after) 341169689Skan { 342169689Skan gcc_assert (new_loop_exit_e == orig_entry_e); 343169689Skan SET_PHI_ARG_DEF (phi_orig, 344169689Skan new_loop_exit_e->dest_idx, 345169689Skan new_ssa_name); 346169689Skan } 347169689Skan } 348169689Skan} 349169689Skan 350169689Skan 351169689Skan/* Update PHI nodes for a guard of the LOOP. 352169689Skan 353169689Skan Input: 354169689Skan - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that 355169689Skan controls whether LOOP is to be executed. GUARD_EDGE is the edge that 356169689Skan originates from the guard-bb, skips LOOP and reaches the (unique) exit 357169689Skan bb of LOOP. This loop-exit-bb is an empty bb with one successor. 358169689Skan We denote this bb NEW_MERGE_BB because before the guard code was added 359169689Skan it had a single predecessor (the LOOP header), and now it became a merge 360169689Skan point of two paths - the path that ends with the LOOP exit-edge, and 361169689Skan the path that ends with GUARD_EDGE. 362169689Skan - NEW_EXIT_BB: New basic block that is added by this function between LOOP 363169689Skan and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis. 364169689Skan 365169689Skan ===> The CFG before the guard-code was added: 366169689Skan LOOP_header_bb: 367169689Skan loop_body 368169689Skan if (exit_loop) goto update_bb 369169689Skan else goto LOOP_header_bb 370169689Skan update_bb: 371169689Skan 372169689Skan ==> The CFG after the guard-code was added: 373169689Skan guard_bb: 374169689Skan if (LOOP_guard_condition) goto new_merge_bb 375169689Skan else goto LOOP_header_bb 376169689Skan LOOP_header_bb: 377169689Skan loop_body 378169689Skan if (exit_loop_condition) goto new_merge_bb 379169689Skan else goto LOOP_header_bb 380169689Skan new_merge_bb: 381169689Skan goto update_bb 382169689Skan update_bb: 383169689Skan 384169689Skan ==> The CFG after this function: 385169689Skan guard_bb: 386169689Skan if (LOOP_guard_condition) goto new_merge_bb 387169689Skan else goto LOOP_header_bb 388169689Skan LOOP_header_bb: 389169689Skan loop_body 390169689Skan if (exit_loop_condition) goto new_exit_bb 391169689Skan else goto LOOP_header_bb 392169689Skan new_exit_bb: 393169689Skan new_merge_bb: 394169689Skan goto update_bb 395169689Skan update_bb: 396169689Skan 397169689Skan This function: 398169689Skan 1. creates and updates the relevant phi nodes to account for the new 399169689Skan incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves: 400169689Skan 1.1. Create phi nodes at NEW_MERGE_BB. 401169689Skan 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted 402169689Skan UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB 403169689Skan 2. preserves loop-closed-ssa-form by creating the required phi nodes 404169689Skan at the exit of LOOP (i.e, in NEW_EXIT_BB). 405169689Skan 406169689Skan There are two flavors to this function: 407169689Skan 408169689Skan slpeel_update_phi_nodes_for_guard1: 409169689Skan Here the guard controls whether we enter or skip LOOP, where LOOP is a 410169689Skan prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are 411169689Skan for variables that have phis in the loop header. 412169689Skan 413169689Skan slpeel_update_phi_nodes_for_guard2: 414169689Skan Here the guard controls whether we enter or skip LOOP, where LOOP is an 415169689Skan epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are 416169689Skan for variables that have phis in the loop exit. 417169689Skan 418169689Skan I.E., the overall structure is: 419169689Skan 420169689Skan loop1_preheader_bb: 421169689Skan guard1 (goto loop1/merg1_bb) 422169689Skan loop1 423169689Skan loop1_exit_bb: 424169689Skan guard2 (goto merge1_bb/merge2_bb) 425169689Skan merge1_bb 426169689Skan loop2 427169689Skan loop2_exit_bb 428169689Skan merge2_bb 429169689Skan next_bb 430169689Skan 431169689Skan slpeel_update_phi_nodes_for_guard1 takes care of creating phis in 432169689Skan loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars 433169689Skan that have phis in loop1->header). 434169689Skan 435169689Skan slpeel_update_phi_nodes_for_guard2 takes care of creating phis in 436169689Skan loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars 437169689Skan that have phis in next_bb). It also adds some of these phis to 438169689Skan loop1_exit_bb. 439169689Skan 440169689Skan slpeel_update_phi_nodes_for_guard1 is always called before 441169689Skan slpeel_update_phi_nodes_for_guard2. They are both needed in order 442169689Skan to create correct data-flow and loop-closed-ssa-form. 443169689Skan 444169689Skan Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables 445169689Skan that change between iterations of a loop (and therefore have a phi-node 446169689Skan at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates 447169689Skan phis for variables that are used out of the loop (and therefore have 448169689Skan loop-closed exit phis). Some variables may be both updated between 449169689Skan iterations and used after the loop. This is why in loop1_exit_bb we 450169689Skan may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1) 451169689Skan and exit phis (created by slpeel_update_phi_nodes_for_guard2). 452169689Skan 453169689Skan - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of 454169689Skan an original loop. i.e., we have: 455169689Skan 456169689Skan orig_loop 457169689Skan guard_bb (goto LOOP/new_merge) 458169689Skan new_loop <-- LOOP 459169689Skan new_exit 460169689Skan new_merge 461169689Skan next_bb 462169689Skan 463169689Skan If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we 464169689Skan have: 465169689Skan 466169689Skan new_loop 467169689Skan guard_bb (goto LOOP/new_merge) 468169689Skan orig_loop <-- LOOP 469169689Skan new_exit 470169689Skan new_merge 471169689Skan next_bb 472169689Skan 473169689Skan The SSA names defined in the original loop have a current 474169689Skan reaching definition that that records the corresponding new 475169689Skan ssa-name used in the new duplicated loop copy. 476169689Skan */ 477169689Skan 478169689Skan/* Function slpeel_update_phi_nodes_for_guard1 479169689Skan 480169689Skan Input: 481169689Skan - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above. 482169689Skan - DEFS - a bitmap of ssa names to mark new names for which we recorded 483169689Skan information. 484169689Skan 485169689Skan In the context of the overall structure, we have: 486169689Skan 487169689Skan loop1_preheader_bb: 488169689Skan guard1 (goto loop1/merg1_bb) 489169689SkanLOOP-> loop1 490169689Skan loop1_exit_bb: 491169689Skan guard2 (goto merge1_bb/merge2_bb) 492169689Skan merge1_bb 493169689Skan loop2 494169689Skan loop2_exit_bb 495169689Skan merge2_bb 496169689Skan next_bb 497169689Skan 498169689Skan For each name updated between loop iterations (i.e - for each name that has 499169689Skan an entry (loop-header) phi in LOOP) we create a new phi in: 500169689Skan 1. merge1_bb (to account for the edge from guard1) 501169689Skan 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form) 502169689Skan*/ 503169689Skan 504169689Skanstatic void 505169689Skanslpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop, 506169689Skan bool is_new_loop, basic_block *new_exit_bb, 507169689Skan bitmap *defs) 508169689Skan{ 509169689Skan tree orig_phi, new_phi; 510169689Skan tree update_phi, update_phi2; 511169689Skan tree guard_arg, loop_arg; 512169689Skan basic_block new_merge_bb = guard_edge->dest; 513169689Skan edge e = EDGE_SUCC (new_merge_bb, 0); 514169689Skan basic_block update_bb = e->dest; 515169689Skan basic_block orig_bb = loop->header; 516169689Skan edge new_exit_e; 517169689Skan tree current_new_name; 518169689Skan tree name; 519169689Skan 520169689Skan /* Create new bb between loop and new_merge_bb. */ 521169689Skan *new_exit_bb = split_edge (loop->single_exit); 522169689Skan add_bb_to_loop (*new_exit_bb, loop->outer); 523169689Skan 524169689Skan new_exit_e = EDGE_SUCC (*new_exit_bb, 0); 525169689Skan 526169689Skan for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb); 527169689Skan orig_phi && update_phi; 528169689Skan orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi)) 529169689Skan { 530169689Skan /* Virtual phi; Mark it for renaming. We actually want to call 531169689Skan mar_sym_for_renaming, but since all ssa renaming datastructures 532169689Skan are going to be freed before we get to call ssa_upate, we just 533169689Skan record this name for now in a bitmap, and will mark it for 534169689Skan renaming later. */ 535169689Skan name = PHI_RESULT (orig_phi); 536169689Skan if (!is_gimple_reg (SSA_NAME_VAR (name))) 537169689Skan bitmap_set_bit (vect_vnames_to_rename, SSA_NAME_VERSION (name)); 538169689Skan 539169689Skan /** 1. Handle new-merge-point phis **/ 540169689Skan 541169689Skan /* 1.1. Generate new phi node in NEW_MERGE_BB: */ 542169689Skan new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), 543169689Skan new_merge_bb); 544169689Skan 545169689Skan /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge 546169689Skan of LOOP. Set the two phi args in NEW_PHI for these edges: */ 547169689Skan loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0)); 548169689Skan guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop)); 549169689Skan 550169689Skan add_phi_arg (new_phi, loop_arg, new_exit_e); 551169689Skan add_phi_arg (new_phi, guard_arg, guard_edge); 552169689Skan 553169689Skan /* 1.3. Update phi in successor block. */ 554169689Skan gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg 555169689Skan || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg); 556169689Skan SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi)); 557169689Skan update_phi2 = new_phi; 558169689Skan 559169689Skan 560169689Skan /** 2. Handle loop-closed-ssa-form phis **/ 561169689Skan 562169689Skan /* 2.1. Generate new phi node in NEW_EXIT_BB: */ 563169689Skan new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), 564169689Skan *new_exit_bb); 565169689Skan 566169689Skan /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */ 567169689Skan add_phi_arg (new_phi, loop_arg, loop->single_exit); 568169689Skan 569169689Skan /* 2.3. Update phi in successor of NEW_EXIT_BB: */ 570169689Skan gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg); 571169689Skan SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi)); 572169689Skan 573169689Skan /* 2.4. Record the newly created name with set_current_def. 574169689Skan We want to find a name such that 575169689Skan name = get_current_def (orig_loop_name) 576169689Skan and to set its current definition as follows: 577169689Skan set_current_def (name, new_phi_name) 578169689Skan 579169689Skan If LOOP is a new loop then loop_arg is already the name we're 580169689Skan looking for. If LOOP is the original loop, then loop_arg is 581169689Skan the orig_loop_name and the relevant name is recorded in its 582169689Skan current reaching definition. */ 583169689Skan if (is_new_loop) 584169689Skan current_new_name = loop_arg; 585169689Skan else 586169689Skan { 587169689Skan current_new_name = get_current_def (loop_arg); 588169689Skan /* current_def is not available only if the variable does not 589169689Skan change inside the loop, in which case we also don't care 590169689Skan about recording a current_def for it because we won't be 591169689Skan trying to create loop-exit-phis for it. */ 592169689Skan if (!current_new_name) 593169689Skan continue; 594169689Skan } 595169689Skan gcc_assert (get_current_def (current_new_name) == NULL_TREE); 596169689Skan 597169689Skan set_current_def (current_new_name, PHI_RESULT (new_phi)); 598169689Skan bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name)); 599169689Skan } 600169689Skan 601169689Skan set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb))); 602169689Skan} 603169689Skan 604169689Skan 605169689Skan/* Function slpeel_update_phi_nodes_for_guard2 606169689Skan 607169689Skan Input: 608169689Skan - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above. 609169689Skan 610169689Skan In the context of the overall structure, we have: 611169689Skan 612169689Skan loop1_preheader_bb: 613169689Skan guard1 (goto loop1/merg1_bb) 614169689Skan loop1 615169689Skan loop1_exit_bb: 616169689Skan guard2 (goto merge1_bb/merge2_bb) 617169689Skan merge1_bb 618169689SkanLOOP-> loop2 619169689Skan loop2_exit_bb 620169689Skan merge2_bb 621169689Skan next_bb 622169689Skan 623169689Skan For each name used out side the loop (i.e - for each name that has an exit 624169689Skan phi in next_bb) we create a new phi in: 625169689Skan 1. merge2_bb (to account for the edge from guard_bb) 626169689Skan 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form) 627169689Skan 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form), 628169689Skan if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1). 629169689Skan*/ 630169689Skan 631169689Skanstatic void 632169689Skanslpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop, 633169689Skan bool is_new_loop, basic_block *new_exit_bb) 634169689Skan{ 635169689Skan tree orig_phi, new_phi; 636169689Skan tree update_phi, update_phi2; 637169689Skan tree guard_arg, loop_arg; 638169689Skan basic_block new_merge_bb = guard_edge->dest; 639169689Skan edge e = EDGE_SUCC (new_merge_bb, 0); 640169689Skan basic_block update_bb = e->dest; 641169689Skan edge new_exit_e; 642169689Skan tree orig_def, orig_def_new_name; 643169689Skan tree new_name, new_name2; 644169689Skan tree arg; 645169689Skan 646169689Skan /* Create new bb between loop and new_merge_bb. */ 647169689Skan *new_exit_bb = split_edge (loop->single_exit); 648169689Skan add_bb_to_loop (*new_exit_bb, loop->outer); 649169689Skan 650169689Skan new_exit_e = EDGE_SUCC (*new_exit_bb, 0); 651169689Skan 652169689Skan for (update_phi = phi_nodes (update_bb); update_phi; 653169689Skan update_phi = PHI_CHAIN (update_phi)) 654169689Skan { 655169689Skan orig_phi = update_phi; 656169689Skan orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); 657169689Skan /* This loop-closed-phi actually doesn't represent a use 658169689Skan out of the loop - the phi arg is a constant. */ 659169689Skan if (TREE_CODE (orig_def) != SSA_NAME) 660169689Skan continue; 661169689Skan orig_def_new_name = get_current_def (orig_def); 662169689Skan arg = NULL_TREE; 663169689Skan 664169689Skan /** 1. Handle new-merge-point phis **/ 665169689Skan 666169689Skan /* 1.1. Generate new phi node in NEW_MERGE_BB: */ 667169689Skan new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), 668169689Skan new_merge_bb); 669169689Skan 670169689Skan /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge 671169689Skan of LOOP. Set the two PHI args in NEW_PHI for these edges: */ 672169689Skan new_name = orig_def; 673169689Skan new_name2 = NULL_TREE; 674169689Skan if (orig_def_new_name) 675169689Skan { 676169689Skan new_name = orig_def_new_name; 677169689Skan /* Some variables have both loop-entry-phis and loop-exit-phis. 678169689Skan Such variables were given yet newer names by phis placed in 679169689Skan guard_bb by slpeel_update_phi_nodes_for_guard1. I.e: 680169689Skan new_name2 = get_current_def (get_current_def (orig_name)). */ 681169689Skan new_name2 = get_current_def (new_name); 682169689Skan } 683169689Skan 684169689Skan if (is_new_loop) 685169689Skan { 686169689Skan guard_arg = orig_def; 687169689Skan loop_arg = new_name; 688169689Skan } 689169689Skan else 690169689Skan { 691169689Skan guard_arg = new_name; 692169689Skan loop_arg = orig_def; 693169689Skan } 694169689Skan if (new_name2) 695169689Skan guard_arg = new_name2; 696169689Skan 697169689Skan add_phi_arg (new_phi, loop_arg, new_exit_e); 698169689Skan add_phi_arg (new_phi, guard_arg, guard_edge); 699169689Skan 700169689Skan /* 1.3. Update phi in successor block. */ 701169689Skan gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def); 702169689Skan SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi)); 703169689Skan update_phi2 = new_phi; 704169689Skan 705169689Skan 706169689Skan /** 2. Handle loop-closed-ssa-form phis **/ 707169689Skan 708169689Skan /* 2.1. Generate new phi node in NEW_EXIT_BB: */ 709169689Skan new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), 710169689Skan *new_exit_bb); 711169689Skan 712169689Skan /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */ 713169689Skan add_phi_arg (new_phi, loop_arg, loop->single_exit); 714169689Skan 715169689Skan /* 2.3. Update phi in successor of NEW_EXIT_BB: */ 716169689Skan gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg); 717169689Skan SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi)); 718169689Skan 719169689Skan 720169689Skan /** 3. Handle loop-closed-ssa-form phis for first loop **/ 721169689Skan 722169689Skan /* 3.1. Find the relevant names that need an exit-phi in 723169689Skan GUARD_BB, i.e. names for which 724169689Skan slpeel_update_phi_nodes_for_guard1 had not already created a 725169689Skan phi node. This is the case for names that are used outside 726169689Skan the loop (and therefore need an exit phi) but are not updated 727169689Skan across loop iterations (and therefore don't have a 728169689Skan loop-header-phi). 729169689Skan 730169689Skan slpeel_update_phi_nodes_for_guard1 is responsible for 731169689Skan creating loop-exit phis in GUARD_BB for names that have a 732169689Skan loop-header-phi. When such a phi is created we also record 733169689Skan the new name in its current definition. If this new name 734169689Skan exists, then guard_arg was set to this new name (see 1.2 735169689Skan above). Therefore, if guard_arg is not this new name, this 736169689Skan is an indication that an exit-phi in GUARD_BB was not yet 737169689Skan created, so we take care of it here. */ 738169689Skan if (guard_arg == new_name2) 739169689Skan continue; 740169689Skan arg = guard_arg; 741169689Skan 742169689Skan /* 3.2. Generate new phi node in GUARD_BB: */ 743169689Skan new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), 744169689Skan guard_edge->src); 745169689Skan 746169689Skan /* 3.3. GUARD_BB has one incoming edge: */ 747169689Skan gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1); 748169689Skan add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0)); 749169689Skan 750169689Skan /* 3.4. Update phi in successor of GUARD_BB: */ 751169689Skan gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge) 752169689Skan == guard_arg); 753169689Skan SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi)); 754169689Skan } 755169689Skan 756169689Skan set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb))); 757169689Skan} 758169689Skan 759169689Skan 760169689Skan/* Make the LOOP iterate NITERS times. This is done by adding a new IV 761169689Skan that starts at zero, increases by one and its limit is NITERS. 762169689Skan 763169689Skan Assumption: the exit-condition of LOOP is the last stmt in the loop. */ 764169689Skan 765169689Skanvoid 766169689Skanslpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters) 767169689Skan{ 768169689Skan tree indx_before_incr, indx_after_incr, cond_stmt, cond; 769169689Skan tree orig_cond; 770169689Skan edge exit_edge = loop->single_exit; 771169689Skan block_stmt_iterator loop_cond_bsi; 772169689Skan block_stmt_iterator incr_bsi; 773169689Skan bool insert_after; 774169689Skan tree begin_label = tree_block_label (loop->latch); 775169689Skan tree exit_label = tree_block_label (loop->single_exit->dest); 776169689Skan tree init = build_int_cst (TREE_TYPE (niters), 0); 777169689Skan tree step = build_int_cst (TREE_TYPE (niters), 1); 778169689Skan tree then_label; 779169689Skan tree else_label; 780169689Skan LOC loop_loc; 781169689Skan 782169689Skan orig_cond = get_loop_exit_condition (loop); 783169689Skan gcc_assert (orig_cond); 784169689Skan loop_cond_bsi = bsi_for_stmt (orig_cond); 785169689Skan 786169689Skan standard_iv_increment_position (loop, &incr_bsi, &insert_after); 787169689Skan create_iv (init, step, NULL_TREE, loop, 788169689Skan &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr); 789169689Skan 790169689Skan if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */ 791169689Skan { 792169689Skan cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters); 793169689Skan then_label = build1 (GOTO_EXPR, void_type_node, exit_label); 794169689Skan else_label = build1 (GOTO_EXPR, void_type_node, begin_label); 795169689Skan } 796169689Skan else /* 'then' edge loops back. */ 797169689Skan { 798169689Skan cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters); 799169689Skan then_label = build1 (GOTO_EXPR, void_type_node, begin_label); 800169689Skan else_label = build1 (GOTO_EXPR, void_type_node, exit_label); 801169689Skan } 802169689Skan 803169689Skan cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond, 804169689Skan then_label, else_label); 805169689Skan bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT); 806169689Skan 807169689Skan /* Remove old loop exit test: */ 808169689Skan bsi_remove (&loop_cond_bsi, true); 809169689Skan 810169689Skan loop_loc = find_loop_location (loop); 811169689Skan if (dump_file && (dump_flags & TDF_DETAILS)) 812169689Skan { 813169689Skan if (loop_loc != UNKNOWN_LOC) 814169689Skan fprintf (dump_file, "\nloop at %s:%d: ", 815169689Skan LOC_FILE (loop_loc), LOC_LINE (loop_loc)); 816169689Skan print_generic_expr (dump_file, cond_stmt, TDF_SLIM); 817169689Skan } 818169689Skan 819169689Skan loop->nb_iterations = niters; 820169689Skan} 821169689Skan 822169689Skan 823169689Skan/* Given LOOP this function generates a new copy of it and puts it 824169689Skan on E which is either the entry or exit of LOOP. */ 825169689Skan 826169689Skanstatic struct loop * 827169689Skanslpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops, 828169689Skan edge e) 829169689Skan{ 830169689Skan struct loop *new_loop; 831169689Skan basic_block *new_bbs, *bbs; 832169689Skan bool at_exit; 833169689Skan bool was_imm_dom; 834169689Skan basic_block exit_dest; 835169689Skan tree phi, phi_arg; 836169689Skan 837169689Skan at_exit = (e == loop->single_exit); 838169689Skan if (!at_exit && e != loop_preheader_edge (loop)) 839169689Skan return NULL; 840169689Skan 841169689Skan bbs = get_loop_body (loop); 842169689Skan 843169689Skan /* Check whether duplication is possible. */ 844169689Skan if (!can_copy_bbs_p (bbs, loop->num_nodes)) 845169689Skan { 846169689Skan free (bbs); 847169689Skan return NULL; 848169689Skan } 849169689Skan 850169689Skan /* Generate new loop structure. */ 851169689Skan new_loop = duplicate_loop (loops, loop, loop->outer); 852169689Skan if (!new_loop) 853169689Skan { 854169689Skan free (bbs); 855169689Skan return NULL; 856169689Skan } 857169689Skan 858169689Skan exit_dest = loop->single_exit->dest; 859169689Skan was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS, 860169689Skan exit_dest) == loop->header ? 861169689Skan true : false); 862169689Skan 863169689Skan new_bbs = XNEWVEC (basic_block, loop->num_nodes); 864169689Skan 865169689Skan copy_bbs (bbs, loop->num_nodes, new_bbs, 866169689Skan &loop->single_exit, 1, &new_loop->single_exit, NULL, 867169689Skan e->src); 868169689Skan 869169689Skan /* Duplicating phi args at exit bbs as coming 870169689Skan also from exit of duplicated loop. */ 871169689Skan for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi)) 872169689Skan { 873169689Skan phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit); 874169689Skan if (phi_arg) 875169689Skan { 876169689Skan edge new_loop_exit_edge; 877169689Skan 878169689Skan if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch) 879169689Skan new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1); 880169689Skan else 881169689Skan new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0); 882169689Skan 883169689Skan add_phi_arg (phi, phi_arg, new_loop_exit_edge); 884169689Skan } 885169689Skan } 886169689Skan 887169689Skan if (at_exit) /* Add the loop copy at exit. */ 888169689Skan { 889169689Skan redirect_edge_and_branch_force (e, new_loop->header); 890169689Skan set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src); 891169689Skan if (was_imm_dom) 892169689Skan set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header); 893169689Skan } 894169689Skan else /* Add the copy at entry. */ 895169689Skan { 896169689Skan edge new_exit_e; 897169689Skan edge entry_e = loop_preheader_edge (loop); 898169689Skan basic_block preheader = entry_e->src; 899169689Skan 900169689Skan if (!flow_bb_inside_loop_p (new_loop, 901169689Skan EDGE_SUCC (new_loop->header, 0)->dest)) 902169689Skan new_exit_e = EDGE_SUCC (new_loop->header, 0); 903169689Skan else 904169689Skan new_exit_e = EDGE_SUCC (new_loop->header, 1); 905169689Skan 906169689Skan redirect_edge_and_branch_force (new_exit_e, loop->header); 907169689Skan set_immediate_dominator (CDI_DOMINATORS, loop->header, 908169689Skan new_exit_e->src); 909169689Skan 910169689Skan /* We have to add phi args to the loop->header here as coming 911169689Skan from new_exit_e edge. */ 912169689Skan for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi)) 913169689Skan { 914169689Skan phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e); 915169689Skan if (phi_arg) 916169689Skan add_phi_arg (phi, phi_arg, new_exit_e); 917169689Skan } 918169689Skan 919169689Skan redirect_edge_and_branch_force (entry_e, new_loop->header); 920169689Skan set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader); 921169689Skan } 922169689Skan 923169689Skan free (new_bbs); 924169689Skan free (bbs); 925169689Skan 926169689Skan return new_loop; 927169689Skan} 928169689Skan 929169689Skan 930169689Skan/* Given the condition statement COND, put it as the last statement 931169689Skan of GUARD_BB; EXIT_BB is the basic block to skip the loop; 932169689Skan Assumes that this is the single exit of the guarded loop. 933169689Skan Returns the skip edge. */ 934169689Skan 935169689Skanstatic edge 936169689Skanslpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb, 937169689Skan basic_block dom_bb) 938169689Skan{ 939169689Skan block_stmt_iterator bsi; 940169689Skan edge new_e, enter_e; 941169689Skan tree cond_stmt, then_label, else_label; 942169689Skan 943169689Skan enter_e = EDGE_SUCC (guard_bb, 0); 944169689Skan enter_e->flags &= ~EDGE_FALLTHRU; 945169689Skan enter_e->flags |= EDGE_FALSE_VALUE; 946169689Skan bsi = bsi_last (guard_bb); 947169689Skan 948169689Skan then_label = build1 (GOTO_EXPR, void_type_node, 949169689Skan tree_block_label (exit_bb)); 950169689Skan else_label = build1 (GOTO_EXPR, void_type_node, 951169689Skan tree_block_label (enter_e->dest)); 952169689Skan cond_stmt = build3 (COND_EXPR, void_type_node, cond, 953169689Skan then_label, else_label); 954169689Skan bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT); 955169689Skan /* Add new edge to connect guard block to the merge/loop-exit block. */ 956169689Skan new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE); 957169689Skan set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb); 958169689Skan return new_e; 959169689Skan} 960169689Skan 961169689Skan 962169689Skan/* This function verifies that the following restrictions apply to LOOP: 963169689Skan (1) it is innermost 964169689Skan (2) it consists of exactly 2 basic blocks - header, and an empty latch. 965169689Skan (3) it is single entry, single exit 966169689Skan (4) its exit condition is the last stmt in the header 967169689Skan (5) E is the entry/exit edge of LOOP. 968169689Skan */ 969169689Skan 970169689Skanbool 971169689Skanslpeel_can_duplicate_loop_p (struct loop *loop, edge e) 972169689Skan{ 973169689Skan edge exit_e = loop->single_exit; 974169689Skan edge entry_e = loop_preheader_edge (loop); 975169689Skan tree orig_cond = get_loop_exit_condition (loop); 976169689Skan block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src); 977169689Skan 978169689Skan if (need_ssa_update_p ()) 979169689Skan return false; 980169689Skan 981169689Skan if (loop->inner 982169689Skan /* All loops have an outer scope; the only case loop->outer is NULL is for 983169689Skan the function itself. */ 984169689Skan || !loop->outer 985169689Skan || loop->num_nodes != 2 986169689Skan || !empty_block_p (loop->latch) 987169689Skan || !loop->single_exit 988169689Skan /* Verify that new loop exit condition can be trivially modified. */ 989169689Skan || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi)) 990169689Skan || (e != exit_e && e != entry_e)) 991169689Skan return false; 992169689Skan 993169689Skan return true; 994169689Skan} 995169689Skan 996169689Skan#ifdef ENABLE_CHECKING 997169689Skanvoid 998169689Skanslpeel_verify_cfg_after_peeling (struct loop *first_loop, 999169689Skan struct loop *second_loop) 1000169689Skan{ 1001169689Skan basic_block loop1_exit_bb = first_loop->single_exit->dest; 1002169689Skan basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src; 1003169689Skan basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src; 1004169689Skan 1005169689Skan /* A guard that controls whether the second_loop is to be executed or skipped 1006169689Skan is placed in first_loop->exit. first_loopt->exit therefore has two 1007169689Skan successors - one is the preheader of second_loop, and the other is a bb 1008169689Skan after second_loop. 1009169689Skan */ 1010169689Skan gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2); 1011169689Skan 1012169689Skan /* 1. Verify that one of the successors of first_loopt->exit is the preheader 1013169689Skan of second_loop. */ 1014169689Skan 1015169689Skan /* The preheader of new_loop is expected to have two predecessors: 1016169689Skan first_loop->exit and the block that precedes first_loop. */ 1017169689Skan 1018169689Skan gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2 1019169689Skan && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb 1020169689Skan && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb) 1021169689Skan || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb 1022169689Skan && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb))); 1023169689Skan 1024169689Skan /* Verify that the other successor of first_loopt->exit is after the 1025169689Skan second_loop. */ 1026169689Skan /* TODO */ 1027169689Skan} 1028169689Skan#endif 1029169689Skan 1030169689Skan/* Function slpeel_tree_peel_loop_to_edge. 1031169689Skan 1032169689Skan Peel the first (last) iterations of LOOP into a new prolog (epilog) loop 1033169689Skan that is placed on the entry (exit) edge E of LOOP. After this transformation 1034169689Skan we have two loops one after the other - first-loop iterates FIRST_NITERS 1035169689Skan times, and second-loop iterates the remainder NITERS - FIRST_NITERS times. 1036169689Skan 1037169689Skan Input: 1038169689Skan - LOOP: the loop to be peeled. 1039169689Skan - E: the exit or entry edge of LOOP. 1040169689Skan If it is the entry edge, we peel the first iterations of LOOP. In this 1041169689Skan case first-loop is LOOP, and second-loop is the newly created loop. 1042169689Skan If it is the exit edge, we peel the last iterations of LOOP. In this 1043169689Skan case, first-loop is the newly created loop, and second-loop is LOOP. 1044169689Skan - NITERS: the number of iterations that LOOP iterates. 1045169689Skan - FIRST_NITERS: the number of iterations that the first-loop should iterate. 1046169689Skan - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible 1047169689Skan for updating the loop bound of the first-loop to FIRST_NITERS. If it 1048169689Skan is false, the caller of this function may want to take care of this 1049169689Skan (this can be useful if we don't want new stmts added to first-loop). 1050169689Skan 1051169689Skan Output: 1052169689Skan The function returns a pointer to the new loop-copy, or NULL if it failed 1053169689Skan to perform the transformation. 1054169689Skan 1055169689Skan The function generates two if-then-else guards: one before the first loop, 1056169689Skan and the other before the second loop: 1057169689Skan The first guard is: 1058169689Skan if (FIRST_NITERS == 0) then skip the first loop, 1059169689Skan and go directly to the second loop. 1060169689Skan The second guard is: 1061169689Skan if (FIRST_NITERS == NITERS) then skip the second loop. 1062169689Skan 1063169689Skan FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p). 1064169689Skan FORNOW the resulting code will not be in loop-closed-ssa form. 1065169689Skan*/ 1066169689Skan 1067169689Skanstruct loop* 1068169689Skanslpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops, 1069169689Skan edge e, tree first_niters, 1070169689Skan tree niters, bool update_first_loop_count) 1071169689Skan{ 1072169689Skan struct loop *new_loop = NULL, *first_loop, *second_loop; 1073169689Skan edge skip_e; 1074169689Skan tree pre_condition; 1075169689Skan bitmap definitions; 1076169689Skan basic_block bb_before_second_loop, bb_after_second_loop; 1077169689Skan basic_block bb_before_first_loop; 1078169689Skan basic_block bb_between_loops; 1079169689Skan basic_block new_exit_bb; 1080169689Skan edge exit_e = loop->single_exit; 1081169689Skan LOC loop_loc; 1082169689Skan 1083169689Skan if (!slpeel_can_duplicate_loop_p (loop, e)) 1084169689Skan return NULL; 1085169689Skan 1086169689Skan /* We have to initialize cfg_hooks. Then, when calling 1087169689Skan cfg_hooks->split_edge, the function tree_split_edge 1088169689Skan is actually called and, when calling cfg_hooks->duplicate_block, 1089169689Skan the function tree_duplicate_bb is called. */ 1090169689Skan tree_register_cfg_hooks (); 1091169689Skan 1092169689Skan 1093169689Skan /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP). 1094169689Skan Resulting CFG would be: 1095169689Skan 1096169689Skan first_loop: 1097169689Skan do { 1098169689Skan } while ... 1099169689Skan 1100169689Skan second_loop: 1101169689Skan do { 1102169689Skan } while ... 1103169689Skan 1104169689Skan orig_exit_bb: 1105169689Skan */ 1106169689Skan 1107169689Skan if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e))) 1108169689Skan { 1109169689Skan loop_loc = find_loop_location (loop); 1110169689Skan if (dump_file && (dump_flags & TDF_DETAILS)) 1111169689Skan { 1112169689Skan if (loop_loc != UNKNOWN_LOC) 1113169689Skan fprintf (dump_file, "\n%s:%d: note: ", 1114169689Skan LOC_FILE (loop_loc), LOC_LINE (loop_loc)); 1115169689Skan fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n"); 1116169689Skan } 1117169689Skan return NULL; 1118169689Skan } 1119169689Skan 1120169689Skan if (e == exit_e) 1121169689Skan { 1122169689Skan /* NEW_LOOP was placed after LOOP. */ 1123169689Skan first_loop = loop; 1124169689Skan second_loop = new_loop; 1125169689Skan } 1126169689Skan else 1127169689Skan { 1128169689Skan /* NEW_LOOP was placed before LOOP. */ 1129169689Skan first_loop = new_loop; 1130169689Skan second_loop = loop; 1131169689Skan } 1132169689Skan 1133169689Skan definitions = ssa_names_to_replace (); 1134169689Skan slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e); 1135169689Skan rename_variables_in_loop (new_loop); 1136169689Skan 1137169689Skan 1138169689Skan /* 2. Add the guard that controls whether the first loop is executed. 1139169689Skan Resulting CFG would be: 1140169689Skan 1141169689Skan bb_before_first_loop: 1142169689Skan if (FIRST_NITERS == 0) GOTO bb_before_second_loop 1143169689Skan GOTO first-loop 1144169689Skan 1145169689Skan first_loop: 1146169689Skan do { 1147169689Skan } while ... 1148169689Skan 1149169689Skan bb_before_second_loop: 1150169689Skan 1151169689Skan second_loop: 1152169689Skan do { 1153169689Skan } while ... 1154169689Skan 1155169689Skan orig_exit_bb: 1156169689Skan */ 1157169689Skan 1158169689Skan bb_before_first_loop = split_edge (loop_preheader_edge (first_loop)); 1159169689Skan add_bb_to_loop (bb_before_first_loop, first_loop->outer); 1160169689Skan bb_before_second_loop = split_edge (first_loop->single_exit); 1161169689Skan add_bb_to_loop (bb_before_second_loop, first_loop->outer); 1162169689Skan 1163169689Skan pre_condition = 1164169689Skan fold_build2 (LE_EXPR, boolean_type_node, first_niters, 1165169689Skan build_int_cst (TREE_TYPE (first_niters), 0)); 1166169689Skan skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition, 1167169689Skan bb_before_second_loop, bb_before_first_loop); 1168169689Skan slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop, 1169169689Skan first_loop == new_loop, 1170169689Skan &new_exit_bb, &definitions); 1171169689Skan 1172169689Skan 1173169689Skan /* 3. Add the guard that controls whether the second loop is executed. 1174169689Skan Resulting CFG would be: 1175169689Skan 1176169689Skan bb_before_first_loop: 1177169689Skan if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop) 1178169689Skan GOTO first-loop 1179169689Skan 1180169689Skan first_loop: 1181169689Skan do { 1182169689Skan } while ... 1183169689Skan 1184169689Skan bb_between_loops: 1185169689Skan if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop) 1186169689Skan GOTO bb_before_second_loop 1187169689Skan 1188169689Skan bb_before_second_loop: 1189169689Skan 1190169689Skan second_loop: 1191169689Skan do { 1192169689Skan } while ... 1193169689Skan 1194169689Skan bb_after_second_loop: 1195169689Skan 1196169689Skan orig_exit_bb: 1197169689Skan */ 1198169689Skan 1199169689Skan bb_between_loops = new_exit_bb; 1200169689Skan bb_after_second_loop = split_edge (second_loop->single_exit); 1201169689Skan add_bb_to_loop (bb_after_second_loop, second_loop->outer); 1202169689Skan 1203169689Skan pre_condition = 1204169689Skan fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters); 1205169689Skan skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, 1206169689Skan bb_after_second_loop, bb_before_first_loop); 1207169689Skan slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop, 1208169689Skan second_loop == new_loop, &new_exit_bb); 1209169689Skan 1210169689Skan /* 4. Make first-loop iterate FIRST_NITERS times, if requested. 1211169689Skan */ 1212169689Skan if (update_first_loop_count) 1213169689Skan slpeel_make_loop_iterate_ntimes (first_loop, first_niters); 1214169689Skan 1215169689Skan BITMAP_FREE (definitions); 1216169689Skan delete_update_ssa (); 1217169689Skan 1218169689Skan return new_loop; 1219169689Skan} 1220169689Skan 1221169689Skan/* Function vect_get_loop_location. 1222169689Skan 1223169689Skan Extract the location of the loop in the source code. 1224169689Skan If the loop is not well formed for vectorization, an estimated 1225169689Skan location is calculated. 1226169689Skan Return the loop location if succeed and NULL if not. */ 1227169689Skan 1228169689SkanLOC 1229169689Skanfind_loop_location (struct loop *loop) 1230169689Skan{ 1231169689Skan tree node = NULL_TREE; 1232169689Skan basic_block bb; 1233169689Skan block_stmt_iterator si; 1234169689Skan 1235169689Skan if (!loop) 1236169689Skan return UNKNOWN_LOC; 1237169689Skan 1238169689Skan node = get_loop_exit_condition (loop); 1239169689Skan 1240169689Skan if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node) 1241169689Skan && EXPR_FILENAME (node) && EXPR_LINENO (node)) 1242169689Skan return EXPR_LOC (node); 1243169689Skan 1244169689Skan /* If we got here the loop is probably not "well formed", 1245169689Skan try to estimate the loop location */ 1246169689Skan 1247169689Skan if (!loop->header) 1248169689Skan return UNKNOWN_LOC; 1249169689Skan 1250169689Skan bb = loop->header; 1251169689Skan 1252169689Skan for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) 1253169689Skan { 1254169689Skan node = bsi_stmt (si); 1255169689Skan if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)) 1256169689Skan return EXPR_LOC (node); 1257169689Skan } 1258169689Skan 1259169689Skan return UNKNOWN_LOC; 1260169689Skan} 1261169689Skan 1262169689Skan 1263169689Skan/************************************************************************* 1264169689Skan Vectorization Debug Information. 1265169689Skan *************************************************************************/ 1266169689Skan 1267169689Skan/* Function vect_set_verbosity_level. 1268169689Skan 1269169689Skan Called from toplev.c upon detection of the 1270169689Skan -ftree-vectorizer-verbose=N option. */ 1271169689Skan 1272169689Skanvoid 1273169689Skanvect_set_verbosity_level (const char *val) 1274169689Skan{ 1275169689Skan unsigned int vl; 1276169689Skan 1277169689Skan vl = atoi (val); 1278169689Skan if (vl < MAX_VERBOSITY_LEVEL) 1279169689Skan vect_verbosity_level = vl; 1280169689Skan else 1281169689Skan vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1; 1282169689Skan} 1283169689Skan 1284169689Skan 1285169689Skan/* Function vect_set_dump_settings. 1286169689Skan 1287169689Skan Fix the verbosity level of the vectorizer if the 1288169689Skan requested level was not set explicitly using the flag 1289169689Skan -ftree-vectorizer-verbose=N. 1290169689Skan Decide where to print the debugging information (dump_file/stderr). 1291169689Skan If the user defined the verbosity level, but there is no dump file, 1292169689Skan print to stderr, otherwise print to the dump file. */ 1293169689Skan 1294169689Skanstatic void 1295169689Skanvect_set_dump_settings (void) 1296169689Skan{ 1297169689Skan vect_dump = dump_file; 1298169689Skan 1299169689Skan /* Check if the verbosity level was defined by the user: */ 1300169689Skan if (vect_verbosity_level != MAX_VERBOSITY_LEVEL) 1301169689Skan { 1302169689Skan /* If there is no dump file, print to stderr. */ 1303169689Skan if (!dump_file) 1304169689Skan vect_dump = stderr; 1305169689Skan return; 1306169689Skan } 1307169689Skan 1308169689Skan /* User didn't specify verbosity level: */ 1309169689Skan if (dump_file && (dump_flags & TDF_DETAILS)) 1310169689Skan vect_verbosity_level = REPORT_DETAILS; 1311169689Skan else if (dump_file && (dump_flags & TDF_STATS)) 1312169689Skan vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS; 1313169689Skan else 1314169689Skan vect_verbosity_level = REPORT_NONE; 1315169689Skan 1316169689Skan gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE); 1317169689Skan} 1318169689Skan 1319169689Skan 1320169689Skan/* Function debug_loop_details. 1321169689Skan 1322169689Skan For vectorization debug dumps. */ 1323169689Skan 1324169689Skanbool 1325169689Skanvect_print_dump_info (enum verbosity_levels vl) 1326169689Skan{ 1327169689Skan if (vl > vect_verbosity_level) 1328169689Skan return false; 1329169689Skan 1330169689Skan if (!current_function_decl || !vect_dump) 1331169689Skan return false; 1332169689Skan 1333169689Skan if (vect_loop_location == UNKNOWN_LOC) 1334169689Skan fprintf (vect_dump, "\n%s:%d: note: ", 1335169689Skan DECL_SOURCE_FILE (current_function_decl), 1336169689Skan DECL_SOURCE_LINE (current_function_decl)); 1337169689Skan else 1338169689Skan fprintf (vect_dump, "\n%s:%d: note: ", 1339169689Skan LOC_FILE (vect_loop_location), LOC_LINE (vect_loop_location)); 1340169689Skan 1341169689Skan return true; 1342169689Skan} 1343169689Skan 1344169689Skan 1345169689Skan/************************************************************************* 1346169689Skan Vectorization Utilities. 1347169689Skan *************************************************************************/ 1348169689Skan 1349169689Skan/* Function new_stmt_vec_info. 1350169689Skan 1351169689Skan Create and initialize a new stmt_vec_info struct for STMT. */ 1352169689Skan 1353169689Skanstmt_vec_info 1354169689Skannew_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo) 1355169689Skan{ 1356169689Skan stmt_vec_info res; 1357169689Skan res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info)); 1358169689Skan 1359169689Skan STMT_VINFO_TYPE (res) = undef_vec_info_type; 1360169689Skan STMT_VINFO_STMT (res) = stmt; 1361169689Skan STMT_VINFO_LOOP_VINFO (res) = loop_vinfo; 1362169689Skan STMT_VINFO_RELEVANT_P (res) = 0; 1363169689Skan STMT_VINFO_LIVE_P (res) = 0; 1364169689Skan STMT_VINFO_VECTYPE (res) = NULL; 1365169689Skan STMT_VINFO_VEC_STMT (res) = NULL; 1366169689Skan STMT_VINFO_IN_PATTERN_P (res) = false; 1367169689Skan STMT_VINFO_RELATED_STMT (res) = NULL; 1368169689Skan STMT_VINFO_DATA_REF (res) = NULL; 1369169689Skan if (TREE_CODE (stmt) == PHI_NODE) 1370169689Skan STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type; 1371169689Skan else 1372169689Skan STMT_VINFO_DEF_TYPE (res) = vect_loop_def; 1373169689Skan STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5); 1374169689Skan 1375169689Skan return res; 1376169689Skan} 1377169689Skan 1378169689Skan 1379169689Skan/* Function new_loop_vec_info. 1380169689Skan 1381169689Skan Create and initialize a new loop_vec_info struct for LOOP, as well as 1382169689Skan stmt_vec_info structs for all the stmts in LOOP. */ 1383169689Skan 1384169689Skanloop_vec_info 1385169689Skannew_loop_vec_info (struct loop *loop) 1386169689Skan{ 1387169689Skan loop_vec_info res; 1388169689Skan basic_block *bbs; 1389169689Skan block_stmt_iterator si; 1390169689Skan unsigned int i; 1391169689Skan 1392169689Skan res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info)); 1393169689Skan 1394169689Skan bbs = get_loop_body (loop); 1395169689Skan 1396169689Skan /* Create stmt_info for all stmts in the loop. */ 1397169689Skan for (i = 0; i < loop->num_nodes; i++) 1398169689Skan { 1399169689Skan basic_block bb = bbs[i]; 1400169689Skan tree phi; 1401169689Skan 1402169689Skan for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) 1403169689Skan { 1404169689Skan stmt_ann_t ann = get_stmt_ann (phi); 1405169689Skan set_stmt_info (ann, new_stmt_vec_info (phi, res)); 1406169689Skan } 1407169689Skan 1408169689Skan for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) 1409169689Skan { 1410169689Skan tree stmt = bsi_stmt (si); 1411169689Skan stmt_ann_t ann; 1412169689Skan 1413169689Skan ann = stmt_ann (stmt); 1414169689Skan set_stmt_info (ann, new_stmt_vec_info (stmt, res)); 1415169689Skan } 1416169689Skan } 1417169689Skan 1418169689Skan LOOP_VINFO_LOOP (res) = loop; 1419169689Skan LOOP_VINFO_BBS (res) = bbs; 1420169689Skan LOOP_VINFO_EXIT_COND (res) = NULL; 1421169689Skan LOOP_VINFO_NITERS (res) = NULL; 1422169689Skan LOOP_VINFO_VECTORIZABLE_P (res) = 0; 1423169689Skan LOOP_PEELING_FOR_ALIGNMENT (res) = 0; 1424169689Skan LOOP_VINFO_VECT_FACTOR (res) = 0; 1425169689Skan LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10); 1426169689Skan LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10); 1427169689Skan LOOP_VINFO_UNALIGNED_DR (res) = NULL; 1428169689Skan LOOP_VINFO_MAY_MISALIGN_STMTS (res) 1429169689Skan = VEC_alloc (tree, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS)); 1430169689Skan 1431169689Skan return res; 1432169689Skan} 1433169689Skan 1434169689Skan 1435169689Skan/* Function destroy_loop_vec_info. 1436169689Skan 1437169689Skan Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the 1438169689Skan stmts in the loop. */ 1439169689Skan 1440169689Skanvoid 1441169689Skandestroy_loop_vec_info (loop_vec_info loop_vinfo) 1442169689Skan{ 1443169689Skan struct loop *loop; 1444169689Skan basic_block *bbs; 1445169689Skan int nbbs; 1446169689Skan block_stmt_iterator si; 1447169689Skan int j; 1448169689Skan 1449169689Skan if (!loop_vinfo) 1450169689Skan return; 1451169689Skan 1452169689Skan loop = LOOP_VINFO_LOOP (loop_vinfo); 1453169689Skan 1454169689Skan bbs = LOOP_VINFO_BBS (loop_vinfo); 1455169689Skan nbbs = loop->num_nodes; 1456169689Skan 1457169689Skan for (j = 0; j < nbbs; j++) 1458169689Skan { 1459169689Skan basic_block bb = bbs[j]; 1460169689Skan tree phi; 1461169689Skan stmt_vec_info stmt_info; 1462169689Skan 1463169689Skan for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) 1464169689Skan { 1465169689Skan stmt_ann_t ann = stmt_ann (phi); 1466169689Skan 1467169689Skan stmt_info = vinfo_for_stmt (phi); 1468169689Skan free (stmt_info); 1469169689Skan set_stmt_info (ann, NULL); 1470169689Skan } 1471169689Skan 1472169689Skan for (si = bsi_start (bb); !bsi_end_p (si); ) 1473169689Skan { 1474169689Skan tree stmt = bsi_stmt (si); 1475169689Skan stmt_ann_t ann = stmt_ann (stmt); 1476169689Skan stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1477169689Skan 1478169689Skan if (stmt_info) 1479169689Skan { 1480169689Skan /* Check if this is a "pattern stmt" (introduced by the 1481169689Skan vectorizer during the pattern recognition pass). */ 1482169689Skan bool remove_stmt_p = false; 1483169689Skan tree orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 1484169689Skan if (orig_stmt) 1485169689Skan { 1486169689Skan stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt); 1487169689Skan if (orig_stmt_info 1488169689Skan && STMT_VINFO_IN_PATTERN_P (orig_stmt_info)) 1489169689Skan remove_stmt_p = true; 1490169689Skan } 1491169689Skan 1492169689Skan /* Free stmt_vec_info. */ 1493169689Skan VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info)); 1494169689Skan free (stmt_info); 1495169689Skan set_stmt_info (ann, NULL); 1496169689Skan 1497169689Skan /* Remove dead "pattern stmts". */ 1498169689Skan if (remove_stmt_p) 1499169689Skan bsi_remove (&si, true); 1500169689Skan } 1501169689Skan bsi_next (&si); 1502169689Skan } 1503169689Skan } 1504169689Skan 1505169689Skan free (LOOP_VINFO_BBS (loop_vinfo)); 1506169689Skan free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo)); 1507169689Skan free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo)); 1508169689Skan VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)); 1509169689Skan 1510169689Skan free (loop_vinfo); 1511169689Skan} 1512169689Skan 1513169689Skan 1514169689Skan/* Function vect_force_dr_alignment_p. 1515169689Skan 1516169689Skan Returns whether the alignment of a DECL can be forced to be aligned 1517169689Skan on ALIGNMENT bit boundary. */ 1518169689Skan 1519169689Skanbool 1520169689Skanvect_can_force_dr_alignment_p (tree decl, unsigned int alignment) 1521169689Skan{ 1522169689Skan if (TREE_CODE (decl) != VAR_DECL) 1523169689Skan return false; 1524169689Skan 1525169689Skan if (DECL_EXTERNAL (decl)) 1526169689Skan return false; 1527169689Skan 1528169689Skan if (TREE_ASM_WRITTEN (decl)) 1529169689Skan return false; 1530169689Skan 1531169689Skan if (TREE_STATIC (decl)) 1532169689Skan return (alignment <= MAX_OFILE_ALIGNMENT); 1533169689Skan else 1534169689Skan /* This is not 100% correct. The absolute correct stack alignment 1535169689Skan is STACK_BOUNDARY. We're supposed to hope, but not assume, that 1536169689Skan PREFERRED_STACK_BOUNDARY is honored by all translation units. 1537169689Skan However, until someone implements forced stack alignment, SSE 1538169689Skan isn't really usable without this. */ 1539169689Skan return (alignment <= PREFERRED_STACK_BOUNDARY); 1540169689Skan} 1541169689Skan 1542169689Skan 1543169689Skan/* Function get_vectype_for_scalar_type. 1544169689Skan 1545169689Skan Returns the vector type corresponding to SCALAR_TYPE as supported 1546169689Skan by the target. */ 1547169689Skan 1548169689Skantree 1549169689Skanget_vectype_for_scalar_type (tree scalar_type) 1550169689Skan{ 1551169689Skan enum machine_mode inner_mode = TYPE_MODE (scalar_type); 1552169689Skan int nbytes = GET_MODE_SIZE (inner_mode); 1553169689Skan int nunits; 1554169689Skan tree vectype; 1555169689Skan 1556169689Skan if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD) 1557169689Skan return NULL_TREE; 1558169689Skan 1559169689Skan /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD) 1560169689Skan is expected. */ 1561169689Skan nunits = UNITS_PER_SIMD_WORD / nbytes; 1562169689Skan 1563169689Skan vectype = build_vector_type (scalar_type, nunits); 1564169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1565169689Skan { 1566169689Skan fprintf (vect_dump, "get vectype with %d units of type ", nunits); 1567169689Skan print_generic_expr (vect_dump, scalar_type, TDF_SLIM); 1568169689Skan } 1569169689Skan 1570169689Skan if (!vectype) 1571169689Skan return NULL_TREE; 1572169689Skan 1573169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1574169689Skan { 1575169689Skan fprintf (vect_dump, "vectype: "); 1576169689Skan print_generic_expr (vect_dump, vectype, TDF_SLIM); 1577169689Skan } 1578169689Skan 1579169689Skan if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 1580169689Skan && !INTEGRAL_MODE_P (TYPE_MODE (vectype))) 1581169689Skan { 1582169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1583169689Skan fprintf (vect_dump, "mode not supported by target."); 1584169689Skan return NULL_TREE; 1585169689Skan } 1586169689Skan 1587169689Skan return vectype; 1588169689Skan} 1589169689Skan 1590169689Skan 1591169689Skan/* Function vect_supportable_dr_alignment 1592169689Skan 1593169689Skan Return whether the data reference DR is supported with respect to its 1594169689Skan alignment. */ 1595169689Skan 1596169689Skanenum dr_alignment_support 1597169689Skanvect_supportable_dr_alignment (struct data_reference *dr) 1598169689Skan{ 1599169689Skan tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))); 1600169689Skan enum machine_mode mode = (int) TYPE_MODE (vectype); 1601169689Skan 1602169689Skan if (aligned_access_p (dr)) 1603169689Skan return dr_aligned; 1604169689Skan 1605169689Skan /* Possibly unaligned access. */ 1606169689Skan 1607169689Skan if (DR_IS_READ (dr)) 1608169689Skan { 1609169689Skan if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing 1610169689Skan && (!targetm.vectorize.builtin_mask_for_load 1611169689Skan || targetm.vectorize.builtin_mask_for_load ())) 1612169689Skan return dr_unaligned_software_pipeline; 1613169689Skan 1614169689Skan if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing) 1615169689Skan /* Can't software pipeline the loads, but can at least do them. */ 1616169689Skan return dr_unaligned_supported; 1617169689Skan } 1618169689Skan 1619169689Skan /* Unsupported. */ 1620169689Skan return dr_unaligned_unsupported; 1621169689Skan} 1622169689Skan 1623169689Skan 1624169689Skan/* Function vect_is_simple_use. 1625169689Skan 1626169689Skan Input: 1627169689Skan LOOP - the loop that is being vectorized. 1628169689Skan OPERAND - operand of a stmt in LOOP. 1629169689Skan DEF - the defining stmt in case OPERAND is an SSA_NAME. 1630169689Skan 1631169689Skan Returns whether a stmt with OPERAND can be vectorized. 1632169689Skan Supportable operands are constants, loop invariants, and operands that are 1633169689Skan defined by the current iteration of the loop. Unsupportable operands are 1634169689Skan those that are defined by a previous iteration of the loop (as is the case 1635169689Skan in reduction/induction computations). */ 1636169689Skan 1637169689Skanbool 1638169689Skanvect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt, 1639169689Skan tree *def, enum vect_def_type *dt) 1640169689Skan{ 1641169689Skan basic_block bb; 1642169689Skan stmt_vec_info stmt_vinfo; 1643169689Skan struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1644169689Skan 1645169689Skan *def_stmt = NULL_TREE; 1646169689Skan *def = NULL_TREE; 1647169689Skan 1648169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1649169689Skan { 1650169689Skan fprintf (vect_dump, "vect_is_simple_use: operand "); 1651169689Skan print_generic_expr (vect_dump, operand, TDF_SLIM); 1652169689Skan } 1653169689Skan 1654169689Skan if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST) 1655169689Skan { 1656169689Skan *dt = vect_constant_def; 1657169689Skan return true; 1658169689Skan } 1659169689Skan 1660169689Skan if (TREE_CODE (operand) != SSA_NAME) 1661169689Skan { 1662169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1663169689Skan fprintf (vect_dump, "not ssa-name."); 1664169689Skan return false; 1665169689Skan } 1666169689Skan 1667169689Skan *def_stmt = SSA_NAME_DEF_STMT (operand); 1668169689Skan if (*def_stmt == NULL_TREE ) 1669169689Skan { 1670169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1671169689Skan fprintf (vect_dump, "no def_stmt."); 1672169689Skan return false; 1673169689Skan } 1674169689Skan 1675169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1676169689Skan { 1677169689Skan fprintf (vect_dump, "def_stmt: "); 1678169689Skan print_generic_expr (vect_dump, *def_stmt, TDF_SLIM); 1679169689Skan } 1680169689Skan 1681169689Skan /* empty stmt is expected only in case of a function argument. 1682169689Skan (Otherwise - we expect a phi_node or a modify_expr). */ 1683169689Skan if (IS_EMPTY_STMT (*def_stmt)) 1684169689Skan { 1685169689Skan tree arg = TREE_OPERAND (*def_stmt, 0); 1686169689Skan if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST) 1687169689Skan { 1688169689Skan *def = operand; 1689169689Skan *dt = vect_invariant_def; 1690169689Skan return true; 1691169689Skan } 1692169689Skan 1693169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1694169689Skan fprintf (vect_dump, "Unexpected empty stmt."); 1695169689Skan return false; 1696169689Skan } 1697169689Skan 1698169689Skan bb = bb_for_stmt (*def_stmt); 1699169689Skan if (!flow_bb_inside_loop_p (loop, bb)) 1700169689Skan *dt = vect_invariant_def; 1701169689Skan else 1702169689Skan { 1703169689Skan stmt_vinfo = vinfo_for_stmt (*def_stmt); 1704169689Skan *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo); 1705169689Skan } 1706169689Skan 1707169689Skan if (*dt == vect_unknown_def_type) 1708169689Skan { 1709169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1710169689Skan fprintf (vect_dump, "Unsupported pattern."); 1711169689Skan return false; 1712169689Skan } 1713169689Skan 1714169689Skan /* stmts inside the loop that have been identified as performing 1715169689Skan a reduction operation cannot have uses in the loop. */ 1716169689Skan if (*dt == vect_reduction_def && TREE_CODE (*def_stmt) != PHI_NODE) 1717169689Skan { 1718169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1719169689Skan fprintf (vect_dump, "reduction used in loop."); 1720169689Skan return false; 1721169689Skan } 1722169689Skan 1723169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1724169689Skan fprintf (vect_dump, "type of def: %d.",*dt); 1725169689Skan 1726169689Skan switch (TREE_CODE (*def_stmt)) 1727169689Skan { 1728169689Skan case PHI_NODE: 1729169689Skan *def = PHI_RESULT (*def_stmt); 1730169689Skan gcc_assert (*dt == vect_induction_def || *dt == vect_reduction_def 1731169689Skan || *dt == vect_invariant_def); 1732169689Skan break; 1733169689Skan 1734169689Skan case MODIFY_EXPR: 1735169689Skan *def = TREE_OPERAND (*def_stmt, 0); 1736169689Skan gcc_assert (*dt == vect_loop_def || *dt == vect_invariant_def); 1737169689Skan break; 1738169689Skan 1739169689Skan default: 1740169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1741169689Skan fprintf (vect_dump, "unsupported defining stmt: "); 1742169689Skan return false; 1743169689Skan } 1744169689Skan 1745169689Skan if (*dt == vect_induction_def) 1746169689Skan { 1747169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1748169689Skan fprintf (vect_dump, "induction not supported."); 1749169689Skan return false; 1750169689Skan } 1751169689Skan 1752169689Skan return true; 1753169689Skan} 1754169689Skan 1755169689Skan 1756169689Skan/* Function reduction_code_for_scalar_code 1757169689Skan 1758169689Skan Input: 1759169689Skan CODE - tree_code of a reduction operations. 1760169689Skan 1761169689Skan Output: 1762169689Skan REDUC_CODE - the corresponding tree-code to be used to reduce the 1763169689Skan vector of partial results into a single scalar result (which 1764169689Skan will also reside in a vector). 1765169689Skan 1766169689Skan Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise. */ 1767169689Skan 1768169689Skanbool 1769169689Skanreduction_code_for_scalar_code (enum tree_code code, 1770169689Skan enum tree_code *reduc_code) 1771169689Skan{ 1772169689Skan switch (code) 1773169689Skan { 1774169689Skan case MAX_EXPR: 1775169689Skan *reduc_code = REDUC_MAX_EXPR; 1776169689Skan return true; 1777169689Skan 1778169689Skan case MIN_EXPR: 1779169689Skan *reduc_code = REDUC_MIN_EXPR; 1780169689Skan return true; 1781169689Skan 1782169689Skan case PLUS_EXPR: 1783169689Skan *reduc_code = REDUC_PLUS_EXPR; 1784169689Skan return true; 1785169689Skan 1786169689Skan default: 1787169689Skan return false; 1788169689Skan } 1789169689Skan} 1790169689Skan 1791169689Skan 1792169689Skan/* Function vect_is_simple_reduction 1793169689Skan 1794169689Skan Detect a cross-iteration def-use cucle that represents a simple 1795169689Skan reduction computation. We look for the following pattern: 1796169689Skan 1797169689Skan loop_header: 1798169689Skan a1 = phi < a0, a2 > 1799169689Skan a3 = ... 1800169689Skan a2 = operation (a3, a1) 1801169689Skan 1802169689Skan such that: 1803169689Skan 1. operation is commutative and associative and it is safe to 1804169689Skan change the order of the computation. 1805169689Skan 2. no uses for a2 in the loop (a2 is used out of the loop) 1806169689Skan 3. no uses of a1 in the loop besides the reduction operation. 1807169689Skan 1808169689Skan Condition 1 is tested here. 1809169689Skan Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized. */ 1810169689Skan 1811169689Skantree 1812169689Skanvect_is_simple_reduction (struct loop *loop, tree phi) 1813169689Skan{ 1814169689Skan edge latch_e = loop_latch_edge (loop); 1815169689Skan tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); 1816169689Skan tree def_stmt, def1, def2; 1817169689Skan enum tree_code code; 1818169689Skan int op_type; 1819169689Skan tree operation, op1, op2; 1820169689Skan tree type; 1821169689Skan 1822169689Skan if (TREE_CODE (loop_arg) != SSA_NAME) 1823169689Skan { 1824169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1825169689Skan { 1826169689Skan fprintf (vect_dump, "reduction: not ssa_name: "); 1827169689Skan print_generic_expr (vect_dump, loop_arg, TDF_SLIM); 1828169689Skan } 1829169689Skan return NULL_TREE; 1830169689Skan } 1831169689Skan 1832169689Skan def_stmt = SSA_NAME_DEF_STMT (loop_arg); 1833169689Skan if (!def_stmt) 1834169689Skan { 1835169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1836169689Skan fprintf (vect_dump, "reduction: no def_stmt."); 1837169689Skan return NULL_TREE; 1838169689Skan } 1839169689Skan 1840169689Skan if (TREE_CODE (def_stmt) != MODIFY_EXPR) 1841169689Skan { 1842169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1843169689Skan { 1844169689Skan print_generic_expr (vect_dump, def_stmt, TDF_SLIM); 1845169689Skan } 1846169689Skan return NULL_TREE; 1847169689Skan } 1848169689Skan 1849169689Skan operation = TREE_OPERAND (def_stmt, 1); 1850169689Skan code = TREE_CODE (operation); 1851169689Skan if (!commutative_tree_code (code) || !associative_tree_code (code)) 1852169689Skan { 1853169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1854169689Skan { 1855169689Skan fprintf (vect_dump, "reduction: not commutative/associative: "); 1856169689Skan print_generic_expr (vect_dump, operation, TDF_SLIM); 1857169689Skan } 1858169689Skan return NULL_TREE; 1859169689Skan } 1860169689Skan 1861169689Skan op_type = TREE_CODE_LENGTH (code); 1862169689Skan if (op_type != binary_op) 1863169689Skan { 1864169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1865169689Skan { 1866169689Skan fprintf (vect_dump, "reduction: not binary operation: "); 1867169689Skan print_generic_expr (vect_dump, operation, TDF_SLIM); 1868169689Skan } 1869169689Skan return NULL_TREE; 1870169689Skan } 1871169689Skan 1872169689Skan op1 = TREE_OPERAND (operation, 0); 1873169689Skan op2 = TREE_OPERAND (operation, 1); 1874169689Skan if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME) 1875169689Skan { 1876169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1877169689Skan { 1878169689Skan fprintf (vect_dump, "reduction: uses not ssa_names: "); 1879169689Skan print_generic_expr (vect_dump, operation, TDF_SLIM); 1880169689Skan } 1881169689Skan return NULL_TREE; 1882169689Skan } 1883169689Skan 1884169689Skan /* Check that it's ok to change the order of the computation. */ 1885169689Skan type = TREE_TYPE (operation); 1886169689Skan if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1)) 1887169689Skan || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2))) 1888169689Skan { 1889169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1890169689Skan { 1891169689Skan fprintf (vect_dump, "reduction: multiple types: operation type: "); 1892169689Skan print_generic_expr (vect_dump, type, TDF_SLIM); 1893169689Skan fprintf (vect_dump, ", operands types: "); 1894169689Skan print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM); 1895169689Skan fprintf (vect_dump, ","); 1896169689Skan print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM); 1897169689Skan } 1898169689Skan return NULL_TREE; 1899169689Skan } 1900169689Skan 1901169689Skan /* CHECKME: check for !flag_finite_math_only too? */ 1902169689Skan if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations) 1903169689Skan { 1904169689Skan /* Changing the order of operations changes the semantics. */ 1905169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1906169689Skan { 1907169689Skan fprintf (vect_dump, "reduction: unsafe fp math optimization: "); 1908169689Skan print_generic_expr (vect_dump, operation, TDF_SLIM); 1909169689Skan } 1910169689Skan return NULL_TREE; 1911169689Skan } 1912169689Skan else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)) 1913169689Skan { 1914169689Skan /* Changing the order of operations changes the semantics. */ 1915169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1916169689Skan { 1917169689Skan fprintf (vect_dump, "reduction: unsafe int math optimization: "); 1918169689Skan print_generic_expr (vect_dump, operation, TDF_SLIM); 1919169689Skan } 1920169689Skan return NULL_TREE; 1921169689Skan } 1922169689Skan 1923169689Skan /* reduction is safe. we're dealing with one of the following: 1924169689Skan 1) integer arithmetic and no trapv 1925169689Skan 2) floating point arithmetic, and special flags permit this optimization. 1926169689Skan */ 1927169689Skan def1 = SSA_NAME_DEF_STMT (op1); 1928169689Skan def2 = SSA_NAME_DEF_STMT (op2); 1929169689Skan if (!def1 || !def2) 1930169689Skan { 1931169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1932169689Skan { 1933169689Skan fprintf (vect_dump, "reduction: no defs for operands: "); 1934169689Skan print_generic_expr (vect_dump, operation, TDF_SLIM); 1935169689Skan } 1936169689Skan return NULL_TREE; 1937169689Skan } 1938169689Skan 1939169689Skan if (TREE_CODE (def1) == MODIFY_EXPR 1940169689Skan && flow_bb_inside_loop_p (loop, bb_for_stmt (def1)) 1941169689Skan && def2 == phi) 1942169689Skan { 1943169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1944169689Skan { 1945169689Skan fprintf (vect_dump, "detected reduction:"); 1946169689Skan print_generic_expr (vect_dump, operation, TDF_SLIM); 1947169689Skan } 1948169689Skan return def_stmt; 1949169689Skan } 1950169689Skan else if (TREE_CODE (def2) == MODIFY_EXPR 1951169689Skan && flow_bb_inside_loop_p (loop, bb_for_stmt (def2)) 1952169689Skan && def1 == phi) 1953169689Skan { 1954169689Skan /* Swap operands (just for simplicity - so that the rest of the code 1955169689Skan can assume that the reduction variable is always the last (second) 1956169689Skan argument). */ 1957169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1958169689Skan { 1959169689Skan fprintf (vect_dump, "detected reduction: need to swap operands:"); 1960169689Skan print_generic_expr (vect_dump, operation, TDF_SLIM); 1961169689Skan } 1962169689Skan swap_tree_operands (def_stmt, &TREE_OPERAND (operation, 0), 1963169689Skan &TREE_OPERAND (operation, 1)); 1964169689Skan return def_stmt; 1965169689Skan } 1966169689Skan else 1967169689Skan { 1968169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 1969169689Skan { 1970169689Skan fprintf (vect_dump, "reduction: unknown pattern."); 1971169689Skan print_generic_expr (vect_dump, operation, TDF_SLIM); 1972169689Skan } 1973169689Skan return NULL_TREE; 1974169689Skan } 1975169689Skan} 1976169689Skan 1977169689Skan 1978169689Skan/* Function vect_is_simple_iv_evolution. 1979169689Skan 1980169689Skan FORNOW: A simple evolution of an induction variables in the loop is 1981169689Skan considered a polynomial evolution with constant step. */ 1982169689Skan 1983169689Skanbool 1984169689Skanvect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, 1985169689Skan tree * step) 1986169689Skan{ 1987169689Skan tree init_expr; 1988169689Skan tree step_expr; 1989169689Skan 1990169689Skan tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb); 1991169689Skan 1992169689Skan /* When there is no evolution in this loop, the evolution function 1993169689Skan is not "simple". */ 1994169689Skan if (evolution_part == NULL_TREE) 1995169689Skan return false; 1996169689Skan 1997169689Skan /* When the evolution is a polynomial of degree >= 2 1998169689Skan the evolution function is not "simple". */ 1999169689Skan if (tree_is_chrec (evolution_part)) 2000169689Skan return false; 2001169689Skan 2002169689Skan step_expr = evolution_part; 2003169689Skan init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, 2004169689Skan loop_nb)); 2005169689Skan 2006169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 2007169689Skan { 2008169689Skan fprintf (vect_dump, "step: "); 2009169689Skan print_generic_expr (vect_dump, step_expr, TDF_SLIM); 2010169689Skan fprintf (vect_dump, ", init: "); 2011169689Skan print_generic_expr (vect_dump, init_expr, TDF_SLIM); 2012169689Skan } 2013169689Skan 2014169689Skan *init = init_expr; 2015169689Skan *step = step_expr; 2016169689Skan 2017169689Skan if (TREE_CODE (step_expr) != INTEGER_CST) 2018169689Skan { 2019169689Skan if (vect_print_dump_info (REPORT_DETAILS)) 2020169689Skan fprintf (vect_dump, "step unknown."); 2021169689Skan return false; 2022169689Skan } 2023169689Skan 2024169689Skan return true; 2025169689Skan} 2026169689Skan 2027169689Skan 2028169689Skan/* Function vectorize_loops. 2029169689Skan 2030169689Skan Entry Point to loop vectorization phase. */ 2031169689Skan 2032169689Skanvoid 2033169689Skanvectorize_loops (struct loops *loops) 2034169689Skan{ 2035169689Skan unsigned int i; 2036169689Skan unsigned int num_vectorized_loops = 0; 2037169689Skan 2038169689Skan /* Fix the verbosity level if not defined explicitly by the user. */ 2039169689Skan vect_set_dump_settings (); 2040169689Skan 2041169689Skan /* Allocate the bitmap that records which virtual variables that 2042169689Skan need to be renamed. */ 2043169689Skan vect_vnames_to_rename = BITMAP_ALLOC (NULL); 2044169689Skan 2045169689Skan /* ----------- Analyze loops. ----------- */ 2046169689Skan 2047169689Skan /* If some loop was duplicated, it gets bigger number 2048169689Skan than all previously defined loops. This fact allows us to run 2049169689Skan only over initial loops skipping newly generated ones. */ 2050169689Skan vect_loops_num = loops->num; 2051169689Skan for (i = 1; i < vect_loops_num; i++) 2052169689Skan { 2053169689Skan loop_vec_info loop_vinfo; 2054169689Skan struct loop *loop = loops->parray[i]; 2055169689Skan 2056169689Skan if (!loop) 2057169689Skan continue; 2058169689Skan 2059169689Skan vect_loop_location = find_loop_location (loop); 2060169689Skan loop_vinfo = vect_analyze_loop (loop); 2061169689Skan loop->aux = loop_vinfo; 2062169689Skan 2063169689Skan if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) 2064169689Skan continue; 2065169689Skan 2066169689Skan vect_transform_loop (loop_vinfo, loops); 2067169689Skan num_vectorized_loops++; 2068169689Skan } 2069169689Skan vect_loop_location = UNKNOWN_LOC; 2070169689Skan 2071169689Skan if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)) 2072169689Skan fprintf (vect_dump, "vectorized %u loops in function.\n", 2073169689Skan num_vectorized_loops); 2074169689Skan 2075169689Skan /* ----------- Finalize. ----------- */ 2076169689Skan 2077169689Skan BITMAP_FREE (vect_vnames_to_rename); 2078169689Skan 2079169689Skan for (i = 1; i < vect_loops_num; i++) 2080169689Skan { 2081169689Skan struct loop *loop = loops->parray[i]; 2082169689Skan loop_vec_info loop_vinfo; 2083169689Skan 2084169689Skan if (!loop) 2085169689Skan continue; 2086169689Skan loop_vinfo = loop->aux; 2087169689Skan destroy_loop_vec_info (loop_vinfo); 2088169689Skan loop->aux = NULL; 2089169689Skan } 2090169689Skan} 2091