1/* Transformation Utilities for Loop Vectorization. 2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc. 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 2, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING. If not, write to the Free 19Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 2002110-1301, USA. */ 21 22#include "config.h" 23#include "system.h" 24#include "coretypes.h" 25#include "tm.h" 26#include "ggc.h" 27#include "tree.h" 28#include "target.h" 29#include "rtl.h" 30#include "basic-block.h" 31#include "diagnostic.h" 32#include "tree-flow.h" 33#include "tree-dump.h" 34#include "timevar.h" 35#include "cfgloop.h" 36#include "expr.h" 37#include "optabs.h" 38#include "recog.h" 39#include "tree-data-ref.h" 40#include "tree-chrec.h" 41#include "tree-scalar-evolution.h" 42#include "tree-vectorizer.h" 43#include "langhooks.h" 44#include "tree-pass.h" 45#include "toplev.h" 46#include "real.h" 47 48/* Utility functions for the code transformation. */ 49static bool vect_transform_stmt (tree, block_stmt_iterator *); 50static void vect_align_data_ref (tree); 51static tree vect_create_destination_var (tree, tree); 52static tree vect_create_data_ref_ptr 53 (tree, block_stmt_iterator *, tree, tree *, bool); 54static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree); 55static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); 56static tree vect_get_vec_def_for_operand (tree, tree, tree *); 57static tree vect_init_vector (tree, tree); 58static void vect_finish_stmt_generation 59 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi); 60static bool vect_is_simple_cond (tree, loop_vec_info); 61static void update_vuses_to_preheader (tree, struct loop*); 62static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree); 63static tree get_initial_def_for_reduction (tree, tree, tree *); 64 65/* Utility function dealing with loop peeling (not peeling itself). */ 66static void vect_generate_tmps_on_preheader 67 (loop_vec_info, tree *, tree *, tree *); 68static tree vect_build_loop_niters (loop_vec_info); 69static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge); 70static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree); 71static void vect_update_init_of_dr (struct data_reference *, tree niters); 72static void vect_update_inits_of_drs (loop_vec_info, tree); 73static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *); 74static void vect_do_peeling_for_loop_bound 75 (loop_vec_info, tree *, struct loops *); 76static int vect_min_worthwhile_factor (enum tree_code); 77 78 79/* Function vect_get_new_vect_var. 80 81 Returns a name for a new variable. The current naming scheme appends the 82 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to 83 the name of vectorizer generated variables, and appends that to NAME if 84 provided. */ 85 86static tree 87vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name) 88{ 89 const char *prefix; 90 tree new_vect_var; 91 92 switch (var_kind) 93 { 94 case vect_simple_var: 95 prefix = "vect_"; 96 break; 97 case vect_scalar_var: 98 prefix = "stmp_"; 99 break; 100 case vect_pointer_var: 101 prefix = "vect_p"; 102 break; 103 default: 104 gcc_unreachable (); 105 } 106 107 if (name) 108 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL)); 109 else 110 new_vect_var = create_tmp_var (type, prefix); 111 112 return new_vect_var; 113} 114 115 116/* Function vect_create_addr_base_for_vector_ref. 117 118 Create an expression that computes the address of the first memory location 119 that will be accessed for a data reference. 120 121 Input: 122 STMT: The statement containing the data reference. 123 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list. 124 OFFSET: Optional. If supplied, it is be added to the initial address. 125 126 Output: 127 1. Return an SSA_NAME whose value is the address of the memory location of 128 the first vector of the data reference. 129 2. If new_stmt_list is not NULL_TREE after return then the caller must insert 130 these statement(s) which define the returned SSA_NAME. 131 132 FORNOW: We are only handling array accesses with step 1. */ 133 134static tree 135vect_create_addr_base_for_vector_ref (tree stmt, 136 tree *new_stmt_list, 137 tree offset) 138{ 139 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 140 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 141 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr)); 142 tree base_name = build_fold_indirect_ref (data_ref_base); 143 tree ref = DR_REF (dr); 144 tree scalar_type = TREE_TYPE (ref); 145 tree scalar_ptr_type = build_pointer_type (scalar_type); 146 tree vec_stmt; 147 tree new_temp; 148 tree addr_base, addr_expr; 149 tree dest, new_stmt; 150 tree base_offset = unshare_expr (DR_OFFSET (dr)); 151 tree init = unshare_expr (DR_INIT (dr)); 152 153 /* Create base_offset */ 154 base_offset = size_binop (PLUS_EXPR, base_offset, init); 155 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off"); 156 add_referenced_var (dest); 157 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest); 158 append_to_statement_list_force (new_stmt, new_stmt_list); 159 160 if (offset) 161 { 162 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset"); 163 add_referenced_var (tmp); 164 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, 165 DR_STEP (dr)); 166 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset), 167 base_offset, offset); 168 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp); 169 append_to_statement_list_force (new_stmt, new_stmt_list); 170 } 171 172 /* base + base_offset */ 173 addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base, 174 base_offset); 175 176 /* addr_expr = addr_base */ 177 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var, 178 get_name (base_name)); 179 add_referenced_var (addr_expr); 180 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base); 181 new_temp = make_ssa_name (addr_expr, vec_stmt); 182 TREE_OPERAND (vec_stmt, 0) = new_temp; 183 append_to_statement_list_force (vec_stmt, new_stmt_list); 184 185 if (vect_print_dump_info (REPORT_DETAILS)) 186 { 187 fprintf (vect_dump, "created "); 188 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM); 189 } 190 return new_temp; 191} 192 193 194/* Function vect_align_data_ref. 195 196 Handle misalignment of a memory accesses. 197 198 FORNOW: Can't handle misaligned accesses. 199 Make sure that the dataref is aligned. */ 200 201static void 202vect_align_data_ref (tree stmt) 203{ 204 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 205 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 206 207 /* FORNOW: can't handle misaligned accesses; 208 all accesses expected to be aligned. */ 209 gcc_assert (aligned_access_p (dr)); 210} 211 212 213/* Function vect_create_data_ref_ptr. 214 215 Create a memory reference expression for vector access, to be used in a 216 vector load/store stmt. The reference is based on a new pointer to vector 217 type (vp). 218 219 Input: 220 1. STMT: a stmt that references memory. Expected to be of the form 221 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>. 222 2. BSI: block_stmt_iterator where new stmts can be added. 223 3. OFFSET (optional): an offset to be added to the initial address accessed 224 by the data-ref in STMT. 225 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain 226 pointing to the initial address. 227 228 Output: 229 1. Declare a new ptr to vector_type, and have it point to the base of the 230 data reference (initial addressed accessed by the data reference). 231 For example, for vector of type V8HI, the following code is generated: 232 233 v8hi *vp; 234 vp = (v8hi *)initial_address; 235 236 if OFFSET is not supplied: 237 initial_address = &a[init]; 238 if OFFSET is supplied: 239 initial_address = &a[init + OFFSET]; 240 241 Return the initial_address in INITIAL_ADDRESS. 242 243 2. If ONLY_INIT is true, return the initial pointer. Otherwise, create 244 a data-reference in the loop based on the new vector pointer vp. This 245 new data reference will by some means be updated each iteration of 246 the loop. Return the pointer vp'. 247 248 FORNOW: handle only aligned and consecutive accesses. */ 249 250static tree 251vect_create_data_ref_ptr (tree stmt, 252 block_stmt_iterator *bsi ATTRIBUTE_UNUSED, 253 tree offset, tree *initial_address, bool only_init) 254{ 255 tree base_name; 256 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 257 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 258 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 259 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 260 tree vect_ptr_type; 261 tree vect_ptr; 262 tree tag; 263 tree new_temp; 264 tree vec_stmt; 265 tree new_stmt_list = NULL_TREE; 266 edge pe = loop_preheader_edge (loop); 267 basic_block new_bb; 268 tree vect_ptr_init; 269 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 270 271 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr))); 272 273 if (vect_print_dump_info (REPORT_DETAILS)) 274 { 275 tree data_ref_base = base_name; 276 fprintf (vect_dump, "create vector-pointer variable to type: "); 277 print_generic_expr (vect_dump, vectype, TDF_SLIM); 278 if (TREE_CODE (data_ref_base) == VAR_DECL) 279 fprintf (vect_dump, " vectorizing a one dimensional array ref: "); 280 else if (TREE_CODE (data_ref_base) == ARRAY_REF) 281 fprintf (vect_dump, " vectorizing a multidimensional array ref: "); 282 else if (TREE_CODE (data_ref_base) == COMPONENT_REF) 283 fprintf (vect_dump, " vectorizing a record based array ref: "); 284 else if (TREE_CODE (data_ref_base) == SSA_NAME) 285 fprintf (vect_dump, " vectorizing a pointer ref: "); 286 print_generic_expr (vect_dump, base_name, TDF_SLIM); 287 } 288 289 /** (1) Create the new vector-pointer variable: **/ 290 291 vect_ptr_type = build_pointer_type (vectype); 292 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, 293 get_name (base_name)); 294 add_referenced_var (vect_ptr); 295 296 297 /** (2) Add aliasing information to the new vector-pointer: 298 (The points-to info (DR_PTR_INFO) may be defined later.) **/ 299 300 tag = DR_MEMTAG (dr); 301 gcc_assert (tag); 302 303 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory 304 tag must be created with tag added to its may alias list. */ 305 if (!MTAG_P (tag)) 306 new_type_alias (vect_ptr, tag, DR_REF (dr)); 307 else 308 var_ann (vect_ptr)->symbol_mem_tag = tag; 309 310 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr); 311 312 /** (3) Calculate the initial address the vector-pointer, and set 313 the vector-pointer to point to it before the loop: **/ 314 315 /* Create: (&(base[init_val+offset]) in the loop preheader. */ 316 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list, 317 offset); 318 pe = loop_preheader_edge (loop); 319 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list); 320 gcc_assert (!new_bb); 321 *initial_address = new_temp; 322 323 /* Create: p = (vectype *) initial_base */ 324 vec_stmt = fold_convert (vect_ptr_type, new_temp); 325 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt); 326 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt); 327 TREE_OPERAND (vec_stmt, 0) = vect_ptr_init; 328 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt); 329 gcc_assert (!new_bb); 330 331 332 /** (4) Handle the updating of the vector-pointer inside the loop: **/ 333 334 if (only_init) /* No update in loop is required. */ 335 { 336 /* Copy the points-to information if it exists. */ 337 if (DR_PTR_INFO (dr)) 338 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr)); 339 return vect_ptr_init; 340 } 341 else 342 { 343 block_stmt_iterator incr_bsi; 344 bool insert_after; 345 tree indx_before_incr, indx_after_incr; 346 tree incr; 347 348 standard_iv_increment_position (loop, &incr_bsi, &insert_after); 349 create_iv (vect_ptr_init, 350 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)), 351 NULL_TREE, loop, &incr_bsi, insert_after, 352 &indx_before_incr, &indx_after_incr); 353 incr = bsi_stmt (incr_bsi); 354 set_stmt_info (stmt_ann (incr), 355 new_stmt_vec_info (incr, loop_vinfo)); 356 357 /* Copy the points-to information if it exists. */ 358 if (DR_PTR_INFO (dr)) 359 { 360 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr)); 361 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr)); 362 } 363 merge_alias_info (vect_ptr_init, indx_before_incr); 364 merge_alias_info (vect_ptr_init, indx_after_incr); 365 366 return indx_before_incr; 367 } 368} 369 370 371/* Function vect_create_destination_var. 372 373 Create a new temporary of type VECTYPE. */ 374 375static tree 376vect_create_destination_var (tree scalar_dest, tree vectype) 377{ 378 tree vec_dest; 379 const char *new_name; 380 tree type; 381 enum vect_var_kind kind; 382 383 kind = vectype ? vect_simple_var : vect_scalar_var; 384 type = vectype ? vectype : TREE_TYPE (scalar_dest); 385 386 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME); 387 388 new_name = get_name (scalar_dest); 389 if (!new_name) 390 new_name = "var_"; 391 vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name); 392 add_referenced_var (vec_dest); 393 394 return vec_dest; 395} 396 397 398/* Function vect_init_vector. 399 400 Insert a new stmt (INIT_STMT) that initializes a new vector variable with 401 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be 402 used in the vectorization of STMT. */ 403 404static tree 405vect_init_vector (tree stmt, tree vector_var) 406{ 407 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 408 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 409 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 410 tree new_var; 411 tree init_stmt; 412 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); 413 tree vec_oprnd; 414 edge pe; 415 tree new_temp; 416 basic_block new_bb; 417 418 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_"); 419 add_referenced_var (new_var); 420 421 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var); 422 new_temp = make_ssa_name (new_var, init_stmt); 423 TREE_OPERAND (init_stmt, 0) = new_temp; 424 425 pe = loop_preheader_edge (loop); 426 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt); 427 gcc_assert (!new_bb); 428 429 if (vect_print_dump_info (REPORT_DETAILS)) 430 { 431 fprintf (vect_dump, "created new init_stmt: "); 432 print_generic_expr (vect_dump, init_stmt, TDF_SLIM); 433 } 434 435 vec_oprnd = TREE_OPERAND (init_stmt, 0); 436 return vec_oprnd; 437} 438 439 440/* Function vect_get_vec_def_for_operand. 441 442 OP is an operand in STMT. This function returns a (vector) def that will be 443 used in the vectorized stmt for STMT. 444 445 In the case that OP is an SSA_NAME which is defined in the loop, then 446 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def. 447 448 In case OP is an invariant or constant, a new stmt that creates a vector def 449 needs to be introduced. */ 450 451static tree 452vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def) 453{ 454 tree vec_oprnd; 455 tree vec_stmt; 456 tree def_stmt; 457 stmt_vec_info def_stmt_info = NULL; 458 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 459 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); 460 int nunits = TYPE_VECTOR_SUBPARTS (vectype); 461 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 462 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 463 tree vec_inv; 464 tree vec_cst; 465 tree t = NULL_TREE; 466 tree def; 467 int i; 468 enum vect_def_type dt; 469 bool is_simple_use; 470 471 if (vect_print_dump_info (REPORT_DETAILS)) 472 { 473 fprintf (vect_dump, "vect_get_vec_def_for_operand: "); 474 print_generic_expr (vect_dump, op, TDF_SLIM); 475 } 476 477 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt); 478 gcc_assert (is_simple_use); 479 if (vect_print_dump_info (REPORT_DETAILS)) 480 { 481 if (def) 482 { 483 fprintf (vect_dump, "def = "); 484 print_generic_expr (vect_dump, def, TDF_SLIM); 485 } 486 if (def_stmt) 487 { 488 fprintf (vect_dump, " def_stmt = "); 489 print_generic_expr (vect_dump, def_stmt, TDF_SLIM); 490 } 491 } 492 493 switch (dt) 494 { 495 /* Case 1: operand is a constant. */ 496 case vect_constant_def: 497 { 498 if (scalar_def) 499 *scalar_def = op; 500 501 /* Create 'vect_cst_ = {cst,cst,...,cst}' */ 502 if (vect_print_dump_info (REPORT_DETAILS)) 503 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits); 504 505 for (i = nunits - 1; i >= 0; --i) 506 { 507 t = tree_cons (NULL_TREE, op, t); 508 } 509 vec_cst = build_vector (vectype, t); 510 return vect_init_vector (stmt, vec_cst); 511 } 512 513 /* Case 2: operand is defined outside the loop - loop invariant. */ 514 case vect_invariant_def: 515 { 516 if (scalar_def) 517 *scalar_def = def; 518 519 /* Create 'vec_inv = {inv,inv,..,inv}' */ 520 if (vect_print_dump_info (REPORT_DETAILS)) 521 fprintf (vect_dump, "Create vector_inv."); 522 523 for (i = nunits - 1; i >= 0; --i) 524 { 525 t = tree_cons (NULL_TREE, def, t); 526 } 527 528 /* FIXME: use build_constructor directly. */ 529 vec_inv = build_constructor_from_list (vectype, t); 530 return vect_init_vector (stmt, vec_inv); 531 } 532 533 /* Case 3: operand is defined inside the loop. */ 534 case vect_loop_def: 535 { 536 if (scalar_def) 537 *scalar_def = def_stmt; 538 539 /* Get the def from the vectorized stmt. */ 540 def_stmt_info = vinfo_for_stmt (def_stmt); 541 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); 542 gcc_assert (vec_stmt); 543 vec_oprnd = TREE_OPERAND (vec_stmt, 0); 544 return vec_oprnd; 545 } 546 547 /* Case 4: operand is defined by a loop header phi - reduction */ 548 case vect_reduction_def: 549 { 550 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE); 551 552 /* Get the def before the loop */ 553 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop)); 554 return get_initial_def_for_reduction (stmt, op, scalar_def); 555 } 556 557 /* Case 5: operand is defined by loop-header phi - induction. */ 558 case vect_induction_def: 559 { 560 if (vect_print_dump_info (REPORT_DETAILS)) 561 fprintf (vect_dump, "induction - unsupported."); 562 internal_error ("no support for induction"); /* FORNOW */ 563 } 564 565 default: 566 gcc_unreachable (); 567 } 568} 569 570 571/* Function vect_finish_stmt_generation. 572 573 Insert a new stmt. */ 574 575static void 576vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi) 577{ 578 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); 579 580 if (vect_print_dump_info (REPORT_DETAILS)) 581 { 582 fprintf (vect_dump, "add new stmt: "); 583 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM); 584 } 585 586 /* Make sure bsi points to the stmt that is being vectorized. */ 587 gcc_assert (stmt == bsi_stmt (*bsi)); 588 589#ifdef USE_MAPPED_LOCATION 590 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt)); 591#else 592 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt)); 593#endif 594} 595 596 597#define ADJUST_IN_EPILOG 1 598 599/* Function get_initial_def_for_reduction 600 601 Input: 602 STMT - a stmt that performs a reduction operation in the loop. 603 INIT_VAL - the initial value of the reduction variable 604 605 Output: 606 SCALAR_DEF - a tree that holds a value to be added to the final result 607 of the reduction (used for "ADJUST_IN_EPILOG" - see below). 608 Return a vector variable, initialized according to the operation that STMT 609 performs. This vector will be used as the initial value of the 610 vector of partial results. 611 612 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows: 613 add: [0,0,...,0,0] 614 mult: [1,1,...,1,1] 615 min/max: [init_val,init_val,..,init_val,init_val] 616 bit and/or: [init_val,init_val,..,init_val,init_val] 617 and when necessary (e.g. add/mult case) let the caller know 618 that it needs to adjust the result by init_val. 619 620 Option2: Initialize the vector as follows: 621 add: [0,0,...,0,init_val] 622 mult: [1,1,...,1,init_val] 623 min/max: [init_val,init_val,...,init_val] 624 bit and/or: [init_val,init_val,...,init_val] 625 and no adjustments are needed. 626 627 For example, for the following code: 628 629 s = init_val; 630 for (i=0;i<n;i++) 631 s = s + a[i]; 632 633 STMT is 's = s + a[i]', and the reduction variable is 's'. 634 For a vector of 4 units, we want to return either [0,0,0,init_val], 635 or [0,0,0,0] and let the caller know that it needs to adjust 636 the result at the end by 'init_val'. 637 638 FORNOW: We use the "ADJUST_IN_EPILOG" scheme. 639 TODO: Use some cost-model to estimate which scheme is more profitable. 640*/ 641 642static tree 643get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def) 644{ 645 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 646 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); 647 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); 648 int nelements; 649 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); 650 tree type = TREE_TYPE (init_val); 651 tree def; 652 tree vec, t = NULL_TREE; 653 bool need_epilog_adjust; 654 int i; 655 656 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)); 657 658 switch (code) 659 { 660 case WIDEN_SUM_EXPR: 661 case DOT_PROD_EXPR: 662 case PLUS_EXPR: 663 if (INTEGRAL_TYPE_P (type)) 664 def = build_int_cst (type, 0); 665 else 666 def = build_real (type, dconst0); 667 668#ifdef ADJUST_IN_EPILOG 669 /* All the 'nunits' elements are set to 0. The final result will be 670 adjusted by 'init_val' at the loop epilog. */ 671 nelements = nunits; 672 need_epilog_adjust = true; 673#else 674 /* 'nunits - 1' elements are set to 0; The last element is set to 675 'init_val'. No further adjustments at the epilog are needed. */ 676 nelements = nunits - 1; 677 need_epilog_adjust = false; 678#endif 679 break; 680 681 case MIN_EXPR: 682 case MAX_EXPR: 683 def = init_val; 684 nelements = nunits; 685 need_epilog_adjust = false; 686 break; 687 688 default: 689 gcc_unreachable (); 690 } 691 692 for (i = nelements - 1; i >= 0; --i) 693 t = tree_cons (NULL_TREE, def, t); 694 695 if (nelements == nunits - 1) 696 { 697 /* Set the last element of the vector. */ 698 t = tree_cons (NULL_TREE, init_val, t); 699 nelements += 1; 700 } 701 gcc_assert (nelements == nunits); 702 703 if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST) 704 vec = build_vector (vectype, t); 705 else 706 vec = build_constructor_from_list (vectype, t); 707 708 if (!need_epilog_adjust) 709 *scalar_def = NULL_TREE; 710 else 711 *scalar_def = init_val; 712 713 return vect_init_vector (stmt, vec); 714} 715 716 717/* Function vect_create_epilog_for_reduction 718 719 Create code at the loop-epilog to finalize the result of a reduction 720 computation. 721 722 VECT_DEF is a vector of partial results. 723 REDUC_CODE is the tree-code for the epilog reduction. 724 STMT is the scalar reduction stmt that is being vectorized. 725 REDUCTION_PHI is the phi-node that carries the reduction computation. 726 727 This function: 728 1. Creates the reduction def-use cycle: sets the the arguments for 729 REDUCTION_PHI: 730 The loop-entry argument is the vectorized initial-value of the reduction. 731 The loop-latch argument is VECT_DEF - the vector of partial sums. 732 2. "Reduces" the vector of partial results VECT_DEF into a single result, 733 by applying the operation specified by REDUC_CODE if available, or by 734 other means (whole-vector shifts or a scalar loop). 735 The function also creates a new phi node at the loop exit to preserve 736 loop-closed form, as illustrated below. 737 738 The flow at the entry to this function: 739 740 loop: 741 vec_def = phi <null, null> # REDUCTION_PHI 742 VECT_DEF = vector_stmt # vectorized form of STMT 743 s_loop = scalar_stmt # (scalar) STMT 744 loop_exit: 745 s_out0 = phi <s_loop> # (scalar) EXIT_PHI 746 use <s_out0> 747 use <s_out0> 748 749 The above is transformed by this function into: 750 751 loop: 752 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI 753 VECT_DEF = vector_stmt # vectorized form of STMT 754 s_loop = scalar_stmt # (scalar) STMT 755 loop_exit: 756 s_out0 = phi <s_loop> # (scalar) EXIT_PHI 757 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI 758 v_out2 = reduce <v_out1> 759 s_out3 = extract_field <v_out2, 0> 760 s_out4 = adjust_result <s_out3> 761 use <s_out4> 762 use <s_out4> 763*/ 764 765static void 766vect_create_epilog_for_reduction (tree vect_def, tree stmt, 767 enum tree_code reduc_code, tree reduction_phi) 768{ 769 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 770 tree vectype; 771 enum machine_mode mode; 772 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 773 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 774 basic_block exit_bb; 775 tree scalar_dest; 776 tree scalar_type; 777 tree new_phi; 778 block_stmt_iterator exit_bsi; 779 tree vec_dest; 780 tree new_temp; 781 tree new_name; 782 tree epilog_stmt; 783 tree new_scalar_dest, exit_phi; 784 tree bitsize, bitpos, bytesize; 785 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); 786 tree scalar_initial_def; 787 tree vec_initial_def; 788 tree orig_name; 789 imm_use_iterator imm_iter; 790 use_operand_p use_p; 791 bool extract_scalar_result; 792 tree reduction_op; 793 tree orig_stmt; 794 tree use_stmt; 795 tree operation = TREE_OPERAND (stmt, 1); 796 int op_type; 797 798 op_type = TREE_CODE_LENGTH (TREE_CODE (operation)); 799 reduction_op = TREE_OPERAND (operation, op_type-1); 800 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op)); 801 mode = TYPE_MODE (vectype); 802 803 /*** 1. Create the reduction def-use cycle ***/ 804 805 /* 1.1 set the loop-entry arg of the reduction-phi: */ 806 /* For the case of reduction, vect_get_vec_def_for_operand returns 807 the scalar def before the loop, that defines the initial value 808 of the reduction variable. */ 809 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt, 810 &scalar_initial_def); 811 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop)); 812 813 /* 1.2 set the loop-latch arg for the reduction-phi: */ 814 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop)); 815 816 if (vect_print_dump_info (REPORT_DETAILS)) 817 { 818 fprintf (vect_dump, "transform reduction: created def-use cycle:"); 819 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM); 820 fprintf (vect_dump, "\n"); 821 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM); 822 } 823 824 825 /*** 2. Create epilog code 826 The reduction epilog code operates across the elements of the vector 827 of partial results computed by the vectorized loop. 828 The reduction epilog code consists of: 829 step 1: compute the scalar result in a vector (v_out2) 830 step 2: extract the scalar result (s_out3) from the vector (v_out2) 831 step 3: adjust the scalar result (s_out3) if needed. 832 833 Step 1 can be accomplished using one the following three schemes: 834 (scheme 1) using reduc_code, if available. 835 (scheme 2) using whole-vector shifts, if available. 836 (scheme 3) using a scalar loop. In this case steps 1+2 above are 837 combined. 838 839 The overall epilog code looks like this: 840 841 s_out0 = phi <s_loop> # original EXIT_PHI 842 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI 843 v_out2 = reduce <v_out1> # step 1 844 s_out3 = extract_field <v_out2, 0> # step 2 845 s_out4 = adjust_result <s_out3> # step 3 846 847 (step 3 is optional, and step2 1 and 2 may be combined). 848 Lastly, the uses of s_out0 are replaced by s_out4. 849 850 ***/ 851 852 /* 2.1 Create new loop-exit-phi to preserve loop-closed form: 853 v_out1 = phi <v_loop> */ 854 855 exit_bb = loop->single_exit->dest; 856 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb); 857 SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def); 858 exit_bsi = bsi_start (exit_bb); 859 860 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3 861 (i.e. when reduc_code is not available) and in the final adjustment code 862 (if needed). Also get the original scalar reduction variable as 863 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it 864 represents a reduction pattern), the tree-code and scalar-def are 865 taken from the original stmt that the pattern-stmt (STMT) replaces. 866 Otherwise (it is a regular reduction) - the tree-code and scalar-def 867 are taken from STMT. */ 868 869 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 870 if (!orig_stmt) 871 { 872 /* Regular reduction */ 873 orig_stmt = stmt; 874 } 875 else 876 { 877 /* Reduction pattern */ 878 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt); 879 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)); 880 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt); 881 } 882 code = TREE_CODE (TREE_OPERAND (orig_stmt, 1)); 883 scalar_dest = TREE_OPERAND (orig_stmt, 0); 884 scalar_type = TREE_TYPE (scalar_dest); 885 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL); 886 bitsize = TYPE_SIZE (scalar_type); 887 bytesize = TYPE_SIZE_UNIT (scalar_type); 888 889 /* 2.3 Create the reduction code, using one of the three schemes described 890 above. */ 891 892 if (reduc_code < NUM_TREE_CODES) 893 { 894 /*** Case 1: Create: 895 v_out2 = reduc_expr <v_out1> */ 896 897 if (vect_print_dump_info (REPORT_DETAILS)) 898 fprintf (vect_dump, "Reduce using direct vector reduction."); 899 900 vec_dest = vect_create_destination_var (scalar_dest, vectype); 901 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, 902 build1 (reduc_code, vectype, PHI_RESULT (new_phi))); 903 new_temp = make_ssa_name (vec_dest, epilog_stmt); 904 TREE_OPERAND (epilog_stmt, 0) = new_temp; 905 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 906 907 extract_scalar_result = true; 908 } 909 else 910 { 911 enum tree_code shift_code = 0; 912 bool have_whole_vector_shift = true; 913 int bit_offset; 914 int element_bitsize = tree_low_cst (bitsize, 1); 915 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1); 916 tree vec_temp; 917 918 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing) 919 shift_code = VEC_RSHIFT_EXPR; 920 else 921 have_whole_vector_shift = false; 922 923 /* Regardless of whether we have a whole vector shift, if we're 924 emulating the operation via tree-vect-generic, we don't want 925 to use it. Only the first round of the reduction is likely 926 to still be profitable via emulation. */ 927 /* ??? It might be better to emit a reduction tree code here, so that 928 tree-vect-generic can expand the first round via bit tricks. */ 929 if (!VECTOR_MODE_P (mode)) 930 have_whole_vector_shift = false; 931 else 932 { 933 optab optab = optab_for_tree_code (code, vectype); 934 if (optab->handlers[mode].insn_code == CODE_FOR_nothing) 935 have_whole_vector_shift = false; 936 } 937 938 if (have_whole_vector_shift) 939 { 940 /*** Case 2: Create: 941 for (offset = VS/2; offset >= element_size; offset/=2) 942 { 943 Create: va' = vec_shift <va, offset> 944 Create: va = vop <va, va'> 945 } */ 946 947 if (vect_print_dump_info (REPORT_DETAILS)) 948 fprintf (vect_dump, "Reduce using vector shifts"); 949 950 vec_dest = vect_create_destination_var (scalar_dest, vectype); 951 new_temp = PHI_RESULT (new_phi); 952 953 for (bit_offset = vec_size_in_bits/2; 954 bit_offset >= element_bitsize; 955 bit_offset /= 2) 956 { 957 tree bitpos = size_int (bit_offset); 958 959 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, 960 build2 (shift_code, vectype, new_temp, bitpos)); 961 new_name = make_ssa_name (vec_dest, epilog_stmt); 962 TREE_OPERAND (epilog_stmt, 0) = new_name; 963 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 964 965 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, 966 build2 (code, vectype, new_name, new_temp)); 967 new_temp = make_ssa_name (vec_dest, epilog_stmt); 968 TREE_OPERAND (epilog_stmt, 0) = new_temp; 969 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 970 } 971 972 extract_scalar_result = true; 973 } 974 else 975 { 976 tree rhs; 977 978 /*** Case 3: Create: 979 s = extract_field <v_out2, 0> 980 for (offset = element_size; 981 offset < vector_size; 982 offset += element_size;) 983 { 984 Create: s' = extract_field <v_out2, offset> 985 Create: s = op <s, s'> 986 } */ 987 988 if (vect_print_dump_info (REPORT_DETAILS)) 989 fprintf (vect_dump, "Reduce using scalar code. "); 990 991 vec_temp = PHI_RESULT (new_phi); 992 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1); 993 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize, 994 bitsize_zero_node); 995 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type); 996 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs); 997 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); 998 TREE_OPERAND (epilog_stmt, 0) = new_temp; 999 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 1000 1001 for (bit_offset = element_bitsize; 1002 bit_offset < vec_size_in_bits; 1003 bit_offset += element_bitsize) 1004 { 1005 tree bitpos = bitsize_int (bit_offset); 1006 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize, 1007 bitpos); 1008 1009 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type); 1010 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, 1011 rhs); 1012 new_name = make_ssa_name (new_scalar_dest, epilog_stmt); 1013 TREE_OPERAND (epilog_stmt, 0) = new_name; 1014 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 1015 1016 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, 1017 build2 (code, scalar_type, new_name, new_temp)); 1018 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); 1019 TREE_OPERAND (epilog_stmt, 0) = new_temp; 1020 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 1021 } 1022 1023 extract_scalar_result = false; 1024 } 1025 } 1026 1027 /* 2.4 Extract the final scalar result. Create: 1028 s_out3 = extract_field <v_out2, bitpos> */ 1029 1030 if (extract_scalar_result) 1031 { 1032 tree rhs; 1033 1034 if (vect_print_dump_info (REPORT_DETAILS)) 1035 fprintf (vect_dump, "extract scalar result"); 1036 1037 if (BYTES_BIG_ENDIAN) 1038 bitpos = size_binop (MULT_EXPR, 1039 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1), 1040 TYPE_SIZE (scalar_type)); 1041 else 1042 bitpos = bitsize_zero_node; 1043 1044 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos); 1045 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type); 1046 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs); 1047 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); 1048 TREE_OPERAND (epilog_stmt, 0) = new_temp; 1049 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 1050 } 1051 1052 /* 2.4 Adjust the final result by the initial value of the reduction 1053 variable. (When such adjustment is not needed, then 1054 'scalar_initial_def' is zero). 1055 1056 Create: 1057 s_out4 = scalar_expr <s_out3, scalar_initial_def> */ 1058 1059 if (scalar_initial_def) 1060 { 1061 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, 1062 build2 (code, scalar_type, new_temp, scalar_initial_def)); 1063 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); 1064 TREE_OPERAND (epilog_stmt, 0) = new_temp; 1065 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); 1066 } 1067 1068 /* 2.6 Replace uses of s_out0 with uses of s_out3 */ 1069 1070 /* Find the loop-closed-use at the loop exit of the original scalar result. 1071 (The reduction result is expected to have two immediate uses - one at the 1072 latch block, and one at the loop exit). */ 1073 exit_phi = NULL; 1074 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest) 1075 { 1076 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p)))) 1077 { 1078 exit_phi = USE_STMT (use_p); 1079 break; 1080 } 1081 } 1082 /* We expect to have found an exit_phi because of loop-closed-ssa form. */ 1083 gcc_assert (exit_phi); 1084 /* Replace the uses: */ 1085 orig_name = PHI_RESULT (exit_phi); 1086 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name) 1087 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) 1088 SET_USE (use_p, new_temp); 1089} 1090 1091 1092/* Function vectorizable_reduction. 1093 1094 Check if STMT performs a reduction operation that can be vectorized. 1095 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1096 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1097 Return FALSE if not a vectorizable STMT, TRUE otherwise. 1098 1099 This function also handles reduction idioms (patterns) that have been 1100 recognized in advance during vect_pattern_recog. In this case, STMT may be 1101 of this form: 1102 X = pattern_expr (arg0, arg1, ..., X) 1103 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original 1104 sequence that had been detected and replaced by the pattern-stmt (STMT). 1105 1106 In some cases of reduction patterns, the type of the reduction variable X is 1107 different than the type of the other arguments of STMT. 1108 In such cases, the vectype that is used when transforming STMT into a vector 1109 stmt is different than the vectype that is used to determine the 1110 vectorization factor, because it consists of a different number of elements 1111 than the actual number of elements that are being operated upon in parallel. 1112 1113 For example, consider an accumulation of shorts into an int accumulator. 1114 On some targets it's possible to vectorize this pattern operating on 8 1115 shorts at a time (hence, the vectype for purposes of determining the 1116 vectorization factor should be V8HI); on the other hand, the vectype that 1117 is used to create the vector form is actually V4SI (the type of the result). 1118 1119 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that 1120 indicates what is the actual level of parallelism (V8HI in the example), so 1121 that the right vectorization factor would be derived. This vectype 1122 corresponds to the type of arguments to the reduction stmt, and should *NOT* 1123 be used to create the vectorized stmt. The right vectype for the vectorized 1124 stmt is obtained from the type of the result X: 1125 get_vectype_for_scalar_type (TREE_TYPE (X)) 1126 1127 This means that, contrary to "regular" reductions (or "regular" stmts in 1128 general), the following equation: 1129 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X)) 1130 does *NOT* necessarily hold for reduction patterns. */ 1131 1132bool 1133vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) 1134{ 1135 tree vec_dest; 1136 tree scalar_dest; 1137 tree op; 1138 tree loop_vec_def0, loop_vec_def1; 1139 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1140 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1141 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1142 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1143 tree operation; 1144 enum tree_code code, orig_code, epilog_reduc_code = 0; 1145 enum machine_mode vec_mode; 1146 int op_type; 1147 optab optab, reduc_optab; 1148 tree new_temp; 1149 tree def, def_stmt; 1150 enum vect_def_type dt; 1151 tree new_phi; 1152 tree scalar_type; 1153 bool is_simple_use; 1154 tree orig_stmt; 1155 stmt_vec_info orig_stmt_info; 1156 tree expr = NULL_TREE; 1157 int i; 1158 1159 /* 1. Is vectorizable reduction? */ 1160 1161 /* Not supportable if the reduction variable is used in the loop. */ 1162 if (STMT_VINFO_RELEVANT_P (stmt_info)) 1163 return false; 1164 1165 if (!STMT_VINFO_LIVE_P (stmt_info)) 1166 return false; 1167 1168 /* Make sure it was already recognized as a reduction computation. */ 1169 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def) 1170 return false; 1171 1172 /* 2. Has this been recognized as a reduction pattern? 1173 1174 Check if STMT represents a pattern that has been recognized 1175 in earlier analysis stages. For stmts that represent a pattern, 1176 the STMT_VINFO_RELATED_STMT field records the last stmt in 1177 the original sequence that constitutes the pattern. */ 1178 1179 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 1180 if (orig_stmt) 1181 { 1182 orig_stmt_info = vinfo_for_stmt (orig_stmt); 1183 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt); 1184 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info)); 1185 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info)); 1186 } 1187 1188 /* 3. Check the operands of the operation. The first operands are defined 1189 inside the loop body. The last operand is the reduction variable, 1190 which is defined by the loop-header-phi. */ 1191 1192 gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR); 1193 1194 operation = TREE_OPERAND (stmt, 1); 1195 code = TREE_CODE (operation); 1196 op_type = TREE_CODE_LENGTH (code); 1197 1198 if (op_type != binary_op && op_type != ternary_op) 1199 return false; 1200 scalar_dest = TREE_OPERAND (stmt, 0); 1201 scalar_type = TREE_TYPE (scalar_dest); 1202 1203 /* All uses but the last are expected to be defined in the loop. 1204 The last use is the reduction variable. */ 1205 for (i = 0; i < op_type-1; i++) 1206 { 1207 op = TREE_OPERAND (operation, i); 1208 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt); 1209 gcc_assert (is_simple_use); 1210 gcc_assert (dt == vect_loop_def || dt == vect_invariant_def || 1211 dt == vect_constant_def); 1212 } 1213 1214 op = TREE_OPERAND (operation, i); 1215 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt); 1216 gcc_assert (is_simple_use); 1217 gcc_assert (dt == vect_reduction_def); 1218 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE); 1219 if (orig_stmt) 1220 gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt)); 1221 else 1222 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt)); 1223 1224 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) 1225 return false; 1226 1227 /* 4. Supportable by target? */ 1228 1229 /* 4.1. check support for the operation in the loop */ 1230 optab = optab_for_tree_code (code, vectype); 1231 if (!optab) 1232 { 1233 if (vect_print_dump_info (REPORT_DETAILS)) 1234 fprintf (vect_dump, "no optab."); 1235 return false; 1236 } 1237 vec_mode = TYPE_MODE (vectype); 1238 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) 1239 { 1240 if (vect_print_dump_info (REPORT_DETAILS)) 1241 fprintf (vect_dump, "op not supported by target."); 1242 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD 1243 || LOOP_VINFO_VECT_FACTOR (loop_vinfo) 1244 < vect_min_worthwhile_factor (code)) 1245 return false; 1246 if (vect_print_dump_info (REPORT_DETAILS)) 1247 fprintf (vect_dump, "proceeding using word mode."); 1248 } 1249 1250 /* Worthwhile without SIMD support? */ 1251 if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 1252 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) 1253 < vect_min_worthwhile_factor (code)) 1254 { 1255 if (vect_print_dump_info (REPORT_DETAILS)) 1256 fprintf (vect_dump, "not worthwhile without SIMD support."); 1257 return false; 1258 } 1259 1260 /* 4.2. Check support for the epilog operation. 1261 1262 If STMT represents a reduction pattern, then the type of the 1263 reduction variable may be different than the type of the rest 1264 of the arguments. For example, consider the case of accumulation 1265 of shorts into an int accumulator; The original code: 1266 S1: int_a = (int) short_a; 1267 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>; 1268 1269 was replaced with: 1270 STMT: int_acc = widen_sum <short_a, int_acc> 1271 1272 This means that: 1273 1. The tree-code that is used to create the vector operation in the 1274 epilog code (that reduces the partial results) is not the 1275 tree-code of STMT, but is rather the tree-code of the original 1276 stmt from the pattern that STMT is replacing. I.e, in the example 1277 above we want to use 'widen_sum' in the loop, but 'plus' in the 1278 epilog. 1279 2. The type (mode) we use to check available target support 1280 for the vector operation to be created in the *epilog*, is 1281 determined by the type of the reduction variable (in the example 1282 above we'd check this: plus_optab[vect_int_mode]). 1283 However the type (mode) we use to check available target support 1284 for the vector operation to be created *inside the loop*, is 1285 determined by the type of the other arguments to STMT (in the 1286 example we'd check this: widen_sum_optab[vect_short_mode]). 1287 1288 This is contrary to "regular" reductions, in which the types of all 1289 the arguments are the same as the type of the reduction variable. 1290 For "regular" reductions we can therefore use the same vector type 1291 (and also the same tree-code) when generating the epilog code and 1292 when generating the code inside the loop. */ 1293 1294 if (orig_stmt) 1295 { 1296 /* This is a reduction pattern: get the vectype from the type of the 1297 reduction variable, and get the tree-code from orig_stmt. */ 1298 orig_code = TREE_CODE (TREE_OPERAND (orig_stmt, 1)); 1299 vectype = get_vectype_for_scalar_type (TREE_TYPE (def)); 1300 vec_mode = TYPE_MODE (vectype); 1301 } 1302 else 1303 { 1304 /* Regular reduction: use the same vectype and tree-code as used for 1305 the vector code inside the loop can be used for the epilog code. */ 1306 orig_code = code; 1307 } 1308 1309 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code)) 1310 return false; 1311 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype); 1312 if (!reduc_optab) 1313 { 1314 if (vect_print_dump_info (REPORT_DETAILS)) 1315 fprintf (vect_dump, "no optab for reduction."); 1316 epilog_reduc_code = NUM_TREE_CODES; 1317 } 1318 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) 1319 { 1320 if (vect_print_dump_info (REPORT_DETAILS)) 1321 fprintf (vect_dump, "reduc op not supported by target."); 1322 epilog_reduc_code = NUM_TREE_CODES; 1323 } 1324 1325 if (!vec_stmt) /* transformation not required. */ 1326 { 1327 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; 1328 return true; 1329 } 1330 1331 /** Transform. **/ 1332 1333 if (vect_print_dump_info (REPORT_DETAILS)) 1334 fprintf (vect_dump, "transform reduction."); 1335 1336 /* Create the destination vector */ 1337 vec_dest = vect_create_destination_var (scalar_dest, vectype); 1338 1339 /* Create the reduction-phi that defines the reduction-operand. */ 1340 new_phi = create_phi_node (vec_dest, loop->header); 1341 1342 /* Prepare the operand that is defined inside the loop body */ 1343 op = TREE_OPERAND (operation, 0); 1344 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL); 1345 if (op_type == binary_op) 1346 expr = build2 (code, vectype, loop_vec_def0, PHI_RESULT (new_phi)); 1347 else if (op_type == ternary_op) 1348 { 1349 op = TREE_OPERAND (operation, 1); 1350 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL); 1351 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1, 1352 PHI_RESULT (new_phi)); 1353 } 1354 1355 /* Create the vectorized operation that computes the partial results */ 1356 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, expr); 1357 new_temp = make_ssa_name (vec_dest, *vec_stmt); 1358 TREE_OPERAND (*vec_stmt, 0) = new_temp; 1359 vect_finish_stmt_generation (stmt, *vec_stmt, bsi); 1360 1361 /* Finalize the reduction-phi (set it's arguments) and create the 1362 epilog reduction code. */ 1363 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi); 1364 return true; 1365} 1366 1367 1368/* Function vectorizable_assignment. 1369 1370 Check if STMT performs an assignment (copy) that can be vectorized. 1371 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1372 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1373 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1374 1375bool 1376vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) 1377{ 1378 tree vec_dest; 1379 tree scalar_dest; 1380 tree op; 1381 tree vec_oprnd; 1382 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1383 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1384 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1385 tree new_temp; 1386 tree def, def_stmt; 1387 enum vect_def_type dt; 1388 1389 /* Is vectorizable assignment? */ 1390 if (!STMT_VINFO_RELEVANT_P (stmt_info)) 1391 return false; 1392 1393 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def); 1394 1395 if (TREE_CODE (stmt) != MODIFY_EXPR) 1396 return false; 1397 1398 scalar_dest = TREE_OPERAND (stmt, 0); 1399 if (TREE_CODE (scalar_dest) != SSA_NAME) 1400 return false; 1401 1402 op = TREE_OPERAND (stmt, 1); 1403 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt)) 1404 { 1405 if (vect_print_dump_info (REPORT_DETAILS)) 1406 fprintf (vect_dump, "use not simple."); 1407 return false; 1408 } 1409 1410 if (!vec_stmt) /* transformation not required. */ 1411 { 1412 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; 1413 return true; 1414 } 1415 1416 /** Transform. **/ 1417 if (vect_print_dump_info (REPORT_DETAILS)) 1418 fprintf (vect_dump, "transform assignment."); 1419 1420 /* Handle def. */ 1421 vec_dest = vect_create_destination_var (scalar_dest, vectype); 1422 1423 /* Handle use. */ 1424 op = TREE_OPERAND (stmt, 1); 1425 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL); 1426 1427 /* Arguments are ready. create the new vector stmt. */ 1428 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd); 1429 new_temp = make_ssa_name (vec_dest, *vec_stmt); 1430 TREE_OPERAND (*vec_stmt, 0) = new_temp; 1431 vect_finish_stmt_generation (stmt, *vec_stmt, bsi); 1432 1433 return true; 1434} 1435 1436 1437/* Function vect_min_worthwhile_factor. 1438 1439 For a loop where we could vectorize the operation indicated by CODE, 1440 return the minimum vectorization factor that makes it worthwhile 1441 to use generic vectors. */ 1442static int 1443vect_min_worthwhile_factor (enum tree_code code) 1444{ 1445 switch (code) 1446 { 1447 case PLUS_EXPR: 1448 case MINUS_EXPR: 1449 case NEGATE_EXPR: 1450 return 4; 1451 1452 case BIT_AND_EXPR: 1453 case BIT_IOR_EXPR: 1454 case BIT_XOR_EXPR: 1455 case BIT_NOT_EXPR: 1456 return 2; 1457 1458 default: 1459 return INT_MAX; 1460 } 1461} 1462 1463 1464/* Function vectorizable_operation. 1465 1466 Check if STMT performs a binary or unary operation that can be vectorized. 1467 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1468 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1469 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1470 1471bool 1472vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) 1473{ 1474 tree vec_dest; 1475 tree scalar_dest; 1476 tree operation; 1477 tree op0, op1 = NULL; 1478 tree vec_oprnd0, vec_oprnd1=NULL; 1479 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1480 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1481 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1482 int i; 1483 enum tree_code code; 1484 enum machine_mode vec_mode; 1485 tree new_temp; 1486 int op_type; 1487 tree op; 1488 optab optab; 1489 int icode; 1490 enum machine_mode optab_op2_mode; 1491 tree def, def_stmt; 1492 enum vect_def_type dt; 1493 1494 /* Is STMT a vectorizable binary/unary operation? */ 1495 if (!STMT_VINFO_RELEVANT_P (stmt_info)) 1496 return false; 1497 1498 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def); 1499 1500 if (STMT_VINFO_LIVE_P (stmt_info)) 1501 { 1502 /* FORNOW: not yet supported. */ 1503 if (vect_print_dump_info (REPORT_DETAILS)) 1504 fprintf (vect_dump, "value used after loop."); 1505 return false; 1506 } 1507 1508 if (TREE_CODE (stmt) != MODIFY_EXPR) 1509 return false; 1510 1511 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME) 1512 return false; 1513 1514 operation = TREE_OPERAND (stmt, 1); 1515 code = TREE_CODE (operation); 1516 optab = optab_for_tree_code (code, vectype); 1517 1518 /* Support only unary or binary operations. */ 1519 op_type = TREE_CODE_LENGTH (code); 1520 if (op_type != unary_op && op_type != binary_op) 1521 { 1522 if (vect_print_dump_info (REPORT_DETAILS)) 1523 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type); 1524 return false; 1525 } 1526 1527 for (i = 0; i < op_type; i++) 1528 { 1529 op = TREE_OPERAND (operation, i); 1530 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt)) 1531 { 1532 if (vect_print_dump_info (REPORT_DETAILS)) 1533 fprintf (vect_dump, "use not simple."); 1534 return false; 1535 } 1536 } 1537 1538 /* Supportable by target? */ 1539 if (!optab) 1540 { 1541 if (vect_print_dump_info (REPORT_DETAILS)) 1542 fprintf (vect_dump, "no optab."); 1543 return false; 1544 } 1545 vec_mode = TYPE_MODE (vectype); 1546 icode = (int) optab->handlers[(int) vec_mode].insn_code; 1547 if (icode == CODE_FOR_nothing) 1548 { 1549 if (vect_print_dump_info (REPORT_DETAILS)) 1550 fprintf (vect_dump, "op not supported by target."); 1551 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD 1552 || LOOP_VINFO_VECT_FACTOR (loop_vinfo) 1553 < vect_min_worthwhile_factor (code)) 1554 return false; 1555 if (vect_print_dump_info (REPORT_DETAILS)) 1556 fprintf (vect_dump, "proceeding using word mode."); 1557 } 1558 1559 /* Worthwhile without SIMD support? */ 1560 if (!VECTOR_MODE_P (TYPE_MODE (vectype)) 1561 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) 1562 < vect_min_worthwhile_factor (code)) 1563 { 1564 if (vect_print_dump_info (REPORT_DETAILS)) 1565 fprintf (vect_dump, "not worthwhile without SIMD support."); 1566 return false; 1567 } 1568 1569 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR) 1570 { 1571 /* FORNOW: not yet supported. */ 1572 if (!VECTOR_MODE_P (vec_mode)) 1573 return false; 1574 1575 /* Invariant argument is needed for a vector shift 1576 by a scalar shift operand. */ 1577 optab_op2_mode = insn_data[icode].operand[2].mode; 1578 if (! (VECTOR_MODE_P (optab_op2_mode) 1579 || dt == vect_constant_def 1580 || dt == vect_invariant_def)) 1581 { 1582 if (vect_print_dump_info (REPORT_DETAILS)) 1583 fprintf (vect_dump, "operand mode requires invariant argument."); 1584 return false; 1585 } 1586 } 1587 1588 if (!vec_stmt) /* transformation not required. */ 1589 { 1590 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; 1591 return true; 1592 } 1593 1594 /** Transform. **/ 1595 1596 if (vect_print_dump_info (REPORT_DETAILS)) 1597 fprintf (vect_dump, "transform binary/unary operation."); 1598 1599 /* Handle def. */ 1600 scalar_dest = TREE_OPERAND (stmt, 0); 1601 vec_dest = vect_create_destination_var (scalar_dest, vectype); 1602 1603 /* Handle uses. */ 1604 op0 = TREE_OPERAND (operation, 0); 1605 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); 1606 1607 if (op_type == binary_op) 1608 { 1609 op1 = TREE_OPERAND (operation, 1); 1610 1611 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR) 1612 { 1613 /* Vector shl and shr insn patterns can be defined with 1614 scalar operand 2 (shift operand). In this case, use 1615 constant or loop invariant op1 directly, without 1616 extending it to vector mode first. */ 1617 1618 optab_op2_mode = insn_data[icode].operand[2].mode; 1619 if (!VECTOR_MODE_P (optab_op2_mode)) 1620 { 1621 if (vect_print_dump_info (REPORT_DETAILS)) 1622 fprintf (vect_dump, "operand 1 using scalar mode."); 1623 vec_oprnd1 = op1; 1624 } 1625 } 1626 1627 if (!vec_oprnd1) 1628 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL); 1629 } 1630 1631 /* Arguments are ready. create the new vector stmt. */ 1632 1633 if (op_type == binary_op) 1634 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, 1635 build2 (code, vectype, vec_oprnd0, vec_oprnd1)); 1636 else 1637 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, 1638 build1 (code, vectype, vec_oprnd0)); 1639 new_temp = make_ssa_name (vec_dest, *vec_stmt); 1640 TREE_OPERAND (*vec_stmt, 0) = new_temp; 1641 vect_finish_stmt_generation (stmt, *vec_stmt, bsi); 1642 1643 return true; 1644} 1645 1646 1647/* Function vectorizable_store. 1648 1649 Check if STMT defines a non scalar data-ref (array/pointer/structure) that 1650 can be vectorized. 1651 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1652 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1653 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1654 1655bool 1656vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) 1657{ 1658 tree scalar_dest; 1659 tree data_ref; 1660 tree op; 1661 tree vec_oprnd1; 1662 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1663 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 1664 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1665 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1666 enum machine_mode vec_mode; 1667 tree dummy; 1668 enum dr_alignment_support alignment_support_cheme; 1669 ssa_op_iter iter; 1670 tree def, def_stmt; 1671 enum vect_def_type dt; 1672 1673 /* Is vectorizable store? */ 1674 1675 if (TREE_CODE (stmt) != MODIFY_EXPR) 1676 return false; 1677 1678 scalar_dest = TREE_OPERAND (stmt, 0); 1679 if (TREE_CODE (scalar_dest) != ARRAY_REF 1680 && TREE_CODE (scalar_dest) != INDIRECT_REF) 1681 return false; 1682 1683 op = TREE_OPERAND (stmt, 1); 1684 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt)) 1685 { 1686 if (vect_print_dump_info (REPORT_DETAILS)) 1687 fprintf (vect_dump, "use not simple."); 1688 return false; 1689 } 1690 1691 vec_mode = TYPE_MODE (vectype); 1692 /* FORNOW. In some cases can vectorize even if data-type not supported 1693 (e.g. - array initialization with 0). */ 1694 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing) 1695 return false; 1696 1697 if (!STMT_VINFO_DATA_REF (stmt_info)) 1698 return false; 1699 1700 1701 if (!vec_stmt) /* transformation not required. */ 1702 { 1703 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; 1704 return true; 1705 } 1706 1707 /** Transform. **/ 1708 1709 if (vect_print_dump_info (REPORT_DETAILS)) 1710 fprintf (vect_dump, "transform store"); 1711 1712 alignment_support_cheme = vect_supportable_dr_alignment (dr); 1713 gcc_assert (alignment_support_cheme); 1714 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */ 1715 1716 /* Handle use - get the vectorized def from the defining stmt. */ 1717 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL); 1718 1719 /* Handle def. */ 1720 /* FORNOW: make sure the data reference is aligned. */ 1721 vect_align_data_ref (stmt); 1722 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false); 1723 data_ref = build_fold_indirect_ref (data_ref); 1724 1725 /* Arguments are ready. create the new vector stmt. */ 1726 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1); 1727 vect_finish_stmt_generation (stmt, *vec_stmt, bsi); 1728 1729 /* Copy the V_MAY_DEFS representing the aliasing of the original array 1730 element's definition to the vector's definition then update the 1731 defining statement. The original is being deleted so the same 1732 SSA_NAMEs can be used. */ 1733 copy_virtual_operands (*vec_stmt, stmt); 1734 1735 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF) 1736 { 1737 SSA_NAME_DEF_STMT (def) = *vec_stmt; 1738 1739 /* If this virtual def has a use outside the loop and a loop peel is 1740 performed then the def may be renamed by the peel. Mark it for 1741 renaming so the later use will also be renamed. */ 1742 mark_sym_for_renaming (SSA_NAME_VAR (def)); 1743 } 1744 1745 return true; 1746} 1747 1748 1749/* vectorizable_load. 1750 1751 Check if STMT reads a non scalar data-ref (array/pointer/structure) that 1752 can be vectorized. 1753 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 1754 stmt to replace it, put it in VEC_STMT, and insert it at BSI. 1755 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 1756 1757bool 1758vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) 1759{ 1760 tree scalar_dest; 1761 tree vec_dest = NULL; 1762 tree data_ref = NULL; 1763 tree op; 1764 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1765 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 1766 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1767 tree new_temp; 1768 int mode; 1769 tree init_addr; 1770 tree new_stmt; 1771 tree dummy; 1772 basic_block new_bb; 1773 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1774 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1775 edge pe = loop_preheader_edge (loop); 1776 enum dr_alignment_support alignment_support_cheme; 1777 1778 /* Is vectorizable load? */ 1779 if (!STMT_VINFO_RELEVANT_P (stmt_info)) 1780 return false; 1781 1782 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def); 1783 1784 if (STMT_VINFO_LIVE_P (stmt_info)) 1785 { 1786 /* FORNOW: not yet supported. */ 1787 if (vect_print_dump_info (REPORT_DETAILS)) 1788 fprintf (vect_dump, "value used after loop."); 1789 return false; 1790 } 1791 1792 if (TREE_CODE (stmt) != MODIFY_EXPR) 1793 return false; 1794 1795 scalar_dest = TREE_OPERAND (stmt, 0); 1796 if (TREE_CODE (scalar_dest) != SSA_NAME) 1797 return false; 1798 1799 op = TREE_OPERAND (stmt, 1); 1800 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF) 1801 return false; 1802 1803 if (!STMT_VINFO_DATA_REF (stmt_info)) 1804 return false; 1805 1806 mode = (int) TYPE_MODE (vectype); 1807 1808 /* FORNOW. In some cases can vectorize even if data-type not supported 1809 (e.g. - data copies). */ 1810 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing) 1811 { 1812 if (vect_print_dump_info (REPORT_DETAILS)) 1813 fprintf (vect_dump, "Aligned load, but unsupported type."); 1814 return false; 1815 } 1816 1817 if (!vec_stmt) /* transformation not required. */ 1818 { 1819 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; 1820 return true; 1821 } 1822 1823 /** Transform. **/ 1824 1825 if (vect_print_dump_info (REPORT_DETAILS)) 1826 fprintf (vect_dump, "transform load."); 1827 1828 alignment_support_cheme = vect_supportable_dr_alignment (dr); 1829 gcc_assert (alignment_support_cheme); 1830 1831 if (alignment_support_cheme == dr_aligned 1832 || alignment_support_cheme == dr_unaligned_supported) 1833 { 1834 /* Create: 1835 p = initial_addr; 1836 indx = 0; 1837 loop { 1838 vec_dest = *(p); 1839 indx = indx + 1; 1840 } 1841 */ 1842 1843 vec_dest = vect_create_destination_var (scalar_dest, vectype); 1844 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false); 1845 if (aligned_access_p (dr)) 1846 data_ref = build_fold_indirect_ref (data_ref); 1847 else 1848 { 1849 int mis = DR_MISALIGNMENT (dr); 1850 tree tmis = (mis == -1 ? size_zero_node : size_int (mis)); 1851 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT)); 1852 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis); 1853 } 1854 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); 1855 new_temp = make_ssa_name (vec_dest, new_stmt); 1856 TREE_OPERAND (new_stmt, 0) = new_temp; 1857 vect_finish_stmt_generation (stmt, new_stmt, bsi); 1858 copy_virtual_operands (new_stmt, stmt); 1859 } 1860 else if (alignment_support_cheme == dr_unaligned_software_pipeline) 1861 { 1862 /* Create: 1863 p1 = initial_addr; 1864 msq_init = *(floor(p1)) 1865 p2 = initial_addr + VS - 1; 1866 magic = have_builtin ? builtin_result : initial_address; 1867 indx = 0; 1868 loop { 1869 p2' = p2 + indx * vectype_size 1870 lsq = *(floor(p2')) 1871 vec_dest = realign_load (msq, lsq, magic) 1872 indx = indx + 1; 1873 msq = lsq; 1874 } 1875 */ 1876 1877 tree offset; 1878 tree magic; 1879 tree phi_stmt; 1880 tree msq_init; 1881 tree msq, lsq; 1882 tree dataref_ptr; 1883 tree params; 1884 1885 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */ 1886 vec_dest = vect_create_destination_var (scalar_dest, vectype); 1887 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, 1888 &init_addr, true); 1889 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref); 1890 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); 1891 new_temp = make_ssa_name (vec_dest, new_stmt); 1892 TREE_OPERAND (new_stmt, 0) = new_temp; 1893 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt); 1894 gcc_assert (!new_bb); 1895 msq_init = TREE_OPERAND (new_stmt, 0); 1896 copy_virtual_operands (new_stmt, stmt); 1897 update_vuses_to_preheader (new_stmt, loop); 1898 1899 1900 /* <2> Create lsq = *(floor(p2')) in the loop */ 1901 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); 1902 vec_dest = vect_create_destination_var (scalar_dest, vectype); 1903 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false); 1904 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr); 1905 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); 1906 new_temp = make_ssa_name (vec_dest, new_stmt); 1907 TREE_OPERAND (new_stmt, 0) = new_temp; 1908 vect_finish_stmt_generation (stmt, new_stmt, bsi); 1909 lsq = TREE_OPERAND (new_stmt, 0); 1910 copy_virtual_operands (new_stmt, stmt); 1911 1912 1913 /* <3> */ 1914 if (targetm.vectorize.builtin_mask_for_load) 1915 { 1916 /* Create permutation mask, if required, in loop preheader. */ 1917 tree builtin_decl; 1918 params = build_tree_list (NULL_TREE, init_addr); 1919 vec_dest = vect_create_destination_var (scalar_dest, vectype); 1920 builtin_decl = targetm.vectorize.builtin_mask_for_load (); 1921 new_stmt = build_function_call_expr (builtin_decl, params); 1922 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt); 1923 new_temp = make_ssa_name (vec_dest, new_stmt); 1924 TREE_OPERAND (new_stmt, 0) = new_temp; 1925 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt); 1926 gcc_assert (!new_bb); 1927 magic = TREE_OPERAND (new_stmt, 0); 1928 1929 /* The result of the CALL_EXPR to this builtin is determined from 1930 the value of the parameter and no global variables are touched 1931 which makes the builtin a "const" function. Requiring the 1932 builtin to have the "const" attribute makes it unnecessary 1933 to call mark_call_clobbered. */ 1934 gcc_assert (TREE_READONLY (builtin_decl)); 1935 } 1936 else 1937 { 1938 /* Use current address instead of init_addr for reduced reg pressure. 1939 */ 1940 magic = dataref_ptr; 1941 } 1942 1943 1944 /* <4> Create msq = phi <msq_init, lsq> in loop */ 1945 vec_dest = vect_create_destination_var (scalar_dest, vectype); 1946 msq = make_ssa_name (vec_dest, NULL_TREE); 1947 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */ 1948 SSA_NAME_DEF_STMT (msq) = phi_stmt; 1949 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop)); 1950 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop)); 1951 1952 1953 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */ 1954 vec_dest = vect_create_destination_var (scalar_dest, vectype); 1955 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic); 1956 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt); 1957 new_temp = make_ssa_name (vec_dest, new_stmt); 1958 TREE_OPERAND (new_stmt, 0) = new_temp; 1959 vect_finish_stmt_generation (stmt, new_stmt, bsi); 1960 } 1961 else 1962 gcc_unreachable (); 1963 1964 *vec_stmt = new_stmt; 1965 return true; 1966} 1967 1968 1969/* Function vectorizable_live_operation. 1970 1971 STMT computes a value that is used outside the loop. Check if 1972 it can be supported. */ 1973 1974bool 1975vectorizable_live_operation (tree stmt, 1976 block_stmt_iterator *bsi ATTRIBUTE_UNUSED, 1977 tree *vec_stmt ATTRIBUTE_UNUSED) 1978{ 1979 tree operation; 1980 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1981 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1982 int i; 1983 enum tree_code code; 1984 int op_type; 1985 tree op; 1986 tree def, def_stmt; 1987 enum vect_def_type dt; 1988 1989 if (!STMT_VINFO_LIVE_P (stmt_info)) 1990 return false; 1991 1992 if (TREE_CODE (stmt) != MODIFY_EXPR) 1993 return false; 1994 1995 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME) 1996 return false; 1997 1998 operation = TREE_OPERAND (stmt, 1); 1999 code = TREE_CODE (operation); 2000 2001 op_type = TREE_CODE_LENGTH (code); 2002 2003 /* FORNOW: support only if all uses are invariant. This means 2004 that the scalar operations can remain in place, unvectorized. 2005 The original last scalar value that they compute will be used. */ 2006 2007 for (i = 0; i < op_type; i++) 2008 { 2009 op = TREE_OPERAND (operation, i); 2010 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt)) 2011 { 2012 if (vect_print_dump_info (REPORT_DETAILS)) 2013 fprintf (vect_dump, "use not simple."); 2014 return false; 2015 } 2016 2017 if (dt != vect_invariant_def && dt != vect_constant_def) 2018 return false; 2019 } 2020 2021 /* No transformation is required for the cases we currently support. */ 2022 return true; 2023} 2024 2025 2026/* Function vect_is_simple_cond. 2027 2028 Input: 2029 LOOP - the loop that is being vectorized. 2030 COND - Condition that is checked for simple use. 2031 2032 Returns whether a COND can be vectorized. Checks whether 2033 condition operands are supportable using vec_is_simple_use. */ 2034 2035static bool 2036vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo) 2037{ 2038 tree lhs, rhs; 2039 tree def; 2040 enum vect_def_type dt; 2041 2042 if (!COMPARISON_CLASS_P (cond)) 2043 return false; 2044 2045 lhs = TREE_OPERAND (cond, 0); 2046 rhs = TREE_OPERAND (cond, 1); 2047 2048 if (TREE_CODE (lhs) == SSA_NAME) 2049 { 2050 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs); 2051 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt)) 2052 return false; 2053 } 2054 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST) 2055 return false; 2056 2057 if (TREE_CODE (rhs) == SSA_NAME) 2058 { 2059 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs); 2060 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt)) 2061 return false; 2062 } 2063 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST) 2064 return false; 2065 2066 return true; 2067} 2068 2069/* vectorizable_condition. 2070 2071 Check if STMT is conditional modify expression that can be vectorized. 2072 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 2073 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it 2074 at BSI. 2075 2076 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 2077 2078bool 2079vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) 2080{ 2081 tree scalar_dest = NULL_TREE; 2082 tree vec_dest = NULL_TREE; 2083 tree op = NULL_TREE; 2084 tree cond_expr, then_clause, else_clause; 2085 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2086 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2087 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause; 2088 tree vec_compare, vec_cond_expr; 2089 tree new_temp; 2090 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2091 enum machine_mode vec_mode; 2092 tree def; 2093 enum vect_def_type dt; 2094 2095 if (!STMT_VINFO_RELEVANT_P (stmt_info)) 2096 return false; 2097 2098 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def); 2099 2100 if (STMT_VINFO_LIVE_P (stmt_info)) 2101 { 2102 /* FORNOW: not yet supported. */ 2103 if (vect_print_dump_info (REPORT_DETAILS)) 2104 fprintf (vect_dump, "value used after loop."); 2105 return false; 2106 } 2107 2108 if (TREE_CODE (stmt) != MODIFY_EXPR) 2109 return false; 2110 2111 op = TREE_OPERAND (stmt, 1); 2112 2113 if (TREE_CODE (op) != COND_EXPR) 2114 return false; 2115 2116 cond_expr = TREE_OPERAND (op, 0); 2117 then_clause = TREE_OPERAND (op, 1); 2118 else_clause = TREE_OPERAND (op, 2); 2119 2120 if (!vect_is_simple_cond (cond_expr, loop_vinfo)) 2121 return false; 2122 2123 /* We do not handle two different vector types for the condition 2124 and the values. */ 2125 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype)) 2126 return false; 2127 2128 if (TREE_CODE (then_clause) == SSA_NAME) 2129 { 2130 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause); 2131 if (!vect_is_simple_use (then_clause, loop_vinfo, 2132 &then_def_stmt, &def, &dt)) 2133 return false; 2134 } 2135 else if (TREE_CODE (then_clause) != INTEGER_CST 2136 && TREE_CODE (then_clause) != REAL_CST) 2137 return false; 2138 2139 if (TREE_CODE (else_clause) == SSA_NAME) 2140 { 2141 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause); 2142 if (!vect_is_simple_use (else_clause, loop_vinfo, 2143 &else_def_stmt, &def, &dt)) 2144 return false; 2145 } 2146 else if (TREE_CODE (else_clause) != INTEGER_CST 2147 && TREE_CODE (else_clause) != REAL_CST) 2148 return false; 2149 2150 2151 vec_mode = TYPE_MODE (vectype); 2152 2153 if (!vec_stmt) 2154 { 2155 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; 2156 return expand_vec_cond_expr_p (op, vec_mode); 2157 } 2158 2159 /* Transform */ 2160 2161 /* Handle def. */ 2162 scalar_dest = TREE_OPERAND (stmt, 0); 2163 vec_dest = vect_create_destination_var (scalar_dest, vectype); 2164 2165 /* Handle cond expr. */ 2166 vec_cond_lhs = 2167 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL); 2168 vec_cond_rhs = 2169 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL); 2170 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL); 2171 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL); 2172 2173 /* Arguments are ready. create the new vector stmt. */ 2174 vec_compare = build2 (TREE_CODE (cond_expr), vectype, 2175 vec_cond_lhs, vec_cond_rhs); 2176 vec_cond_expr = build3 (VEC_COND_EXPR, vectype, 2177 vec_compare, vec_then_clause, vec_else_clause); 2178 2179 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr); 2180 new_temp = make_ssa_name (vec_dest, *vec_stmt); 2181 TREE_OPERAND (*vec_stmt, 0) = new_temp; 2182 vect_finish_stmt_generation (stmt, *vec_stmt, bsi); 2183 2184 return true; 2185} 2186 2187/* Function vect_transform_stmt. 2188 2189 Create a vectorized stmt to replace STMT, and insert it at BSI. */ 2190 2191bool 2192vect_transform_stmt (tree stmt, block_stmt_iterator *bsi) 2193{ 2194 bool is_store = false; 2195 tree vec_stmt = NULL_TREE; 2196 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2197 tree orig_stmt_in_pattern; 2198 bool done; 2199 2200 if (STMT_VINFO_RELEVANT_P (stmt_info)) 2201 { 2202 switch (STMT_VINFO_TYPE (stmt_info)) 2203 { 2204 case op_vec_info_type: 2205 done = vectorizable_operation (stmt, bsi, &vec_stmt); 2206 gcc_assert (done); 2207 break; 2208 2209 case assignment_vec_info_type: 2210 done = vectorizable_assignment (stmt, bsi, &vec_stmt); 2211 gcc_assert (done); 2212 break; 2213 2214 case load_vec_info_type: 2215 done = vectorizable_load (stmt, bsi, &vec_stmt); 2216 gcc_assert (done); 2217 break; 2218 2219 case store_vec_info_type: 2220 done = vectorizable_store (stmt, bsi, &vec_stmt); 2221 gcc_assert (done); 2222 is_store = true; 2223 break; 2224 2225 case condition_vec_info_type: 2226 done = vectorizable_condition (stmt, bsi, &vec_stmt); 2227 gcc_assert (done); 2228 break; 2229 2230 default: 2231 if (vect_print_dump_info (REPORT_DETAILS)) 2232 fprintf (vect_dump, "stmt not supported."); 2233 gcc_unreachable (); 2234 } 2235 2236 gcc_assert (vec_stmt); 2237 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; 2238 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info); 2239 if (orig_stmt_in_pattern) 2240 { 2241 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern); 2242 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 2243 { 2244 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt); 2245 2246 /* STMT was inserted by the vectorizer to replace a computation 2247 idiom. ORIG_STMT_IN_PATTERN is a stmt in the original 2248 sequence that computed this idiom. We need to record a pointer 2249 to VEC_STMT in the stmt_info of ORIG_STMT_IN_PATTERN. See more 2250 detail in the documentation of vect_pattern_recog. */ 2251 2252 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt; 2253 } 2254 } 2255 } 2256 2257 if (STMT_VINFO_LIVE_P (stmt_info)) 2258 { 2259 switch (STMT_VINFO_TYPE (stmt_info)) 2260 { 2261 case reduc_vec_info_type: 2262 done = vectorizable_reduction (stmt, bsi, &vec_stmt); 2263 gcc_assert (done); 2264 break; 2265 2266 default: 2267 done = vectorizable_live_operation (stmt, bsi, &vec_stmt); 2268 gcc_assert (done); 2269 } 2270 2271 if (vec_stmt) 2272 { 2273 gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info)); 2274 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; 2275 } 2276 } 2277 2278 return is_store; 2279} 2280 2281 2282/* This function builds ni_name = number of iterations loop executes 2283 on the loop preheader. */ 2284 2285static tree 2286vect_build_loop_niters (loop_vec_info loop_vinfo) 2287{ 2288 tree ni_name, stmt, var; 2289 edge pe; 2290 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2291 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo)); 2292 2293 var = create_tmp_var (TREE_TYPE (ni), "niters"); 2294 add_referenced_var (var); 2295 ni_name = force_gimple_operand (ni, &stmt, false, var); 2296 2297 pe = loop_preheader_edge (loop); 2298 if (stmt) 2299 { 2300 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt); 2301 gcc_assert (!new_bb); 2302 } 2303 2304 return ni_name; 2305} 2306 2307 2308/* This function generates the following statements: 2309 2310 ni_name = number of iterations loop executes 2311 ratio = ni_name / vf 2312 ratio_mult_vf_name = ratio * vf 2313 2314 and places them at the loop preheader edge. */ 2315 2316static void 2317vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, 2318 tree *ni_name_ptr, 2319 tree *ratio_mult_vf_name_ptr, 2320 tree *ratio_name_ptr) 2321{ 2322 2323 edge pe; 2324 basic_block new_bb; 2325 tree stmt, ni_name; 2326 tree var; 2327 tree ratio_name; 2328 tree ratio_mult_vf_name; 2329 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2330 tree ni = LOOP_VINFO_NITERS (loop_vinfo); 2331 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 2332 tree log_vf; 2333 2334 pe = loop_preheader_edge (loop); 2335 2336 /* Generate temporary variable that contains 2337 number of iterations loop executes. */ 2338 2339 ni_name = vect_build_loop_niters (loop_vinfo); 2340 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf)); 2341 2342 /* Create: ratio = ni >> log2(vf) */ 2343 2344 var = create_tmp_var (TREE_TYPE (ni), "bnd"); 2345 add_referenced_var (var); 2346 ratio_name = make_ssa_name (var, NULL_TREE); 2347 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name, 2348 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf)); 2349 SSA_NAME_DEF_STMT (ratio_name) = stmt; 2350 2351 pe = loop_preheader_edge (loop); 2352 new_bb = bsi_insert_on_edge_immediate (pe, stmt); 2353 gcc_assert (!new_bb); 2354 2355 /* Create: ratio_mult_vf = ratio << log2 (vf). */ 2356 2357 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf"); 2358 add_referenced_var (var); 2359 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE); 2360 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name, 2361 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf)); 2362 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt; 2363 2364 pe = loop_preheader_edge (loop); 2365 new_bb = bsi_insert_on_edge_immediate (pe, stmt); 2366 gcc_assert (!new_bb); 2367 2368 *ni_name_ptr = ni_name; 2369 *ratio_mult_vf_name_ptr = ratio_mult_vf_name; 2370 *ratio_name_ptr = ratio_name; 2371 2372 return; 2373} 2374 2375 2376/* Function update_vuses_to_preheader. 2377 2378 Input: 2379 STMT - a statement with potential VUSEs. 2380 LOOP - the loop whose preheader will contain STMT. 2381 2382 It's possible to vectorize a loop even though an SSA_NAME from a VUSE 2383 appears to be defined in a V_MAY_DEF in another statement in a loop. 2384 One such case is when the VUSE is at the dereference of a __restricted__ 2385 pointer in a load and the V_MAY_DEF is at the dereference of a different 2386 __restricted__ pointer in a store. Vectorization may result in 2387 copy_virtual_uses being called to copy the problematic VUSE to a new 2388 statement that is being inserted in the loop preheader. This procedure 2389 is called to change the SSA_NAME in the new statement's VUSE from the 2390 SSA_NAME updated in the loop to the related SSA_NAME available on the 2391 path entering the loop. 2392 2393 When this function is called, we have the following situation: 2394 2395 # vuse <name1> 2396 S1: vload 2397 do { 2398 # name1 = phi < name0 , name2> 2399 2400 # vuse <name1> 2401 S2: vload 2402 2403 # name2 = vdef <name1> 2404 S3: vstore 2405 2406 }while... 2407 2408 Stmt S1 was created in the loop preheader block as part of misaligned-load 2409 handling. This function fixes the name of the vuse of S1 from 'name1' to 2410 'name0'. */ 2411 2412static void 2413update_vuses_to_preheader (tree stmt, struct loop *loop) 2414{ 2415 basic_block header_bb = loop->header; 2416 edge preheader_e = loop_preheader_edge (loop); 2417 ssa_op_iter iter; 2418 use_operand_p use_p; 2419 2420 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE) 2421 { 2422 tree ssa_name = USE_FROM_PTR (use_p); 2423 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name); 2424 tree name_var = SSA_NAME_VAR (ssa_name); 2425 basic_block bb = bb_for_stmt (def_stmt); 2426 2427 /* For a use before any definitions, def_stmt is a NOP_EXPR. */ 2428 if (!IS_EMPTY_STMT (def_stmt) 2429 && flow_bb_inside_loop_p (loop, bb)) 2430 { 2431 /* If the block containing the statement defining the SSA_NAME 2432 is in the loop then it's necessary to find the definition 2433 outside the loop using the PHI nodes of the header. */ 2434 tree phi; 2435 bool updated = false; 2436 2437 for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi)) 2438 { 2439 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var) 2440 { 2441 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx)); 2442 updated = true; 2443 break; 2444 } 2445 } 2446 gcc_assert (updated); 2447 } 2448 } 2449} 2450 2451 2452/* Function vect_update_ivs_after_vectorizer. 2453 2454 "Advance" the induction variables of LOOP to the value they should take 2455 after the execution of LOOP. This is currently necessary because the 2456 vectorizer does not handle induction variables that are used after the 2457 loop. Such a situation occurs when the last iterations of LOOP are 2458 peeled, because: 2459 1. We introduced new uses after LOOP for IVs that were not originally used 2460 after LOOP: the IVs of LOOP are now used by an epilog loop. 2461 2. LOOP is going to be vectorized; this means that it will iterate N/VF 2462 times, whereas the loop IVs should be bumped N times. 2463 2464 Input: 2465 - LOOP - a loop that is going to be vectorized. The last few iterations 2466 of LOOP were peeled. 2467 - NITERS - the number of iterations that LOOP executes (before it is 2468 vectorized). i.e, the number of times the ivs should be bumped. 2469 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path 2470 coming out from LOOP on which there are uses of the LOOP ivs 2471 (this is the path from LOOP->exit to epilog_loop->preheader). 2472 2473 The new definitions of the ivs are placed in LOOP->exit. 2474 The phi args associated with the edge UPDATE_E in the bb 2475 UPDATE_E->dest are updated accordingly. 2476 2477 Assumption 1: Like the rest of the vectorizer, this function assumes 2478 a single loop exit that has a single predecessor. 2479 2480 Assumption 2: The phi nodes in the LOOP header and in update_bb are 2481 organized in the same order. 2482 2483 Assumption 3: The access function of the ivs is simple enough (see 2484 vect_can_advance_ivs_p). This assumption will be relaxed in the future. 2485 2486 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path 2487 coming out of LOOP on which the ivs of LOOP are used (this is the path 2488 that leads to the epilog loop; other paths skip the epilog loop). This 2489 path starts with the edge UPDATE_E, and its destination (denoted update_bb) 2490 needs to have its phis updated. 2491 */ 2492 2493static void 2494vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters, 2495 edge update_e) 2496{ 2497 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2498 basic_block exit_bb = loop->single_exit->dest; 2499 tree phi, phi1; 2500 basic_block update_bb = update_e->dest; 2501 2502 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */ 2503 2504 /* Make sure there exists a single-predecessor exit bb: */ 2505 gcc_assert (single_pred_p (exit_bb)); 2506 2507 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb); 2508 phi && phi1; 2509 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1)) 2510 { 2511 tree access_fn = NULL; 2512 tree evolution_part; 2513 tree init_expr; 2514 tree step_expr; 2515 tree var, stmt, ni, ni_name; 2516 block_stmt_iterator last_bsi; 2517 2518 if (vect_print_dump_info (REPORT_DETAILS)) 2519 { 2520 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: "); 2521 print_generic_expr (vect_dump, phi, TDF_SLIM); 2522 } 2523 2524 /* Skip virtual phi's. */ 2525 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) 2526 { 2527 if (vect_print_dump_info (REPORT_DETAILS)) 2528 fprintf (vect_dump, "virtual phi. skip."); 2529 continue; 2530 } 2531 2532 /* Skip reduction phis. */ 2533 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def) 2534 { 2535 if (vect_print_dump_info (REPORT_DETAILS)) 2536 fprintf (vect_dump, "reduc phi. skip."); 2537 continue; 2538 } 2539 2540 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi)); 2541 gcc_assert (access_fn); 2542 evolution_part = 2543 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num)); 2544 gcc_assert (evolution_part != NULL_TREE); 2545 2546 /* FORNOW: We do not support IVs whose evolution function is a polynomial 2547 of degree >= 2 or exponential. */ 2548 gcc_assert (!tree_is_chrec (evolution_part)); 2549 2550 step_expr = evolution_part; 2551 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, 2552 loop->num)); 2553 2554 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr), 2555 build2 (MULT_EXPR, TREE_TYPE (niters), 2556 niters, step_expr), init_expr); 2557 2558 var = create_tmp_var (TREE_TYPE (init_expr), "tmp"); 2559 add_referenced_var (var); 2560 2561 ni_name = force_gimple_operand (ni, &stmt, false, var); 2562 2563 /* Insert stmt into exit_bb. */ 2564 last_bsi = bsi_last (exit_bb); 2565 if (stmt) 2566 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT); 2567 2568 /* Fix phi expressions in the successor bb. */ 2569 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name); 2570 } 2571} 2572 2573 2574/* Function vect_do_peeling_for_loop_bound 2575 2576 Peel the last iterations of the loop represented by LOOP_VINFO. 2577 The peeled iterations form a new epilog loop. Given that the loop now 2578 iterates NITERS times, the new epilog loop iterates 2579 NITERS % VECTORIZATION_FACTOR times. 2580 2581 The original loop will later be made to iterate 2582 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */ 2583 2584static void 2585vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, 2586 struct loops *loops) 2587{ 2588 tree ni_name, ratio_mult_vf_name; 2589 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2590 struct loop *new_loop; 2591 edge update_e; 2592 basic_block preheader; 2593 int loop_num; 2594 2595 if (vect_print_dump_info (REPORT_DETAILS)) 2596 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ==="); 2597 2598 initialize_original_copy_tables (); 2599 2600 /* Generate the following variables on the preheader of original loop: 2601 2602 ni_name = number of iteration the original loop executes 2603 ratio = ni_name / vf 2604 ratio_mult_vf_name = ratio * vf */ 2605 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, 2606 &ratio_mult_vf_name, ratio); 2607 2608 loop_num = loop->num; 2609 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit, 2610 ratio_mult_vf_name, ni_name, false); 2611 gcc_assert (new_loop); 2612 gcc_assert (loop_num == loop->num); 2613#ifdef ENABLE_CHECKING 2614 slpeel_verify_cfg_after_peeling (loop, new_loop); 2615#endif 2616 2617 /* A guard that controls whether the new_loop is to be executed or skipped 2618 is placed in LOOP->exit. LOOP->exit therefore has two successors - one 2619 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other 2620 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that 2621 is on the path where the LOOP IVs are used and need to be updated. */ 2622 2623 preheader = loop_preheader_edge (new_loop)->src; 2624 if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest) 2625 update_e = EDGE_PRED (preheader, 0); 2626 else 2627 update_e = EDGE_PRED (preheader, 1); 2628 2629 /* Update IVs of original loop as if they were advanced 2630 by ratio_mult_vf_name steps. */ 2631 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e); 2632 2633 /* After peeling we have to reset scalar evolution analyzer. */ 2634 scev_reset (); 2635 2636 free_original_copy_tables (); 2637} 2638 2639 2640/* Function vect_gen_niters_for_prolog_loop 2641 2642 Set the number of iterations for the loop represented by LOOP_VINFO 2643 to the minimum between LOOP_NITERS (the original iteration count of the loop) 2644 and the misalignment of DR - the data reference recorded in 2645 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of 2646 this loop, the data reference DR will refer to an aligned location. 2647 2648 The following computation is generated: 2649 2650 If the misalignment of DR is known at compile time: 2651 addr_mis = int mis = DR_MISALIGNMENT (dr); 2652 Else, compute address misalignment in bytes: 2653 addr_mis = addr & (vectype_size - 1) 2654 2655 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) ) 2656 2657 (elem_size = element type size; an element is the scalar element 2658 whose type is the inner type of the vectype) */ 2659 2660static tree 2661vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) 2662{ 2663 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); 2664 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 2665 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2666 tree var, stmt; 2667 tree iters, iters_name; 2668 edge pe; 2669 basic_block new_bb; 2670 tree dr_stmt = DR_STMT (dr); 2671 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt); 2672 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2673 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT; 2674 tree niters_type = TREE_TYPE (loop_niters); 2675 2676 pe = loop_preheader_edge (loop); 2677 2678 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0) 2679 { 2680 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); 2681 int element_size = vectype_align/vf; 2682 int elem_misalign = byte_misalign / element_size; 2683 2684 if (vect_print_dump_info (REPORT_DETAILS)) 2685 fprintf (vect_dump, "known alignment = %d.", byte_misalign); 2686 iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1)); 2687 } 2688 else 2689 { 2690 tree new_stmts = NULL_TREE; 2691 tree start_addr = 2692 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE); 2693 tree ptr_type = TREE_TYPE (start_addr); 2694 tree size = TYPE_SIZE (ptr_type); 2695 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1); 2696 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1); 2697 tree elem_size_log = 2698 build_int_cst (type, exact_log2 (vectype_align/vf)); 2699 tree vf_minus_1 = build_int_cst (type, vf - 1); 2700 tree vf_tree = build_int_cst (type, vf); 2701 tree byte_misalign; 2702 tree elem_misalign; 2703 2704 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts); 2705 gcc_assert (!new_bb); 2706 2707 /* Create: byte_misalign = addr & (vectype_size - 1) */ 2708 byte_misalign = 2709 build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1); 2710 2711 /* Create: elem_misalign = byte_misalign / element_size */ 2712 elem_misalign = 2713 build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log); 2714 2715 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */ 2716 iters = build2 (MINUS_EXPR, type, vf_tree, elem_misalign); 2717 iters = build2 (BIT_AND_EXPR, type, iters, vf_minus_1); 2718 iters = fold_convert (niters_type, iters); 2719 } 2720 2721 /* Create: prolog_loop_niters = min (iters, loop_niters) */ 2722 /* If the loop bound is known at compile time we already verified that it is 2723 greater than vf; since the misalignment ('iters') is at most vf, there's 2724 no need to generate the MIN_EXPR in this case. */ 2725 if (TREE_CODE (loop_niters) != INTEGER_CST) 2726 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters); 2727 2728 if (vect_print_dump_info (REPORT_DETAILS)) 2729 { 2730 fprintf (vect_dump, "niters for prolog loop: "); 2731 print_generic_expr (vect_dump, iters, TDF_SLIM); 2732 } 2733 2734 var = create_tmp_var (niters_type, "prolog_loop_niters"); 2735 add_referenced_var (var); 2736 iters_name = force_gimple_operand (iters, &stmt, false, var); 2737 2738 /* Insert stmt on loop preheader edge. */ 2739 if (stmt) 2740 { 2741 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt); 2742 gcc_assert (!new_bb); 2743 } 2744 2745 return iters_name; 2746} 2747 2748 2749/* Function vect_update_init_of_dr 2750 2751 NITERS iterations were peeled from LOOP. DR represents a data reference 2752 in LOOP. This function updates the information recorded in DR to 2753 account for the fact that the first NITERS iterations had already been 2754 executed. Specifically, it updates the OFFSET field of DR. */ 2755 2756static void 2757vect_update_init_of_dr (struct data_reference *dr, tree niters) 2758{ 2759 tree offset = DR_OFFSET (dr); 2760 2761 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr)); 2762 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters); 2763 DR_OFFSET (dr) = offset; 2764} 2765 2766 2767/* Function vect_update_inits_of_drs 2768 2769 NITERS iterations were peeled from the loop represented by LOOP_VINFO. 2770 This function updates the information recorded for the data references in 2771 the loop to account for the fact that the first NITERS iterations had 2772 already been executed. Specifically, it updates the initial_condition of the 2773 access_function of all the data_references in the loop. */ 2774 2775static void 2776vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters) 2777{ 2778 unsigned int i; 2779 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 2780 struct data_reference *dr; 2781 2782 if (vect_dump && (dump_flags & TDF_DETAILS)) 2783 fprintf (vect_dump, "=== vect_update_inits_of_dr ==="); 2784 2785 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 2786 vect_update_init_of_dr (dr, niters); 2787} 2788 2789 2790/* Function vect_do_peeling_for_alignment 2791 2792 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO. 2793 'niters' is set to the misalignment of one of the data references in the 2794 loop, thereby forcing it to refer to an aligned location at the beginning 2795 of the execution of this loop. The data reference for which we are 2796 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */ 2797 2798static void 2799vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops) 2800{ 2801 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2802 tree niters_of_prolog_loop, ni_name; 2803 tree n_iters; 2804 struct loop *new_loop; 2805 2806 if (vect_print_dump_info (REPORT_DETAILS)) 2807 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ==="); 2808 2809 initialize_original_copy_tables (); 2810 2811 ni_name = vect_build_loop_niters (loop_vinfo); 2812 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name); 2813 2814 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */ 2815 new_loop = 2816 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop), 2817 niters_of_prolog_loop, ni_name, true); 2818 gcc_assert (new_loop); 2819#ifdef ENABLE_CHECKING 2820 slpeel_verify_cfg_after_peeling (new_loop, loop); 2821#endif 2822 2823 /* Update number of times loop executes. */ 2824 n_iters = LOOP_VINFO_NITERS (loop_vinfo); 2825 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR, 2826 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop); 2827 2828 /* Update the init conditions of the access functions of all data refs. */ 2829 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop); 2830 2831 /* After peeling we have to reset scalar evolution analyzer. */ 2832 scev_reset (); 2833 2834 free_original_copy_tables (); 2835} 2836 2837 2838/* Function vect_create_cond_for_align_checks. 2839 2840 Create a conditional expression that represents the alignment checks for 2841 all of data references (array element references) whose alignment must be 2842 checked at runtime. 2843 2844 Input: 2845 LOOP_VINFO - two fields of the loop information are used. 2846 LOOP_VINFO_PTR_MASK is the mask used to check the alignment. 2847 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked. 2848 2849 Output: 2850 COND_EXPR_STMT_LIST - statements needed to construct the conditional 2851 expression. 2852 The returned value is the conditional expression to be used in the if 2853 statement that controls which version of the loop gets executed at runtime. 2854 2855 The algorithm makes two assumptions: 2856 1) The number of bytes "n" in a vector is a power of 2. 2857 2) An address "a" is aligned if a%n is zero and that this 2858 test can be done as a&(n-1) == 0. For example, for 16 2859 byte vectors the test is a&0xf == 0. */ 2860 2861static tree 2862vect_create_cond_for_align_checks (loop_vec_info loop_vinfo, 2863 tree *cond_expr_stmt_list) 2864{ 2865 VEC(tree,heap) *may_misalign_stmts 2866 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo); 2867 tree ref_stmt; 2868 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo); 2869 tree mask_cst; 2870 unsigned int i; 2871 tree psize; 2872 tree int_ptrsize_type; 2873 char tmp_name[20]; 2874 tree or_tmp_name = NULL_TREE; 2875 tree and_tmp, and_tmp_name, and_stmt; 2876 tree ptrsize_zero; 2877 2878 /* Check that mask is one less than a power of 2, i.e., mask is 2879 all zeros followed by all ones. */ 2880 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0)); 2881 2882 /* CHECKME: what is the best integer or unsigned type to use to hold a 2883 cast from a pointer value? */ 2884 psize = TYPE_SIZE (ptr_type_node); 2885 int_ptrsize_type 2886 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0); 2887 2888 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address 2889 of the first vector of the i'th data reference. */ 2890 2891 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++) 2892 { 2893 tree new_stmt_list = NULL_TREE; 2894 tree addr_base; 2895 tree addr_tmp, addr_tmp_name, addr_stmt; 2896 tree or_tmp, new_or_tmp_name, or_stmt; 2897 2898 /* create: addr_tmp = (int)(address_of_first_vector) */ 2899 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt, 2900 &new_stmt_list, 2901 NULL_TREE); 2902 2903 if (new_stmt_list != NULL_TREE) 2904 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list); 2905 2906 sprintf (tmp_name, "%s%d", "addr2int", i); 2907 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name); 2908 add_referenced_var (addr_tmp); 2909 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE); 2910 addr_stmt = fold_convert (int_ptrsize_type, addr_base); 2911 addr_stmt = build2 (MODIFY_EXPR, void_type_node, 2912 addr_tmp_name, addr_stmt); 2913 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt; 2914 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list); 2915 2916 /* The addresses are OR together. */ 2917 2918 if (or_tmp_name != NULL_TREE) 2919 { 2920 /* create: or_tmp = or_tmp | addr_tmp */ 2921 sprintf (tmp_name, "%s%d", "orptrs", i); 2922 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name); 2923 add_referenced_var (or_tmp); 2924 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE); 2925 or_stmt = build2 (MODIFY_EXPR, void_type_node, new_or_tmp_name, 2926 build2 (BIT_IOR_EXPR, int_ptrsize_type, 2927 or_tmp_name, 2928 addr_tmp_name)); 2929 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt; 2930 append_to_statement_list_force (or_stmt, cond_expr_stmt_list); 2931 or_tmp_name = new_or_tmp_name; 2932 } 2933 else 2934 or_tmp_name = addr_tmp_name; 2935 2936 } /* end for i */ 2937 2938 mask_cst = build_int_cst (int_ptrsize_type, mask); 2939 2940 /* create: and_tmp = or_tmp & mask */ 2941 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" ); 2942 add_referenced_var (and_tmp); 2943 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE); 2944 2945 and_stmt = build2 (MODIFY_EXPR, void_type_node, 2946 and_tmp_name, 2947 build2 (BIT_AND_EXPR, int_ptrsize_type, 2948 or_tmp_name, mask_cst)); 2949 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt; 2950 append_to_statement_list_force (and_stmt, cond_expr_stmt_list); 2951 2952 /* Make and_tmp the left operand of the conditional test against zero. 2953 if and_tmp has a nonzero bit then some address is unaligned. */ 2954 ptrsize_zero = build_int_cst (int_ptrsize_type, 0); 2955 return build2 (EQ_EXPR, boolean_type_node, 2956 and_tmp_name, ptrsize_zero); 2957} 2958 2959 2960/* Function vect_transform_loop. 2961 2962 The analysis phase has determined that the loop is vectorizable. 2963 Vectorize the loop - created vectorized stmts to replace the scalar 2964 stmts in the loop, and update the loop exit condition. */ 2965 2966void 2967vect_transform_loop (loop_vec_info loop_vinfo, 2968 struct loops *loops ATTRIBUTE_UNUSED) 2969{ 2970 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2971 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 2972 int nbbs = loop->num_nodes; 2973 block_stmt_iterator si; 2974 int i; 2975 tree ratio = NULL; 2976 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 2977 bitmap_iterator bi; 2978 unsigned int j; 2979 2980 if (vect_print_dump_info (REPORT_DETAILS)) 2981 fprintf (vect_dump, "=== vec_transform_loop ==="); 2982 2983 /* If the loop has data references that may or may not be aligned then 2984 two versions of the loop need to be generated, one which is vectorized 2985 and one which isn't. A test is then generated to control which of the 2986 loops is executed. The test checks for the alignment of all of the 2987 data references that may or may not be aligned. */ 2988 2989 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))) 2990 { 2991 struct loop *nloop; 2992 tree cond_expr; 2993 tree cond_expr_stmt_list = NULL_TREE; 2994 basic_block condition_bb; 2995 block_stmt_iterator cond_exp_bsi; 2996 basic_block merge_bb; 2997 basic_block new_exit_bb; 2998 edge new_exit_e, e; 2999 tree orig_phi, new_phi, arg; 3000 3001 cond_expr = vect_create_cond_for_align_checks (loop_vinfo, 3002 &cond_expr_stmt_list); 3003 initialize_original_copy_tables (); 3004 nloop = loop_version (loops, loop, cond_expr, &condition_bb, true); 3005 free_original_copy_tables(); 3006 3007 /** Loop versioning violates an assumption we try to maintain during 3008 vectorization - that the loop exit block has a single predecessor. 3009 After versioning, the exit block of both loop versions is the same 3010 basic block (i.e. it has two predecessors). Just in order to simplify 3011 following transformations in the vectorizer, we fix this situation 3012 here by adding a new (empty) block on the exit-edge of the loop, 3013 with the proper loop-exit phis to maintain loop-closed-form. **/ 3014 3015 merge_bb = loop->single_exit->dest; 3016 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2); 3017 new_exit_bb = split_edge (loop->single_exit); 3018 add_bb_to_loop (new_exit_bb, loop->outer); 3019 new_exit_e = loop->single_exit; 3020 e = EDGE_SUCC (new_exit_bb, 0); 3021 3022 for (orig_phi = phi_nodes (merge_bb); orig_phi; 3023 orig_phi = PHI_CHAIN (orig_phi)) 3024 { 3025 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), 3026 new_exit_bb); 3027 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); 3028 add_phi_arg (new_phi, arg, new_exit_e); 3029 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi)); 3030 } 3031 3032 /** end loop-exit-fixes after versioning **/ 3033 3034 update_ssa (TODO_update_ssa); 3035 cond_exp_bsi = bsi_last (condition_bb); 3036 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT); 3037 } 3038 3039 /* CHECKME: we wouldn't need this if we called update_ssa once 3040 for all loops. */ 3041 bitmap_zero (vect_vnames_to_rename); 3042 3043 /* Peel the loop if there are data refs with unknown alignment. 3044 Only one data ref with unknown store is allowed. */ 3045 3046 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) 3047 vect_do_peeling_for_alignment (loop_vinfo, loops); 3048 3049 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a 3050 compile time constant), or it is a constant that doesn't divide by the 3051 vectorization factor, then an epilog loop needs to be created. 3052 We therefore duplicate the loop: the original loop will be vectorized, 3053 and will compute the first (n/VF) iterations. The second copy of the loop 3054 will remain scalar and will compute the remaining (n%VF) iterations. 3055 (VF is the vectorization factor). */ 3056 3057 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 3058 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 3059 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)) 3060 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops); 3061 else 3062 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)), 3063 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor); 3064 3065 /* 1) Make sure the loop header has exactly two entries 3066 2) Make sure we have a preheader basic block. */ 3067 3068 gcc_assert (EDGE_COUNT (loop->header->preds) == 2); 3069 3070 loop_split_edge_with (loop_preheader_edge (loop), NULL); 3071 3072 3073 /* FORNOW: the vectorizer supports only loops which body consist 3074 of one basic block (header + empty latch). When the vectorizer will 3075 support more involved loop forms, the order by which the BBs are 3076 traversed need to be reconsidered. */ 3077 3078 for (i = 0; i < nbbs; i++) 3079 { 3080 basic_block bb = bbs[i]; 3081 3082 for (si = bsi_start (bb); !bsi_end_p (si);) 3083 { 3084 tree stmt = bsi_stmt (si); 3085 stmt_vec_info stmt_info; 3086 bool is_store; 3087 3088 if (vect_print_dump_info (REPORT_DETAILS)) 3089 { 3090 fprintf (vect_dump, "------>vectorizing statement: "); 3091 print_generic_expr (vect_dump, stmt, TDF_SLIM); 3092 } 3093 stmt_info = vinfo_for_stmt (stmt); 3094 gcc_assert (stmt_info); 3095 if (!STMT_VINFO_RELEVANT_P (stmt_info) 3096 && !STMT_VINFO_LIVE_P (stmt_info)) 3097 { 3098 bsi_next (&si); 3099 continue; 3100 } 3101 /* FORNOW: Verify that all stmts operate on the same number of 3102 units and no inner unrolling is necessary. */ 3103 gcc_assert 3104 (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)) 3105 == (unsigned HOST_WIDE_INT) vectorization_factor); 3106 3107 /* -------- vectorize statement ------------ */ 3108 if (vect_print_dump_info (REPORT_DETAILS)) 3109 fprintf (vect_dump, "transform statement."); 3110 3111 is_store = vect_transform_stmt (stmt, &si); 3112 if (is_store) 3113 { 3114 /* Free the attached stmt_vec_info and remove the stmt. */ 3115 stmt_ann_t ann = stmt_ann (stmt); 3116 free (stmt_info); 3117 set_stmt_info (ann, NULL); 3118 bsi_remove (&si, true); 3119 continue; 3120 } 3121 3122 bsi_next (&si); 3123 } /* stmts in BB */ 3124 } /* BBs in loop */ 3125 3126 slpeel_make_loop_iterate_ntimes (loop, ratio); 3127 3128 EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi) 3129 mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j))); 3130 3131 /* The memory tags and pointers in vectorized statements need to 3132 have their SSA forms updated. FIXME, why can't this be delayed 3133 until all the loops have been transformed? */ 3134 update_ssa (TODO_update_ssa); 3135 3136 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)) 3137 fprintf (vect_dump, "LOOP VECTORIZED."); 3138} 3139