1/* Analysis Utilities for Loop Vectorization. 2 Copyright (C) 2006-2015 Free Software Foundation, Inc. 3 Contributed by Dorit Nuzman <dorit@il.ibm.com> 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING3. If not see 19<http://www.gnu.org/licenses/>. */ 20 21#include "config.h" 22#include "system.h" 23#include "coretypes.h" 24#include "tm.h" 25#include "hash-set.h" 26#include "machmode.h" 27#include "vec.h" 28#include "double-int.h" 29#include "input.h" 30#include "alias.h" 31#include "symtab.h" 32#include "wide-int.h" 33#include "inchash.h" 34#include "tree.h" 35#include "fold-const.h" 36#include "stor-layout.h" 37#include "target.h" 38#include "predict.h" 39#include "hard-reg-set.h" 40#include "function.h" 41#include "dominance.h" 42#include "basic-block.h" 43#include "gimple-pretty-print.h" 44#include "tree-ssa-alias.h" 45#include "internal-fn.h" 46#include "tree-eh.h" 47#include "gimple-expr.h" 48#include "is-a.h" 49#include "gimple.h" 50#include "gimplify.h" 51#include "gimple-iterator.h" 52#include "gimple-ssa.h" 53#include "tree-phinodes.h" 54#include "ssa-iterators.h" 55#include "stringpool.h" 56#include "tree-ssanames.h" 57#include "cfgloop.h" 58#include "hashtab.h" 59#include "rtl.h" 60#include "flags.h" 61#include "statistics.h" 62#include "real.h" 63#include "fixed-value.h" 64#include "insn-config.h" 65#include "expmed.h" 66#include "dojump.h" 67#include "explow.h" 68#include "calls.h" 69#include "emit-rtl.h" 70#include "varasm.h" 71#include "stmt.h" 72#include "expr.h" 73#include "insn-codes.h" 74#include "optabs.h" 75#include "params.h" 76#include "tree-data-ref.h" 77#include "tree-vectorizer.h" 78#include "recog.h" /* FIXME: for insn_data */ 79#include "diagnostic-core.h" 80#include "dumpfile.h" 81#include "builtins.h" 82 83/* Pattern recognition functions */ 84static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *, 85 tree *); 86static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *, 87 tree *); 88static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *, 89 tree *); 90static gimple vect_recog_sad_pattern (vec<gimple> *, tree *, 91 tree *); 92static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *); 93static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *, 94 tree *); 95static gimple vect_recog_widen_shift_pattern (vec<gimple> *, 96 tree *, tree *); 97static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *); 98static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *, 99 tree *, tree *); 100static gimple vect_recog_divmod_pattern (vec<gimple> *, 101 tree *, tree *); 102static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *, 103 tree *, tree *); 104static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *); 105static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { 106 vect_recog_widen_mult_pattern, 107 vect_recog_widen_sum_pattern, 108 vect_recog_dot_prod_pattern, 109 vect_recog_sad_pattern, 110 vect_recog_pow_pattern, 111 vect_recog_widen_shift_pattern, 112 vect_recog_over_widening_pattern, 113 vect_recog_rotate_pattern, 114 vect_recog_vector_vector_shift_pattern, 115 vect_recog_divmod_pattern, 116 vect_recog_mixed_size_cond_pattern, 117 vect_recog_bool_pattern}; 118 119static inline void 120append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt) 121{ 122 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info), 123 stmt); 124} 125 126static inline void 127new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt) 128{ 129 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL; 130 append_pattern_def_seq (stmt_info, stmt); 131} 132 133/* Check whether STMT2 is in the same loop or basic block as STMT1. 134 Which of the two applies depends on whether we're currently doing 135 loop-based or basic-block-based vectorization, as determined by 136 the vinfo_for_stmt for STMT1 (which must be defined). 137 138 If this returns true, vinfo_for_stmt for STMT2 is guaranteed 139 to be defined as well. */ 140 141static bool 142vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2) 143{ 144 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1); 145 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 146 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); 147 148 if (!gimple_bb (stmt2)) 149 return false; 150 151 if (loop_vinfo) 152 { 153 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 154 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2))) 155 return false; 156 } 157 else 158 { 159 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo) 160 || gimple_code (stmt2) == GIMPLE_PHI) 161 return false; 162 } 163 164 gcc_assert (vinfo_for_stmt (stmt2)); 165 return true; 166} 167 168/* If the LHS of DEF_STMT has a single use, and that statement is 169 in the same loop or basic block, return it. */ 170 171static gimple 172vect_single_imm_use (gimple def_stmt) 173{ 174 tree lhs = gimple_assign_lhs (def_stmt); 175 use_operand_p use_p; 176 gimple use_stmt; 177 178 if (!single_imm_use (lhs, &use_p, &use_stmt)) 179 return NULL; 180 181 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt)) 182 return NULL; 183 184 return use_stmt; 185} 186 187/* Check whether NAME, an ssa-name used in USE_STMT, 188 is a result of a type promotion, such that: 189 DEF_STMT: NAME = NOP (name0) 190 If CHECK_SIGN is TRUE, check that either both types are signed or both are 191 unsigned. */ 192 193static bool 194type_conversion_p (tree name, gimple use_stmt, bool check_sign, 195 tree *orig_type, gimple *def_stmt, bool *promotion) 196{ 197 tree dummy; 198 gimple dummy_gimple; 199 loop_vec_info loop_vinfo; 200 stmt_vec_info stmt_vinfo; 201 tree type = TREE_TYPE (name); 202 tree oprnd0; 203 enum vect_def_type dt; 204 tree def; 205 bb_vec_info bb_vinfo; 206 207 stmt_vinfo = vinfo_for_stmt (use_stmt); 208 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 209 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); 210 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt, 211 &def, &dt)) 212 return false; 213 214 if (dt != vect_internal_def 215 && dt != vect_external_def && dt != vect_constant_def) 216 return false; 217 218 if (!*def_stmt) 219 return false; 220 221 if (!is_gimple_assign (*def_stmt)) 222 return false; 223 224 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt))) 225 return false; 226 227 oprnd0 = gimple_assign_rhs1 (*def_stmt); 228 229 *orig_type = TREE_TYPE (oprnd0); 230 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type) 231 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign)) 232 return false; 233 234 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2)) 235 *promotion = true; 236 else 237 *promotion = false; 238 239 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo, 240 bb_vinfo, &dummy_gimple, &dummy, &dt)) 241 return false; 242 243 return true; 244} 245 246/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT 247 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */ 248 249static tree 250vect_recog_temp_ssa_var (tree type, gimple stmt) 251{ 252 return make_temp_ssa_name (type, stmt, "patt"); 253} 254 255/* Function vect_recog_dot_prod_pattern 256 257 Try to find the following pattern: 258 259 type x_t, y_t; 260 TYPE1 prod; 261 TYPE2 sum = init; 262 loop: 263 sum_0 = phi <init, sum_1> 264 S1 x_t = ... 265 S2 y_t = ... 266 S3 x_T = (TYPE1) x_t; 267 S4 y_T = (TYPE1) y_t; 268 S5 prod = x_T * y_T; 269 [S6 prod = (TYPE2) prod; #optional] 270 S7 sum_1 = prod + sum_0; 271 272 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the 273 same size of 'TYPE1' or bigger. This is a special case of a reduction 274 computation. 275 276 Input: 277 278 * STMTS: Contains a stmt from which the pattern search begins. In the 279 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7} 280 will be detected. 281 282 Output: 283 284 * TYPE_IN: The type of the input arguments to the pattern. 285 286 * TYPE_OUT: The type of the output of this pattern. 287 288 * Return value: A new stmt that will be used to replace the sequence of 289 stmts that constitute the pattern. In this case it will be: 290 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0> 291 292 Note: The dot-prod idiom is a widening reduction pattern that is 293 vectorized without preserving all the intermediate results. It 294 produces only N/2 (widened) results (by summing up pairs of 295 intermediate results) rather than all N results. Therefore, we 296 cannot allow this pattern when we want to get all the results and in 297 the correct order (as is the case when this computation is in an 298 inner-loop nested in an outer-loop that us being vectorized). */ 299 300static gimple 301vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in, 302 tree *type_out) 303{ 304 gimple stmt, last_stmt = (*stmts)[0]; 305 tree oprnd0, oprnd1; 306 tree oprnd00, oprnd01; 307 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 308 tree type, half_type; 309 gimple pattern_stmt; 310 tree prod_type; 311 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 312 struct loop *loop; 313 tree var; 314 bool promotion; 315 316 if (!loop_info) 317 return NULL; 318 319 loop = LOOP_VINFO_LOOP (loop_info); 320 321 if (!is_gimple_assign (last_stmt)) 322 return NULL; 323 324 type = gimple_expr_type (last_stmt); 325 326 /* Look for the following pattern 327 DX = (TYPE1) X; 328 DY = (TYPE1) Y; 329 DPROD = DX * DY; 330 DDPROD = (TYPE2) DPROD; 331 sum_1 = DDPROD + sum_0; 332 In which 333 - DX is double the size of X 334 - DY is double the size of Y 335 - DX, DY, DPROD all have the same type 336 - sum is the same size of DPROD or bigger 337 - sum has been recognized as a reduction variable. 338 339 This is equivalent to: 340 DPROD = X w* Y; #widen mult 341 sum_1 = DPROD w+ sum_0; #widen summation 342 or 343 DPROD = X w* Y; #widen mult 344 sum_1 = DPROD + sum_0; #summation 345 */ 346 347 /* Starting from LAST_STMT, follow the defs of its uses in search 348 of the above pattern. */ 349 350 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) 351 return NULL; 352 353 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 354 { 355 /* Has been detected as widening-summation? */ 356 357 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); 358 type = gimple_expr_type (stmt); 359 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR) 360 return NULL; 361 oprnd0 = gimple_assign_rhs1 (stmt); 362 oprnd1 = gimple_assign_rhs2 (stmt); 363 half_type = TREE_TYPE (oprnd0); 364 } 365 else 366 { 367 gimple def_stmt; 368 369 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) 370 return NULL; 371 oprnd0 = gimple_assign_rhs1 (last_stmt); 372 oprnd1 = gimple_assign_rhs2 (last_stmt); 373 if (!types_compatible_p (TREE_TYPE (oprnd0), type) 374 || !types_compatible_p (TREE_TYPE (oprnd1), type)) 375 return NULL; 376 stmt = last_stmt; 377 378 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt, 379 &promotion) 380 && promotion) 381 { 382 stmt = def_stmt; 383 oprnd0 = gimple_assign_rhs1 (stmt); 384 } 385 else 386 half_type = type; 387 } 388 389 /* So far so good. Since last_stmt was detected as a (summation) reduction, 390 we know that oprnd1 is the reduction variable (defined by a loop-header 391 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. 392 Left to check that oprnd0 is defined by a (widen_)mult_expr */ 393 if (TREE_CODE (oprnd0) != SSA_NAME) 394 return NULL; 395 396 prod_type = half_type; 397 stmt = SSA_NAME_DEF_STMT (oprnd0); 398 399 /* It could not be the dot_prod pattern if the stmt is outside the loop. */ 400 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt))) 401 return NULL; 402 403 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi 404 inside the loop (in case we are analyzing an outer-loop). */ 405 if (!is_gimple_assign (stmt)) 406 return NULL; 407 stmt_vinfo = vinfo_for_stmt (stmt); 408 gcc_assert (stmt_vinfo); 409 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def) 410 return NULL; 411 if (gimple_assign_rhs_code (stmt) != MULT_EXPR) 412 return NULL; 413 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 414 { 415 /* Has been detected as a widening multiplication? */ 416 417 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); 418 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR) 419 return NULL; 420 stmt_vinfo = vinfo_for_stmt (stmt); 421 gcc_assert (stmt_vinfo); 422 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def); 423 oprnd00 = gimple_assign_rhs1 (stmt); 424 oprnd01 = gimple_assign_rhs2 (stmt); 425 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt)) 426 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo); 427 } 428 else 429 { 430 tree half_type0, half_type1; 431 gimple def_stmt; 432 tree oprnd0, oprnd1; 433 434 oprnd0 = gimple_assign_rhs1 (stmt); 435 oprnd1 = gimple_assign_rhs2 (stmt); 436 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type) 437 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type)) 438 return NULL; 439 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt, 440 &promotion) 441 || !promotion) 442 return NULL; 443 oprnd00 = gimple_assign_rhs1 (def_stmt); 444 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt, 445 &promotion) 446 || !promotion) 447 return NULL; 448 oprnd01 = gimple_assign_rhs1 (def_stmt); 449 if (!types_compatible_p (half_type0, half_type1)) 450 return NULL; 451 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2) 452 return NULL; 453 } 454 455 half_type = TREE_TYPE (oprnd00); 456 *type_in = half_type; 457 *type_out = type; 458 459 /* Pattern detected. Create a stmt to be used to replace the pattern: */ 460 var = vect_recog_temp_ssa_var (type, NULL); 461 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR, 462 oprnd00, oprnd01, oprnd1); 463 464 if (dump_enabled_p ()) 465 { 466 dump_printf_loc (MSG_NOTE, vect_location, 467 "vect_recog_dot_prod_pattern: detected: "); 468 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0); 469 dump_printf (MSG_NOTE, "\n"); 470 } 471 472 /* We don't allow changing the order of the computation in the inner-loop 473 when doing outer-loop vectorization. */ 474 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); 475 476 return pattern_stmt; 477} 478 479 480/* Function vect_recog_sad_pattern 481 482 Try to find the following Sum of Absolute Difference (SAD) pattern: 483 484 type x_t, y_t; 485 signed TYPE1 diff, abs_diff; 486 TYPE2 sum = init; 487 loop: 488 sum_0 = phi <init, sum_1> 489 S1 x_t = ... 490 S2 y_t = ... 491 S3 x_T = (TYPE1) x_t; 492 S4 y_T = (TYPE1) y_t; 493 S5 diff = x_T - y_T; 494 S6 abs_diff = ABS_EXPR <diff>; 495 [S7 abs_diff = (TYPE2) abs_diff; #optional] 496 S8 sum_1 = abs_diff + sum_0; 497 498 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the 499 same size of 'TYPE1' or bigger. This is a special case of a reduction 500 computation. 501 502 Input: 503 504 * STMTS: Contains a stmt from which the pattern search begins. In the 505 example, when this function is called with S8, the pattern 506 {S3,S4,S5,S6,S7,S8} will be detected. 507 508 Output: 509 510 * TYPE_IN: The type of the input arguments to the pattern. 511 512 * TYPE_OUT: The type of the output of this pattern. 513 514 * Return value: A new stmt that will be used to replace the sequence of 515 stmts that constitute the pattern. In this case it will be: 516 SAD_EXPR <x_t, y_t, sum_0> 517 */ 518 519static gimple 520vect_recog_sad_pattern (vec<gimple> *stmts, tree *type_in, 521 tree *type_out) 522{ 523 gimple last_stmt = (*stmts)[0]; 524 tree sad_oprnd0, sad_oprnd1; 525 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 526 tree half_type; 527 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 528 struct loop *loop; 529 bool promotion; 530 531 if (!loop_info) 532 return NULL; 533 534 loop = LOOP_VINFO_LOOP (loop_info); 535 536 if (!is_gimple_assign (last_stmt)) 537 return NULL; 538 539 tree sum_type = gimple_expr_type (last_stmt); 540 541 /* Look for the following pattern 542 DX = (TYPE1) X; 543 DY = (TYPE1) Y; 544 DDIFF = DX - DY; 545 DAD = ABS_EXPR <DDIFF>; 546 DDPROD = (TYPE2) DPROD; 547 sum_1 = DAD + sum_0; 548 In which 549 - DX is at least double the size of X 550 - DY is at least double the size of Y 551 - DX, DY, DDIFF, DAD all have the same type 552 - sum is the same size of DAD or bigger 553 - sum has been recognized as a reduction variable. 554 555 This is equivalent to: 556 DDIFF = X w- Y; #widen sub 557 DAD = ABS_EXPR <DDIFF>; 558 sum_1 = DAD w+ sum_0; #widen summation 559 or 560 DDIFF = X w- Y; #widen sub 561 DAD = ABS_EXPR <DDIFF>; 562 sum_1 = DAD + sum_0; #summation 563 */ 564 565 /* Starting from LAST_STMT, follow the defs of its uses in search 566 of the above pattern. */ 567 568 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) 569 return NULL; 570 571 tree plus_oprnd0, plus_oprnd1; 572 573 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 574 { 575 /* Has been detected as widening-summation? */ 576 577 gimple stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); 578 sum_type = gimple_expr_type (stmt); 579 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR) 580 return NULL; 581 plus_oprnd0 = gimple_assign_rhs1 (stmt); 582 plus_oprnd1 = gimple_assign_rhs2 (stmt); 583 half_type = TREE_TYPE (plus_oprnd0); 584 } 585 else 586 { 587 gimple def_stmt; 588 589 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) 590 return NULL; 591 plus_oprnd0 = gimple_assign_rhs1 (last_stmt); 592 plus_oprnd1 = gimple_assign_rhs2 (last_stmt); 593 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type) 594 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type)) 595 return NULL; 596 597 /* The type conversion could be promotion, demotion, 598 or just signed -> unsigned. */ 599 if (type_conversion_p (plus_oprnd0, last_stmt, false, 600 &half_type, &def_stmt, &promotion)) 601 plus_oprnd0 = gimple_assign_rhs1 (def_stmt); 602 else 603 half_type = sum_type; 604 } 605 606 /* So far so good. Since last_stmt was detected as a (summation) reduction, 607 we know that plus_oprnd1 is the reduction variable (defined by a loop-header 608 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body. 609 Then check that plus_oprnd0 is defined by an abs_expr. */ 610 611 if (TREE_CODE (plus_oprnd0) != SSA_NAME) 612 return NULL; 613 614 tree abs_type = half_type; 615 gimple abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0); 616 617 /* It could not be the sad pattern if the abs_stmt is outside the loop. */ 618 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt))) 619 return NULL; 620 621 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi 622 inside the loop (in case we are analyzing an outer-loop). */ 623 if (!is_gimple_assign (abs_stmt)) 624 return NULL; 625 626 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt); 627 gcc_assert (abs_stmt_vinfo); 628 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def) 629 return NULL; 630 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR) 631 return NULL; 632 633 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt); 634 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type)) 635 return NULL; 636 if (TYPE_UNSIGNED (abs_type)) 637 return NULL; 638 639 /* We then detect if the operand of abs_expr is defined by a minus_expr. */ 640 641 if (TREE_CODE (abs_oprnd) != SSA_NAME) 642 return NULL; 643 644 gimple diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd); 645 646 /* It could not be the sad pattern if the diff_stmt is outside the loop. */ 647 if (!gimple_bb (diff_stmt) 648 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt))) 649 return NULL; 650 651 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi 652 inside the loop (in case we are analyzing an outer-loop). */ 653 if (!is_gimple_assign (diff_stmt)) 654 return NULL; 655 656 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt); 657 gcc_assert (diff_stmt_vinfo); 658 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def) 659 return NULL; 660 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR) 661 return NULL; 662 663 tree half_type0, half_type1; 664 gimple def_stmt; 665 666 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt); 667 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt); 668 669 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type) 670 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type)) 671 return NULL; 672 if (!type_conversion_p (minus_oprnd0, diff_stmt, false, 673 &half_type0, &def_stmt, &promotion) 674 || !promotion) 675 return NULL; 676 sad_oprnd0 = gimple_assign_rhs1 (def_stmt); 677 678 if (!type_conversion_p (minus_oprnd1, diff_stmt, false, 679 &half_type1, &def_stmt, &promotion) 680 || !promotion) 681 return NULL; 682 sad_oprnd1 = gimple_assign_rhs1 (def_stmt); 683 684 if (!types_compatible_p (half_type0, half_type1)) 685 return NULL; 686 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2 687 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2) 688 return NULL; 689 690 *type_in = TREE_TYPE (sad_oprnd0); 691 *type_out = sum_type; 692 693 /* Pattern detected. Create a stmt to be used to replace the pattern: */ 694 tree var = vect_recog_temp_ssa_var (sum_type, NULL); 695 gimple pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0, 696 sad_oprnd1, plus_oprnd1); 697 698 if (dump_enabled_p ()) 699 { 700 dump_printf_loc (MSG_NOTE, vect_location, 701 "vect_recog_sad_pattern: detected: "); 702 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0); 703 dump_printf (MSG_NOTE, "\n"); 704 } 705 706 /* We don't allow changing the order of the computation in the inner-loop 707 when doing outer-loop vectorization. */ 708 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); 709 710 return pattern_stmt; 711} 712 713 714/* Handle widening operation by a constant. At the moment we support MULT_EXPR 715 and LSHIFT_EXPR. 716 717 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR 718 we check that CONST_OPRND is less or equal to the size of HALF_TYPE. 719 720 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than 721 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE) 722 that satisfies the above restrictions, we can perform a widening opeartion 723 from the intermediate type to TYPE and replace a_T = (TYPE) a_t; 724 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */ 725 726static bool 727vect_handle_widen_op_by_const (gimple stmt, enum tree_code code, 728 tree const_oprnd, tree *oprnd, 729 gimple *wstmt, tree type, 730 tree *half_type, gimple def_stmt) 731{ 732 tree new_type, new_oprnd; 733 734 if (code != MULT_EXPR && code != LSHIFT_EXPR) 735 return false; 736 737 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type)) 738 || (code == LSHIFT_EXPR 739 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type)) 740 != 1)) 741 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2)) 742 { 743 /* CONST_OPRND is a constant of HALF_TYPE. */ 744 *oprnd = gimple_assign_rhs1 (def_stmt); 745 return true; 746 } 747 748 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)) 749 return false; 750 751 if (!vect_same_loop_or_bb_p (stmt, def_stmt)) 752 return false; 753 754 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for 755 a type 2 times bigger than HALF_TYPE. */ 756 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2, 757 TYPE_UNSIGNED (type)); 758 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type)) 759 || (code == LSHIFT_EXPR 760 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1)) 761 return false; 762 763 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */ 764 *oprnd = gimple_assign_rhs1 (def_stmt); 765 new_oprnd = make_ssa_name (new_type); 766 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd); 767 *oprnd = new_oprnd; 768 769 *half_type = new_type; 770 return true; 771} 772 773 774/* Function vect_recog_widen_mult_pattern 775 776 Try to find the following pattern: 777 778 type1 a_t; 779 type2 b_t; 780 TYPE a_T, b_T, prod_T; 781 782 S1 a_t = ; 783 S2 b_t = ; 784 S3 a_T = (TYPE) a_t; 785 S4 b_T = (TYPE) b_t; 786 S5 prod_T = a_T * b_T; 787 788 where type 'TYPE' is at least double the size of type 'type1' and 'type2'. 789 790 Also detect unsigned cases: 791 792 unsigned type1 a_t; 793 unsigned type2 b_t; 794 unsigned TYPE u_prod_T; 795 TYPE a_T, b_T, prod_T; 796 797 S1 a_t = ; 798 S2 b_t = ; 799 S3 a_T = (TYPE) a_t; 800 S4 b_T = (TYPE) b_t; 801 S5 prod_T = a_T * b_T; 802 S6 u_prod_T = (unsigned TYPE) prod_T; 803 804 and multiplication by constants: 805 806 type a_t; 807 TYPE a_T, prod_T; 808 809 S1 a_t = ; 810 S3 a_T = (TYPE) a_t; 811 S5 prod_T = a_T * CONST; 812 813 A special case of multiplication by constants is when 'TYPE' is 4 times 814 bigger than 'type', but CONST fits an intermediate type 2 times smaller 815 than 'TYPE'. In that case we create an additional pattern stmt for S3 816 to create a variable of the intermediate type, and perform widen-mult 817 on the intermediate type as well: 818 819 type a_t; 820 interm_type a_it; 821 TYPE a_T, prod_T, prod_T'; 822 823 S1 a_t = ; 824 S3 a_T = (TYPE) a_t; 825 '--> a_it = (interm_type) a_t; 826 S5 prod_T = a_T * CONST; 827 '--> prod_T' = a_it w* CONST; 828 829 Input/Output: 830 831 * STMTS: Contains a stmt from which the pattern search begins. In the 832 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)} 833 is detected. In case of unsigned widen-mult, the original stmt (S5) is 834 replaced with S6 in STMTS. In case of multiplication by a constant 835 of an intermediate type (the last case above), STMTS also contains S3 836 (inserted before S5). 837 838 Output: 839 840 * TYPE_IN: The type of the input arguments to the pattern. 841 842 * TYPE_OUT: The type of the output of this pattern. 843 844 * Return value: A new stmt that will be used to replace the sequence of 845 stmts that constitute the pattern. In this case it will be: 846 WIDEN_MULT <a_t, b_t> 847 If the result of WIDEN_MULT needs to be converted to a larger type, the 848 returned stmt will be this type conversion stmt. 849*/ 850 851static gimple 852vect_recog_widen_mult_pattern (vec<gimple> *stmts, 853 tree *type_in, tree *type_out) 854{ 855 gimple last_stmt = stmts->pop (); 856 gimple def_stmt0, def_stmt1; 857 tree oprnd0, oprnd1; 858 tree type, half_type0, half_type1; 859 gimple new_stmt = NULL, pattern_stmt = NULL; 860 tree vectype, vecitype; 861 tree var; 862 enum tree_code dummy_code; 863 int dummy_int; 864 vec<tree> dummy_vec; 865 bool op1_ok; 866 bool promotion; 867 868 if (!is_gimple_assign (last_stmt)) 869 return NULL; 870 871 type = gimple_expr_type (last_stmt); 872 873 /* Starting from LAST_STMT, follow the defs of its uses in search 874 of the above pattern. */ 875 876 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR) 877 return NULL; 878 879 oprnd0 = gimple_assign_rhs1 (last_stmt); 880 oprnd1 = gimple_assign_rhs2 (last_stmt); 881 if (!types_compatible_p (TREE_TYPE (oprnd0), type) 882 || !types_compatible_p (TREE_TYPE (oprnd1), type)) 883 return NULL; 884 885 /* Check argument 0. */ 886 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0, 887 &promotion) 888 || !promotion) 889 return NULL; 890 /* Check argument 1. */ 891 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1, 892 &def_stmt1, &promotion); 893 894 if (op1_ok && promotion) 895 { 896 oprnd0 = gimple_assign_rhs1 (def_stmt0); 897 oprnd1 = gimple_assign_rhs1 (def_stmt1); 898 } 899 else 900 { 901 if (TREE_CODE (oprnd1) == INTEGER_CST 902 && TREE_CODE (half_type0) == INTEGER_TYPE 903 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1, 904 &oprnd0, &new_stmt, type, 905 &half_type0, def_stmt0)) 906 { 907 half_type1 = half_type0; 908 oprnd1 = fold_convert (half_type1, oprnd1); 909 } 910 else 911 return NULL; 912 } 913 914 /* If the two arguments have different sizes, convert the one with 915 the smaller type into the larger type. */ 916 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1)) 917 { 918 /* If we already used up the single-stmt slot give up. */ 919 if (new_stmt) 920 return NULL; 921 922 tree* oprnd = NULL; 923 gimple def_stmt = NULL; 924 925 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1)) 926 { 927 def_stmt = def_stmt0; 928 half_type0 = half_type1; 929 oprnd = &oprnd0; 930 } 931 else 932 { 933 def_stmt = def_stmt1; 934 half_type1 = half_type0; 935 oprnd = &oprnd1; 936 } 937 938 tree old_oprnd = gimple_assign_rhs1 (def_stmt); 939 tree new_oprnd = make_ssa_name (half_type0); 940 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd); 941 *oprnd = new_oprnd; 942 } 943 944 /* Handle unsigned case. Look for 945 S6 u_prod_T = (unsigned TYPE) prod_T; 946 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */ 947 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0)) 948 { 949 gimple use_stmt; 950 tree use_lhs; 951 tree use_type; 952 953 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1)) 954 return NULL; 955 956 use_stmt = vect_single_imm_use (last_stmt); 957 if (!use_stmt || !is_gimple_assign (use_stmt) 958 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))) 959 return NULL; 960 961 use_lhs = gimple_assign_lhs (use_stmt); 962 use_type = TREE_TYPE (use_lhs); 963 if (!INTEGRAL_TYPE_P (use_type) 964 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type)) 965 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type))) 966 return NULL; 967 968 type = use_type; 969 last_stmt = use_stmt; 970 } 971 972 if (!types_compatible_p (half_type0, half_type1)) 973 return NULL; 974 975 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT 976 to get an intermediate result of type ITYPE. In this case we need 977 to build a statement to convert this intermediate result to type TYPE. */ 978 tree itype = type; 979 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2) 980 itype = build_nonstandard_integer_type 981 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2, 982 TYPE_UNSIGNED (type)); 983 984 /* Pattern detected. */ 985 if (dump_enabled_p ()) 986 dump_printf_loc (MSG_NOTE, vect_location, 987 "vect_recog_widen_mult_pattern: detected:\n"); 988 989 /* Check target support */ 990 vectype = get_vectype_for_scalar_type (half_type0); 991 vecitype = get_vectype_for_scalar_type (itype); 992 if (!vectype 993 || !vecitype 994 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, 995 vecitype, vectype, 996 &dummy_code, &dummy_code, 997 &dummy_int, &dummy_vec)) 998 return NULL; 999 1000 *type_in = vectype; 1001 *type_out = get_vectype_for_scalar_type (type); 1002 1003 /* Pattern supported. Create a stmt to be used to replace the pattern: */ 1004 var = vect_recog_temp_ssa_var (itype, NULL); 1005 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1); 1006 1007 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 1008 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1009 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); 1010 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; 1011 1012 /* If the original two operands have different sizes, we may need to convert 1013 the smaller one into the larget type. If this is the case, at this point 1014 the new stmt is already built. */ 1015 if (new_stmt) 1016 { 1017 append_pattern_def_seq (stmt_vinfo, new_stmt); 1018 stmt_vec_info new_stmt_info 1019 = new_stmt_vec_info (new_stmt, loop_vinfo, bb_vinfo); 1020 set_vinfo_for_stmt (new_stmt, new_stmt_info); 1021 STMT_VINFO_VECTYPE (new_stmt_info) = vectype; 1022 } 1023 1024 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert 1025 the result of the widen-mult operation into type TYPE. */ 1026 if (itype != type) 1027 { 1028 append_pattern_def_seq (stmt_vinfo, pattern_stmt); 1029 stmt_vec_info pattern_stmt_info 1030 = new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo); 1031 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); 1032 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype; 1033 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL), 1034 NOP_EXPR, 1035 gimple_assign_lhs (pattern_stmt)); 1036 } 1037 1038 if (dump_enabled_p ()) 1039 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0); 1040 1041 stmts->safe_push (last_stmt); 1042 return pattern_stmt; 1043} 1044 1045 1046/* Function vect_recog_pow_pattern 1047 1048 Try to find the following pattern: 1049 1050 x = POW (y, N); 1051 1052 with POW being one of pow, powf, powi, powif and N being 1053 either 2 or 0.5. 1054 1055 Input: 1056 1057 * LAST_STMT: A stmt from which the pattern search begins. 1058 1059 Output: 1060 1061 * TYPE_IN: The type of the input arguments to the pattern. 1062 1063 * TYPE_OUT: The type of the output of this pattern. 1064 1065 * Return value: A new stmt that will be used to replace the sequence of 1066 stmts that constitute the pattern. In this case it will be: 1067 x = x * x 1068 or 1069 x = sqrt (x) 1070*/ 1071 1072static gimple 1073vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in, 1074 tree *type_out) 1075{ 1076 gimple last_stmt = (*stmts)[0]; 1077 tree fn, base, exp = NULL; 1078 gimple stmt; 1079 tree var; 1080 1081 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL) 1082 return NULL; 1083 1084 fn = gimple_call_fndecl (last_stmt); 1085 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL) 1086 return NULL; 1087 1088 switch (DECL_FUNCTION_CODE (fn)) 1089 { 1090 case BUILT_IN_POWIF: 1091 case BUILT_IN_POWI: 1092 case BUILT_IN_POWF: 1093 case BUILT_IN_POW: 1094 base = gimple_call_arg (last_stmt, 0); 1095 exp = gimple_call_arg (last_stmt, 1); 1096 if (TREE_CODE (exp) != REAL_CST 1097 && TREE_CODE (exp) != INTEGER_CST) 1098 return NULL; 1099 break; 1100 1101 default: 1102 return NULL; 1103 } 1104 1105 /* We now have a pow or powi builtin function call with a constant 1106 exponent. */ 1107 1108 *type_out = NULL_TREE; 1109 1110 /* Catch squaring. */ 1111 if ((tree_fits_shwi_p (exp) 1112 && tree_to_shwi (exp) == 2) 1113 || (TREE_CODE (exp) == REAL_CST 1114 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2))) 1115 { 1116 *type_in = TREE_TYPE (base); 1117 1118 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL); 1119 stmt = gimple_build_assign (var, MULT_EXPR, base, base); 1120 return stmt; 1121 } 1122 1123 /* Catch square root. */ 1124 if (TREE_CODE (exp) == REAL_CST 1125 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf)) 1126 { 1127 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT); 1128 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base)); 1129 if (*type_in) 1130 { 1131 gcall *stmt = gimple_build_call (newfn, 1, base); 1132 if (vectorizable_function (stmt, *type_in, *type_in) 1133 != NULL_TREE) 1134 { 1135 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt); 1136 gimple_call_set_lhs (stmt, var); 1137 return stmt; 1138 } 1139 } 1140 } 1141 1142 return NULL; 1143} 1144 1145 1146/* Function vect_recog_widen_sum_pattern 1147 1148 Try to find the following pattern: 1149 1150 type x_t; 1151 TYPE x_T, sum = init; 1152 loop: 1153 sum_0 = phi <init, sum_1> 1154 S1 x_t = *p; 1155 S2 x_T = (TYPE) x_t; 1156 S3 sum_1 = x_T + sum_0; 1157 1158 where type 'TYPE' is at least double the size of type 'type', i.e - we're 1159 summing elements of type 'type' into an accumulator of type 'TYPE'. This is 1160 a special case of a reduction computation. 1161 1162 Input: 1163 1164 * LAST_STMT: A stmt from which the pattern search begins. In the example, 1165 when this function is called with S3, the pattern {S2,S3} will be detected. 1166 1167 Output: 1168 1169 * TYPE_IN: The type of the input arguments to the pattern. 1170 1171 * TYPE_OUT: The type of the output of this pattern. 1172 1173 * Return value: A new stmt that will be used to replace the sequence of 1174 stmts that constitute the pattern. In this case it will be: 1175 WIDEN_SUM <x_t, sum_0> 1176 1177 Note: The widening-sum idiom is a widening reduction pattern that is 1178 vectorized without preserving all the intermediate results. It 1179 produces only N/2 (widened) results (by summing up pairs of 1180 intermediate results) rather than all N results. Therefore, we 1181 cannot allow this pattern when we want to get all the results and in 1182 the correct order (as is the case when this computation is in an 1183 inner-loop nested in an outer-loop that us being vectorized). */ 1184 1185static gimple 1186vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in, 1187 tree *type_out) 1188{ 1189 gimple stmt, last_stmt = (*stmts)[0]; 1190 tree oprnd0, oprnd1; 1191 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 1192 tree type, half_type; 1193 gimple pattern_stmt; 1194 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1195 struct loop *loop; 1196 tree var; 1197 bool promotion; 1198 1199 if (!loop_info) 1200 return NULL; 1201 1202 loop = LOOP_VINFO_LOOP (loop_info); 1203 1204 if (!is_gimple_assign (last_stmt)) 1205 return NULL; 1206 1207 type = gimple_expr_type (last_stmt); 1208 1209 /* Look for the following pattern 1210 DX = (TYPE) X; 1211 sum_1 = DX + sum_0; 1212 In which DX is at least double the size of X, and sum_1 has been 1213 recognized as a reduction variable. 1214 */ 1215 1216 /* Starting from LAST_STMT, follow the defs of its uses in search 1217 of the above pattern. */ 1218 1219 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) 1220 return NULL; 1221 1222 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) 1223 return NULL; 1224 1225 oprnd0 = gimple_assign_rhs1 (last_stmt); 1226 oprnd1 = gimple_assign_rhs2 (last_stmt); 1227 if (!types_compatible_p (TREE_TYPE (oprnd0), type) 1228 || !types_compatible_p (TREE_TYPE (oprnd1), type)) 1229 return NULL; 1230 1231 /* So far so good. Since last_stmt was detected as a (summation) reduction, 1232 we know that oprnd1 is the reduction variable (defined by a loop-header 1233 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. 1234 Left to check that oprnd0 is defined by a cast from type 'type' to type 1235 'TYPE'. */ 1236 1237 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt, 1238 &promotion) 1239 || !promotion) 1240 return NULL; 1241 1242 oprnd0 = gimple_assign_rhs1 (stmt); 1243 *type_in = half_type; 1244 *type_out = type; 1245 1246 /* Pattern detected. Create a stmt to be used to replace the pattern: */ 1247 var = vect_recog_temp_ssa_var (type, NULL); 1248 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1); 1249 1250 if (dump_enabled_p ()) 1251 { 1252 dump_printf_loc (MSG_NOTE, vect_location, 1253 "vect_recog_widen_sum_pattern: detected: "); 1254 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0); 1255 dump_printf (MSG_NOTE, "\n"); 1256 } 1257 1258 /* We don't allow changing the order of the computation in the inner-loop 1259 when doing outer-loop vectorization. */ 1260 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); 1261 1262 return pattern_stmt; 1263} 1264 1265 1266/* Return TRUE if the operation in STMT can be performed on a smaller type. 1267 1268 Input: 1269 STMT - a statement to check. 1270 DEF - we support operations with two operands, one of which is constant. 1271 The other operand can be defined by a demotion operation, or by a 1272 previous statement in a sequence of over-promoted operations. In the 1273 later case DEF is used to replace that operand. (It is defined by a 1274 pattern statement we created for the previous statement in the 1275 sequence). 1276 1277 Input/output: 1278 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not 1279 NULL, it's the type of DEF. 1280 STMTS - additional pattern statements. If a pattern statement (type 1281 conversion) is created in this function, its original statement is 1282 added to STMTS. 1283 1284 Output: 1285 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new 1286 operands to use in the new pattern statement for STMT (will be created 1287 in vect_recog_over_widening_pattern ()). 1288 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern 1289 statements for STMT: the first one is a type promotion and the second 1290 one is the operation itself. We return the type promotion statement 1291 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of 1292 the second pattern statement. */ 1293 1294static bool 1295vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type, 1296 tree *op0, tree *op1, gimple *new_def_stmt, 1297 vec<gimple> *stmts) 1298{ 1299 enum tree_code code; 1300 tree const_oprnd, oprnd; 1301 tree interm_type = NULL_TREE, half_type, new_oprnd, type; 1302 gimple def_stmt, new_stmt; 1303 bool first = false; 1304 bool promotion; 1305 1306 *op0 = NULL_TREE; 1307 *op1 = NULL_TREE; 1308 *new_def_stmt = NULL; 1309 1310 if (!is_gimple_assign (stmt)) 1311 return false; 1312 1313 code = gimple_assign_rhs_code (stmt); 1314 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR 1315 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR) 1316 return false; 1317 1318 oprnd = gimple_assign_rhs1 (stmt); 1319 const_oprnd = gimple_assign_rhs2 (stmt); 1320 type = gimple_expr_type (stmt); 1321 1322 if (TREE_CODE (oprnd) != SSA_NAME 1323 || TREE_CODE (const_oprnd) != INTEGER_CST) 1324 return false; 1325 1326 /* If oprnd has other uses besides that in stmt we cannot mark it 1327 as being part of a pattern only. */ 1328 if (!has_single_use (oprnd)) 1329 return false; 1330 1331 /* If we are in the middle of a sequence, we use DEF from a previous 1332 statement. Otherwise, OPRND has to be a result of type promotion. */ 1333 if (*new_type) 1334 { 1335 half_type = *new_type; 1336 oprnd = def; 1337 } 1338 else 1339 { 1340 first = true; 1341 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt, 1342 &promotion) 1343 || !promotion 1344 || !vect_same_loop_or_bb_p (stmt, def_stmt)) 1345 return false; 1346 } 1347 1348 /* Can we perform the operation on a smaller type? */ 1349 switch (code) 1350 { 1351 case BIT_IOR_EXPR: 1352 case BIT_XOR_EXPR: 1353 case BIT_AND_EXPR: 1354 if (!int_fits_type_p (const_oprnd, half_type)) 1355 { 1356 /* HALF_TYPE is not enough. Try a bigger type if possible. */ 1357 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) 1358 return false; 1359 1360 interm_type = build_nonstandard_integer_type ( 1361 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); 1362 if (!int_fits_type_p (const_oprnd, interm_type)) 1363 return false; 1364 } 1365 1366 break; 1367 1368 case LSHIFT_EXPR: 1369 /* Try intermediate type - HALF_TYPE is not enough for sure. */ 1370 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) 1371 return false; 1372 1373 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size. 1374 (e.g., if the original value was char, the shift amount is at most 8 1375 if we want to use short). */ 1376 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1) 1377 return false; 1378 1379 interm_type = build_nonstandard_integer_type ( 1380 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); 1381 1382 if (!vect_supportable_shift (code, interm_type)) 1383 return false; 1384 1385 break; 1386 1387 case RSHIFT_EXPR: 1388 if (vect_supportable_shift (code, half_type)) 1389 break; 1390 1391 /* Try intermediate type - HALF_TYPE is not supported. */ 1392 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) 1393 return false; 1394 1395 interm_type = build_nonstandard_integer_type ( 1396 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); 1397 1398 if (!vect_supportable_shift (code, interm_type)) 1399 return false; 1400 1401 break; 1402 1403 default: 1404 gcc_unreachable (); 1405 } 1406 1407 /* There are four possible cases: 1408 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's 1409 the first statement in the sequence) 1410 a. The original, HALF_TYPE, is not enough - we replace the promotion 1411 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE. 1412 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original 1413 promotion. 1414 2. OPRND is defined by a pattern statement we created. 1415 a. Its type is not sufficient for the operation, we create a new stmt: 1416 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store 1417 this statement in NEW_DEF_STMT, and it is later put in 1418 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT. 1419 b. OPRND is good to use in the new statement. */ 1420 if (first) 1421 { 1422 if (interm_type) 1423 { 1424 /* Replace the original type conversion HALF_TYPE->TYPE with 1425 HALF_TYPE->INTERM_TYPE. */ 1426 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) 1427 { 1428 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); 1429 /* Check if the already created pattern stmt is what we need. */ 1430 if (!is_gimple_assign (new_stmt) 1431 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt)) 1432 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type) 1433 return false; 1434 1435 stmts->safe_push (def_stmt); 1436 oprnd = gimple_assign_lhs (new_stmt); 1437 } 1438 else 1439 { 1440 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */ 1441 oprnd = gimple_assign_rhs1 (def_stmt); 1442 new_oprnd = make_ssa_name (interm_type); 1443 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd); 1444 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; 1445 stmts->safe_push (def_stmt); 1446 oprnd = new_oprnd; 1447 } 1448 } 1449 else 1450 { 1451 /* Retrieve the operand before the type promotion. */ 1452 oprnd = gimple_assign_rhs1 (def_stmt); 1453 } 1454 } 1455 else 1456 { 1457 if (interm_type) 1458 { 1459 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */ 1460 new_oprnd = make_ssa_name (interm_type); 1461 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd); 1462 oprnd = new_oprnd; 1463 *new_def_stmt = new_stmt; 1464 } 1465 1466 /* Otherwise, OPRND is already set. */ 1467 } 1468 1469 if (interm_type) 1470 *new_type = interm_type; 1471 else 1472 *new_type = half_type; 1473 1474 *op0 = oprnd; 1475 *op1 = fold_convert (*new_type, const_oprnd); 1476 1477 return true; 1478} 1479 1480 1481/* Try to find a statement or a sequence of statements that can be performed 1482 on a smaller type: 1483 1484 type x_t; 1485 TYPE x_T, res0_T, res1_T; 1486 loop: 1487 S1 x_t = *p; 1488 S2 x_T = (TYPE) x_t; 1489 S3 res0_T = op (x_T, C0); 1490 S4 res1_T = op (res0_T, C1); 1491 S5 ... = () res1_T; - type demotion 1492 1493 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are 1494 constants. 1495 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either 1496 be 'type' or some intermediate type. For now, we expect S5 to be a type 1497 demotion operation. We also check that S3 and S4 have only one use. */ 1498 1499static gimple 1500vect_recog_over_widening_pattern (vec<gimple> *stmts, 1501 tree *type_in, tree *type_out) 1502{ 1503 gimple stmt = stmts->pop (); 1504 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL; 1505 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type; 1506 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd; 1507 bool first; 1508 tree type = NULL; 1509 1510 first = true; 1511 while (1) 1512 { 1513 if (!vinfo_for_stmt (stmt) 1514 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt))) 1515 return NULL; 1516 1517 new_def_stmt = NULL; 1518 if (!vect_operation_fits_smaller_type (stmt, var, &new_type, 1519 &op0, &op1, &new_def_stmt, 1520 stmts)) 1521 { 1522 if (first) 1523 return NULL; 1524 else 1525 break; 1526 } 1527 1528 /* STMT can be performed on a smaller type. Check its uses. */ 1529 use_stmt = vect_single_imm_use (stmt); 1530 if (!use_stmt || !is_gimple_assign (use_stmt)) 1531 return NULL; 1532 1533 /* Create pattern statement for STMT. */ 1534 vectype = get_vectype_for_scalar_type (new_type); 1535 if (!vectype) 1536 return NULL; 1537 1538 /* We want to collect all the statements for which we create pattern 1539 statetments, except for the case when the last statement in the 1540 sequence doesn't have a corresponding pattern statement. In such 1541 case we associate the last pattern statement with the last statement 1542 in the sequence. Therefore, we only add the original statement to 1543 the list if we know that it is not the last. */ 1544 if (prev_stmt) 1545 stmts->safe_push (prev_stmt); 1546 1547 var = vect_recog_temp_ssa_var (new_type, NULL); 1548 pattern_stmt 1549 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1); 1550 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt; 1551 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt); 1552 1553 if (dump_enabled_p ()) 1554 { 1555 dump_printf_loc (MSG_NOTE, vect_location, 1556 "created pattern stmt: "); 1557 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0); 1558 dump_printf (MSG_NOTE, "\n"); 1559 } 1560 1561 type = gimple_expr_type (stmt); 1562 prev_stmt = stmt; 1563 stmt = use_stmt; 1564 1565 first = false; 1566 } 1567 1568 /* We got a sequence. We expect it to end with a type demotion operation. 1569 Otherwise, we quit (for now). There are three possible cases: the 1570 conversion is to NEW_TYPE (we don't do anything), the conversion is to 1571 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and 1572 NEW_TYPE differs (we create a new conversion statement). */ 1573 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))) 1574 { 1575 use_lhs = gimple_assign_lhs (use_stmt); 1576 use_type = TREE_TYPE (use_lhs); 1577 /* Support only type demotion or signedess change. */ 1578 if (!INTEGRAL_TYPE_P (use_type) 1579 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type)) 1580 return NULL; 1581 1582 /* Check that NEW_TYPE is not bigger than the conversion result. */ 1583 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)) 1584 return NULL; 1585 1586 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type) 1587 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type)) 1588 { 1589 /* Create NEW_TYPE->USE_TYPE conversion. */ 1590 new_oprnd = make_ssa_name (use_type); 1591 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var); 1592 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt; 1593 1594 *type_in = get_vectype_for_scalar_type (new_type); 1595 *type_out = get_vectype_for_scalar_type (use_type); 1596 1597 /* We created a pattern statement for the last statement in the 1598 sequence, so we don't need to associate it with the pattern 1599 statement created for PREV_STMT. Therefore, we add PREV_STMT 1600 to the list in order to mark it later in vect_pattern_recog_1. */ 1601 if (prev_stmt) 1602 stmts->safe_push (prev_stmt); 1603 } 1604 else 1605 { 1606 if (prev_stmt) 1607 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt)) 1608 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt)); 1609 1610 *type_in = vectype; 1611 *type_out = NULL_TREE; 1612 } 1613 1614 stmts->safe_push (use_stmt); 1615 } 1616 else 1617 /* TODO: support general case, create a conversion to the correct type. */ 1618 return NULL; 1619 1620 /* Pattern detected. */ 1621 if (dump_enabled_p ()) 1622 { 1623 dump_printf_loc (MSG_NOTE, vect_location, 1624 "vect_recog_over_widening_pattern: detected: "); 1625 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0); 1626 dump_printf (MSG_NOTE, "\n"); 1627 } 1628 1629 return pattern_stmt; 1630} 1631 1632/* Detect widening shift pattern: 1633 1634 type a_t; 1635 TYPE a_T, res_T; 1636 1637 S1 a_t = ; 1638 S2 a_T = (TYPE) a_t; 1639 S3 res_T = a_T << CONST; 1640 1641 where type 'TYPE' is at least double the size of type 'type'. 1642 1643 Also detect cases where the shift result is immediately converted 1644 to another type 'result_type' that is no larger in size than 'TYPE'. 1645 In those cases we perform a widen-shift that directly results in 1646 'result_type', to avoid a possible over-widening situation: 1647 1648 type a_t; 1649 TYPE a_T, res_T; 1650 result_type res_result; 1651 1652 S1 a_t = ; 1653 S2 a_T = (TYPE) a_t; 1654 S3 res_T = a_T << CONST; 1655 S4 res_result = (result_type) res_T; 1656 '--> res_result' = a_t w<< CONST; 1657 1658 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we 1659 create an additional pattern stmt for S2 to create a variable of an 1660 intermediate type, and perform widen-shift on the intermediate type: 1661 1662 type a_t; 1663 interm_type a_it; 1664 TYPE a_T, res_T, res_T'; 1665 1666 S1 a_t = ; 1667 S2 a_T = (TYPE) a_t; 1668 '--> a_it = (interm_type) a_t; 1669 S3 res_T = a_T << CONST; 1670 '--> res_T' = a_it <<* CONST; 1671 1672 Input/Output: 1673 1674 * STMTS: Contains a stmt from which the pattern search begins. 1675 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4 1676 in STMTS. When an intermediate type is used and a pattern statement is 1677 created for S2, we also put S2 here (before S3). 1678 1679 Output: 1680 1681 * TYPE_IN: The type of the input arguments to the pattern. 1682 1683 * TYPE_OUT: The type of the output of this pattern. 1684 1685 * Return value: A new stmt that will be used to replace the sequence of 1686 stmts that constitute the pattern. In this case it will be: 1687 WIDEN_LSHIFT_EXPR <a_t, CONST>. */ 1688 1689static gimple 1690vect_recog_widen_shift_pattern (vec<gimple> *stmts, 1691 tree *type_in, tree *type_out) 1692{ 1693 gimple last_stmt = stmts->pop (); 1694 gimple def_stmt0; 1695 tree oprnd0, oprnd1; 1696 tree type, half_type0; 1697 gimple pattern_stmt; 1698 tree vectype, vectype_out = NULL_TREE; 1699 tree var; 1700 enum tree_code dummy_code; 1701 int dummy_int; 1702 vec<tree> dummy_vec; 1703 gimple use_stmt; 1704 bool promotion; 1705 1706 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt)) 1707 return NULL; 1708 1709 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt))) 1710 return NULL; 1711 1712 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR) 1713 return NULL; 1714 1715 oprnd0 = gimple_assign_rhs1 (last_stmt); 1716 oprnd1 = gimple_assign_rhs2 (last_stmt); 1717 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST) 1718 return NULL; 1719 1720 /* Check operand 0: it has to be defined by a type promotion. */ 1721 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0, 1722 &promotion) 1723 || !promotion) 1724 return NULL; 1725 1726 /* Check operand 1: has to be positive. We check that it fits the type 1727 in vect_handle_widen_op_by_const (). */ 1728 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0) 1729 return NULL; 1730 1731 oprnd0 = gimple_assign_rhs1 (def_stmt0); 1732 type = gimple_expr_type (last_stmt); 1733 1734 /* Check for subsequent conversion to another type. */ 1735 use_stmt = vect_single_imm_use (last_stmt); 1736 if (use_stmt && is_gimple_assign (use_stmt) 1737 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)) 1738 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) 1739 { 1740 tree use_lhs = gimple_assign_lhs (use_stmt); 1741 tree use_type = TREE_TYPE (use_lhs); 1742 1743 if (INTEGRAL_TYPE_P (use_type) 1744 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type)) 1745 { 1746 last_stmt = use_stmt; 1747 type = use_type; 1748 } 1749 } 1750 1751 /* Check if this a widening operation. */ 1752 gimple wstmt = NULL; 1753 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1, 1754 &oprnd0, &wstmt, 1755 type, &half_type0, def_stmt0)) 1756 return NULL; 1757 1758 /* Pattern detected. */ 1759 if (dump_enabled_p ()) 1760 dump_printf_loc (MSG_NOTE, vect_location, 1761 "vect_recog_widen_shift_pattern: detected:\n"); 1762 1763 /* Check target support. */ 1764 vectype = get_vectype_for_scalar_type (half_type0); 1765 vectype_out = get_vectype_for_scalar_type (type); 1766 1767 if (!vectype 1768 || !vectype_out 1769 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt, 1770 vectype_out, vectype, 1771 &dummy_code, &dummy_code, 1772 &dummy_int, &dummy_vec)) 1773 return NULL; 1774 1775 *type_in = vectype; 1776 *type_out = vectype_out; 1777 1778 /* Pattern supported. Create a stmt to be used to replace the pattern. */ 1779 var = vect_recog_temp_ssa_var (type, NULL); 1780 pattern_stmt = 1781 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1); 1782 if (wstmt) 1783 { 1784 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 1785 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1786 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); 1787 new_pattern_def_seq (stmt_vinfo, wstmt); 1788 stmt_vec_info new_stmt_info 1789 = new_stmt_vec_info (wstmt, loop_vinfo, bb_vinfo); 1790 set_vinfo_for_stmt (wstmt, new_stmt_info); 1791 STMT_VINFO_VECTYPE (new_stmt_info) = vectype; 1792 } 1793 1794 if (dump_enabled_p ()) 1795 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0); 1796 1797 stmts->safe_push (last_stmt); 1798 return pattern_stmt; 1799} 1800 1801/* Detect a rotate pattern wouldn't be otherwise vectorized: 1802 1803 type a_t, b_t, c_t; 1804 1805 S0 a_t = b_t r<< c_t; 1806 1807 Input/Output: 1808 1809 * STMTS: Contains a stmt from which the pattern search begins, 1810 i.e. the shift/rotate stmt. The original stmt (S0) is replaced 1811 with a sequence: 1812 1813 S1 d_t = -c_t; 1814 S2 e_t = d_t & (B - 1); 1815 S3 f_t = b_t << c_t; 1816 S4 g_t = b_t >> e_t; 1817 S0 a_t = f_t | g_t; 1818 1819 where B is element bitsize of type. 1820 1821 Output: 1822 1823 * TYPE_IN: The type of the input arguments to the pattern. 1824 1825 * TYPE_OUT: The type of the output of this pattern. 1826 1827 * Return value: A new stmt that will be used to replace the rotate 1828 S0 stmt. */ 1829 1830static gimple 1831vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out) 1832{ 1833 gimple last_stmt = stmts->pop (); 1834 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2; 1835 gimple pattern_stmt, def_stmt; 1836 enum tree_code rhs_code; 1837 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 1838 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1839 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); 1840 enum vect_def_type dt; 1841 optab optab1, optab2; 1842 edge ext_def = NULL; 1843 1844 if (!is_gimple_assign (last_stmt)) 1845 return NULL; 1846 1847 rhs_code = gimple_assign_rhs_code (last_stmt); 1848 switch (rhs_code) 1849 { 1850 case LROTATE_EXPR: 1851 case RROTATE_EXPR: 1852 break; 1853 default: 1854 return NULL; 1855 } 1856 1857 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 1858 return NULL; 1859 1860 lhs = gimple_assign_lhs (last_stmt); 1861 oprnd0 = gimple_assign_rhs1 (last_stmt); 1862 type = TREE_TYPE (oprnd0); 1863 oprnd1 = gimple_assign_rhs2 (last_stmt); 1864 if (TREE_CODE (oprnd0) != SSA_NAME 1865 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type) 1866 || !INTEGRAL_TYPE_P (type) 1867 || !TYPE_UNSIGNED (type)) 1868 return NULL; 1869 1870 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt, 1871 &def, &dt)) 1872 return NULL; 1873 1874 if (dt != vect_internal_def 1875 && dt != vect_constant_def 1876 && dt != vect_external_def) 1877 return NULL; 1878 1879 vectype = get_vectype_for_scalar_type (type); 1880 if (vectype == NULL_TREE) 1881 return NULL; 1882 1883 /* If vector/vector or vector/scalar rotate is supported by the target, 1884 don't do anything here. */ 1885 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector); 1886 if (optab1 1887 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing) 1888 return NULL; 1889 1890 if (bb_vinfo != NULL || dt != vect_internal_def) 1891 { 1892 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar); 1893 if (optab2 1894 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing) 1895 return NULL; 1896 } 1897 1898 /* If vector/vector or vector/scalar shifts aren't supported by the target, 1899 don't do anything here either. */ 1900 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector); 1901 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector); 1902 if (!optab1 1903 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing 1904 || !optab2 1905 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing) 1906 { 1907 if (bb_vinfo == NULL && dt == vect_internal_def) 1908 return NULL; 1909 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar); 1910 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar); 1911 if (!optab1 1912 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing 1913 || !optab2 1914 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing) 1915 return NULL; 1916 } 1917 1918 *type_in = vectype; 1919 *type_out = vectype; 1920 if (*type_in == NULL_TREE) 1921 return NULL; 1922 1923 if (dt == vect_external_def 1924 && TREE_CODE (oprnd1) == SSA_NAME 1925 && loop_vinfo) 1926 { 1927 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1928 ext_def = loop_preheader_edge (loop); 1929 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1)) 1930 { 1931 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1)); 1932 if (bb == NULL 1933 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb)) 1934 ext_def = NULL; 1935 } 1936 } 1937 1938 def = NULL_TREE; 1939 if (TREE_CODE (oprnd1) == INTEGER_CST 1940 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type)) 1941 def = oprnd1; 1942 else if (def_stmt && gimple_assign_cast_p (def_stmt)) 1943 { 1944 tree rhs1 = gimple_assign_rhs1 (def_stmt); 1945 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type) 1946 && TYPE_PRECISION (TREE_TYPE (rhs1)) 1947 == TYPE_PRECISION (type)) 1948 def = rhs1; 1949 } 1950 1951 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; 1952 if (def == NULL_TREE) 1953 { 1954 def = vect_recog_temp_ssa_var (type, NULL); 1955 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1); 1956 if (ext_def) 1957 { 1958 basic_block new_bb 1959 = gsi_insert_on_edge_immediate (ext_def, def_stmt); 1960 gcc_assert (!new_bb); 1961 } 1962 else 1963 append_pattern_def_seq (stmt_vinfo, def_stmt); 1964 } 1965 stype = TREE_TYPE (def); 1966 1967 if (TREE_CODE (def) == INTEGER_CST) 1968 { 1969 if (!tree_fits_uhwi_p (def) 1970 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type)) 1971 || integer_zerop (def)) 1972 return NULL; 1973 def2 = build_int_cst (stype, 1974 GET_MODE_PRECISION (TYPE_MODE (type)) 1975 - tree_to_uhwi (def)); 1976 } 1977 else 1978 { 1979 tree vecstype = get_vectype_for_scalar_type (stype); 1980 stmt_vec_info def_stmt_vinfo; 1981 1982 if (vecstype == NULL_TREE) 1983 return NULL; 1984 def2 = vect_recog_temp_ssa_var (stype, NULL); 1985 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def); 1986 if (ext_def) 1987 { 1988 basic_block new_bb 1989 = gsi_insert_on_edge_immediate (ext_def, def_stmt); 1990 gcc_assert (!new_bb); 1991 } 1992 else 1993 { 1994 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); 1995 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); 1996 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype; 1997 append_pattern_def_seq (stmt_vinfo, def_stmt); 1998 } 1999 2000 def2 = vect_recog_temp_ssa_var (stype, NULL); 2001 tree mask 2002 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1); 2003 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR, 2004 gimple_assign_lhs (def_stmt), mask); 2005 if (ext_def) 2006 { 2007 basic_block new_bb 2008 = gsi_insert_on_edge_immediate (ext_def, def_stmt); 2009 gcc_assert (!new_bb); 2010 } 2011 else 2012 { 2013 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); 2014 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); 2015 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype; 2016 append_pattern_def_seq (stmt_vinfo, def_stmt); 2017 } 2018 } 2019 2020 var1 = vect_recog_temp_ssa_var (type, NULL); 2021 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR 2022 ? LSHIFT_EXPR : RSHIFT_EXPR, 2023 oprnd0, def); 2024 append_pattern_def_seq (stmt_vinfo, def_stmt); 2025 2026 var2 = vect_recog_temp_ssa_var (type, NULL); 2027 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR 2028 ? RSHIFT_EXPR : LSHIFT_EXPR, 2029 oprnd0, def2); 2030 append_pattern_def_seq (stmt_vinfo, def_stmt); 2031 2032 /* Pattern detected. */ 2033 if (dump_enabled_p ()) 2034 dump_printf_loc (MSG_NOTE, vect_location, 2035 "vect_recog_rotate_pattern: detected:\n"); 2036 2037 /* Pattern supported. Create a stmt to be used to replace the pattern. */ 2038 var = vect_recog_temp_ssa_var (type, NULL); 2039 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2); 2040 2041 if (dump_enabled_p ()) 2042 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0); 2043 2044 stmts->safe_push (last_stmt); 2045 return pattern_stmt; 2046} 2047 2048/* Detect a vector by vector shift pattern that wouldn't be otherwise 2049 vectorized: 2050 2051 type a_t; 2052 TYPE b_T, res_T; 2053 2054 S1 a_t = ; 2055 S2 b_T = ; 2056 S3 res_T = b_T op a_t; 2057 2058 where type 'TYPE' is a type with different size than 'type', 2059 and op is <<, >> or rotate. 2060 2061 Also detect cases: 2062 2063 type a_t; 2064 TYPE b_T, c_T, res_T; 2065 2066 S0 c_T = ; 2067 S1 a_t = (type) c_T; 2068 S2 b_T = ; 2069 S3 res_T = b_T op a_t; 2070 2071 Input/Output: 2072 2073 * STMTS: Contains a stmt from which the pattern search begins, 2074 i.e. the shift/rotate stmt. The original stmt (S3) is replaced 2075 with a shift/rotate which has same type on both operands, in the 2076 second case just b_T op c_T, in the first case with added cast 2077 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ. 2078 2079 Output: 2080 2081 * TYPE_IN: The type of the input arguments to the pattern. 2082 2083 * TYPE_OUT: The type of the output of this pattern. 2084 2085 * Return value: A new stmt that will be used to replace the shift/rotate 2086 S3 stmt. */ 2087 2088static gimple 2089vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts, 2090 tree *type_in, tree *type_out) 2091{ 2092 gimple last_stmt = stmts->pop (); 2093 tree oprnd0, oprnd1, lhs, var; 2094 gimple pattern_stmt, def_stmt; 2095 enum tree_code rhs_code; 2096 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 2097 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 2098 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); 2099 enum vect_def_type dt; 2100 tree def; 2101 2102 if (!is_gimple_assign (last_stmt)) 2103 return NULL; 2104 2105 rhs_code = gimple_assign_rhs_code (last_stmt); 2106 switch (rhs_code) 2107 { 2108 case LSHIFT_EXPR: 2109 case RSHIFT_EXPR: 2110 case LROTATE_EXPR: 2111 case RROTATE_EXPR: 2112 break; 2113 default: 2114 return NULL; 2115 } 2116 2117 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 2118 return NULL; 2119 2120 lhs = gimple_assign_lhs (last_stmt); 2121 oprnd0 = gimple_assign_rhs1 (last_stmt); 2122 oprnd1 = gimple_assign_rhs2 (last_stmt); 2123 if (TREE_CODE (oprnd0) != SSA_NAME 2124 || TREE_CODE (oprnd1) != SSA_NAME 2125 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1)) 2126 || TYPE_PRECISION (TREE_TYPE (oprnd1)) 2127 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1))) 2128 || TYPE_PRECISION (TREE_TYPE (lhs)) 2129 != TYPE_PRECISION (TREE_TYPE (oprnd0))) 2130 return NULL; 2131 2132 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt, 2133 &def, &dt)) 2134 return NULL; 2135 2136 if (dt != vect_internal_def) 2137 return NULL; 2138 2139 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0)); 2140 *type_out = *type_in; 2141 if (*type_in == NULL_TREE) 2142 return NULL; 2143 2144 def = NULL_TREE; 2145 if (gimple_assign_cast_p (def_stmt)) 2146 { 2147 tree rhs1 = gimple_assign_rhs1 (def_stmt); 2148 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0)) 2149 && TYPE_PRECISION (TREE_TYPE (rhs1)) 2150 == TYPE_PRECISION (TREE_TYPE (oprnd0))) 2151 def = rhs1; 2152 } 2153 2154 if (def == NULL_TREE) 2155 { 2156 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL); 2157 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1); 2158 new_pattern_def_seq (stmt_vinfo, def_stmt); 2159 } 2160 2161 /* Pattern detected. */ 2162 if (dump_enabled_p ()) 2163 dump_printf_loc (MSG_NOTE, vect_location, 2164 "vect_recog_vector_vector_shift_pattern: detected:\n"); 2165 2166 /* Pattern supported. Create a stmt to be used to replace the pattern. */ 2167 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL); 2168 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def); 2169 2170 if (dump_enabled_p ()) 2171 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0); 2172 2173 stmts->safe_push (last_stmt); 2174 return pattern_stmt; 2175} 2176 2177/* Detect a signed division by a constant that wouldn't be 2178 otherwise vectorized: 2179 2180 type a_t, b_t; 2181 2182 S1 a_t = b_t / N; 2183 2184 where type 'type' is an integral type and N is a constant. 2185 2186 Similarly handle modulo by a constant: 2187 2188 S4 a_t = b_t % N; 2189 2190 Input/Output: 2191 2192 * STMTS: Contains a stmt from which the pattern search begins, 2193 i.e. the division stmt. S1 is replaced by if N is a power 2194 of two constant and type is signed: 2195 S3 y_t = b_t < 0 ? N - 1 : 0; 2196 S2 x_t = b_t + y_t; 2197 S1' a_t = x_t >> log2 (N); 2198 2199 S4 is replaced if N is a power of two constant and 2200 type is signed by (where *_T temporaries have unsigned type): 2201 S9 y_T = b_t < 0 ? -1U : 0U; 2202 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N)); 2203 S7 z_t = (type) z_T; 2204 S6 w_t = b_t + z_t; 2205 S5 x_t = w_t & (N - 1); 2206 S4' a_t = x_t - z_t; 2207 2208 Output: 2209 2210 * TYPE_IN: The type of the input arguments to the pattern. 2211 2212 * TYPE_OUT: The type of the output of this pattern. 2213 2214 * Return value: A new stmt that will be used to replace the division 2215 S1 or modulo S4 stmt. */ 2216 2217static gimple 2218vect_recog_divmod_pattern (vec<gimple> *stmts, 2219 tree *type_in, tree *type_out) 2220{ 2221 gimple last_stmt = stmts->pop (); 2222 tree oprnd0, oprnd1, vectype, itype, cond; 2223 gimple pattern_stmt, def_stmt; 2224 enum tree_code rhs_code; 2225 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 2226 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 2227 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); 2228 optab optab; 2229 tree q; 2230 int dummy_int, prec; 2231 stmt_vec_info def_stmt_vinfo; 2232 2233 if (!is_gimple_assign (last_stmt)) 2234 return NULL; 2235 2236 rhs_code = gimple_assign_rhs_code (last_stmt); 2237 switch (rhs_code) 2238 { 2239 case TRUNC_DIV_EXPR: 2240 case TRUNC_MOD_EXPR: 2241 break; 2242 default: 2243 return NULL; 2244 } 2245 2246 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 2247 return NULL; 2248 2249 oprnd0 = gimple_assign_rhs1 (last_stmt); 2250 oprnd1 = gimple_assign_rhs2 (last_stmt); 2251 itype = TREE_TYPE (oprnd0); 2252 if (TREE_CODE (oprnd0) != SSA_NAME 2253 || TREE_CODE (oprnd1) != INTEGER_CST 2254 || TREE_CODE (itype) != INTEGER_TYPE 2255 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))) 2256 return NULL; 2257 2258 vectype = get_vectype_for_scalar_type (itype); 2259 if (vectype == NULL_TREE) 2260 return NULL; 2261 2262 /* If the target can handle vectorized division or modulo natively, 2263 don't attempt to optimize this. */ 2264 optab = optab_for_tree_code (rhs_code, vectype, optab_default); 2265 if (optab != unknown_optab) 2266 { 2267 machine_mode vec_mode = TYPE_MODE (vectype); 2268 int icode = (int) optab_handler (optab, vec_mode); 2269 if (icode != CODE_FOR_nothing) 2270 return NULL; 2271 } 2272 2273 prec = TYPE_PRECISION (itype); 2274 if (integer_pow2p (oprnd1)) 2275 { 2276 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1) 2277 return NULL; 2278 2279 /* Pattern detected. */ 2280 if (dump_enabled_p ()) 2281 dump_printf_loc (MSG_NOTE, vect_location, 2282 "vect_recog_divmod_pattern: detected:\n"); 2283 2284 cond = build2 (LT_EXPR, boolean_type_node, oprnd0, 2285 build_int_cst (itype, 0)); 2286 if (rhs_code == TRUNC_DIV_EXPR) 2287 { 2288 tree var = vect_recog_temp_ssa_var (itype, NULL); 2289 tree shift; 2290 def_stmt 2291 = gimple_build_assign (var, COND_EXPR, cond, 2292 fold_build2 (MINUS_EXPR, itype, oprnd1, 2293 build_int_cst (itype, 1)), 2294 build_int_cst (itype, 0)); 2295 new_pattern_def_seq (stmt_vinfo, def_stmt); 2296 var = vect_recog_temp_ssa_var (itype, NULL); 2297 def_stmt 2298 = gimple_build_assign (var, PLUS_EXPR, oprnd0, 2299 gimple_assign_lhs (def_stmt)); 2300 append_pattern_def_seq (stmt_vinfo, def_stmt); 2301 2302 shift = build_int_cst (itype, tree_log2 (oprnd1)); 2303 pattern_stmt 2304 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 2305 RSHIFT_EXPR, var, shift); 2306 } 2307 else 2308 { 2309 tree signmask; 2310 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; 2311 if (compare_tree_int (oprnd1, 2) == 0) 2312 { 2313 signmask = vect_recog_temp_ssa_var (itype, NULL); 2314 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond, 2315 build_int_cst (itype, 1), 2316 build_int_cst (itype, 0)); 2317 append_pattern_def_seq (stmt_vinfo, def_stmt); 2318 } 2319 else 2320 { 2321 tree utype 2322 = build_nonstandard_integer_type (prec, 1); 2323 tree vecutype = get_vectype_for_scalar_type (utype); 2324 tree shift 2325 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype)) 2326 - tree_log2 (oprnd1)); 2327 tree var = vect_recog_temp_ssa_var (utype, NULL); 2328 2329 def_stmt = gimple_build_assign (var, COND_EXPR, cond, 2330 build_int_cst (utype, -1), 2331 build_int_cst (utype, 0)); 2332 def_stmt_vinfo 2333 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); 2334 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); 2335 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype; 2336 append_pattern_def_seq (stmt_vinfo, def_stmt); 2337 var = vect_recog_temp_ssa_var (utype, NULL); 2338 def_stmt = gimple_build_assign (var, RSHIFT_EXPR, 2339 gimple_assign_lhs (def_stmt), 2340 shift); 2341 def_stmt_vinfo 2342 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); 2343 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); 2344 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype; 2345 append_pattern_def_seq (stmt_vinfo, def_stmt); 2346 signmask = vect_recog_temp_ssa_var (itype, NULL); 2347 def_stmt 2348 = gimple_build_assign (signmask, NOP_EXPR, var); 2349 append_pattern_def_seq (stmt_vinfo, def_stmt); 2350 } 2351 def_stmt 2352 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 2353 PLUS_EXPR, oprnd0, signmask); 2354 append_pattern_def_seq (stmt_vinfo, def_stmt); 2355 def_stmt 2356 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 2357 BIT_AND_EXPR, gimple_assign_lhs (def_stmt), 2358 fold_build2 (MINUS_EXPR, itype, oprnd1, 2359 build_int_cst (itype, 1))); 2360 append_pattern_def_seq (stmt_vinfo, def_stmt); 2361 2362 pattern_stmt 2363 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 2364 MINUS_EXPR, gimple_assign_lhs (def_stmt), 2365 signmask); 2366 } 2367 2368 if (dump_enabled_p ()) 2369 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 2370 0); 2371 2372 stmts->safe_push (last_stmt); 2373 2374 *type_in = vectype; 2375 *type_out = vectype; 2376 return pattern_stmt; 2377 } 2378 2379 if (prec > HOST_BITS_PER_WIDE_INT 2380 || integer_zerop (oprnd1)) 2381 return NULL; 2382 2383 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype))) 2384 return NULL; 2385 2386 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; 2387 2388 if (TYPE_UNSIGNED (itype)) 2389 { 2390 unsigned HOST_WIDE_INT mh, ml; 2391 int pre_shift, post_shift; 2392 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1) 2393 & GET_MODE_MASK (TYPE_MODE (itype))); 2394 tree t1, t2, t3, t4; 2395 2396 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1))) 2397 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */ 2398 return NULL; 2399 2400 /* Find a suitable multiplier and right shift count 2401 instead of multiplying with D. */ 2402 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int); 2403 2404 /* If the suggested multiplier is more than SIZE bits, we can do better 2405 for even divisors, using an initial right shift. */ 2406 if (mh != 0 && (d & 1) == 0) 2407 { 2408 pre_shift = floor_log2 (d & -d); 2409 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift, 2410 &ml, &post_shift, &dummy_int); 2411 gcc_assert (!mh); 2412 } 2413 else 2414 pre_shift = 0; 2415 2416 if (mh != 0) 2417 { 2418 if (post_shift - 1 >= prec) 2419 return NULL; 2420 2421 /* t1 = oprnd0 h* ml; 2422 t2 = oprnd0 - t1; 2423 t3 = t2 >> 1; 2424 t4 = t1 + t3; 2425 q = t4 >> (post_shift - 1); */ 2426 t1 = vect_recog_temp_ssa_var (itype, NULL); 2427 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0, 2428 build_int_cst (itype, ml)); 2429 append_pattern_def_seq (stmt_vinfo, def_stmt); 2430 2431 t2 = vect_recog_temp_ssa_var (itype, NULL); 2432 def_stmt 2433 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1); 2434 append_pattern_def_seq (stmt_vinfo, def_stmt); 2435 2436 t3 = vect_recog_temp_ssa_var (itype, NULL); 2437 def_stmt 2438 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node); 2439 append_pattern_def_seq (stmt_vinfo, def_stmt); 2440 2441 t4 = vect_recog_temp_ssa_var (itype, NULL); 2442 def_stmt 2443 = gimple_build_assign (t4, PLUS_EXPR, t1, t3); 2444 2445 if (post_shift != 1) 2446 { 2447 append_pattern_def_seq (stmt_vinfo, def_stmt); 2448 2449 q = vect_recog_temp_ssa_var (itype, NULL); 2450 pattern_stmt 2451 = gimple_build_assign (q, RSHIFT_EXPR, t4, 2452 build_int_cst (itype, post_shift - 1)); 2453 } 2454 else 2455 { 2456 q = t4; 2457 pattern_stmt = def_stmt; 2458 } 2459 } 2460 else 2461 { 2462 if (pre_shift >= prec || post_shift >= prec) 2463 return NULL; 2464 2465 /* t1 = oprnd0 >> pre_shift; 2466 t2 = t1 h* ml; 2467 q = t2 >> post_shift; */ 2468 if (pre_shift) 2469 { 2470 t1 = vect_recog_temp_ssa_var (itype, NULL); 2471 def_stmt 2472 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0, 2473 build_int_cst (NULL, pre_shift)); 2474 append_pattern_def_seq (stmt_vinfo, def_stmt); 2475 } 2476 else 2477 t1 = oprnd0; 2478 2479 t2 = vect_recog_temp_ssa_var (itype, NULL); 2480 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1, 2481 build_int_cst (itype, ml)); 2482 2483 if (post_shift) 2484 { 2485 append_pattern_def_seq (stmt_vinfo, def_stmt); 2486 2487 q = vect_recog_temp_ssa_var (itype, NULL); 2488 def_stmt 2489 = gimple_build_assign (q, RSHIFT_EXPR, t2, 2490 build_int_cst (itype, post_shift)); 2491 } 2492 else 2493 q = t2; 2494 2495 pattern_stmt = def_stmt; 2496 } 2497 } 2498 else 2499 { 2500 unsigned HOST_WIDE_INT ml; 2501 int post_shift; 2502 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1); 2503 unsigned HOST_WIDE_INT abs_d; 2504 bool add = false; 2505 tree t1, t2, t3, t4; 2506 2507 /* Give up for -1. */ 2508 if (d == -1) 2509 return NULL; 2510 2511 /* Since d might be INT_MIN, we have to cast to 2512 unsigned HOST_WIDE_INT before negating to avoid 2513 undefined signed overflow. */ 2514 abs_d = (d >= 0 2515 ? (unsigned HOST_WIDE_INT) d 2516 : - (unsigned HOST_WIDE_INT) d); 2517 2518 /* n rem d = n rem -d */ 2519 if (rhs_code == TRUNC_MOD_EXPR && d < 0) 2520 { 2521 d = abs_d; 2522 oprnd1 = build_int_cst (itype, abs_d); 2523 } 2524 else if (HOST_BITS_PER_WIDE_INT >= prec 2525 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1)) 2526 /* This case is not handled correctly below. */ 2527 return NULL; 2528 2529 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int); 2530 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1)) 2531 { 2532 add = true; 2533 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1); 2534 } 2535 if (post_shift >= prec) 2536 return NULL; 2537 2538 /* t1 = oprnd0 h* ml; */ 2539 t1 = vect_recog_temp_ssa_var (itype, NULL); 2540 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0, 2541 build_int_cst (itype, ml)); 2542 2543 if (add) 2544 { 2545 /* t2 = t1 + oprnd0; */ 2546 append_pattern_def_seq (stmt_vinfo, def_stmt); 2547 t2 = vect_recog_temp_ssa_var (itype, NULL); 2548 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0); 2549 } 2550 else 2551 t2 = t1; 2552 2553 if (post_shift) 2554 { 2555 /* t3 = t2 >> post_shift; */ 2556 append_pattern_def_seq (stmt_vinfo, def_stmt); 2557 t3 = vect_recog_temp_ssa_var (itype, NULL); 2558 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2, 2559 build_int_cst (itype, post_shift)); 2560 } 2561 else 2562 t3 = t2; 2563 2564 wide_int oprnd0_min, oprnd0_max; 2565 int msb = 1; 2566 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE) 2567 { 2568 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype))) 2569 msb = 0; 2570 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype))) 2571 msb = -1; 2572 } 2573 2574 if (msb == 0 && d >= 0) 2575 { 2576 /* q = t3; */ 2577 q = t3; 2578 pattern_stmt = def_stmt; 2579 } 2580 else 2581 { 2582 /* t4 = oprnd0 >> (prec - 1); 2583 or if we know from VRP that oprnd0 >= 0 2584 t4 = 0; 2585 or if we know from VRP that oprnd0 < 0 2586 t4 = -1; */ 2587 append_pattern_def_seq (stmt_vinfo, def_stmt); 2588 t4 = vect_recog_temp_ssa_var (itype, NULL); 2589 if (msb != 1) 2590 def_stmt = gimple_build_assign (t4, INTEGER_CST, 2591 build_int_cst (itype, msb)); 2592 else 2593 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0, 2594 build_int_cst (itype, prec - 1)); 2595 append_pattern_def_seq (stmt_vinfo, def_stmt); 2596 2597 /* q = t3 - t4; or q = t4 - t3; */ 2598 q = vect_recog_temp_ssa_var (itype, NULL); 2599 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3, 2600 d < 0 ? t3 : t4); 2601 } 2602 } 2603 2604 if (rhs_code == TRUNC_MOD_EXPR) 2605 { 2606 tree r, t1; 2607 2608 /* We divided. Now finish by: 2609 t1 = q * oprnd1; 2610 r = oprnd0 - t1; */ 2611 append_pattern_def_seq (stmt_vinfo, pattern_stmt); 2612 2613 t1 = vect_recog_temp_ssa_var (itype, NULL); 2614 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1); 2615 append_pattern_def_seq (stmt_vinfo, def_stmt); 2616 2617 r = vect_recog_temp_ssa_var (itype, NULL); 2618 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1); 2619 } 2620 2621 /* Pattern detected. */ 2622 if (dump_enabled_p ()) 2623 { 2624 dump_printf_loc (MSG_NOTE, vect_location, 2625 "vect_recog_divmod_pattern: detected: "); 2626 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0); 2627 dump_printf (MSG_NOTE, "\n"); 2628 } 2629 2630 stmts->safe_push (last_stmt); 2631 2632 *type_in = vectype; 2633 *type_out = vectype; 2634 return pattern_stmt; 2635} 2636 2637/* Function vect_recog_mixed_size_cond_pattern 2638 2639 Try to find the following pattern: 2640 2641 type x_t, y_t; 2642 TYPE a_T, b_T, c_T; 2643 loop: 2644 S1 a_T = x_t CMP y_t ? b_T : c_T; 2645 2646 where type 'TYPE' is an integral type which has different size 2647 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider 2648 than 'type', the constants need to fit into an integer type 2649 with the same width as 'type') or results of conversion from 'type'. 2650 2651 Input: 2652 2653 * LAST_STMT: A stmt from which the pattern search begins. 2654 2655 Output: 2656 2657 * TYPE_IN: The type of the input arguments to the pattern. 2658 2659 * TYPE_OUT: The type of the output of this pattern. 2660 2661 * Return value: A new stmt that will be used to replace the pattern. 2662 Additionally a def_stmt is added. 2663 2664 a_it = x_t CMP y_t ? b_it : c_it; 2665 a_T = (TYPE) a_it; */ 2666 2667static gimple 2668vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in, 2669 tree *type_out) 2670{ 2671 gimple last_stmt = (*stmts)[0]; 2672 tree cond_expr, then_clause, else_clause; 2673 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info; 2674 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype; 2675 machine_mode cmpmode; 2676 gimple pattern_stmt, def_stmt; 2677 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 2678 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); 2679 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE; 2680 gimple def_stmt0 = NULL, def_stmt1 = NULL; 2681 bool promotion; 2682 tree comp_scalar_type; 2683 2684 if (!is_gimple_assign (last_stmt) 2685 || gimple_assign_rhs_code (last_stmt) != COND_EXPR 2686 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def) 2687 return NULL; 2688 2689 cond_expr = gimple_assign_rhs1 (last_stmt); 2690 then_clause = gimple_assign_rhs2 (last_stmt); 2691 else_clause = gimple_assign_rhs3 (last_stmt); 2692 2693 if (!COMPARISON_CLASS_P (cond_expr)) 2694 return NULL; 2695 2696 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0)); 2697 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type); 2698 if (comp_vectype == NULL_TREE) 2699 return NULL; 2700 2701 type = gimple_expr_type (last_stmt); 2702 if (types_compatible_p (type, comp_scalar_type) 2703 || ((TREE_CODE (then_clause) != INTEGER_CST 2704 || TREE_CODE (else_clause) != INTEGER_CST) 2705 && !INTEGRAL_TYPE_P (comp_scalar_type)) 2706 || !INTEGRAL_TYPE_P (type)) 2707 return NULL; 2708 2709 if ((TREE_CODE (then_clause) != INTEGER_CST 2710 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0, 2711 &def_stmt0, &promotion)) 2712 || (TREE_CODE (else_clause) != INTEGER_CST 2713 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1, 2714 &def_stmt1, &promotion))) 2715 return NULL; 2716 2717 if (orig_type0 && orig_type1 2718 && !types_compatible_p (orig_type0, orig_type1)) 2719 return NULL; 2720 2721 if (orig_type0) 2722 { 2723 if (!types_compatible_p (orig_type0, comp_scalar_type)) 2724 return NULL; 2725 then_clause = gimple_assign_rhs1 (def_stmt0); 2726 itype = orig_type0; 2727 } 2728 2729 if (orig_type1) 2730 { 2731 if (!types_compatible_p (orig_type1, comp_scalar_type)) 2732 return NULL; 2733 else_clause = gimple_assign_rhs1 (def_stmt1); 2734 itype = orig_type1; 2735 } 2736 2737 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype)); 2738 2739 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode)) 2740 return NULL; 2741 2742 vectype = get_vectype_for_scalar_type (type); 2743 if (vectype == NULL_TREE) 2744 return NULL; 2745 2746 if (expand_vec_cond_expr_p (vectype, comp_vectype)) 2747 return NULL; 2748 2749 if (itype == NULL_TREE) 2750 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode), 2751 TYPE_UNSIGNED (type)); 2752 2753 if (itype == NULL_TREE 2754 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode)) 2755 return NULL; 2756 2757 vecitype = get_vectype_for_scalar_type (itype); 2758 if (vecitype == NULL_TREE) 2759 return NULL; 2760 2761 if (!expand_vec_cond_expr_p (vecitype, comp_vectype)) 2762 return NULL; 2763 2764 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode)) 2765 { 2766 if ((TREE_CODE (then_clause) == INTEGER_CST 2767 && !int_fits_type_p (then_clause, itype)) 2768 || (TREE_CODE (else_clause) == INTEGER_CST 2769 && !int_fits_type_p (else_clause, itype))) 2770 return NULL; 2771 } 2772 2773 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 2774 COND_EXPR, unshare_expr (cond_expr), 2775 fold_convert (itype, then_clause), 2776 fold_convert (itype, else_clause)); 2777 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL), 2778 NOP_EXPR, gimple_assign_lhs (def_stmt)); 2779 2780 new_pattern_def_seq (stmt_vinfo, def_stmt); 2781 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); 2782 set_vinfo_for_stmt (def_stmt, def_stmt_info); 2783 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype; 2784 *type_in = vecitype; 2785 *type_out = vectype; 2786 2787 if (dump_enabled_p ()) 2788 dump_printf_loc (MSG_NOTE, vect_location, 2789 "vect_recog_mixed_size_cond_pattern: detected:\n"); 2790 2791 return pattern_stmt; 2792} 2793 2794 2795/* Helper function of vect_recog_bool_pattern. Called recursively, return 2796 true if bool VAR can be optimized that way. */ 2797 2798static bool 2799check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) 2800{ 2801 gimple def_stmt; 2802 enum vect_def_type dt; 2803 tree def, rhs1; 2804 enum tree_code rhs_code; 2805 2806 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def, 2807 &dt)) 2808 return false; 2809 2810 if (dt != vect_internal_def) 2811 return false; 2812 2813 if (!is_gimple_assign (def_stmt)) 2814 return false; 2815 2816 if (!has_single_use (def)) 2817 return false; 2818 2819 rhs1 = gimple_assign_rhs1 (def_stmt); 2820 rhs_code = gimple_assign_rhs_code (def_stmt); 2821 switch (rhs_code) 2822 { 2823 case SSA_NAME: 2824 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo); 2825 2826 CASE_CONVERT: 2827 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1 2828 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))) 2829 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE) 2830 return false; 2831 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo); 2832 2833 case BIT_NOT_EXPR: 2834 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo); 2835 2836 case BIT_AND_EXPR: 2837 case BIT_IOR_EXPR: 2838 case BIT_XOR_EXPR: 2839 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo)) 2840 return false; 2841 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo, 2842 bb_vinfo); 2843 2844 default: 2845 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison) 2846 { 2847 tree vecitype, comp_vectype; 2848 2849 /* If the comparison can throw, then is_gimple_condexpr will be 2850 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */ 2851 if (stmt_could_throw_p (def_stmt)) 2852 return false; 2853 2854 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1)); 2855 if (comp_vectype == NULL_TREE) 2856 return false; 2857 2858 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE) 2859 { 2860 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1)); 2861 tree itype 2862 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1); 2863 vecitype = get_vectype_for_scalar_type (itype); 2864 if (vecitype == NULL_TREE) 2865 return false; 2866 } 2867 else 2868 vecitype = comp_vectype; 2869 return expand_vec_cond_expr_p (vecitype, comp_vectype); 2870 } 2871 return false; 2872 } 2873} 2874 2875 2876/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous 2877 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT 2878 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */ 2879 2880static tree 2881adjust_bool_pattern_cast (tree type, tree var) 2882{ 2883 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var)); 2884 gimple cast_stmt, pattern_stmt; 2885 2886 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo)); 2887 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); 2888 new_pattern_def_seq (stmt_vinfo, pattern_stmt); 2889 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL), 2890 NOP_EXPR, gimple_assign_lhs (pattern_stmt)); 2891 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt; 2892 return gimple_assign_lhs (cast_stmt); 2893} 2894 2895 2896/* Helper function of vect_recog_bool_pattern. Do the actual transformations, 2897 recursively. VAR is an SSA_NAME that should be transformed from bool 2898 to a wider integer type, OUT_TYPE is the desired final integer type of 2899 the whole pattern, TRUEVAL should be NULL unless optimizing 2900 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands 2901 in the then_clause, STMTS is where statements with added pattern stmts 2902 should be pushed to. */ 2903 2904static tree 2905adjust_bool_pattern (tree var, tree out_type, tree trueval, 2906 vec<gimple> *stmts) 2907{ 2908 gimple stmt = SSA_NAME_DEF_STMT (var); 2909 enum tree_code rhs_code, def_rhs_code; 2910 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2; 2911 location_t loc; 2912 gimple pattern_stmt, def_stmt; 2913 2914 rhs1 = gimple_assign_rhs1 (stmt); 2915 rhs2 = gimple_assign_rhs2 (stmt); 2916 rhs_code = gimple_assign_rhs_code (stmt); 2917 loc = gimple_location (stmt); 2918 switch (rhs_code) 2919 { 2920 case SSA_NAME: 2921 CASE_CONVERT: 2922 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); 2923 itype = TREE_TYPE (irhs1); 2924 pattern_stmt 2925 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 2926 SSA_NAME, irhs1); 2927 break; 2928 2929 case BIT_NOT_EXPR: 2930 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); 2931 itype = TREE_TYPE (irhs1); 2932 pattern_stmt 2933 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 2934 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1)); 2935 break; 2936 2937 case BIT_AND_EXPR: 2938 /* Try to optimize x = y & (a < b ? 1 : 0); into 2939 x = (a < b ? y : 0); 2940 2941 E.g. for: 2942 bool a_b, b_b, c_b; 2943 TYPE d_T; 2944 2945 S1 a_b = x1 CMP1 y1; 2946 S2 b_b = x2 CMP2 y2; 2947 S3 c_b = a_b & b_b; 2948 S4 d_T = (TYPE) c_b; 2949 2950 we would normally emit: 2951 2952 S1' a_T = x1 CMP1 y1 ? 1 : 0; 2953 S2' b_T = x2 CMP2 y2 ? 1 : 0; 2954 S3' c_T = a_T & b_T; 2955 S4' d_T = c_T; 2956 2957 but we can save one stmt by using the 2958 result of one of the COND_EXPRs in the other COND_EXPR and leave 2959 BIT_AND_EXPR stmt out: 2960 2961 S1' a_T = x1 CMP1 y1 ? 1 : 0; 2962 S3' c_T = x2 CMP2 y2 ? a_T : 0; 2963 S4' f_T = c_T; 2964 2965 At least when VEC_COND_EXPR is implemented using masks 2966 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it 2967 computes the comparison masks and ands it, in one case with 2968 all ones vector, in the other case with a vector register. 2969 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is 2970 often more expensive. */ 2971 def_stmt = SSA_NAME_DEF_STMT (rhs2); 2972 def_rhs_code = gimple_assign_rhs_code (def_stmt); 2973 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison) 2974 { 2975 tree def_rhs1 = gimple_assign_rhs1 (def_stmt); 2976 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); 2977 if (TYPE_PRECISION (TREE_TYPE (irhs1)) 2978 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1)))) 2979 { 2980 gimple tstmt; 2981 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt); 2982 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts); 2983 tstmt = stmts->pop (); 2984 gcc_assert (tstmt == def_stmt); 2985 stmts->quick_push (stmt); 2986 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) 2987 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo); 2988 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo)); 2989 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL; 2990 return irhs2; 2991 } 2992 else 2993 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts); 2994 goto and_ior_xor; 2995 } 2996 def_stmt = SSA_NAME_DEF_STMT (rhs1); 2997 def_rhs_code = gimple_assign_rhs_code (def_stmt); 2998 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison) 2999 { 3000 tree def_rhs1 = gimple_assign_rhs1 (def_stmt); 3001 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts); 3002 if (TYPE_PRECISION (TREE_TYPE (irhs2)) 3003 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1)))) 3004 { 3005 gimple tstmt; 3006 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt); 3007 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts); 3008 tstmt = stmts->pop (); 3009 gcc_assert (tstmt == def_stmt); 3010 stmts->quick_push (stmt); 3011 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) 3012 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo); 3013 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo)); 3014 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL; 3015 return irhs1; 3016 } 3017 else 3018 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); 3019 goto and_ior_xor; 3020 } 3021 /* FALLTHRU */ 3022 case BIT_IOR_EXPR: 3023 case BIT_XOR_EXPR: 3024 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); 3025 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts); 3026 and_ior_xor: 3027 if (TYPE_PRECISION (TREE_TYPE (irhs1)) 3028 != TYPE_PRECISION (TREE_TYPE (irhs2))) 3029 { 3030 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1)); 3031 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2)); 3032 int out_prec = TYPE_PRECISION (out_type); 3033 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2)) 3034 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2); 3035 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2)) 3036 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1); 3037 else 3038 { 3039 irhs1 = adjust_bool_pattern_cast (out_type, rhs1); 3040 irhs2 = adjust_bool_pattern_cast (out_type, rhs2); 3041 } 3042 } 3043 itype = TREE_TYPE (irhs1); 3044 pattern_stmt 3045 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 3046 rhs_code, irhs1, irhs2); 3047 break; 3048 3049 default: 3050 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison); 3051 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE 3052 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)) 3053 || (TYPE_PRECISION (TREE_TYPE (rhs1)) 3054 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1))))) 3055 { 3056 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1)); 3057 itype 3058 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1); 3059 } 3060 else 3061 itype = TREE_TYPE (rhs1); 3062 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2); 3063 if (trueval == NULL_TREE) 3064 trueval = build_int_cst (itype, 1); 3065 else 3066 gcc_checking_assert (useless_type_conversion_p (itype, 3067 TREE_TYPE (trueval))); 3068 pattern_stmt 3069 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), 3070 COND_EXPR, cond_expr, trueval, 3071 build_int_cst (itype, 0)); 3072 break; 3073 } 3074 3075 stmts->safe_push (stmt); 3076 gimple_set_location (pattern_stmt, loc); 3077 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt; 3078 return gimple_assign_lhs (pattern_stmt); 3079} 3080 3081 3082/* Function vect_recog_bool_pattern 3083 3084 Try to find pattern like following: 3085 3086 bool a_b, b_b, c_b, d_b, e_b; 3087 TYPE f_T; 3088 loop: 3089 S1 a_b = x1 CMP1 y1; 3090 S2 b_b = x2 CMP2 y2; 3091 S3 c_b = a_b & b_b; 3092 S4 d_b = x3 CMP3 y3; 3093 S5 e_b = c_b | d_b; 3094 S6 f_T = (TYPE) e_b; 3095 3096 where type 'TYPE' is an integral type. Or a similar pattern 3097 ending in 3098 3099 S6 f_Y = e_b ? r_Y : s_Y; 3100 3101 as results from if-conversion of a complex condition. 3102 3103 Input: 3104 3105 * LAST_STMT: A stmt at the end from which the pattern 3106 search begins, i.e. cast of a bool to 3107 an integer type. 3108 3109 Output: 3110 3111 * TYPE_IN: The type of the input arguments to the pattern. 3112 3113 * TYPE_OUT: The type of the output of this pattern. 3114 3115 * Return value: A new stmt that will be used to replace the pattern. 3116 3117 Assuming size of TYPE is the same as size of all comparisons 3118 (otherwise some casts would be added where needed), the above 3119 sequence we create related pattern stmts: 3120 S1' a_T = x1 CMP1 y1 ? 1 : 0; 3121 S3' c_T = x2 CMP2 y2 ? a_T : 0; 3122 S4' d_T = x3 CMP3 y3 ? 1 : 0; 3123 S5' e_T = c_T | d_T; 3124 S6' f_T = e_T; 3125 3126 Instead of the above S3' we could emit: 3127 S2' b_T = x2 CMP2 y2 ? 1 : 0; 3128 S3' c_T = a_T | b_T; 3129 but the above is more efficient. */ 3130 3131static gimple 3132vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in, 3133 tree *type_out) 3134{ 3135 gimple last_stmt = stmts->pop (); 3136 enum tree_code rhs_code; 3137 tree var, lhs, rhs, vectype; 3138 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 3139 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 3140 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); 3141 gimple pattern_stmt; 3142 3143 if (!is_gimple_assign (last_stmt)) 3144 return NULL; 3145 3146 var = gimple_assign_rhs1 (last_stmt); 3147 lhs = gimple_assign_lhs (last_stmt); 3148 3149 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1 3150 || !TYPE_UNSIGNED (TREE_TYPE (var))) 3151 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE) 3152 return NULL; 3153 3154 rhs_code = gimple_assign_rhs_code (last_stmt); 3155 if (CONVERT_EXPR_CODE_P (rhs_code)) 3156 { 3157 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE 3158 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1) 3159 return NULL; 3160 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs)); 3161 if (vectype == NULL_TREE) 3162 return NULL; 3163 3164 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo)) 3165 return NULL; 3166 3167 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts); 3168 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 3169 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) 3170 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs); 3171 else 3172 pattern_stmt 3173 = gimple_build_assign (lhs, NOP_EXPR, rhs); 3174 *type_out = vectype; 3175 *type_in = vectype; 3176 stmts->safe_push (last_stmt); 3177 if (dump_enabled_p ()) 3178 dump_printf_loc (MSG_NOTE, vect_location, 3179 "vect_recog_bool_pattern: detected:\n"); 3180 3181 return pattern_stmt; 3182 } 3183 else if (rhs_code == COND_EXPR 3184 && TREE_CODE (var) == SSA_NAME) 3185 { 3186 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs)); 3187 if (vectype == NULL_TREE) 3188 return NULL; 3189 3190 /* Build a scalar type for the boolean result that when 3191 vectorized matches the vector type of the result in 3192 size and number of elements. */ 3193 unsigned prec 3194 = wi::udiv_trunc (TYPE_SIZE (vectype), 3195 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi (); 3196 tree type 3197 = build_nonstandard_integer_type (prec, 3198 TYPE_UNSIGNED (TREE_TYPE (var))); 3199 if (get_vectype_for_scalar_type (type) == NULL_TREE) 3200 return NULL; 3201 3202 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo)) 3203 return NULL; 3204 3205 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts); 3206 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 3207 pattern_stmt 3208 = gimple_build_assign (lhs, COND_EXPR, 3209 build2 (NE_EXPR, boolean_type_node, 3210 rhs, build_int_cst (type, 0)), 3211 gimple_assign_rhs2 (last_stmt), 3212 gimple_assign_rhs3 (last_stmt)); 3213 *type_out = vectype; 3214 *type_in = vectype; 3215 stmts->safe_push (last_stmt); 3216 if (dump_enabled_p ()) 3217 dump_printf_loc (MSG_NOTE, vect_location, 3218 "vect_recog_bool_pattern: detected:\n"); 3219 3220 return pattern_stmt; 3221 } 3222 else if (rhs_code == SSA_NAME 3223 && STMT_VINFO_DATA_REF (stmt_vinfo)) 3224 { 3225 stmt_vec_info pattern_stmt_info; 3226 vectype = STMT_VINFO_VECTYPE (stmt_vinfo); 3227 gcc_assert (vectype != NULL_TREE); 3228 if (!VECTOR_MODE_P (TYPE_MODE (vectype))) 3229 return NULL; 3230 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo)) 3231 return NULL; 3232 3233 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts); 3234 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs); 3235 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) 3236 { 3237 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 3238 gimple cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs); 3239 new_pattern_def_seq (stmt_vinfo, cast_stmt); 3240 rhs = rhs2; 3241 } 3242 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs); 3243 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, 3244 bb_vinfo); 3245 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); 3246 STMT_VINFO_DATA_REF (pattern_stmt_info) 3247 = STMT_VINFO_DATA_REF (stmt_vinfo); 3248 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info) 3249 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo); 3250 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo); 3251 STMT_VINFO_DR_OFFSET (pattern_stmt_info) 3252 = STMT_VINFO_DR_OFFSET (stmt_vinfo); 3253 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo); 3254 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info) 3255 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo); 3256 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt; 3257 *type_out = vectype; 3258 *type_in = vectype; 3259 stmts->safe_push (last_stmt); 3260 if (dump_enabled_p ()) 3261 dump_printf_loc (MSG_NOTE, vect_location, 3262 "vect_recog_bool_pattern: detected:\n"); 3263 return pattern_stmt; 3264 } 3265 else 3266 return NULL; 3267} 3268 3269 3270/* Mark statements that are involved in a pattern. */ 3271 3272static inline void 3273vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt, 3274 tree pattern_vectype) 3275{ 3276 stmt_vec_info pattern_stmt_info, def_stmt_info; 3277 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt); 3278 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info); 3279 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info); 3280 gimple def_stmt; 3281 3282 pattern_stmt_info = vinfo_for_stmt (pattern_stmt); 3283 if (pattern_stmt_info == NULL) 3284 { 3285 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, 3286 bb_vinfo); 3287 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); 3288 } 3289 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt)); 3290 3291 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt; 3292 STMT_VINFO_DEF_TYPE (pattern_stmt_info) 3293 = STMT_VINFO_DEF_TYPE (orig_stmt_info); 3294 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; 3295 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true; 3296 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt; 3297 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info) 3298 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info); 3299 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)) 3300 { 3301 gimple_stmt_iterator si; 3302 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)); 3303 !gsi_end_p (si); gsi_next (&si)) 3304 { 3305 def_stmt = gsi_stmt (si); 3306 def_stmt_info = vinfo_for_stmt (def_stmt); 3307 if (def_stmt_info == NULL) 3308 { 3309 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, 3310 bb_vinfo); 3311 set_vinfo_for_stmt (def_stmt, def_stmt_info); 3312 } 3313 gimple_set_bb (def_stmt, gimple_bb (orig_stmt)); 3314 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt; 3315 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def; 3316 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE) 3317 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype; 3318 } 3319 } 3320} 3321 3322/* Function vect_pattern_recog_1 3323 3324 Input: 3325 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain 3326 computation pattern. 3327 STMT: A stmt from which the pattern search should start. 3328 3329 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an 3330 expression that computes the same functionality and can be used to 3331 replace the sequence of stmts that are involved in the pattern. 3332 3333 Output: 3334 This function checks if the expression returned by PATTERN_RECOG_FUNC is 3335 supported in vector form by the target. We use 'TYPE_IN' to obtain the 3336 relevant vector type. If 'TYPE_IN' is already a vector type, then this 3337 indicates that target support had already been checked by PATTERN_RECOG_FUNC. 3338 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits 3339 to the available target pattern. 3340 3341 This function also does some bookkeeping, as explained in the documentation 3342 for vect_recog_pattern. */ 3343 3344static void 3345vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func, 3346 gimple_stmt_iterator si, 3347 vec<gimple> *stmts_to_replace) 3348{ 3349 gimple stmt = gsi_stmt (si), pattern_stmt; 3350 stmt_vec_info stmt_info; 3351 loop_vec_info loop_vinfo; 3352 tree pattern_vectype; 3353 tree type_in, type_out; 3354 enum tree_code code; 3355 int i; 3356 gimple next; 3357 3358 stmts_to_replace->truncate (0); 3359 stmts_to_replace->quick_push (stmt); 3360 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out); 3361 if (!pattern_stmt) 3362 return; 3363 3364 stmt = stmts_to_replace->last (); 3365 stmt_info = vinfo_for_stmt (stmt); 3366 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 3367 3368 if (VECTOR_MODE_P (TYPE_MODE (type_in))) 3369 { 3370 /* No need to check target support (already checked by the pattern 3371 recognition function). */ 3372 pattern_vectype = type_out ? type_out : type_in; 3373 } 3374 else 3375 { 3376 machine_mode vec_mode; 3377 enum insn_code icode; 3378 optab optab; 3379 3380 /* Check target support */ 3381 type_in = get_vectype_for_scalar_type (type_in); 3382 if (!type_in) 3383 return; 3384 if (type_out) 3385 type_out = get_vectype_for_scalar_type (type_out); 3386 else 3387 type_out = type_in; 3388 if (!type_out) 3389 return; 3390 pattern_vectype = type_out; 3391 3392 if (is_gimple_assign (pattern_stmt)) 3393 code = gimple_assign_rhs_code (pattern_stmt); 3394 else 3395 { 3396 gcc_assert (is_gimple_call (pattern_stmt)); 3397 code = CALL_EXPR; 3398 } 3399 3400 optab = optab_for_tree_code (code, type_in, optab_default); 3401 vec_mode = TYPE_MODE (type_in); 3402 if (!optab 3403 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing 3404 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out))) 3405 return; 3406 } 3407 3408 /* Found a vectorizable pattern. */ 3409 if (dump_enabled_p ()) 3410 { 3411 dump_printf_loc (MSG_NOTE, vect_location, 3412 "pattern recognized: "); 3413 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0); 3414 } 3415 3416 /* Mark the stmts that are involved in the pattern. */ 3417 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype); 3418 3419 /* Patterns cannot be vectorized using SLP, because they change the order of 3420 computation. */ 3421 if (loop_vinfo) 3422 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next) 3423 if (next == stmt) 3424 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i); 3425 3426 /* It is possible that additional pattern stmts are created and inserted in 3427 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the 3428 relevant statements. */ 3429 for (i = 0; stmts_to_replace->iterate (i, &stmt) 3430 && (unsigned) i < (stmts_to_replace->length () - 1); 3431 i++) 3432 { 3433 stmt_info = vinfo_for_stmt (stmt); 3434 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 3435 if (dump_enabled_p ()) 3436 { 3437 dump_printf_loc (MSG_NOTE, vect_location, 3438 "additional pattern stmt: "); 3439 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0); 3440 } 3441 3442 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE); 3443 } 3444} 3445 3446 3447/* Function vect_pattern_recog 3448 3449 Input: 3450 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for 3451 computation idioms. 3452 3453 Output - for each computation idiom that is detected we create a new stmt 3454 that provides the same functionality and that can be vectorized. We 3455 also record some information in the struct_stmt_info of the relevant 3456 stmts, as explained below: 3457 3458 At the entry to this function we have the following stmts, with the 3459 following initial value in the STMT_VINFO fields: 3460 3461 stmt in_pattern_p related_stmt vec_stmt 3462 S1: a_i = .... - - - 3463 S2: a_2 = ..use(a_i).. - - - 3464 S3: a_1 = ..use(a_2).. - - - 3465 S4: a_0 = ..use(a_1).. - - - 3466 S5: ... = ..use(a_0).. - - - 3467 3468 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be 3469 represented by a single stmt. We then: 3470 - create a new stmt S6 equivalent to the pattern (the stmt is not 3471 inserted into the code) 3472 - fill in the STMT_VINFO fields as follows: 3473 3474 in_pattern_p related_stmt vec_stmt 3475 S1: a_i = .... - - - 3476 S2: a_2 = ..use(a_i).. - - - 3477 S3: a_1 = ..use(a_2).. - - - 3478 S4: a_0 = ..use(a_1).. true S6 - 3479 '---> S6: a_new = .... - S4 - 3480 S5: ... = ..use(a_0).. - - - 3481 3482 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point 3483 to each other through the RELATED_STMT field). 3484 3485 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead 3486 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will 3487 remain irrelevant unless used by stmts other than S4. 3488 3489 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3} 3490 (because they are marked as irrelevant). It will vectorize S6, and record 3491 a pointer to the new vector stmt VS6 from S6 (as usual). 3492 S4 will be skipped, and S5 will be vectorized as usual: 3493 3494 in_pattern_p related_stmt vec_stmt 3495 S1: a_i = .... - - - 3496 S2: a_2 = ..use(a_i).. - - - 3497 S3: a_1 = ..use(a_2).. - - - 3498 > VS6: va_new = .... - - - 3499 S4: a_0 = ..use(a_1).. true S6 VS6 3500 '---> S6: a_new = .... - S4 VS6 3501 > VS5: ... = ..vuse(va_new).. - - - 3502 S5: ... = ..use(a_0).. - - - 3503 3504 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used 3505 elsewhere), and we'll end up with: 3506 3507 VS6: va_new = .... 3508 VS5: ... = ..vuse(va_new).. 3509 3510 In case of more than one pattern statements, e.g., widen-mult with 3511 intermediate type: 3512 3513 S1 a_t = ; 3514 S2 a_T = (TYPE) a_t; 3515 '--> S3: a_it = (interm_type) a_t; 3516 S4 prod_T = a_T * CONST; 3517 '--> S5: prod_T' = a_it w* CONST; 3518 3519 there may be other users of a_T outside the pattern. In that case S2 will 3520 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed 3521 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will 3522 be recorded in S3. */ 3523 3524void 3525vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) 3526{ 3527 struct loop *loop; 3528 basic_block *bbs; 3529 unsigned int nbbs; 3530 gimple_stmt_iterator si; 3531 unsigned int i, j; 3532 vect_recog_func_ptr vect_recog_func; 3533 auto_vec<gimple, 1> stmts_to_replace; 3534 gimple stmt; 3535 3536 if (dump_enabled_p ()) 3537 dump_printf_loc (MSG_NOTE, vect_location, 3538 "=== vect_pattern_recog ===\n"); 3539 3540 if (loop_vinfo) 3541 { 3542 loop = LOOP_VINFO_LOOP (loop_vinfo); 3543 bbs = LOOP_VINFO_BBS (loop_vinfo); 3544 nbbs = loop->num_nodes; 3545 } 3546 else 3547 { 3548 bbs = &BB_VINFO_BB (bb_vinfo); 3549 nbbs = 1; 3550 } 3551 3552 /* Scan through the loop stmts, applying the pattern recognition 3553 functions starting at each stmt visited: */ 3554 for (i = 0; i < nbbs; i++) 3555 { 3556 basic_block bb = bbs[i]; 3557 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) 3558 { 3559 if (bb_vinfo && (stmt = gsi_stmt (si)) 3560 && vinfo_for_stmt (stmt) 3561 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt))) 3562 continue; 3563 3564 /* Scan over all generic vect_recog_xxx_pattern functions. */ 3565 for (j = 0; j < NUM_PATTERNS; j++) 3566 { 3567 vect_recog_func = vect_vect_recog_func_ptrs[j]; 3568 vect_pattern_recog_1 (vect_recog_func, si, 3569 &stmts_to_replace); 3570 } 3571 } 3572 } 3573} 3574