1/* Loop autoparallelization. 2 Copyright (C) 2006-2015 Free Software Foundation, Inc. 3 Contributed by Sebastian Pop <pop@cri.ensmp.fr> 4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>. 5 6This file is part of GCC. 7 8GCC is free software; you can redistribute it and/or modify it under 9the terms of the GNU General Public License as published by the Free 10Software Foundation; either version 3, or (at your option) any later 11version. 12 13GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14WARRANTY; without even the implied warranty of MERCHANTABILITY or 15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16for more details. 17 18You should have received a copy of the GNU General Public License 19along with GCC; see the file COPYING3. If not see 20<http://www.gnu.org/licenses/>. */ 21 22#include "config.h" 23#include "system.h" 24#include "coretypes.h" 25#include "hash-set.h" 26#include "machmode.h" 27#include "vec.h" 28#include "double-int.h" 29#include "input.h" 30#include "alias.h" 31#include "symtab.h" 32#include "options.h" 33#include "wide-int.h" 34#include "inchash.h" 35#include "tree.h" 36#include "fold-const.h" 37#include "predict.h" 38#include "tm.h" 39#include "hard-reg-set.h" 40#include "input.h" 41#include "function.h" 42#include "dominance.h" 43#include "cfg.h" 44#include "basic-block.h" 45#include "tree-ssa-alias.h" 46#include "internal-fn.h" 47#include "gimple-expr.h" 48#include "is-a.h" 49#include "gimple.h" 50#include "gimplify.h" 51#include "gimple-iterator.h" 52#include "gimplify-me.h" 53#include "gimple-walk.h" 54#include "stor-layout.h" 55#include "tree-nested.h" 56#include "gimple-ssa.h" 57#include "tree-cfg.h" 58#include "tree-phinodes.h" 59#include "ssa-iterators.h" 60#include "stringpool.h" 61#include "tree-ssanames.h" 62#include "tree-ssa-loop-ivopts.h" 63#include "tree-ssa-loop-manip.h" 64#include "tree-ssa-loop-niter.h" 65#include "tree-ssa-loop.h" 66#include "tree-into-ssa.h" 67#include "cfgloop.h" 68#include "tree-data-ref.h" 69#include "tree-scalar-evolution.h" 70#include "gimple-pretty-print.h" 71#include "tree-pass.h" 72#include "langhooks.h" 73#include "tree-vectorizer.h" 74#include "tree-hasher.h" 75#include "tree-parloops.h" 76#include "omp-low.h" 77#include "tree-nested.h" 78#include "plugin-api.h" 79#include "ipa-ref.h" 80#include "cgraph.h" 81 82/* This pass tries to distribute iterations of loops into several threads. 83 The implementation is straightforward -- for each loop we test whether its 84 iterations are independent, and if it is the case (and some additional 85 conditions regarding profitability and correctness are satisfied), we 86 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion 87 machinery do its job. 88 89 The most of the complexity is in bringing the code into shape expected 90 by the omp expanders: 91 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction 92 variable and that the exit test is at the start of the loop body 93 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable 94 variables by accesses through pointers, and breaking up ssa chains 95 by storing the values incoming to the parallelized loop to a structure 96 passed to the new function as an argument (something similar is done 97 in omp gimplification, unfortunately only a small part of the code 98 can be shared). 99 100 TODO: 101 -- if there are several parallelizable loops in a function, it may be 102 possible to generate the threads just once (using synchronization to 103 ensure that cross-loop dependences are obeyed). 104 -- handling of common reduction patterns for outer loops. 105 106 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */ 107/* 108 Reduction handling: 109 currently we use vect_force_simple_reduction() to detect reduction patterns. 110 The code transformation will be introduced by an example. 111 112 113parloop 114{ 115 int sum=1; 116 117 for (i = 0; i < N; i++) 118 { 119 x[i] = i + 3; 120 sum+=x[i]; 121 } 122} 123 124gimple-like code: 125header_bb: 126 127 # sum_29 = PHI <sum_11(5), 1(3)> 128 # i_28 = PHI <i_12(5), 0(3)> 129 D.1795_8 = i_28 + 3; 130 x[i_28] = D.1795_8; 131 sum_11 = D.1795_8 + sum_29; 132 i_12 = i_28 + 1; 133 if (N_6(D) > i_12) 134 goto header_bb; 135 136 137exit_bb: 138 139 # sum_21 = PHI <sum_11(4)> 140 printf (&"%d"[0], sum_21); 141 142 143after reduction transformation (only relevant parts): 144 145parloop 146{ 147 148.... 149 150 151 # Storing the initial value given by the user. # 152 153 .paral_data_store.32.sum.27 = 1; 154 155 #pragma omp parallel num_threads(4) 156 157 #pragma omp for schedule(static) 158 159 # The neutral element corresponding to the particular 160 reduction's operation, e.g. 0 for PLUS_EXPR, 161 1 for MULT_EXPR, etc. replaces the user's initial value. # 162 163 # sum.27_29 = PHI <sum.27_11, 0> 164 165 sum.27_11 = D.1827_8 + sum.27_29; 166 167 GIMPLE_OMP_CONTINUE 168 169 # Adding this reduction phi is done at create_phi_for_local_result() # 170 # sum.27_56 = PHI <sum.27_11, 0> 171 GIMPLE_OMP_RETURN 172 173 # Creating the atomic operation is done at 174 create_call_for_reduction_1() # 175 176 #pragma omp atomic_load 177 D.1839_59 = *&.paral_data_load.33_51->reduction.23; 178 D.1840_60 = sum.27_56 + D.1839_59; 179 #pragma omp atomic_store (D.1840_60); 180 181 GIMPLE_OMP_RETURN 182 183 # collecting the result after the join of the threads is done at 184 create_loads_for_reductions(). 185 The value computed by the threads is loaded from the 186 shared struct. # 187 188 189 .paral_data_load.33_52 = &.paral_data_store.32; 190 sum_37 = .paral_data_load.33_52->sum.27; 191 sum_43 = D.1795_41 + sum_37; 192 193 exit bb: 194 # sum_21 = PHI <sum_43, sum_26> 195 printf (&"%d"[0], sum_21); 196 197... 198 199} 200 201*/ 202 203/* Minimal number of iterations of a loop that should be executed in each 204 thread. */ 205#define MIN_PER_THREAD 100 206 207/* Element of the hashtable, representing a 208 reduction in the current loop. */ 209struct reduction_info 210{ 211 gimple reduc_stmt; /* reduction statement. */ 212 gimple reduc_phi; /* The phi node defining the reduction. */ 213 enum tree_code reduction_code;/* code for the reduction operation. */ 214 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi 215 result. */ 216 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value 217 of the reduction variable when existing the loop. */ 218 tree initial_value; /* The initial value of the reduction var before entering the loop. */ 219 tree field; /* the name of the field in the parloop data structure intended for reduction. */ 220 tree init; /* reduction initialization value. */ 221 gphi *new_phi; /* (helper field) Newly created phi node whose result 222 will be passed to the atomic operation. Represents 223 the local result each thread computed for the reduction 224 operation. */ 225}; 226 227/* Reduction info hashtable helpers. */ 228 229struct reduction_hasher : typed_free_remove <reduction_info> 230{ 231 typedef reduction_info value_type; 232 typedef reduction_info compare_type; 233 static inline hashval_t hash (const value_type *); 234 static inline bool equal (const value_type *, const compare_type *); 235}; 236 237/* Equality and hash functions for hashtab code. */ 238 239inline bool 240reduction_hasher::equal (const value_type *a, const compare_type *b) 241{ 242 return (a->reduc_phi == b->reduc_phi); 243} 244 245inline hashval_t 246reduction_hasher::hash (const value_type *a) 247{ 248 return a->reduc_version; 249} 250 251typedef hash_table<reduction_hasher> reduction_info_table_type; 252 253 254static struct reduction_info * 255reduction_phi (reduction_info_table_type *reduction_list, gimple phi) 256{ 257 struct reduction_info tmpred, *red; 258 259 if (reduction_list->elements () == 0 || phi == NULL) 260 return NULL; 261 262 tmpred.reduc_phi = phi; 263 tmpred.reduc_version = gimple_uid (phi); 264 red = reduction_list->find (&tmpred); 265 266 return red; 267} 268 269/* Element of hashtable of names to copy. */ 270 271struct name_to_copy_elt 272{ 273 unsigned version; /* The version of the name to copy. */ 274 tree new_name; /* The new name used in the copy. */ 275 tree field; /* The field of the structure used to pass the 276 value. */ 277}; 278 279/* Name copies hashtable helpers. */ 280 281struct name_to_copy_hasher : typed_free_remove <name_to_copy_elt> 282{ 283 typedef name_to_copy_elt value_type; 284 typedef name_to_copy_elt compare_type; 285 static inline hashval_t hash (const value_type *); 286 static inline bool equal (const value_type *, const compare_type *); 287}; 288 289/* Equality and hash functions for hashtab code. */ 290 291inline bool 292name_to_copy_hasher::equal (const value_type *a, const compare_type *b) 293{ 294 return a->version == b->version; 295} 296 297inline hashval_t 298name_to_copy_hasher::hash (const value_type *a) 299{ 300 return (hashval_t) a->version; 301} 302 303typedef hash_table<name_to_copy_hasher> name_to_copy_table_type; 304 305/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE 306 matrix. Rather than use floats, we simply keep a single DENOMINATOR that 307 represents the denominator for every element in the matrix. */ 308typedef struct lambda_trans_matrix_s 309{ 310 lambda_matrix matrix; 311 int rowsize; 312 int colsize; 313 int denominator; 314} *lambda_trans_matrix; 315#define LTM_MATRIX(T) ((T)->matrix) 316#define LTM_ROWSIZE(T) ((T)->rowsize) 317#define LTM_COLSIZE(T) ((T)->colsize) 318#define LTM_DENOMINATOR(T) ((T)->denominator) 319 320/* Allocate a new transformation matrix. */ 321 322static lambda_trans_matrix 323lambda_trans_matrix_new (int colsize, int rowsize, 324 struct obstack * lambda_obstack) 325{ 326 lambda_trans_matrix ret; 327 328 ret = (lambda_trans_matrix) 329 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s)); 330 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack); 331 LTM_ROWSIZE (ret) = rowsize; 332 LTM_COLSIZE (ret) = colsize; 333 LTM_DENOMINATOR (ret) = 1; 334 return ret; 335} 336 337/* Multiply a vector VEC by a matrix MAT. 338 MAT is an M*N matrix, and VEC is a vector with length N. The result 339 is stored in DEST which must be a vector of length M. */ 340 341static void 342lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n, 343 lambda_vector vec, lambda_vector dest) 344{ 345 int i, j; 346 347 lambda_vector_clear (dest, m); 348 for (i = 0; i < m; i++) 349 for (j = 0; j < n; j++) 350 dest[i] += matrix[i][j] * vec[j]; 351} 352 353/* Return true if TRANS is a legal transformation matrix that respects 354 the dependence vectors in DISTS and DIRS. The conservative answer 355 is false. 356 357 "Wolfe proves that a unimodular transformation represented by the 358 matrix T is legal when applied to a loop nest with a set of 359 lexicographically non-negative distance vectors RDG if and only if 360 for each vector d in RDG, (T.d >= 0) is lexicographically positive. 361 i.e.: if and only if it transforms the lexicographically positive 362 distance vectors to lexicographically positive vectors. Note that 363 a unimodular matrix must transform the zero vector (and only it) to 364 the zero vector." S.Muchnick. */ 365 366static bool 367lambda_transform_legal_p (lambda_trans_matrix trans, 368 int nb_loops, 369 vec<ddr_p> dependence_relations) 370{ 371 unsigned int i, j; 372 lambda_vector distres; 373 struct data_dependence_relation *ddr; 374 375 gcc_assert (LTM_COLSIZE (trans) == nb_loops 376 && LTM_ROWSIZE (trans) == nb_loops); 377 378 /* When there are no dependences, the transformation is correct. */ 379 if (dependence_relations.length () == 0) 380 return true; 381 382 ddr = dependence_relations[0]; 383 if (ddr == NULL) 384 return true; 385 386 /* When there is an unknown relation in the dependence_relations, we 387 know that it is no worth looking at this loop nest: give up. */ 388 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 389 return false; 390 391 distres = lambda_vector_new (nb_loops); 392 393 /* For each distance vector in the dependence graph. */ 394 FOR_EACH_VEC_ELT (dependence_relations, i, ddr) 395 { 396 /* Don't care about relations for which we know that there is no 397 dependence, nor about read-read (aka. output-dependences): 398 these data accesses can happen in any order. */ 399 if (DDR_ARE_DEPENDENT (ddr) == chrec_known 400 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr)))) 401 continue; 402 403 /* Conservatively answer: "this transformation is not valid". */ 404 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 405 return false; 406 407 /* If the dependence could not be captured by a distance vector, 408 conservatively answer that the transform is not valid. */ 409 if (DDR_NUM_DIST_VECTS (ddr) == 0) 410 return false; 411 412 /* Compute trans.dist_vect */ 413 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++) 414 { 415 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops, 416 DDR_DIST_VECT (ddr, j), distres); 417 418 if (!lambda_vector_lexico_pos (distres, nb_loops)) 419 return false; 420 } 421 } 422 return true; 423} 424 425/* Data dependency analysis. Returns true if the iterations of LOOP 426 are independent on each other (that is, if we can execute them 427 in parallel). */ 428 429static bool 430loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack) 431{ 432 vec<ddr_p> dependence_relations; 433 vec<data_reference_p> datarefs; 434 lambda_trans_matrix trans; 435 bool ret = false; 436 437 if (dump_file && (dump_flags & TDF_DETAILS)) 438 { 439 fprintf (dump_file, "Considering loop %d\n", loop->num); 440 if (!loop->inner) 441 fprintf (dump_file, "loop is innermost\n"); 442 else 443 fprintf (dump_file, "loop NOT innermost\n"); 444 } 445 446 /* Check for problems with dependences. If the loop can be reversed, 447 the iterations are independent. */ 448 auto_vec<loop_p, 3> loop_nest; 449 datarefs.create (10); 450 dependence_relations.create (100); 451 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs, 452 &dependence_relations)) 453 { 454 if (dump_file && (dump_flags & TDF_DETAILS)) 455 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n"); 456 ret = false; 457 goto end; 458 } 459 if (dump_file && (dump_flags & TDF_DETAILS)) 460 dump_data_dependence_relations (dump_file, dependence_relations); 461 462 trans = lambda_trans_matrix_new (1, 1, parloop_obstack); 463 LTM_MATRIX (trans)[0][0] = -1; 464 465 if (lambda_transform_legal_p (trans, 1, dependence_relations)) 466 { 467 ret = true; 468 if (dump_file && (dump_flags & TDF_DETAILS)) 469 fprintf (dump_file, " SUCCESS: may be parallelized\n"); 470 } 471 else if (dump_file && (dump_flags & TDF_DETAILS)) 472 fprintf (dump_file, 473 " FAILED: data dependencies exist across iterations\n"); 474 475 end: 476 free_dependence_relations (dependence_relations); 477 free_data_refs (datarefs); 478 479 return ret; 480} 481 482/* Return true when LOOP contains basic blocks marked with the 483 BB_IRREDUCIBLE_LOOP flag. */ 484 485static inline bool 486loop_has_blocks_with_irreducible_flag (struct loop *loop) 487{ 488 unsigned i; 489 basic_block *bbs = get_loop_body_in_dom_order (loop); 490 bool res = true; 491 492 for (i = 0; i < loop->num_nodes; i++) 493 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP) 494 goto end; 495 496 res = false; 497 end: 498 free (bbs); 499 return res; 500} 501 502/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name. 503 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls 504 to their addresses that can be reused. The address of OBJ is known to 505 be invariant in the whole function. Other needed statements are placed 506 right before GSI. */ 507 508static tree 509take_address_of (tree obj, tree type, edge entry, 510 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi) 511{ 512 int uid; 513 tree *var_p, name, addr; 514 gassign *stmt; 515 gimple_seq stmts; 516 517 /* Since the address of OBJ is invariant, the trees may be shared. 518 Avoid rewriting unrelated parts of the code. */ 519 obj = unshare_expr (obj); 520 for (var_p = &obj; 521 handled_component_p (*var_p); 522 var_p = &TREE_OPERAND (*var_p, 0)) 523 continue; 524 525 /* Canonicalize the access to base on a MEM_REF. */ 526 if (DECL_P (*var_p)) 527 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p)); 528 529 /* Assign a canonical SSA name to the address of the base decl used 530 in the address and share it for all accesses and addresses based 531 on it. */ 532 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0)); 533 int_tree_map elt; 534 elt.uid = uid; 535 int_tree_map *slot = decl_address->find_slot (elt, INSERT); 536 if (!slot->to) 537 { 538 if (gsi == NULL) 539 return NULL; 540 addr = TREE_OPERAND (*var_p, 0); 541 const char *obj_name 542 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0)); 543 if (obj_name) 544 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name); 545 else 546 name = make_ssa_name (TREE_TYPE (addr)); 547 stmt = gimple_build_assign (name, addr); 548 gsi_insert_on_edge_immediate (entry, stmt); 549 550 slot->uid = uid; 551 slot->to = name; 552 } 553 else 554 name = slot->to; 555 556 /* Express the address in terms of the canonical SSA name. */ 557 TREE_OPERAND (*var_p, 0) = name; 558 if (gsi == NULL) 559 return build_fold_addr_expr_with_type (obj, type); 560 561 name = force_gimple_operand (build_addr (obj, current_function_decl), 562 &stmts, true, NULL_TREE); 563 if (!gimple_seq_empty_p (stmts)) 564 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); 565 566 if (!useless_type_conversion_p (type, TREE_TYPE (name))) 567 { 568 name = force_gimple_operand (fold_convert (type, name), &stmts, true, 569 NULL_TREE); 570 if (!gimple_seq_empty_p (stmts)) 571 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); 572 } 573 574 return name; 575} 576 577/* Callback for htab_traverse. Create the initialization statement 578 for reduction described in SLOT, and place it at the preheader of 579 the loop described in DATA. */ 580 581int 582initialize_reductions (reduction_info **slot, struct loop *loop) 583{ 584 tree init, c; 585 tree bvar, type, arg; 586 edge e; 587 588 struct reduction_info *const reduc = *slot; 589 590 /* Create initialization in preheader: 591 reduction_variable = initialization value of reduction. */ 592 593 /* In the phi node at the header, replace the argument coming 594 from the preheader with the reduction initialization value. */ 595 596 /* Create a new variable to initialize the reduction. */ 597 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi)); 598 bvar = create_tmp_var (type, "reduction"); 599 600 c = build_omp_clause (gimple_location (reduc->reduc_stmt), 601 OMP_CLAUSE_REDUCTION); 602 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code; 603 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)); 604 605 init = omp_reduction_init (c, TREE_TYPE (bvar)); 606 reduc->init = init; 607 608 /* Replace the argument representing the initialization value 609 with the initialization value for the reduction (neutral 610 element for the particular operation, e.g. 0 for PLUS_EXPR, 611 1 for MULT_EXPR, etc). 612 Keep the old value in a new variable "reduction_initial", 613 that will be taken in consideration after the parallel 614 computing is done. */ 615 616 e = loop_preheader_edge (loop); 617 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e); 618 /* Create new variable to hold the initial value. */ 619 620 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE 621 (reduc->reduc_phi, loop_preheader_edge (loop)), init); 622 reduc->initial_value = arg; 623 return 1; 624} 625 626struct elv_data 627{ 628 struct walk_stmt_info info; 629 edge entry; 630 int_tree_htab_type *decl_address; 631 gimple_stmt_iterator *gsi; 632 bool changed; 633 bool reset; 634}; 635 636/* Eliminates references to local variables in *TP out of the single 637 entry single exit region starting at DTA->ENTRY. 638 DECL_ADDRESS contains addresses of the references that had their 639 address taken already. If the expression is changed, CHANGED is 640 set to true. Callback for walk_tree. */ 641 642static tree 643eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data) 644{ 645 struct elv_data *const dta = (struct elv_data *) data; 646 tree t = *tp, var, addr, addr_type, type, obj; 647 648 if (DECL_P (t)) 649 { 650 *walk_subtrees = 0; 651 652 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t)) 653 return NULL_TREE; 654 655 type = TREE_TYPE (t); 656 addr_type = build_pointer_type (type); 657 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address, 658 dta->gsi); 659 if (dta->gsi == NULL && addr == NULL_TREE) 660 { 661 dta->reset = true; 662 return NULL_TREE; 663 } 664 665 *tp = build_simple_mem_ref (addr); 666 667 dta->changed = true; 668 return NULL_TREE; 669 } 670 671 if (TREE_CODE (t) == ADDR_EXPR) 672 { 673 /* ADDR_EXPR may appear in two contexts: 674 -- as a gimple operand, when the address taken is a function invariant 675 -- as gimple rhs, when the resulting address in not a function 676 invariant 677 We do not need to do anything special in the latter case (the base of 678 the memory reference whose address is taken may be replaced in the 679 DECL_P case). The former case is more complicated, as we need to 680 ensure that the new address is still a gimple operand. Thus, it 681 is not sufficient to replace just the base of the memory reference -- 682 we need to move the whole computation of the address out of the 683 loop. */ 684 if (!is_gimple_val (t)) 685 return NULL_TREE; 686 687 *walk_subtrees = 0; 688 obj = TREE_OPERAND (t, 0); 689 var = get_base_address (obj); 690 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var)) 691 return NULL_TREE; 692 693 addr_type = TREE_TYPE (t); 694 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address, 695 dta->gsi); 696 if (dta->gsi == NULL && addr == NULL_TREE) 697 { 698 dta->reset = true; 699 return NULL_TREE; 700 } 701 *tp = addr; 702 703 dta->changed = true; 704 return NULL_TREE; 705 } 706 707 if (!EXPR_P (t)) 708 *walk_subtrees = 0; 709 710 return NULL_TREE; 711} 712 713/* Moves the references to local variables in STMT at *GSI out of the single 714 entry single exit region starting at ENTRY. DECL_ADDRESS contains 715 addresses of the references that had their address taken 716 already. */ 717 718static void 719eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi, 720 int_tree_htab_type *decl_address) 721{ 722 struct elv_data dta; 723 gimple stmt = gsi_stmt (*gsi); 724 725 memset (&dta.info, '\0', sizeof (dta.info)); 726 dta.entry = entry; 727 dta.decl_address = decl_address; 728 dta.changed = false; 729 dta.reset = false; 730 731 if (gimple_debug_bind_p (stmt)) 732 { 733 dta.gsi = NULL; 734 walk_tree (gimple_debug_bind_get_value_ptr (stmt), 735 eliminate_local_variables_1, &dta.info, NULL); 736 if (dta.reset) 737 { 738 gimple_debug_bind_reset_value (stmt); 739 dta.changed = true; 740 } 741 } 742 else if (gimple_clobber_p (stmt)) 743 { 744 unlink_stmt_vdef (stmt); 745 stmt = gimple_build_nop (); 746 gsi_replace (gsi, stmt, false); 747 dta.changed = true; 748 } 749 else 750 { 751 dta.gsi = gsi; 752 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info); 753 } 754 755 if (dta.changed) 756 update_stmt (stmt); 757} 758 759/* Eliminates the references to local variables from the single entry 760 single exit region between the ENTRY and EXIT edges. 761 762 This includes: 763 1) Taking address of a local variable -- these are moved out of the 764 region (and temporary variable is created to hold the address if 765 necessary). 766 767 2) Dereferencing a local variable -- these are replaced with indirect 768 references. */ 769 770static void 771eliminate_local_variables (edge entry, edge exit) 772{ 773 basic_block bb; 774 auto_vec<basic_block, 3> body; 775 unsigned i; 776 gimple_stmt_iterator gsi; 777 bool has_debug_stmt = false; 778 int_tree_htab_type decl_address (10); 779 basic_block entry_bb = entry->src; 780 basic_block exit_bb = exit->dest; 781 782 gather_blocks_in_sese_region (entry_bb, exit_bb, &body); 783 784 FOR_EACH_VEC_ELT (body, i, bb) 785 if (bb != entry_bb && bb != exit_bb) 786 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 787 if (is_gimple_debug (gsi_stmt (gsi))) 788 { 789 if (gimple_debug_bind_p (gsi_stmt (gsi))) 790 has_debug_stmt = true; 791 } 792 else 793 eliminate_local_variables_stmt (entry, &gsi, &decl_address); 794 795 if (has_debug_stmt) 796 FOR_EACH_VEC_ELT (body, i, bb) 797 if (bb != entry_bb && bb != exit_bb) 798 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 799 if (gimple_debug_bind_p (gsi_stmt (gsi))) 800 eliminate_local_variables_stmt (entry, &gsi, &decl_address); 801} 802 803/* Returns true if expression EXPR is not defined between ENTRY and 804 EXIT, i.e. if all its operands are defined outside of the region. */ 805 806static bool 807expr_invariant_in_region_p (edge entry, edge exit, tree expr) 808{ 809 basic_block entry_bb = entry->src; 810 basic_block exit_bb = exit->dest; 811 basic_block def_bb; 812 813 if (is_gimple_min_invariant (expr)) 814 return true; 815 816 if (TREE_CODE (expr) == SSA_NAME) 817 { 818 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); 819 if (def_bb 820 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb) 821 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb)) 822 return false; 823 824 return true; 825 } 826 827 return false; 828} 829 830/* If COPY_NAME_P is true, creates and returns a duplicate of NAME. 831 The copies are stored to NAME_COPIES, if NAME was already duplicated, 832 its duplicate stored in NAME_COPIES is returned. 833 834 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also 835 duplicated, storing the copies in DECL_COPIES. */ 836 837static tree 838separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies, 839 int_tree_htab_type *decl_copies, 840 bool copy_name_p) 841{ 842 tree copy, var, var_copy; 843 unsigned idx, uid, nuid; 844 struct int_tree_map ielt; 845 struct name_to_copy_elt elt, *nelt; 846 name_to_copy_elt **slot; 847 int_tree_map *dslot; 848 849 if (TREE_CODE (name) != SSA_NAME) 850 return name; 851 852 idx = SSA_NAME_VERSION (name); 853 elt.version = idx; 854 slot = name_copies->find_slot_with_hash (&elt, idx, 855 copy_name_p ? INSERT : NO_INSERT); 856 if (slot && *slot) 857 return (*slot)->new_name; 858 859 if (copy_name_p) 860 { 861 copy = duplicate_ssa_name (name, NULL); 862 nelt = XNEW (struct name_to_copy_elt); 863 nelt->version = idx; 864 nelt->new_name = copy; 865 nelt->field = NULL_TREE; 866 *slot = nelt; 867 } 868 else 869 { 870 gcc_assert (!slot); 871 copy = name; 872 } 873 874 var = SSA_NAME_VAR (name); 875 if (!var) 876 return copy; 877 878 uid = DECL_UID (var); 879 ielt.uid = uid; 880 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT); 881 if (!dslot->to) 882 { 883 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var)); 884 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var); 885 dslot->uid = uid; 886 dslot->to = var_copy; 887 888 /* Ensure that when we meet this decl next time, we won't duplicate 889 it again. */ 890 nuid = DECL_UID (var_copy); 891 ielt.uid = nuid; 892 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT); 893 gcc_assert (!dslot->to); 894 dslot->uid = nuid; 895 dslot->to = var_copy; 896 } 897 else 898 var_copy = dslot->to; 899 900 replace_ssa_name_symbol (copy, var_copy); 901 return copy; 902} 903 904/* Finds the ssa names used in STMT that are defined outside the 905 region between ENTRY and EXIT and replaces such ssa names with 906 their duplicates. The duplicates are stored to NAME_COPIES. Base 907 decls of all ssa names used in STMT (including those defined in 908 LOOP) are replaced with the new temporary variables; the 909 replacement decls are stored in DECL_COPIES. */ 910 911static void 912separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt, 913 name_to_copy_table_type *name_copies, 914 int_tree_htab_type *decl_copies) 915{ 916 use_operand_p use; 917 def_operand_p def; 918 ssa_op_iter oi; 919 tree name, copy; 920 bool copy_name_p; 921 922 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF) 923 { 924 name = DEF_FROM_PTR (def); 925 gcc_assert (TREE_CODE (name) == SSA_NAME); 926 copy = separate_decls_in_region_name (name, name_copies, decl_copies, 927 false); 928 gcc_assert (copy == name); 929 } 930 931 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE) 932 { 933 name = USE_FROM_PTR (use); 934 if (TREE_CODE (name) != SSA_NAME) 935 continue; 936 937 copy_name_p = expr_invariant_in_region_p (entry, exit, name); 938 copy = separate_decls_in_region_name (name, name_copies, decl_copies, 939 copy_name_p); 940 SET_USE (use, copy); 941 } 942} 943 944/* Finds the ssa names used in STMT that are defined outside the 945 region between ENTRY and EXIT and replaces such ssa names with 946 their duplicates. The duplicates are stored to NAME_COPIES. Base 947 decls of all ssa names used in STMT (including those defined in 948 LOOP) are replaced with the new temporary variables; the 949 replacement decls are stored in DECL_COPIES. */ 950 951static bool 952separate_decls_in_region_debug (gimple stmt, 953 name_to_copy_table_type *name_copies, 954 int_tree_htab_type *decl_copies) 955{ 956 use_operand_p use; 957 ssa_op_iter oi; 958 tree var, name; 959 struct int_tree_map ielt; 960 struct name_to_copy_elt elt; 961 name_to_copy_elt **slot; 962 int_tree_map *dslot; 963 964 if (gimple_debug_bind_p (stmt)) 965 var = gimple_debug_bind_get_var (stmt); 966 else if (gimple_debug_source_bind_p (stmt)) 967 var = gimple_debug_source_bind_get_var (stmt); 968 else 969 return true; 970 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL) 971 return true; 972 gcc_assert (DECL_P (var) && SSA_VAR_P (var)); 973 ielt.uid = DECL_UID (var); 974 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT); 975 if (!dslot) 976 return true; 977 if (gimple_debug_bind_p (stmt)) 978 gimple_debug_bind_set_var (stmt, dslot->to); 979 else if (gimple_debug_source_bind_p (stmt)) 980 gimple_debug_source_bind_set_var (stmt, dslot->to); 981 982 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE) 983 { 984 name = USE_FROM_PTR (use); 985 if (TREE_CODE (name) != SSA_NAME) 986 continue; 987 988 elt.version = SSA_NAME_VERSION (name); 989 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT); 990 if (!slot) 991 { 992 gimple_debug_bind_reset_value (stmt); 993 update_stmt (stmt); 994 break; 995 } 996 997 SET_USE (use, (*slot)->new_name); 998 } 999 1000 return false; 1001} 1002 1003/* Callback for htab_traverse. Adds a field corresponding to the reduction 1004 specified in SLOT. The type is passed in DATA. */ 1005 1006int 1007add_field_for_reduction (reduction_info **slot, tree type) 1008{ 1009 1010 struct reduction_info *const red = *slot; 1011 tree var = gimple_assign_lhs (red->reduc_stmt); 1012 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL, 1013 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var)); 1014 1015 insert_field_into_struct (type, field); 1016 1017 red->field = field; 1018 1019 return 1; 1020} 1021 1022/* Callback for htab_traverse. Adds a field corresponding to a ssa name 1023 described in SLOT. The type is passed in DATA. */ 1024 1025int 1026add_field_for_name (name_to_copy_elt **slot, tree type) 1027{ 1028 struct name_to_copy_elt *const elt = *slot; 1029 tree name = ssa_name (elt->version); 1030 tree field = build_decl (UNKNOWN_LOCATION, 1031 FIELD_DECL, SSA_NAME_IDENTIFIER (name), 1032 TREE_TYPE (name)); 1033 1034 insert_field_into_struct (type, field); 1035 elt->field = field; 1036 1037 return 1; 1038} 1039 1040/* Callback for htab_traverse. A local result is the intermediate result 1041 computed by a single 1042 thread, or the initial value in case no iteration was executed. 1043 This function creates a phi node reflecting these values. 1044 The phi's result will be stored in NEW_PHI field of the 1045 reduction's data structure. */ 1046 1047int 1048create_phi_for_local_result (reduction_info **slot, struct loop *loop) 1049{ 1050 struct reduction_info *const reduc = *slot; 1051 edge e; 1052 gphi *new_phi; 1053 basic_block store_bb; 1054 tree local_res; 1055 source_location locus; 1056 1057 /* STORE_BB is the block where the phi 1058 should be stored. It is the destination of the loop exit. 1059 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */ 1060 store_bb = FALLTHRU_EDGE (loop->latch)->dest; 1061 1062 /* STORE_BB has two predecessors. One coming from the loop 1063 (the reduction's result is computed at the loop), 1064 and another coming from a block preceding the loop, 1065 when no iterations 1066 are executed (the initial value should be taken). */ 1067 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch)) 1068 e = EDGE_PRED (store_bb, 1); 1069 else 1070 e = EDGE_PRED (store_bb, 0); 1071 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt)); 1072 locus = gimple_location (reduc->reduc_stmt); 1073 new_phi = create_phi_node (local_res, store_bb); 1074 add_phi_arg (new_phi, reduc->init, e, locus); 1075 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt), 1076 FALLTHRU_EDGE (loop->latch), locus); 1077 reduc->new_phi = new_phi; 1078 1079 return 1; 1080} 1081 1082struct clsn_data 1083{ 1084 tree store; 1085 tree load; 1086 1087 basic_block store_bb; 1088 basic_block load_bb; 1089}; 1090 1091/* Callback for htab_traverse. Create an atomic instruction for the 1092 reduction described in SLOT. 1093 DATA annotates the place in memory the atomic operation relates to, 1094 and the basic block it needs to be generated in. */ 1095 1096int 1097create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data) 1098{ 1099 struct reduction_info *const reduc = *slot; 1100 gimple_stmt_iterator gsi; 1101 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi)); 1102 tree load_struct; 1103 basic_block bb; 1104 basic_block new_bb; 1105 edge e; 1106 tree t, addr, ref, x; 1107 tree tmp_load, name; 1108 gimple load; 1109 1110 load_struct = build_simple_mem_ref (clsn_data->load); 1111 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE); 1112 1113 addr = build_addr (t, current_function_decl); 1114 1115 /* Create phi node. */ 1116 bb = clsn_data->load_bb; 1117 1118 gsi = gsi_last_bb (bb); 1119 e = split_block (bb, gsi_stmt (gsi)); 1120 new_bb = e->dest; 1121 1122 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr))); 1123 tmp_load = make_ssa_name (tmp_load); 1124 load = gimple_build_omp_atomic_load (tmp_load, addr); 1125 SSA_NAME_DEF_STMT (tmp_load) = load; 1126 gsi = gsi_start_bb (new_bb); 1127 gsi_insert_after (&gsi, load, GSI_NEW_STMT); 1128 1129 e = split_block (new_bb, load); 1130 new_bb = e->dest; 1131 gsi = gsi_start_bb (new_bb); 1132 ref = tmp_load; 1133 x = fold_build2 (reduc->reduction_code, 1134 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref, 1135 PHI_RESULT (reduc->new_phi)); 1136 1137 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true, 1138 GSI_CONTINUE_LINKING); 1139 1140 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT); 1141 return 1; 1142} 1143 1144/* Create the atomic operation at the join point of the threads. 1145 REDUCTION_LIST describes the reductions in the LOOP. 1146 LD_ST_DATA describes the shared data structure where 1147 shared data is stored in and loaded from. */ 1148static void 1149create_call_for_reduction (struct loop *loop, 1150 reduction_info_table_type *reduction_list, 1151 struct clsn_data *ld_st_data) 1152{ 1153 reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop); 1154 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */ 1155 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest; 1156 reduction_list 1157 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data); 1158} 1159 1160/* Callback for htab_traverse. Loads the final reduction value at the 1161 join point of all threads, and inserts it in the right place. */ 1162 1163int 1164create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data) 1165{ 1166 struct reduction_info *const red = *slot; 1167 gimple stmt; 1168 gimple_stmt_iterator gsi; 1169 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt)); 1170 tree load_struct; 1171 tree name; 1172 tree x; 1173 1174 gsi = gsi_after_labels (clsn_data->load_bb); 1175 load_struct = build_simple_mem_ref (clsn_data->load); 1176 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field, 1177 NULL_TREE); 1178 1179 x = load_struct; 1180 name = PHI_RESULT (red->keep_res); 1181 stmt = gimple_build_assign (name, x); 1182 1183 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 1184 1185 for (gsi = gsi_start_phis (gimple_bb (red->keep_res)); 1186 !gsi_end_p (gsi); gsi_next (&gsi)) 1187 if (gsi_stmt (gsi) == red->keep_res) 1188 { 1189 remove_phi_node (&gsi, false); 1190 return 1; 1191 } 1192 gcc_unreachable (); 1193} 1194 1195/* Load the reduction result that was stored in LD_ST_DATA. 1196 REDUCTION_LIST describes the list of reductions that the 1197 loads should be generated for. */ 1198static void 1199create_final_loads_for_reduction (reduction_info_table_type *reduction_list, 1200 struct clsn_data *ld_st_data) 1201{ 1202 gimple_stmt_iterator gsi; 1203 tree t; 1204 gimple stmt; 1205 1206 gsi = gsi_after_labels (ld_st_data->load_bb); 1207 t = build_fold_addr_expr (ld_st_data->store); 1208 stmt = gimple_build_assign (ld_st_data->load, t); 1209 1210 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT); 1211 1212 reduction_list 1213 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data); 1214 1215} 1216 1217/* Callback for htab_traverse. Store the neutral value for the 1218 particular reduction's operation, e.g. 0 for PLUS_EXPR, 1219 1 for MULT_EXPR, etc. into the reduction field. 1220 The reduction is specified in SLOT. The store information is 1221 passed in DATA. */ 1222 1223int 1224create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data) 1225{ 1226 struct reduction_info *const red = *slot; 1227 tree t; 1228 gimple stmt; 1229 gimple_stmt_iterator gsi; 1230 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt)); 1231 1232 gsi = gsi_last_bb (clsn_data->store_bb); 1233 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE); 1234 stmt = gimple_build_assign (t, red->initial_value); 1235 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 1236 1237 return 1; 1238} 1239 1240/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and 1241 store to a field of STORE in STORE_BB for the ssa name and its duplicate 1242 specified in SLOT. */ 1243 1244int 1245create_loads_and_stores_for_name (name_to_copy_elt **slot, 1246 struct clsn_data *clsn_data) 1247{ 1248 struct name_to_copy_elt *const elt = *slot; 1249 tree t; 1250 gimple stmt; 1251 gimple_stmt_iterator gsi; 1252 tree type = TREE_TYPE (elt->new_name); 1253 tree load_struct; 1254 1255 gsi = gsi_last_bb (clsn_data->store_bb); 1256 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE); 1257 stmt = gimple_build_assign (t, ssa_name (elt->version)); 1258 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 1259 1260 gsi = gsi_last_bb (clsn_data->load_bb); 1261 load_struct = build_simple_mem_ref (clsn_data->load); 1262 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE); 1263 stmt = gimple_build_assign (elt->new_name, t); 1264 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 1265 1266 return 1; 1267} 1268 1269/* Moves all the variables used in LOOP and defined outside of it (including 1270 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa 1271 name) to a structure created for this purpose. The code 1272 1273 while (1) 1274 { 1275 use (a); 1276 use (b); 1277 } 1278 1279 is transformed this way: 1280 1281 bb0: 1282 old.a = a; 1283 old.b = b; 1284 1285 bb1: 1286 a' = new->a; 1287 b' = new->b; 1288 while (1) 1289 { 1290 use (a'); 1291 use (b'); 1292 } 1293 1294 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The 1295 pointer `new' is intentionally not initialized (the loop will be split to a 1296 separate function later, and `new' will be initialized from its arguments). 1297 LD_ST_DATA holds information about the shared data structure used to pass 1298 information among the threads. It is initialized here, and 1299 gen_parallel_loop will pass it to create_call_for_reduction that 1300 needs this information. REDUCTION_LIST describes the reductions 1301 in LOOP. */ 1302 1303static void 1304separate_decls_in_region (edge entry, edge exit, 1305 reduction_info_table_type *reduction_list, 1306 tree *arg_struct, tree *new_arg_struct, 1307 struct clsn_data *ld_st_data) 1308 1309{ 1310 basic_block bb1 = split_edge (entry); 1311 basic_block bb0 = single_pred (bb1); 1312 name_to_copy_table_type name_copies (10); 1313 int_tree_htab_type decl_copies (10); 1314 unsigned i; 1315 tree type, type_name, nvar; 1316 gimple_stmt_iterator gsi; 1317 struct clsn_data clsn_data; 1318 auto_vec<basic_block, 3> body; 1319 basic_block bb; 1320 basic_block entry_bb = bb1; 1321 basic_block exit_bb = exit->dest; 1322 bool has_debug_stmt = false; 1323 1324 entry = single_succ_edge (entry_bb); 1325 gather_blocks_in_sese_region (entry_bb, exit_bb, &body); 1326 1327 FOR_EACH_VEC_ELT (body, i, bb) 1328 { 1329 if (bb != entry_bb && bb != exit_bb) 1330 { 1331 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1332 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi), 1333 &name_copies, &decl_copies); 1334 1335 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1336 { 1337 gimple stmt = gsi_stmt (gsi); 1338 1339 if (is_gimple_debug (stmt)) 1340 has_debug_stmt = true; 1341 else 1342 separate_decls_in_region_stmt (entry, exit, stmt, 1343 &name_copies, &decl_copies); 1344 } 1345 } 1346 } 1347 1348 /* Now process debug bind stmts. We must not create decls while 1349 processing debug stmts, so we defer their processing so as to 1350 make sure we will have debug info for as many variables as 1351 possible (all of those that were dealt with in the loop above), 1352 and discard those for which we know there's nothing we can 1353 do. */ 1354 if (has_debug_stmt) 1355 FOR_EACH_VEC_ELT (body, i, bb) 1356 if (bb != entry_bb && bb != exit_bb) 1357 { 1358 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);) 1359 { 1360 gimple stmt = gsi_stmt (gsi); 1361 1362 if (is_gimple_debug (stmt)) 1363 { 1364 if (separate_decls_in_region_debug (stmt, &name_copies, 1365 &decl_copies)) 1366 { 1367 gsi_remove (&gsi, true); 1368 continue; 1369 } 1370 } 1371 1372 gsi_next (&gsi); 1373 } 1374 } 1375 1376 if (name_copies.elements () == 0 && reduction_list->elements () == 0) 1377 { 1378 /* It may happen that there is nothing to copy (if there are only 1379 loop carried and external variables in the loop). */ 1380 *arg_struct = NULL; 1381 *new_arg_struct = NULL; 1382 } 1383 else 1384 { 1385 /* Create the type for the structure to store the ssa names to. */ 1386 type = lang_hooks.types.make_type (RECORD_TYPE); 1387 type_name = build_decl (UNKNOWN_LOCATION, 1388 TYPE_DECL, create_tmp_var_name (".paral_data"), 1389 type); 1390 TYPE_NAME (type) = type_name; 1391 1392 name_copies.traverse <tree, add_field_for_name> (type); 1393 if (reduction_list && reduction_list->elements () > 0) 1394 { 1395 /* Create the fields for reductions. */ 1396 reduction_list->traverse <tree, add_field_for_reduction> (type); 1397 } 1398 layout_type (type); 1399 1400 /* Create the loads and stores. */ 1401 *arg_struct = create_tmp_var (type, ".paral_data_store"); 1402 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load"); 1403 *new_arg_struct = make_ssa_name (nvar); 1404 1405 ld_st_data->store = *arg_struct; 1406 ld_st_data->load = *new_arg_struct; 1407 ld_st_data->store_bb = bb0; 1408 ld_st_data->load_bb = bb1; 1409 1410 name_copies 1411 .traverse <struct clsn_data *, create_loads_and_stores_for_name> 1412 (ld_st_data); 1413 1414 /* Load the calculation from memory (after the join of the threads). */ 1415 1416 if (reduction_list && reduction_list->elements () > 0) 1417 { 1418 reduction_list 1419 ->traverse <struct clsn_data *, create_stores_for_reduction> 1420 (ld_st_data); 1421 clsn_data.load = make_ssa_name (nvar); 1422 clsn_data.load_bb = exit->dest; 1423 clsn_data.store = ld_st_data->store; 1424 create_final_loads_for_reduction (reduction_list, &clsn_data); 1425 } 1426 } 1427} 1428 1429/* Returns true if FN was created to run in parallel. */ 1430 1431bool 1432parallelized_function_p (tree fndecl) 1433{ 1434 cgraph_node *node = cgraph_node::get (fndecl); 1435 gcc_assert (node != NULL); 1436 return node->parallelized_function; 1437} 1438 1439/* Creates and returns an empty function that will receive the body of 1440 a parallelized loop. */ 1441 1442static tree 1443create_loop_fn (location_t loc) 1444{ 1445 char buf[100]; 1446 char *tname; 1447 tree decl, type, name, t; 1448 struct function *act_cfun = cfun; 1449 static unsigned loopfn_num; 1450 1451 loc = LOCATION_LOCUS (loc); 1452 snprintf (buf, 100, "%s.$loopfn", current_function_name ()); 1453 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++); 1454 clean_symbol_name (tname); 1455 name = get_identifier (tname); 1456 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE); 1457 1458 decl = build_decl (loc, FUNCTION_DECL, name, type); 1459 TREE_STATIC (decl) = 1; 1460 TREE_USED (decl) = 1; 1461 DECL_ARTIFICIAL (decl) = 1; 1462 DECL_IGNORED_P (decl) = 0; 1463 TREE_PUBLIC (decl) = 0; 1464 DECL_UNINLINABLE (decl) = 1; 1465 DECL_EXTERNAL (decl) = 0; 1466 DECL_CONTEXT (decl) = NULL_TREE; 1467 DECL_INITIAL (decl) = make_node (BLOCK); 1468 1469 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node); 1470 DECL_ARTIFICIAL (t) = 1; 1471 DECL_IGNORED_P (t) = 1; 1472 DECL_RESULT (decl) = t; 1473 1474 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"), 1475 ptr_type_node); 1476 DECL_ARTIFICIAL (t) = 1; 1477 DECL_ARG_TYPE (t) = ptr_type_node; 1478 DECL_CONTEXT (t) = decl; 1479 TREE_USED (t) = 1; 1480 DECL_ARGUMENTS (decl) = t; 1481 1482 allocate_struct_function (decl, false); 1483 1484 /* The call to allocate_struct_function clobbers CFUN, so we need to restore 1485 it. */ 1486 set_cfun (act_cfun); 1487 1488 return decl; 1489} 1490 1491/* Moves the exit condition of LOOP to the beginning of its header, and 1492 duplicates the part of the last iteration that gets disabled to the 1493 exit of the loop. NIT is the number of iterations of the loop 1494 (used to initialize the variables in the duplicated part). 1495 1496 TODO: the common case is that latch of the loop is empty and immediately 1497 follows the loop exit. In this case, it would be better not to copy the 1498 body of the loop, but only move the entry of the loop directly before the 1499 exit check and increase the number of iterations of the loop by one. 1500 This may need some additional preconditioning in case NIT = ~0. 1501 REDUCTION_LIST describes the reductions in LOOP. */ 1502 1503static void 1504transform_to_exit_first_loop (struct loop *loop, 1505 reduction_info_table_type *reduction_list, 1506 tree nit) 1507{ 1508 basic_block *bbs, *nbbs, ex_bb, orig_header; 1509 unsigned n; 1510 bool ok; 1511 edge exit = single_dom_exit (loop), hpred; 1512 tree control, control_name, res, t; 1513 gphi *phi, *nphi; 1514 gassign *stmt; 1515 gcond *cond_stmt, *cond_nit; 1516 tree nit_1; 1517 1518 split_block_after_labels (loop->header); 1519 orig_header = single_succ (loop->header); 1520 hpred = single_succ_edge (loop->header); 1521 1522 cond_stmt = as_a <gcond *> (last_stmt (exit->src)); 1523 control = gimple_cond_lhs (cond_stmt); 1524 gcc_assert (gimple_cond_rhs (cond_stmt) == nit); 1525 1526 /* Make sure that we have phi nodes on exit for all loop header phis 1527 (create_parallel_loop requires that). */ 1528 for (gphi_iterator gsi = gsi_start_phis (loop->header); 1529 !gsi_end_p (gsi); 1530 gsi_next (&gsi)) 1531 { 1532 phi = gsi.phi (); 1533 res = PHI_RESULT (phi); 1534 t = copy_ssa_name (res, phi); 1535 SET_PHI_RESULT (phi, t); 1536 nphi = create_phi_node (res, orig_header); 1537 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION); 1538 1539 if (res == control) 1540 { 1541 gimple_cond_set_lhs (cond_stmt, t); 1542 update_stmt (cond_stmt); 1543 control = t; 1544 } 1545 } 1546 1547 bbs = get_loop_body_in_dom_order (loop); 1548 1549 for (n = 0; bbs[n] != exit->src; n++) 1550 continue; 1551 nbbs = XNEWVEC (basic_block, n); 1552 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit, 1553 bbs + 1, n, nbbs); 1554 gcc_assert (ok); 1555 free (bbs); 1556 ex_bb = nbbs[0]; 1557 free (nbbs); 1558 1559 /* Other than reductions, the only gimple reg that should be copied 1560 out of the loop is the control variable. */ 1561 exit = single_dom_exit (loop); 1562 control_name = NULL_TREE; 1563 for (gphi_iterator gsi = gsi_start_phis (ex_bb); 1564 !gsi_end_p (gsi); ) 1565 { 1566 phi = gsi.phi (); 1567 res = PHI_RESULT (phi); 1568 if (virtual_operand_p (res)) 1569 { 1570 gsi_next (&gsi); 1571 continue; 1572 } 1573 1574 /* Check if it is a part of reduction. If it is, 1575 keep the phi at the reduction's keep_res field. The 1576 PHI_RESULT of this phi is the resulting value of the reduction 1577 variable when exiting the loop. */ 1578 1579 if (reduction_list->elements () > 0) 1580 { 1581 struct reduction_info *red; 1582 1583 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit); 1584 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val)); 1585 if (red) 1586 { 1587 red->keep_res = phi; 1588 gsi_next (&gsi); 1589 continue; 1590 } 1591 } 1592 gcc_assert (control_name == NULL_TREE 1593 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control)); 1594 control_name = res; 1595 remove_phi_node (&gsi, false); 1596 } 1597 gcc_assert (control_name != NULL_TREE); 1598 1599 /* Initialize the control variable to number of iterations 1600 according to the rhs of the exit condition. */ 1601 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb); 1602 cond_nit = as_a <gcond *> (last_stmt (exit->src)); 1603 nit_1 = gimple_cond_rhs (cond_nit); 1604 nit_1 = force_gimple_operand_gsi (&gsi, 1605 fold_convert (TREE_TYPE (control_name), nit_1), 1606 false, NULL_TREE, false, GSI_SAME_STMT); 1607 stmt = gimple_build_assign (control_name, nit_1); 1608 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT); 1609} 1610 1611/* Create the parallel constructs for LOOP as described in gen_parallel_loop. 1612 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL. 1613 NEW_DATA is the variable that should be initialized from the argument 1614 of LOOP_FN. N_THREADS is the requested number of threads. Returns the 1615 basic block containing GIMPLE_OMP_PARALLEL tree. */ 1616 1617static basic_block 1618create_parallel_loop (struct loop *loop, tree loop_fn, tree data, 1619 tree new_data, unsigned n_threads, location_t loc) 1620{ 1621 gimple_stmt_iterator gsi; 1622 basic_block bb, paral_bb, for_bb, ex_bb; 1623 tree t, param; 1624 gomp_parallel *omp_par_stmt; 1625 gimple omp_return_stmt1, omp_return_stmt2; 1626 gimple phi; 1627 gcond *cond_stmt; 1628 gomp_for *for_stmt; 1629 gomp_continue *omp_cont_stmt; 1630 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type; 1631 edge exit, nexit, guard, end, e; 1632 1633 /* Prepare the GIMPLE_OMP_PARALLEL statement. */ 1634 bb = loop_preheader_edge (loop)->src; 1635 paral_bb = single_pred (bb); 1636 gsi = gsi_last_bb (paral_bb); 1637 1638 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS); 1639 OMP_CLAUSE_NUM_THREADS_EXPR (t) 1640 = build_int_cst (integer_type_node, n_threads); 1641 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data); 1642 gimple_set_location (omp_par_stmt, loc); 1643 1644 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT); 1645 1646 /* Initialize NEW_DATA. */ 1647 if (data) 1648 { 1649 gassign *assign_stmt; 1650 1651 gsi = gsi_after_labels (bb); 1652 1653 param = make_ssa_name (DECL_ARGUMENTS (loop_fn)); 1654 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data)); 1655 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 1656 1657 assign_stmt = gimple_build_assign (new_data, 1658 fold_convert (TREE_TYPE (new_data), param)); 1659 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 1660 } 1661 1662 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */ 1663 bb = split_loop_exit_edge (single_dom_exit (loop)); 1664 gsi = gsi_last_bb (bb); 1665 omp_return_stmt1 = gimple_build_omp_return (false); 1666 gimple_set_location (omp_return_stmt1, loc); 1667 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT); 1668 1669 /* Extract data for GIMPLE_OMP_FOR. */ 1670 gcc_assert (loop->header == single_dom_exit (loop)->src); 1671 cond_stmt = as_a <gcond *> (last_stmt (loop->header)); 1672 1673 cvar = gimple_cond_lhs (cond_stmt); 1674 cvar_base = SSA_NAME_VAR (cvar); 1675 phi = SSA_NAME_DEF_STMT (cvar); 1676 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); 1677 initvar = copy_ssa_name (cvar); 1678 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)), 1679 initvar); 1680 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); 1681 1682 gsi = gsi_last_nondebug_bb (loop->latch); 1683 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next)); 1684 gsi_remove (&gsi, true); 1685 1686 /* Prepare cfg. */ 1687 for_bb = split_edge (loop_preheader_edge (loop)); 1688 ex_bb = split_loop_exit_edge (single_dom_exit (loop)); 1689 extract_true_false_edges_from_block (loop->header, &nexit, &exit); 1690 gcc_assert (exit == single_dom_exit (loop)); 1691 1692 guard = make_edge (for_bb, ex_bb, 0); 1693 single_succ_edge (loop->latch)->flags = 0; 1694 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU); 1695 for (gphi_iterator gpi = gsi_start_phis (ex_bb); 1696 !gsi_end_p (gpi); gsi_next (&gpi)) 1697 { 1698 source_location locus; 1699 tree def; 1700 gphi *phi = gpi.phi (); 1701 gphi *stmt; 1702 1703 stmt = as_a <gphi *> ( 1704 SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit))); 1705 1706 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop)); 1707 locus = gimple_phi_arg_location_from_edge (stmt, 1708 loop_preheader_edge (loop)); 1709 add_phi_arg (phi, def, guard, locus); 1710 1711 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop)); 1712 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop)); 1713 add_phi_arg (phi, def, end, locus); 1714 } 1715 e = redirect_edge_and_branch (exit, nexit->dest); 1716 PENDING_STMT (e) = NULL; 1717 1718 /* Emit GIMPLE_OMP_FOR. */ 1719 gimple_cond_set_lhs (cond_stmt, cvar_base); 1720 type = TREE_TYPE (cvar); 1721 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE); 1722 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC; 1723 1724 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL); 1725 gimple_set_location (for_stmt, loc); 1726 gimple_omp_for_set_index (for_stmt, 0, initvar); 1727 gimple_omp_for_set_initial (for_stmt, 0, cvar_init); 1728 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt)); 1729 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt)); 1730 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type, 1731 cvar_base, 1732 build_int_cst (type, 1))); 1733 1734 gsi = gsi_last_bb (for_bb); 1735 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT); 1736 SSA_NAME_DEF_STMT (initvar) = for_stmt; 1737 1738 /* Emit GIMPLE_OMP_CONTINUE. */ 1739 gsi = gsi_last_bb (loop->latch); 1740 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar); 1741 gimple_set_location (omp_cont_stmt, loc); 1742 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT); 1743 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt; 1744 1745 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */ 1746 gsi = gsi_last_bb (ex_bb); 1747 omp_return_stmt2 = gimple_build_omp_return (true); 1748 gimple_set_location (omp_return_stmt2, loc); 1749 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT); 1750 1751 /* After the above dom info is hosed. Re-compute it. */ 1752 free_dominance_info (CDI_DOMINATORS); 1753 calculate_dominance_info (CDI_DOMINATORS); 1754 1755 return paral_bb; 1756} 1757 1758/* Generates code to execute the iterations of LOOP in N_THREADS 1759 threads in parallel. 1760 1761 NITER describes number of iterations of LOOP. 1762 REDUCTION_LIST describes the reductions existent in the LOOP. */ 1763 1764static void 1765gen_parallel_loop (struct loop *loop, 1766 reduction_info_table_type *reduction_list, 1767 unsigned n_threads, struct tree_niter_desc *niter) 1768{ 1769 tree many_iterations_cond, type, nit; 1770 tree arg_struct, new_arg_struct; 1771 gimple_seq stmts; 1772 edge entry, exit; 1773 struct clsn_data clsn_data; 1774 unsigned prob; 1775 location_t loc; 1776 gimple cond_stmt; 1777 unsigned int m_p_thread=2; 1778 1779 /* From 1780 1781 --------------------------------------------------------------------- 1782 loop 1783 { 1784 IV = phi (INIT, IV + STEP) 1785 BODY1; 1786 if (COND) 1787 break; 1788 BODY2; 1789 } 1790 --------------------------------------------------------------------- 1791 1792 with # of iterations NITER (possibly with MAY_BE_ZERO assumption), 1793 we generate the following code: 1794 1795 --------------------------------------------------------------------- 1796 1797 if (MAY_BE_ZERO 1798 || NITER < MIN_PER_THREAD * N_THREADS) 1799 goto original; 1800 1801 BODY1; 1802 store all local loop-invariant variables used in body of the loop to DATA. 1803 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA); 1804 load the variables from DATA. 1805 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static)) 1806 BODY2; 1807 BODY1; 1808 GIMPLE_OMP_CONTINUE; 1809 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR 1810 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL 1811 goto end; 1812 1813 original: 1814 loop 1815 { 1816 IV = phi (INIT, IV + STEP) 1817 BODY1; 1818 if (COND) 1819 break; 1820 BODY2; 1821 } 1822 1823 end: 1824 1825 */ 1826 1827 /* Create two versions of the loop -- in the old one, we know that the 1828 number of iterations is large enough, and we will transform it into the 1829 loop that will be split to loop_fn, the new one will be used for the 1830 remaining iterations. */ 1831 1832 /* We should compute a better number-of-iterations value for outer loops. 1833 That is, if we have 1834 1835 for (i = 0; i < n; ++i) 1836 for (j = 0; j < m; ++j) 1837 ... 1838 1839 we should compute nit = n * m, not nit = n. 1840 Also may_be_zero handling would need to be adjusted. */ 1841 1842 type = TREE_TYPE (niter->niter); 1843 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true, 1844 NULL_TREE); 1845 if (stmts) 1846 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); 1847 1848 if (loop->inner) 1849 m_p_thread=2; 1850 else 1851 m_p_thread=MIN_PER_THREAD; 1852 1853 many_iterations_cond = 1854 fold_build2 (GE_EXPR, boolean_type_node, 1855 nit, build_int_cst (type, m_p_thread * n_threads)); 1856 1857 many_iterations_cond 1858 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, 1859 invert_truthvalue (unshare_expr (niter->may_be_zero)), 1860 many_iterations_cond); 1861 many_iterations_cond 1862 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE); 1863 if (stmts) 1864 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); 1865 if (!is_gimple_condexpr (many_iterations_cond)) 1866 { 1867 many_iterations_cond 1868 = force_gimple_operand (many_iterations_cond, &stmts, 1869 true, NULL_TREE); 1870 if (stmts) 1871 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); 1872 } 1873 1874 initialize_original_copy_tables (); 1875 1876 /* We assume that the loop usually iterates a lot. */ 1877 prob = 4 * REG_BR_PROB_BASE / 5; 1878 loop_version (loop, many_iterations_cond, NULL, 1879 prob, prob, REG_BR_PROB_BASE - prob, true); 1880 update_ssa (TODO_update_ssa); 1881 free_original_copy_tables (); 1882 1883 /* Base all the induction variables in LOOP on a single control one. */ 1884 canonicalize_loop_ivs (loop, &nit, true); 1885 1886 /* Ensure that the exit condition is the first statement in the loop. */ 1887 transform_to_exit_first_loop (loop, reduction_list, nit); 1888 1889 /* Generate initializations for reductions. */ 1890 if (reduction_list->elements () > 0) 1891 reduction_list->traverse <struct loop *, initialize_reductions> (loop); 1892 1893 /* Eliminate the references to local variables from the loop. */ 1894 gcc_assert (single_exit (loop)); 1895 entry = loop_preheader_edge (loop); 1896 exit = single_dom_exit (loop); 1897 1898 eliminate_local_variables (entry, exit); 1899 /* In the old loop, move all variables non-local to the loop to a structure 1900 and back, and create separate decls for the variables used in loop. */ 1901 separate_decls_in_region (entry, exit, reduction_list, &arg_struct, 1902 &new_arg_struct, &clsn_data); 1903 1904 /* Create the parallel constructs. */ 1905 loc = UNKNOWN_LOCATION; 1906 cond_stmt = last_stmt (loop->header); 1907 if (cond_stmt) 1908 loc = gimple_location (cond_stmt); 1909 create_parallel_loop (loop, create_loop_fn (loc), arg_struct, 1910 new_arg_struct, n_threads, loc); 1911 if (reduction_list->elements () > 0) 1912 create_call_for_reduction (loop, reduction_list, &clsn_data); 1913 1914 scev_reset (); 1915 1916 /* Cancel the loop (it is simpler to do it here rather than to teach the 1917 expander to do it). */ 1918 cancel_loop_tree (loop); 1919 1920 /* Free loop bound estimations that could contain references to 1921 removed statements. */ 1922 FOR_EACH_LOOP (loop, 0) 1923 free_numbers_of_iterations_estimates_loop (loop); 1924} 1925 1926/* Returns true when LOOP contains vector phi nodes. */ 1927 1928static bool 1929loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED) 1930{ 1931 unsigned i; 1932 basic_block *bbs = get_loop_body_in_dom_order (loop); 1933 gphi_iterator gsi; 1934 bool res = true; 1935 1936 for (i = 0; i < loop->num_nodes; i++) 1937 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi)) 1938 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE) 1939 goto end; 1940 1941 res = false; 1942 end: 1943 free (bbs); 1944 return res; 1945} 1946 1947/* Create a reduction_info struct, initialize it with REDUC_STMT 1948 and PHI, insert it to the REDUCTION_LIST. */ 1949 1950static void 1951build_new_reduction (reduction_info_table_type *reduction_list, 1952 gimple reduc_stmt, gphi *phi) 1953{ 1954 reduction_info **slot; 1955 struct reduction_info *new_reduction; 1956 1957 gcc_assert (reduc_stmt); 1958 1959 if (dump_file && (dump_flags & TDF_DETAILS)) 1960 { 1961 fprintf (dump_file, 1962 "Detected reduction. reduction stmt is: \n"); 1963 print_gimple_stmt (dump_file, reduc_stmt, 0, 0); 1964 fprintf (dump_file, "\n"); 1965 } 1966 1967 new_reduction = XCNEW (struct reduction_info); 1968 1969 new_reduction->reduc_stmt = reduc_stmt; 1970 new_reduction->reduc_phi = phi; 1971 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi)); 1972 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt); 1973 slot = reduction_list->find_slot (new_reduction, INSERT); 1974 *slot = new_reduction; 1975} 1976 1977/* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */ 1978 1979int 1980set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED) 1981{ 1982 struct reduction_info *const red = *slot; 1983 gimple_set_uid (red->reduc_phi, red->reduc_version); 1984 return 1; 1985} 1986 1987/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */ 1988 1989static void 1990gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list) 1991{ 1992 gphi_iterator gsi; 1993 loop_vec_info simple_loop_info; 1994 1995 simple_loop_info = vect_analyze_loop_form (loop); 1996 1997 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) 1998 { 1999 gphi *phi = gsi.phi (); 2000 affine_iv iv; 2001 tree res = PHI_RESULT (phi); 2002 bool double_reduc; 2003 2004 if (virtual_operand_p (res)) 2005 continue; 2006 2007 if (!simple_iv (loop, loop, res, &iv, true) 2008 && simple_loop_info) 2009 { 2010 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info, 2011 phi, true, 2012 &double_reduc); 2013 if (reduc_stmt && !double_reduc) 2014 build_new_reduction (reduction_list, reduc_stmt, phi); 2015 } 2016 } 2017 destroy_loop_vec_info (simple_loop_info, true); 2018 2019 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form 2020 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts 2021 only now. */ 2022 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL); 2023} 2024 2025/* Try to initialize NITER for code generation part. */ 2026 2027static bool 2028try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter) 2029{ 2030 edge exit = single_dom_exit (loop); 2031 2032 gcc_assert (exit); 2033 2034 /* We need to know # of iterations, and there should be no uses of values 2035 defined inside loop outside of it, unless the values are invariants of 2036 the loop. */ 2037 if (!number_of_iterations_exit (loop, exit, niter, false)) 2038 { 2039 if (dump_file && (dump_flags & TDF_DETAILS)) 2040 fprintf (dump_file, " FAILED: number of iterations not known\n"); 2041 return false; 2042 } 2043 2044 return true; 2045} 2046 2047/* Try to initialize REDUCTION_LIST for code generation part. 2048 REDUCTION_LIST describes the reductions. */ 2049 2050static bool 2051try_create_reduction_list (loop_p loop, 2052 reduction_info_table_type *reduction_list) 2053{ 2054 edge exit = single_dom_exit (loop); 2055 gphi_iterator gsi; 2056 2057 gcc_assert (exit); 2058 2059 gather_scalar_reductions (loop, reduction_list); 2060 2061 2062 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi)) 2063 { 2064 gphi *phi = gsi.phi (); 2065 struct reduction_info *red; 2066 imm_use_iterator imm_iter; 2067 use_operand_p use_p; 2068 gimple reduc_phi; 2069 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit); 2070 2071 if (!virtual_operand_p (val)) 2072 { 2073 if (dump_file && (dump_flags & TDF_DETAILS)) 2074 { 2075 fprintf (dump_file, "phi is "); 2076 print_gimple_stmt (dump_file, phi, 0, 0); 2077 fprintf (dump_file, "arg of phi to exit: value "); 2078 print_generic_expr (dump_file, val, 0); 2079 fprintf (dump_file, " used outside loop\n"); 2080 fprintf (dump_file, 2081 " checking if it a part of reduction pattern: \n"); 2082 } 2083 if (reduction_list->elements () == 0) 2084 { 2085 if (dump_file && (dump_flags & TDF_DETAILS)) 2086 fprintf (dump_file, 2087 " FAILED: it is not a part of reduction.\n"); 2088 return false; 2089 } 2090 reduc_phi = NULL; 2091 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val) 2092 { 2093 if (!gimple_debug_bind_p (USE_STMT (use_p)) 2094 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))) 2095 { 2096 reduc_phi = USE_STMT (use_p); 2097 break; 2098 } 2099 } 2100 red = reduction_phi (reduction_list, reduc_phi); 2101 if (red == NULL) 2102 { 2103 if (dump_file && (dump_flags & TDF_DETAILS)) 2104 fprintf (dump_file, 2105 " FAILED: it is not a part of reduction.\n"); 2106 return false; 2107 } 2108 if (dump_file && (dump_flags & TDF_DETAILS)) 2109 { 2110 fprintf (dump_file, "reduction phi is "); 2111 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0); 2112 fprintf (dump_file, "reduction stmt is "); 2113 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0); 2114 } 2115 } 2116 } 2117 2118 /* The iterations of the loop may communicate only through bivs whose 2119 iteration space can be distributed efficiently. */ 2120 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) 2121 { 2122 gphi *phi = gsi.phi (); 2123 tree def = PHI_RESULT (phi); 2124 affine_iv iv; 2125 2126 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true)) 2127 { 2128 struct reduction_info *red; 2129 2130 red = reduction_phi (reduction_list, phi); 2131 if (red == NULL) 2132 { 2133 if (dump_file && (dump_flags & TDF_DETAILS)) 2134 fprintf (dump_file, 2135 " FAILED: scalar dependency between iterations\n"); 2136 return false; 2137 } 2138 } 2139 } 2140 2141 2142 return true; 2143} 2144 2145/* Detect parallel loops and generate parallel code using libgomp 2146 primitives. Returns true if some loop was parallelized, false 2147 otherwise. */ 2148 2149static bool 2150parallelize_loops (void) 2151{ 2152 unsigned n_threads = flag_tree_parallelize_loops; 2153 bool changed = false; 2154 struct loop *loop; 2155 struct tree_niter_desc niter_desc; 2156 struct obstack parloop_obstack; 2157 HOST_WIDE_INT estimated; 2158 source_location loop_loc; 2159 2160 /* Do not parallelize loops in the functions created by parallelization. */ 2161 if (parallelized_function_p (cfun->decl)) 2162 return false; 2163 if (cfun->has_nonlocal_label) 2164 return false; 2165 2166 gcc_obstack_init (&parloop_obstack); 2167 reduction_info_table_type reduction_list (10); 2168 init_stmt_vec_info_vec (); 2169 2170 FOR_EACH_LOOP (loop, 0) 2171 { 2172 reduction_list.empty (); 2173 if (dump_file && (dump_flags & TDF_DETAILS)) 2174 { 2175 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num); 2176 if (loop->inner) 2177 fprintf (dump_file, "loop %d is not innermost\n",loop->num); 2178 else 2179 fprintf (dump_file, "loop %d is innermost\n",loop->num); 2180 } 2181 2182 /* If we use autopar in graphite pass, we use its marked dependency 2183 checking results. */ 2184 if (flag_loop_parallelize_all && !loop->can_be_parallel) 2185 { 2186 if (dump_file && (dump_flags & TDF_DETAILS)) 2187 fprintf (dump_file, "loop is not parallel according to graphite\n"); 2188 continue; 2189 } 2190 2191 if (!single_dom_exit (loop)) 2192 { 2193 2194 if (dump_file && (dump_flags & TDF_DETAILS)) 2195 fprintf (dump_file, "loop is !single_dom_exit\n"); 2196 2197 continue; 2198 } 2199 2200 if (/* And of course, the loop must be parallelizable. */ 2201 !can_duplicate_loop_p (loop) 2202 || loop_has_blocks_with_irreducible_flag (loop) 2203 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP) 2204 /* FIXME: the check for vector phi nodes could be removed. */ 2205 || loop_has_vector_phi_nodes (loop)) 2206 continue; 2207 2208 estimated = estimated_stmt_executions_int (loop); 2209 if (estimated == -1) 2210 estimated = max_stmt_executions_int (loop); 2211 /* FIXME: Bypass this check as graphite doesn't update the 2212 count and frequency correctly now. */ 2213 if (!flag_loop_parallelize_all 2214 && ((estimated != -1 2215 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD) 2216 /* Do not bother with loops in cold areas. */ 2217 || optimize_loop_nest_for_size_p (loop))) 2218 continue; 2219 2220 if (!try_get_loop_niter (loop, &niter_desc)) 2221 continue; 2222 2223 if (!try_create_reduction_list (loop, &reduction_list)) 2224 continue; 2225 2226 if (!flag_loop_parallelize_all 2227 && !loop_parallel_p (loop, &parloop_obstack)) 2228 continue; 2229 2230 changed = true; 2231 if (dump_file && (dump_flags & TDF_DETAILS)) 2232 { 2233 if (loop->inner) 2234 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index); 2235 else 2236 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index); 2237 loop_loc = find_loop_location (loop); 2238 if (loop_loc != UNKNOWN_LOCATION) 2239 fprintf (dump_file, "\nloop at %s:%d: ", 2240 LOCATION_FILE (loop_loc), LOCATION_LINE (loop_loc)); 2241 } 2242 gen_parallel_loop (loop, &reduction_list, 2243 n_threads, &niter_desc); 2244 } 2245 2246 free_stmt_vec_info_vec (); 2247 obstack_free (&parloop_obstack, NULL); 2248 2249 /* Parallelization will cause new function calls to be inserted through 2250 which local variables will escape. Reset the points-to solution 2251 for ESCAPED. */ 2252 if (changed) 2253 pt_solution_reset (&cfun->gimple_df->escaped); 2254 2255 return changed; 2256} 2257 2258/* Parallelization. */ 2259 2260namespace { 2261 2262const pass_data pass_data_parallelize_loops = 2263{ 2264 GIMPLE_PASS, /* type */ 2265 "parloops", /* name */ 2266 OPTGROUP_LOOP, /* optinfo_flags */ 2267 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */ 2268 ( PROP_cfg | PROP_ssa ), /* properties_required */ 2269 0, /* properties_provided */ 2270 0, /* properties_destroyed */ 2271 0, /* todo_flags_start */ 2272 0, /* todo_flags_finish */ 2273}; 2274 2275class pass_parallelize_loops : public gimple_opt_pass 2276{ 2277public: 2278 pass_parallelize_loops (gcc::context *ctxt) 2279 : gimple_opt_pass (pass_data_parallelize_loops, ctxt) 2280 {} 2281 2282 /* opt_pass methods: */ 2283 virtual bool gate (function *) { return flag_tree_parallelize_loops > 1; } 2284 virtual unsigned int execute (function *); 2285 2286}; // class pass_parallelize_loops 2287 2288unsigned 2289pass_parallelize_loops::execute (function *fun) 2290{ 2291 if (number_of_loops (fun) <= 1) 2292 return 0; 2293 2294 if (parallelize_loops ()) 2295 { 2296 fun->curr_properties &= ~(PROP_gimple_eomp); 2297 return TODO_update_ssa; 2298 } 2299 2300 return 0; 2301} 2302 2303} // anon namespace 2304 2305gimple_opt_pass * 2306make_pass_parallelize_loops (gcc::context *ctxt) 2307{ 2308 return new pass_parallelize_loops (ctxt); 2309} 2310