1/* Loop invariant motion.
2   Copyright (C) 2003-2015 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "tm.h"
24#include "hash-set.h"
25#include "machmode.h"
26#include "vec.h"
27#include "double-int.h"
28#include "input.h"
29#include "alias.h"
30#include "symtab.h"
31#include "wide-int.h"
32#include "inchash.h"
33#include "tree.h"
34#include "fold-const.h"
35#include "tm_p.h"
36#include "predict.h"
37#include "hard-reg-set.h"
38#include "input.h"
39#include "function.h"
40#include "dominance.h"
41#include "cfg.h"
42#include "cfganal.h"
43#include "basic-block.h"
44#include "gimple-pretty-print.h"
45#include "hash-map.h"
46#include "hash-table.h"
47#include "tree-ssa-alias.h"
48#include "internal-fn.h"
49#include "tree-eh.h"
50#include "gimple-expr.h"
51#include "is-a.h"
52#include "gimple.h"
53#include "gimplify.h"
54#include "gimple-iterator.h"
55#include "gimple-ssa.h"
56#include "tree-cfg.h"
57#include "tree-phinodes.h"
58#include "ssa-iterators.h"
59#include "stringpool.h"
60#include "tree-ssanames.h"
61#include "tree-ssa-loop-manip.h"
62#include "tree-ssa-loop.h"
63#include "tree-into-ssa.h"
64#include "cfgloop.h"
65#include "domwalk.h"
66#include "params.h"
67#include "tree-pass.h"
68#include "flags.h"
69#include "tree-affine.h"
70#include "tree-ssa-propagate.h"
71#include "trans-mem.h"
72#include "gimple-fold.h"
73
74/* TODO:  Support for predicated code motion.  I.e.
75
76   while (1)
77     {
78       if (cond)
79	 {
80	   a = inv;
81	   something;
82	 }
83     }
84
85   Where COND and INV are invariants, but evaluating INV may trap or be
86   invalid from some other reason if !COND.  This may be transformed to
87
88   if (cond)
89     a = inv;
90   while (1)
91     {
92       if (cond)
93	 something;
94     }  */
95
96/* The auxiliary data kept for each statement.  */
97
98struct lim_aux_data
99{
100  struct loop *max_loop;	/* The outermost loop in that the statement
101				   is invariant.  */
102
103  struct loop *tgt_loop;	/* The loop out of that we want to move the
104				   invariant.  */
105
106  struct loop *always_executed_in;
107				/* The outermost loop for that we are sure
108				   the statement is executed if the loop
109				   is entered.  */
110
111  unsigned cost;		/* Cost of the computation performed by the
112				   statement.  */
113
114  vec<gimple> depends;		/* Vector of statements that must be also
115				   hoisted out of the loop when this statement
116				   is hoisted; i.e. those that define the
117				   operands of the statement and are inside of
118				   the MAX_LOOP loop.  */
119};
120
121/* Maps statements to their lim_aux_data.  */
122
123static hash_map<gimple, lim_aux_data *> *lim_aux_data_map;
124
125/* Description of a memory reference location.  */
126
127typedef struct mem_ref_loc
128{
129  tree *ref;			/* The reference itself.  */
130  gimple stmt;			/* The statement in that it occurs.  */
131} *mem_ref_loc_p;
132
133
134/* Description of a memory reference.  */
135
136typedef struct im_mem_ref
137{
138  unsigned id;			/* ID assigned to the memory reference
139				   (its index in memory_accesses.refs_list)  */
140  hashval_t hash;		/* Its hash value.  */
141
142  /* The memory access itself and associated caching of alias-oracle
143     query meta-data.  */
144  ao_ref mem;
145
146  bitmap stored;		/* The set of loops in that this memory location
147				   is stored to.  */
148  vec<mem_ref_loc>		accesses_in_loop;
149				/* The locations of the accesses.  Vector
150				   indexed by the loop number.  */
151
152  /* The following sets are computed on demand.  We keep both set and
153     its complement, so that we know whether the information was
154     already computed or not.  */
155  bitmap_head indep_loop;	/* The set of loops in that the memory
156				   reference is independent, meaning:
157				   If it is stored in the loop, this store
158				     is independent on all other loads and
159				     stores.
160				   If it is only loaded, then it is independent
161				     on all stores in the loop.  */
162  bitmap_head dep_loop;		/* The complement of INDEP_LOOP.  */
163} *mem_ref_p;
164
165/* We use two bits per loop in the ref->{in,}dep_loop bitmaps, the first
166   to record (in)dependence against stores in the loop and its subloops, the
167   second to record (in)dependence against all references in the loop
168   and its subloops.  */
169#define LOOP_DEP_BIT(loopnum, storedp) (2 * (loopnum) + (storedp ? 1 : 0))
170
171/* Mem_ref hashtable helpers.  */
172
173struct mem_ref_hasher : typed_noop_remove <im_mem_ref>
174{
175  typedef im_mem_ref value_type;
176  typedef tree_node compare_type;
177  static inline hashval_t hash (const value_type *);
178  static inline bool equal (const value_type *, const compare_type *);
179};
180
181/* A hash function for struct im_mem_ref object OBJ.  */
182
183inline hashval_t
184mem_ref_hasher::hash (const value_type *mem)
185{
186  return mem->hash;
187}
188
189/* An equality function for struct im_mem_ref object MEM1 with
190   memory reference OBJ2.  */
191
192inline bool
193mem_ref_hasher::equal (const value_type *mem1, const compare_type *obj2)
194{
195  return operand_equal_p (mem1->mem.ref, (const_tree) obj2, 0);
196}
197
198
199/* Description of memory accesses in loops.  */
200
201static struct
202{
203  /* The hash table of memory references accessed in loops.  */
204  hash_table<mem_ref_hasher> *refs;
205
206  /* The list of memory references.  */
207  vec<mem_ref_p> refs_list;
208
209  /* The set of memory references accessed in each loop.  */
210  vec<bitmap_head> refs_in_loop;
211
212  /* The set of memory references stored in each loop.  */
213  vec<bitmap_head> refs_stored_in_loop;
214
215  /* The set of memory references stored in each loop, including subloops .  */
216  vec<bitmap_head> all_refs_stored_in_loop;
217
218  /* Cache for expanding memory addresses.  */
219  hash_map<tree, name_expansion *> *ttae_cache;
220} memory_accesses;
221
222/* Obstack for the bitmaps in the above data structures.  */
223static bitmap_obstack lim_bitmap_obstack;
224static obstack mem_ref_obstack;
225
226static bool ref_indep_loop_p (struct loop *, mem_ref_p);
227
228/* Minimum cost of an expensive expression.  */
229#define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
230
231/* The outermost loop for which execution of the header guarantees that the
232   block will be executed.  */
233#define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
234#define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
235
236/* ID of the shared unanalyzable mem.  */
237#define UNANALYZABLE_MEM_ID 0
238
239/* Whether the reference was analyzable.  */
240#define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
241
242static struct lim_aux_data *
243init_lim_data (gimple stmt)
244{
245  lim_aux_data *p = XCNEW (struct lim_aux_data);
246  lim_aux_data_map->put (stmt, p);
247
248  return p;
249}
250
251static struct lim_aux_data *
252get_lim_data (gimple stmt)
253{
254  lim_aux_data **p = lim_aux_data_map->get (stmt);
255  if (!p)
256    return NULL;
257
258  return *p;
259}
260
261/* Releases the memory occupied by DATA.  */
262
263static void
264free_lim_aux_data (struct lim_aux_data *data)
265{
266  data->depends.release ();
267  free (data);
268}
269
270static void
271clear_lim_data (gimple stmt)
272{
273  lim_aux_data **p = lim_aux_data_map->get (stmt);
274  if (!p)
275    return;
276
277  free_lim_aux_data (*p);
278  *p = NULL;
279}
280
281
282/* The possibilities of statement movement.  */
283enum move_pos
284  {
285    MOVE_IMPOSSIBLE,		/* No movement -- side effect expression.  */
286    MOVE_PRESERVE_EXECUTION,	/* Must not cause the non-executed statement
287				   become executed -- memory accesses, ... */
288    MOVE_POSSIBLE		/* Unlimited movement.  */
289  };
290
291
292/* If it is possible to hoist the statement STMT unconditionally,
293   returns MOVE_POSSIBLE.
294   If it is possible to hoist the statement STMT, but we must avoid making
295   it executed if it would not be executed in the original program (e.g.
296   because it may trap), return MOVE_PRESERVE_EXECUTION.
297   Otherwise return MOVE_IMPOSSIBLE.  */
298
299enum move_pos
300movement_possibility (gimple stmt)
301{
302  tree lhs;
303  enum move_pos ret = MOVE_POSSIBLE;
304
305  if (flag_unswitch_loops
306      && gimple_code (stmt) == GIMPLE_COND)
307    {
308      /* If we perform unswitching, force the operands of the invariant
309	 condition to be moved out of the loop.  */
310      return MOVE_POSSIBLE;
311    }
312
313  if (gimple_code (stmt) == GIMPLE_PHI
314      && gimple_phi_num_args (stmt) <= 2
315      && !virtual_operand_p (gimple_phi_result (stmt))
316      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
317    return MOVE_POSSIBLE;
318
319  if (gimple_get_lhs (stmt) == NULL_TREE)
320    return MOVE_IMPOSSIBLE;
321
322  if (gimple_vdef (stmt))
323    return MOVE_IMPOSSIBLE;
324
325  if (stmt_ends_bb_p (stmt)
326      || gimple_has_volatile_ops (stmt)
327      || gimple_has_side_effects (stmt)
328      || stmt_could_throw_p (stmt))
329    return MOVE_IMPOSSIBLE;
330
331  if (is_gimple_call (stmt))
332    {
333      /* While pure or const call is guaranteed to have no side effects, we
334	 cannot move it arbitrarily.  Consider code like
335
336	 char *s = something ();
337
338	 while (1)
339	   {
340	     if (s)
341	       t = strlen (s);
342	     else
343	       t = 0;
344	   }
345
346	 Here the strlen call cannot be moved out of the loop, even though
347	 s is invariant.  In addition to possibly creating a call with
348	 invalid arguments, moving out a function call that is not executed
349	 may cause performance regressions in case the call is costly and
350	 not executed at all.  */
351      ret = MOVE_PRESERVE_EXECUTION;
352      lhs = gimple_call_lhs (stmt);
353    }
354  else if (is_gimple_assign (stmt))
355    lhs = gimple_assign_lhs (stmt);
356  else
357    return MOVE_IMPOSSIBLE;
358
359  if (TREE_CODE (lhs) == SSA_NAME
360      && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
361    return MOVE_IMPOSSIBLE;
362
363  if (TREE_CODE (lhs) != SSA_NAME
364      || gimple_could_trap_p (stmt))
365    return MOVE_PRESERVE_EXECUTION;
366
367  /* Non local loads in a transaction cannot be hoisted out.  Well,
368     unless the load happens on every path out of the loop, but we
369     don't take this into account yet.  */
370  if (flag_tm
371      && gimple_in_transaction (stmt)
372      && gimple_assign_single_p (stmt))
373    {
374      tree rhs = gimple_assign_rhs1 (stmt);
375      if (DECL_P (rhs) && is_global_var (rhs))
376	{
377	  if (dump_file)
378	    {
379	      fprintf (dump_file, "Cannot hoist conditional load of ");
380	      print_generic_expr (dump_file, rhs, TDF_SLIM);
381	      fprintf (dump_file, " because it is in a transaction.\n");
382	    }
383	  return MOVE_IMPOSSIBLE;
384	}
385    }
386
387  return ret;
388}
389
390/* Suppose that operand DEF is used inside the LOOP.  Returns the outermost
391   loop to that we could move the expression using DEF if it did not have
392   other operands, i.e. the outermost loop enclosing LOOP in that the value
393   of DEF is invariant.  */
394
395static struct loop *
396outermost_invariant_loop (tree def, struct loop *loop)
397{
398  gimple def_stmt;
399  basic_block def_bb;
400  struct loop *max_loop;
401  struct lim_aux_data *lim_data;
402
403  if (!def)
404    return superloop_at_depth (loop, 1);
405
406  if (TREE_CODE (def) != SSA_NAME)
407    {
408      gcc_assert (is_gimple_min_invariant (def));
409      return superloop_at_depth (loop, 1);
410    }
411
412  def_stmt = SSA_NAME_DEF_STMT (def);
413  def_bb = gimple_bb (def_stmt);
414  if (!def_bb)
415    return superloop_at_depth (loop, 1);
416
417  max_loop = find_common_loop (loop, def_bb->loop_father);
418
419  lim_data = get_lim_data (def_stmt);
420  if (lim_data != NULL && lim_data->max_loop != NULL)
421    max_loop = find_common_loop (max_loop,
422				 loop_outer (lim_data->max_loop));
423  if (max_loop == loop)
424    return NULL;
425  max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
426
427  return max_loop;
428}
429
430/* DATA is a structure containing information associated with a statement
431   inside LOOP.  DEF is one of the operands of this statement.
432
433   Find the outermost loop enclosing LOOP in that value of DEF is invariant
434   and record this in DATA->max_loop field.  If DEF itself is defined inside
435   this loop as well (i.e. we need to hoist it out of the loop if we want
436   to hoist the statement represented by DATA), record the statement in that
437   DEF is defined to the DATA->depends list.  Additionally if ADD_COST is true,
438   add the cost of the computation of DEF to the DATA->cost.
439
440   If DEF is not invariant in LOOP, return false.  Otherwise return TRUE.  */
441
442static bool
443add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
444		bool add_cost)
445{
446  gimple def_stmt = SSA_NAME_DEF_STMT (def);
447  basic_block def_bb = gimple_bb (def_stmt);
448  struct loop *max_loop;
449  struct lim_aux_data *def_data;
450
451  if (!def_bb)
452    return true;
453
454  max_loop = outermost_invariant_loop (def, loop);
455  if (!max_loop)
456    return false;
457
458  if (flow_loop_nested_p (data->max_loop, max_loop))
459    data->max_loop = max_loop;
460
461  def_data = get_lim_data (def_stmt);
462  if (!def_data)
463    return true;
464
465  if (add_cost
466      /* Only add the cost if the statement defining DEF is inside LOOP,
467	 i.e. if it is likely that by moving the invariants dependent
468	 on it, we will be able to avoid creating a new register for
469	 it (since it will be only used in these dependent invariants).  */
470      && def_bb->loop_father == loop)
471    data->cost += def_data->cost;
472
473  data->depends.safe_push (def_stmt);
474
475  return true;
476}
477
478/* Returns an estimate for a cost of statement STMT.  The values here
479   are just ad-hoc constants, similar to costs for inlining.  */
480
481static unsigned
482stmt_cost (gimple stmt)
483{
484  /* Always try to create possibilities for unswitching.  */
485  if (gimple_code (stmt) == GIMPLE_COND
486      || gimple_code (stmt) == GIMPLE_PHI)
487    return LIM_EXPENSIVE;
488
489  /* We should be hoisting calls if possible.  */
490  if (is_gimple_call (stmt))
491    {
492      tree fndecl;
493
494      /* Unless the call is a builtin_constant_p; this always folds to a
495	 constant, so moving it is useless.  */
496      fndecl = gimple_call_fndecl (stmt);
497      if (fndecl
498	  && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
499	  && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)
500	return 0;
501
502      return LIM_EXPENSIVE;
503    }
504
505  /* Hoisting memory references out should almost surely be a win.  */
506  if (gimple_references_memory_p (stmt))
507    return LIM_EXPENSIVE;
508
509  if (gimple_code (stmt) != GIMPLE_ASSIGN)
510    return 1;
511
512  switch (gimple_assign_rhs_code (stmt))
513    {
514    case MULT_EXPR:
515    case WIDEN_MULT_EXPR:
516    case WIDEN_MULT_PLUS_EXPR:
517    case WIDEN_MULT_MINUS_EXPR:
518    case DOT_PROD_EXPR:
519    case FMA_EXPR:
520    case TRUNC_DIV_EXPR:
521    case CEIL_DIV_EXPR:
522    case FLOOR_DIV_EXPR:
523    case ROUND_DIV_EXPR:
524    case EXACT_DIV_EXPR:
525    case CEIL_MOD_EXPR:
526    case FLOOR_MOD_EXPR:
527    case ROUND_MOD_EXPR:
528    case TRUNC_MOD_EXPR:
529    case RDIV_EXPR:
530      /* Division and multiplication are usually expensive.  */
531      return LIM_EXPENSIVE;
532
533    case LSHIFT_EXPR:
534    case RSHIFT_EXPR:
535    case WIDEN_LSHIFT_EXPR:
536    case LROTATE_EXPR:
537    case RROTATE_EXPR:
538      /* Shifts and rotates are usually expensive.  */
539      return LIM_EXPENSIVE;
540
541    case CONSTRUCTOR:
542      /* Make vector construction cost proportional to the number
543         of elements.  */
544      return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
545
546    case SSA_NAME:
547    case PAREN_EXPR:
548      /* Whether or not something is wrapped inside a PAREN_EXPR
549         should not change move cost.  Nor should an intermediate
550	 unpropagated SSA name copy.  */
551      return 0;
552
553    default:
554      return 1;
555    }
556}
557
558/* Finds the outermost loop between OUTER and LOOP in that the memory reference
559   REF is independent.  If REF is not independent in LOOP, NULL is returned
560   instead.  */
561
562static struct loop *
563outermost_indep_loop (struct loop *outer, struct loop *loop, mem_ref_p ref)
564{
565  struct loop *aloop;
566
567  if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
568    return NULL;
569
570  for (aloop = outer;
571       aloop != loop;
572       aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
573    if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
574	&& ref_indep_loop_p (aloop, ref))
575      return aloop;
576
577  if (ref_indep_loop_p (loop, ref))
578    return loop;
579  else
580    return NULL;
581}
582
583/* If there is a simple load or store to a memory reference in STMT, returns
584   the location of the memory reference, and sets IS_STORE according to whether
585   it is a store or load.  Otherwise, returns NULL.  */
586
587static tree *
588simple_mem_ref_in_stmt (gimple stmt, bool *is_store)
589{
590  tree *lhs, *rhs;
591
592  /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns.  */
593  if (!gimple_assign_single_p (stmt))
594    return NULL;
595
596  lhs = gimple_assign_lhs_ptr (stmt);
597  rhs = gimple_assign_rhs1_ptr (stmt);
598
599  if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
600    {
601      *is_store = false;
602      return rhs;
603    }
604  else if (gimple_vdef (stmt)
605	   && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
606    {
607      *is_store = true;
608      return lhs;
609    }
610  else
611    return NULL;
612}
613
614/* Returns the memory reference contained in STMT.  */
615
616static mem_ref_p
617mem_ref_in_stmt (gimple stmt)
618{
619  bool store;
620  tree *mem = simple_mem_ref_in_stmt (stmt, &store);
621  hashval_t hash;
622  mem_ref_p ref;
623
624  if (!mem)
625    return NULL;
626  gcc_assert (!store);
627
628  hash = iterative_hash_expr (*mem, 0);
629  ref = memory_accesses.refs->find_with_hash (*mem, hash);
630
631  gcc_assert (ref != NULL);
632  return ref;
633}
634
635/* From a controlling predicate in DOM determine the arguments from
636   the PHI node PHI that are chosen if the predicate evaluates to
637   true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
638   they are non-NULL.  Returns true if the arguments can be determined,
639   else return false.  */
640
641static bool
642extract_true_false_args_from_phi (basic_block dom, gphi *phi,
643				  tree *true_arg_p, tree *false_arg_p)
644{
645  basic_block bb = gimple_bb (phi);
646  edge true_edge, false_edge, tem;
647  tree arg0 = NULL_TREE, arg1 = NULL_TREE;
648
649  /* We have to verify that one edge into the PHI node is dominated
650     by the true edge of the predicate block and the other edge
651     dominated by the false edge.  This ensures that the PHI argument
652     we are going to take is completely determined by the path we
653     take from the predicate block.
654     We can only use BB dominance checks below if the destination of
655     the true/false edges are dominated by their edge, thus only
656     have a single predecessor.  */
657  extract_true_false_edges_from_block (dom, &true_edge, &false_edge);
658  tem = EDGE_PRED (bb, 0);
659  if (tem == true_edge
660      || (single_pred_p (true_edge->dest)
661	  && (tem->src == true_edge->dest
662	      || dominated_by_p (CDI_DOMINATORS,
663				 tem->src, true_edge->dest))))
664    arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
665  else if (tem == false_edge
666	   || (single_pred_p (false_edge->dest)
667	       && (tem->src == false_edge->dest
668		   || dominated_by_p (CDI_DOMINATORS,
669				      tem->src, false_edge->dest))))
670    arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
671  else
672    return false;
673  tem = EDGE_PRED (bb, 1);
674  if (tem == true_edge
675      || (single_pred_p (true_edge->dest)
676	  && (tem->src == true_edge->dest
677	      || dominated_by_p (CDI_DOMINATORS,
678				 tem->src, true_edge->dest))))
679    arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
680  else if (tem == false_edge
681	   || (single_pred_p (false_edge->dest)
682	       && (tem->src == false_edge->dest
683		   || dominated_by_p (CDI_DOMINATORS,
684				      tem->src, false_edge->dest))))
685    arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
686  else
687    return false;
688  if (!arg0 || !arg1)
689    return false;
690
691  if (true_arg_p)
692    *true_arg_p = arg0;
693  if (false_arg_p)
694    *false_arg_p = arg1;
695
696  return true;
697}
698
699/* Determine the outermost loop to that it is possible to hoist a statement
700   STMT and store it to LIM_DATA (STMT)->max_loop.  To do this we determine
701   the outermost loop in that the value computed by STMT is invariant.
702   If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
703   we preserve the fact whether STMT is executed.  It also fills other related
704   information to LIM_DATA (STMT).
705
706   The function returns false if STMT cannot be hoisted outside of the loop it
707   is defined in, and true otherwise.  */
708
709static bool
710determine_max_movement (gimple stmt, bool must_preserve_exec)
711{
712  basic_block bb = gimple_bb (stmt);
713  struct loop *loop = bb->loop_father;
714  struct loop *level;
715  struct lim_aux_data *lim_data = get_lim_data (stmt);
716  tree val;
717  ssa_op_iter iter;
718
719  if (must_preserve_exec)
720    level = ALWAYS_EXECUTED_IN (bb);
721  else
722    level = superloop_at_depth (loop, 1);
723  lim_data->max_loop = level;
724
725  if (gphi *phi = dyn_cast <gphi *> (stmt))
726    {
727      use_operand_p use_p;
728      unsigned min_cost = UINT_MAX;
729      unsigned total_cost = 0;
730      struct lim_aux_data *def_data;
731
732      /* We will end up promoting dependencies to be unconditionally
733	 evaluated.  For this reason the PHI cost (and thus the
734	 cost we remove from the loop by doing the invariant motion)
735	 is that of the cheapest PHI argument dependency chain.  */
736      FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
737	{
738	  val = USE_FROM_PTR (use_p);
739
740	  if (TREE_CODE (val) != SSA_NAME)
741	    {
742	      /* Assign const 1 to constants.  */
743	      min_cost = MIN (min_cost, 1);
744	      total_cost += 1;
745	      continue;
746	    }
747	  if (!add_dependency (val, lim_data, loop, false))
748	    return false;
749
750	  gimple def_stmt = SSA_NAME_DEF_STMT (val);
751	  if (gimple_bb (def_stmt)
752	      && gimple_bb (def_stmt)->loop_father == loop)
753	    {
754	      def_data = get_lim_data (def_stmt);
755	      if (def_data)
756		{
757		  min_cost = MIN (min_cost, def_data->cost);
758		  total_cost += def_data->cost;
759		}
760	    }
761	}
762
763      min_cost = MIN (min_cost, total_cost);
764      lim_data->cost += min_cost;
765
766      if (gimple_phi_num_args (phi) > 1)
767	{
768	  basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
769	  gimple cond;
770	  if (gsi_end_p (gsi_last_bb (dom)))
771	    return false;
772	  cond = gsi_stmt (gsi_last_bb (dom));
773	  if (gimple_code (cond) != GIMPLE_COND)
774	    return false;
775	  /* Verify that this is an extended form of a diamond and
776	     the PHI arguments are completely controlled by the
777	     predicate in DOM.  */
778	  if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
779	    return false;
780
781	  /* Fold in dependencies and cost of the condition.  */
782	  FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
783	    {
784	      if (!add_dependency (val, lim_data, loop, false))
785		return false;
786	      def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
787	      if (def_data)
788		total_cost += def_data->cost;
789	    }
790
791	  /* We want to avoid unconditionally executing very expensive
792	     operations.  As costs for our dependencies cannot be
793	     negative just claim we are not invariand for this case.
794	     We also are not sure whether the control-flow inside the
795	     loop will vanish.  */
796	  if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
797	      && !(min_cost != 0
798		   && total_cost / min_cost <= 2))
799	    return false;
800
801	  /* Assume that the control-flow in the loop will vanish.
802	     ???  We should verify this and not artificially increase
803	     the cost if that is not the case.  */
804	  lim_data->cost += stmt_cost (stmt);
805	}
806
807      return true;
808    }
809  else
810    FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
811      if (!add_dependency (val, lim_data, loop, true))
812	return false;
813
814  if (gimple_vuse (stmt))
815    {
816      mem_ref_p ref = mem_ref_in_stmt (stmt);
817
818      if (ref)
819	{
820	  lim_data->max_loop
821		  = outermost_indep_loop (lim_data->max_loop, loop, ref);
822	  if (!lim_data->max_loop)
823	    return false;
824	}
825      else
826	{
827	  if ((val = gimple_vuse (stmt)) != NULL_TREE)
828	    {
829	      if (!add_dependency (val, lim_data, loop, false))
830		return false;
831	    }
832	}
833    }
834
835  lim_data->cost += stmt_cost (stmt);
836
837  return true;
838}
839
840/* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
841   and that one of the operands of this statement is computed by STMT.
842   Ensure that STMT (together with all the statements that define its
843   operands) is hoisted at least out of the loop LEVEL.  */
844
845static void
846set_level (gimple stmt, struct loop *orig_loop, struct loop *level)
847{
848  struct loop *stmt_loop = gimple_bb (stmt)->loop_father;
849  struct lim_aux_data *lim_data;
850  gimple dep_stmt;
851  unsigned i;
852
853  stmt_loop = find_common_loop (orig_loop, stmt_loop);
854  lim_data = get_lim_data (stmt);
855  if (lim_data != NULL && lim_data->tgt_loop != NULL)
856    stmt_loop = find_common_loop (stmt_loop,
857				  loop_outer (lim_data->tgt_loop));
858  if (flow_loop_nested_p (stmt_loop, level))
859    return;
860
861  gcc_assert (level == lim_data->max_loop
862	      || flow_loop_nested_p (lim_data->max_loop, level));
863
864  lim_data->tgt_loop = level;
865  FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
866    set_level (dep_stmt, orig_loop, level);
867}
868
869/* Determines an outermost loop from that we want to hoist the statement STMT.
870   For now we chose the outermost possible loop.  TODO -- use profiling
871   information to set it more sanely.  */
872
873static void
874set_profitable_level (gimple stmt)
875{
876  set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
877}
878
879/* Returns true if STMT is a call that has side effects.  */
880
881static bool
882nonpure_call_p (gimple stmt)
883{
884  if (gimple_code (stmt) != GIMPLE_CALL)
885    return false;
886
887  return gimple_has_side_effects (stmt);
888}
889
890/* Rewrite a/b to a*(1/b).  Return the invariant stmt to process.  */
891
892static gimple
893rewrite_reciprocal (gimple_stmt_iterator *bsi)
894{
895  gassign *stmt, *stmt1, *stmt2;
896  tree name, lhs, type;
897  tree real_one;
898  gimple_stmt_iterator gsi;
899
900  stmt = as_a <gassign *> (gsi_stmt (*bsi));
901  lhs = gimple_assign_lhs (stmt);
902  type = TREE_TYPE (lhs);
903
904  real_one = build_one_cst (type);
905
906  name = make_temp_ssa_name (type, NULL, "reciptmp");
907  stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
908			       gimple_assign_rhs2 (stmt));
909  stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
910			       gimple_assign_rhs1 (stmt));
911
912  /* Replace division stmt with reciprocal and multiply stmts.
913     The multiply stmt is not invariant, so update iterator
914     and avoid rescanning.  */
915  gsi = *bsi;
916  gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
917  gsi_replace (&gsi, stmt2, true);
918
919  /* Continue processing with invariant reciprocal statement.  */
920  return stmt1;
921}
922
923/* Check if the pattern at *BSI is a bittest of the form
924   (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0.  */
925
926static gimple
927rewrite_bittest (gimple_stmt_iterator *bsi)
928{
929  gassign *stmt;
930  gimple stmt1;
931  gassign *stmt2;
932  gimple use_stmt;
933  gcond *cond_stmt;
934  tree lhs, name, t, a, b;
935  use_operand_p use;
936
937  stmt = as_a <gassign *> (gsi_stmt (*bsi));
938  lhs = gimple_assign_lhs (stmt);
939
940  /* Verify that the single use of lhs is a comparison against zero.  */
941  if (TREE_CODE (lhs) != SSA_NAME
942      || !single_imm_use (lhs, &use, &use_stmt))
943    return stmt;
944  cond_stmt = dyn_cast <gcond *> (use_stmt);
945  if (!cond_stmt)
946    return stmt;
947  if (gimple_cond_lhs (cond_stmt) != lhs
948      || (gimple_cond_code (cond_stmt) != NE_EXPR
949	  && gimple_cond_code (cond_stmt) != EQ_EXPR)
950      || !integer_zerop (gimple_cond_rhs (cond_stmt)))
951    return stmt;
952
953  /* Get at the operands of the shift.  The rhs is TMP1 & 1.  */
954  stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
955  if (gimple_code (stmt1) != GIMPLE_ASSIGN)
956    return stmt;
957
958  /* There is a conversion in between possibly inserted by fold.  */
959  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
960    {
961      t = gimple_assign_rhs1 (stmt1);
962      if (TREE_CODE (t) != SSA_NAME
963	  || !has_single_use (t))
964	return stmt;
965      stmt1 = SSA_NAME_DEF_STMT (t);
966      if (gimple_code (stmt1) != GIMPLE_ASSIGN)
967	return stmt;
968    }
969
970  /* Verify that B is loop invariant but A is not.  Verify that with
971     all the stmt walking we are still in the same loop.  */
972  if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
973      || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
974    return stmt;
975
976  a = gimple_assign_rhs1 (stmt1);
977  b = gimple_assign_rhs2 (stmt1);
978
979  if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
980      && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
981    {
982      gimple_stmt_iterator rsi;
983
984      /* 1 << B */
985      t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
986		       build_int_cst (TREE_TYPE (a), 1), b);
987      name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
988      stmt1 = gimple_build_assign (name, t);
989
990      /* A & (1 << B) */
991      t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
992      name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
993      stmt2 = gimple_build_assign (name, t);
994
995      /* Replace the SSA_NAME we compare against zero.  Adjust
996	 the type of zero accordingly.  */
997      SET_USE (use, name);
998      gimple_cond_set_rhs (cond_stmt,
999			   build_int_cst_type (TREE_TYPE (name),
1000					       0));
1001
1002      /* Don't use gsi_replace here, none of the new assignments sets
1003	 the variable originally set in stmt.  Move bsi to stmt1, and
1004	 then remove the original stmt, so that we get a chance to
1005	 retain debug info for it.  */
1006      rsi = *bsi;
1007      gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
1008      gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
1009      gsi_remove (&rsi, true);
1010
1011      return stmt1;
1012    }
1013
1014  return stmt;
1015}
1016
1017/* For each statement determines the outermost loop in that it is invariant,
1018   -   statements on whose motion it depends and the cost of the computation.
1019   -   This information is stored to the LIM_DATA structure associated with
1020   -   each statement.  */
1021class invariantness_dom_walker : public dom_walker
1022{
1023public:
1024  invariantness_dom_walker (cdi_direction direction)
1025    : dom_walker (direction) {}
1026
1027  virtual void before_dom_children (basic_block);
1028};
1029
1030/* Determine the outermost loops in that statements in basic block BB are
1031   invariant, and record them to the LIM_DATA associated with the statements.
1032   Callback for dom_walker.  */
1033
1034void
1035invariantness_dom_walker::before_dom_children (basic_block bb)
1036{
1037  enum move_pos pos;
1038  gimple_stmt_iterator bsi;
1039  gimple stmt;
1040  bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
1041  struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
1042  struct lim_aux_data *lim_data;
1043
1044  if (!loop_outer (bb->loop_father))
1045    return;
1046
1047  if (dump_file && (dump_flags & TDF_DETAILS))
1048    fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
1049	     bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1050
1051  /* Look at PHI nodes, but only if there is at most two.
1052     ???  We could relax this further by post-processing the inserted
1053     code and transforming adjacent cond-exprs with the same predicate
1054     to control flow again.  */
1055  bsi = gsi_start_phis (bb);
1056  if (!gsi_end_p (bsi)
1057      && ((gsi_next (&bsi), gsi_end_p (bsi))
1058	  || (gsi_next (&bsi), gsi_end_p (bsi))))
1059    for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1060      {
1061	stmt = gsi_stmt (bsi);
1062
1063	pos = movement_possibility (stmt);
1064	if (pos == MOVE_IMPOSSIBLE)
1065	  continue;
1066
1067	lim_data = init_lim_data (stmt);
1068	lim_data->always_executed_in = outermost;
1069
1070	if (!determine_max_movement (stmt, false))
1071	  {
1072	    lim_data->max_loop = NULL;
1073	    continue;
1074	  }
1075
1076	if (dump_file && (dump_flags & TDF_DETAILS))
1077	  {
1078	    print_gimple_stmt (dump_file, stmt, 2, 0);
1079	    fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
1080		     loop_depth (lim_data->max_loop),
1081		     lim_data->cost);
1082	  }
1083
1084	if (lim_data->cost >= LIM_EXPENSIVE)
1085	  set_profitable_level (stmt);
1086      }
1087
1088  for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1089    {
1090      stmt = gsi_stmt (bsi);
1091
1092      pos = movement_possibility (stmt);
1093      if (pos == MOVE_IMPOSSIBLE)
1094	{
1095	  if (nonpure_call_p (stmt))
1096	    {
1097	      maybe_never = true;
1098	      outermost = NULL;
1099	    }
1100	  /* Make sure to note always_executed_in for stores to make
1101	     store-motion work.  */
1102	  else if (stmt_makes_single_store (stmt))
1103	    {
1104	      struct lim_aux_data *lim_data = init_lim_data (stmt);
1105	      lim_data->always_executed_in = outermost;
1106	    }
1107	  continue;
1108	}
1109
1110      if (is_gimple_assign (stmt)
1111	  && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1112	      == GIMPLE_BINARY_RHS))
1113	{
1114	  tree op0 = gimple_assign_rhs1 (stmt);
1115	  tree op1 = gimple_assign_rhs2 (stmt);
1116	  struct loop *ol1 = outermost_invariant_loop (op1,
1117					loop_containing_stmt (stmt));
1118
1119	  /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1120	     to be hoisted out of loop, saving expensive divide.  */
1121	  if (pos == MOVE_POSSIBLE
1122	      && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1123	      && flag_unsafe_math_optimizations
1124	      && !flag_trapping_math
1125	      && ol1 != NULL
1126	      && outermost_invariant_loop (op0, ol1) == NULL)
1127	    stmt = rewrite_reciprocal (&bsi);
1128
1129	  /* If the shift count is invariant, convert (A >> B) & 1 to
1130	     A & (1 << B) allowing the bit mask to be hoisted out of the loop
1131	     saving an expensive shift.  */
1132	  if (pos == MOVE_POSSIBLE
1133	      && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1134	      && integer_onep (op1)
1135	      && TREE_CODE (op0) == SSA_NAME
1136	      && has_single_use (op0))
1137	    stmt = rewrite_bittest (&bsi);
1138	}
1139
1140      lim_data = init_lim_data (stmt);
1141      lim_data->always_executed_in = outermost;
1142
1143      if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1144	continue;
1145
1146      if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1147	{
1148	  lim_data->max_loop = NULL;
1149	  continue;
1150	}
1151
1152      if (dump_file && (dump_flags & TDF_DETAILS))
1153	{
1154	  print_gimple_stmt (dump_file, stmt, 2, 0);
1155	  fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
1156		   loop_depth (lim_data->max_loop),
1157		   lim_data->cost);
1158	}
1159
1160      if (lim_data->cost >= LIM_EXPENSIVE)
1161	set_profitable_level (stmt);
1162    }
1163}
1164
1165class move_computations_dom_walker : public dom_walker
1166{
1167public:
1168  move_computations_dom_walker (cdi_direction direction)
1169    : dom_walker (direction), todo_ (0) {}
1170
1171  virtual void before_dom_children (basic_block);
1172
1173  unsigned int todo_;
1174};
1175
1176/* Hoist the statements in basic block BB out of the loops prescribed by
1177   data stored in LIM_DATA structures associated with each statement.  Callback
1178   for walk_dominator_tree.  */
1179
1180void
1181move_computations_dom_walker::before_dom_children (basic_block bb)
1182{
1183  struct loop *level;
1184  unsigned cost = 0;
1185  struct lim_aux_data *lim_data;
1186
1187  if (!loop_outer (bb->loop_father))
1188    return;
1189
1190  for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1191    {
1192      gassign *new_stmt;
1193      gphi *stmt = bsi.phi ();
1194
1195      lim_data = get_lim_data (stmt);
1196      if (lim_data == NULL)
1197	{
1198	  gsi_next (&bsi);
1199	  continue;
1200	}
1201
1202      cost = lim_data->cost;
1203      level = lim_data->tgt_loop;
1204      clear_lim_data (stmt);
1205
1206      if (!level)
1207	{
1208	  gsi_next (&bsi);
1209	  continue;
1210	}
1211
1212      if (dump_file && (dump_flags & TDF_DETAILS))
1213	{
1214	  fprintf (dump_file, "Moving PHI node\n");
1215	  print_gimple_stmt (dump_file, stmt, 0, 0);
1216	  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1217		   cost, level->num);
1218	}
1219
1220      if (gimple_phi_num_args (stmt) == 1)
1221	{
1222	  tree arg = PHI_ARG_DEF (stmt, 0);
1223	  new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1224					  TREE_CODE (arg), arg);
1225	}
1226      else
1227	{
1228	  basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1229	  gimple cond = gsi_stmt (gsi_last_bb (dom));
1230	  tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1231	  /* Get the PHI arguments corresponding to the true and false
1232	     edges of COND.  */
1233	  extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1234	  gcc_assert (arg0 && arg1);
1235	  t = build2 (gimple_cond_code (cond), boolean_type_node,
1236		      gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1237	  new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1238					  COND_EXPR, t, arg0, arg1);
1239	  todo_ |= TODO_cleanup_cfg;
1240	}
1241      if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (new_stmt)))
1242	  && (!ALWAYS_EXECUTED_IN (bb)
1243	      || (ALWAYS_EXECUTED_IN (bb) != level
1244		  && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1245	{
1246	  tree lhs = gimple_assign_lhs (new_stmt);
1247	  SSA_NAME_RANGE_INFO (lhs) = NULL;
1248	  SSA_NAME_ANTI_RANGE_P (lhs) = 0;
1249	}
1250      gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1251      remove_phi_node (&bsi, false);
1252    }
1253
1254  for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1255    {
1256      edge e;
1257
1258      gimple stmt = gsi_stmt (bsi);
1259
1260      lim_data = get_lim_data (stmt);
1261      if (lim_data == NULL)
1262	{
1263	  gsi_next (&bsi);
1264	  continue;
1265	}
1266
1267      cost = lim_data->cost;
1268      level = lim_data->tgt_loop;
1269      clear_lim_data (stmt);
1270
1271      if (!level)
1272	{
1273	  gsi_next (&bsi);
1274	  continue;
1275	}
1276
1277      /* We do not really want to move conditionals out of the loop; we just
1278	 placed it here to force its operands to be moved if necessary.  */
1279      if (gimple_code (stmt) == GIMPLE_COND)
1280	continue;
1281
1282      if (dump_file && (dump_flags & TDF_DETAILS))
1283	{
1284	  fprintf (dump_file, "Moving statement\n");
1285	  print_gimple_stmt (dump_file, stmt, 0, 0);
1286	  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1287		   cost, level->num);
1288	}
1289
1290      e = loop_preheader_edge (level);
1291      gcc_assert (!gimple_vdef (stmt));
1292      if (gimple_vuse (stmt))
1293	{
1294	  /* The new VUSE is the one from the virtual PHI in the loop
1295	     header or the one already present.  */
1296	  gphi_iterator gsi2;
1297	  for (gsi2 = gsi_start_phis (e->dest);
1298	       !gsi_end_p (gsi2); gsi_next (&gsi2))
1299	    {
1300	      gphi *phi = gsi2.phi ();
1301	      if (virtual_operand_p (gimple_phi_result (phi)))
1302		{
1303		  gimple_set_vuse (stmt, PHI_ARG_DEF_FROM_EDGE (phi, e));
1304		  break;
1305		}
1306	    }
1307	}
1308      gsi_remove (&bsi, false);
1309      if (gimple_has_lhs (stmt)
1310	  && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1311	  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_get_lhs (stmt)))
1312	  && (!ALWAYS_EXECUTED_IN (bb)
1313	      || !(ALWAYS_EXECUTED_IN (bb) == level
1314		   || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1315	{
1316	  tree lhs = gimple_get_lhs (stmt);
1317	  SSA_NAME_RANGE_INFO (lhs) = NULL;
1318	  SSA_NAME_ANTI_RANGE_P (lhs) = 0;
1319	}
1320      /* In case this is a stmt that is not unconditionally executed
1321         when the target loop header is executed and the stmt may
1322	 invoke undefined integer or pointer overflow rewrite it to
1323	 unsigned arithmetic.  */
1324      if (is_gimple_assign (stmt)
1325	  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1326	  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1327	  && arith_code_with_undefined_signed_overflow
1328	       (gimple_assign_rhs_code (stmt))
1329	  && (!ALWAYS_EXECUTED_IN (bb)
1330	      || !(ALWAYS_EXECUTED_IN (bb) == level
1331		   || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1332	gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1333      else
1334	gsi_insert_on_edge (e, stmt);
1335    }
1336}
1337
1338/* Hoist the statements out of the loops prescribed by data stored in
1339   LIM_DATA structures associated with each statement.*/
1340
1341static unsigned int
1342move_computations (void)
1343{
1344  move_computations_dom_walker walker (CDI_DOMINATORS);
1345  walker.walk (cfun->cfg->x_entry_block_ptr);
1346
1347  gsi_commit_edge_inserts ();
1348  if (need_ssa_update_p (cfun))
1349    rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1350
1351  return walker.todo_;
1352}
1353
1354/* Checks whether the statement defining variable *INDEX can be hoisted
1355   out of the loop passed in DATA.  Callback for for_each_index.  */
1356
1357static bool
1358may_move_till (tree ref, tree *index, void *data)
1359{
1360  struct loop *loop = (struct loop *) data, *max_loop;
1361
1362  /* If REF is an array reference, check also that the step and the lower
1363     bound is invariant in LOOP.  */
1364  if (TREE_CODE (ref) == ARRAY_REF)
1365    {
1366      tree step = TREE_OPERAND (ref, 3);
1367      tree lbound = TREE_OPERAND (ref, 2);
1368
1369      max_loop = outermost_invariant_loop (step, loop);
1370      if (!max_loop)
1371	return false;
1372
1373      max_loop = outermost_invariant_loop (lbound, loop);
1374      if (!max_loop)
1375	return false;
1376    }
1377
1378  max_loop = outermost_invariant_loop (*index, loop);
1379  if (!max_loop)
1380    return false;
1381
1382  return true;
1383}
1384
1385/* If OP is SSA NAME, force the statement that defines it to be
1386   moved out of the LOOP.  ORIG_LOOP is the loop in that EXPR is used.  */
1387
1388static void
1389force_move_till_op (tree op, struct loop *orig_loop, struct loop *loop)
1390{
1391  gimple stmt;
1392
1393  if (!op
1394      || is_gimple_min_invariant (op))
1395    return;
1396
1397  gcc_assert (TREE_CODE (op) == SSA_NAME);
1398
1399  stmt = SSA_NAME_DEF_STMT (op);
1400  if (gimple_nop_p (stmt))
1401    return;
1402
1403  set_level (stmt, orig_loop, loop);
1404}
1405
1406/* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1407   the LOOP.  The reference REF is used in the loop ORIG_LOOP.  Callback for
1408   for_each_index.  */
1409
1410struct fmt_data
1411{
1412  struct loop *loop;
1413  struct loop *orig_loop;
1414};
1415
1416static bool
1417force_move_till (tree ref, tree *index, void *data)
1418{
1419  struct fmt_data *fmt_data = (struct fmt_data *) data;
1420
1421  if (TREE_CODE (ref) == ARRAY_REF)
1422    {
1423      tree step = TREE_OPERAND (ref, 3);
1424      tree lbound = TREE_OPERAND (ref, 2);
1425
1426      force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1427      force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1428    }
1429
1430  force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1431
1432  return true;
1433}
1434
1435/* A function to free the mem_ref object OBJ.  */
1436
1437static void
1438memref_free (struct im_mem_ref *mem)
1439{
1440  mem->accesses_in_loop.release ();
1441}
1442
1443/* Allocates and returns a memory reference description for MEM whose hash
1444   value is HASH and id is ID.  */
1445
1446static mem_ref_p
1447mem_ref_alloc (tree mem, unsigned hash, unsigned id)
1448{
1449  mem_ref_p ref = XOBNEW (&mem_ref_obstack, struct im_mem_ref);
1450  ao_ref_init (&ref->mem, mem);
1451  ref->id = id;
1452  ref->hash = hash;
1453  ref->stored = NULL;
1454  bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
1455  bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1456  ref->accesses_in_loop.create (1);
1457
1458  return ref;
1459}
1460
1461/* Records memory reference location *LOC in LOOP to the memory reference
1462   description REF.  The reference occurs in statement STMT.  */
1463
1464static void
1465record_mem_ref_loc (mem_ref_p ref, gimple stmt, tree *loc)
1466{
1467  mem_ref_loc aref;
1468  aref.stmt = stmt;
1469  aref.ref = loc;
1470  ref->accesses_in_loop.safe_push (aref);
1471}
1472
1473/* Set the LOOP bit in REF stored bitmap and allocate that if
1474   necessary.  Return whether a bit was changed.  */
1475
1476static bool
1477set_ref_stored_in_loop (mem_ref_p ref, struct loop *loop)
1478{
1479  if (!ref->stored)
1480    ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1481  return bitmap_set_bit (ref->stored, loop->num);
1482}
1483
1484/* Marks reference REF as stored in LOOP.  */
1485
1486static void
1487mark_ref_stored (mem_ref_p ref, struct loop *loop)
1488{
1489  while (loop != current_loops->tree_root
1490	 && set_ref_stored_in_loop (ref, loop))
1491    loop = loop_outer (loop);
1492}
1493
1494/* Gathers memory references in statement STMT in LOOP, storing the
1495   information about them in the memory_accesses structure.  Marks
1496   the vops accessed through unrecognized statements there as
1497   well.  */
1498
1499static void
1500gather_mem_refs_stmt (struct loop *loop, gimple stmt)
1501{
1502  tree *mem = NULL;
1503  hashval_t hash;
1504  im_mem_ref **slot;
1505  mem_ref_p ref;
1506  bool is_stored;
1507  unsigned id;
1508
1509  if (!gimple_vuse (stmt))
1510    return;
1511
1512  mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1513  if (!mem)
1514    {
1515      /* We use the shared mem_ref for all unanalyzable refs.  */
1516      id = UNANALYZABLE_MEM_ID;
1517      ref = memory_accesses.refs_list[id];
1518      if (dump_file && (dump_flags & TDF_DETAILS))
1519	{
1520	  fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1521	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1522	}
1523      is_stored = gimple_vdef (stmt);
1524    }
1525  else
1526    {
1527      hash = iterative_hash_expr (*mem, 0);
1528      slot = memory_accesses.refs->find_slot_with_hash (*mem, hash, INSERT);
1529      if (*slot)
1530	{
1531	  ref = (mem_ref_p) *slot;
1532	  id = ref->id;
1533	}
1534      else
1535	{
1536	  id = memory_accesses.refs_list.length ();
1537	  ref = mem_ref_alloc (*mem, hash, id);
1538	  memory_accesses.refs_list.safe_push (ref);
1539	  *slot = ref;
1540
1541	  if (dump_file && (dump_flags & TDF_DETAILS))
1542	    {
1543	      fprintf (dump_file, "Memory reference %u: ", id);
1544	      print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1545	      fprintf (dump_file, "\n");
1546	    }
1547	}
1548
1549      record_mem_ref_loc (ref, stmt, mem);
1550    }
1551  bitmap_set_bit (&memory_accesses.refs_in_loop[loop->num], ref->id);
1552  if (is_stored)
1553    {
1554      bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1555      mark_ref_stored (ref, loop);
1556    }
1557  return;
1558}
1559
1560static unsigned *bb_loop_postorder;
1561
1562/* qsort sort function to sort blocks after their loop fathers postorder.  */
1563
1564static int
1565sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_)
1566{
1567  basic_block bb1 = *(basic_block *)const_cast<void *>(bb1_);
1568  basic_block bb2 = *(basic_block *)const_cast<void *>(bb2_);
1569  struct loop *loop1 = bb1->loop_father;
1570  struct loop *loop2 = bb2->loop_father;
1571  if (loop1->num == loop2->num)
1572    return 0;
1573  return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1574}
1575
1576/* qsort sort function to sort ref locs after their loop fathers postorder.  */
1577
1578static int
1579sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_)
1580{
1581  mem_ref_loc *loc1 = (mem_ref_loc *)const_cast<void *>(loc1_);
1582  mem_ref_loc *loc2 = (mem_ref_loc *)const_cast<void *>(loc2_);
1583  struct loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1584  struct loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1585  if (loop1->num == loop2->num)
1586    return 0;
1587  return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1588}
1589
1590/* Gathers memory references in loops.  */
1591
1592static void
1593analyze_memory_references (void)
1594{
1595  gimple_stmt_iterator bsi;
1596  basic_block bb, *bbs;
1597  struct loop *loop, *outer;
1598  unsigned i, n;
1599
1600  /* Collect all basic-blocks in loops and sort them after their
1601     loops postorder.  */
1602  i = 0;
1603  bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1604  FOR_EACH_BB_FN (bb, cfun)
1605    if (bb->loop_father != current_loops->tree_root)
1606      bbs[i++] = bb;
1607  n = i;
1608  qsort (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp);
1609
1610  /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1611     That results in better locality for all the bitmaps.  */
1612  for (i = 0; i < n; ++i)
1613    {
1614      basic_block bb = bbs[i];
1615      for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1616        gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1617    }
1618
1619  /* Sort the location list of gathered memory references after their
1620     loop postorder number.  */
1621  im_mem_ref *ref;
1622  FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1623    ref->accesses_in_loop.qsort (sort_locs_in_loop_postorder_cmp);
1624
1625  free (bbs);
1626//  free (bb_loop_postorder);
1627
1628  /* Propagate the information about accessed memory references up
1629     the loop hierarchy.  */
1630  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1631    {
1632      /* Finalize the overall touched references (including subloops).  */
1633      bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1634		       &memory_accesses.refs_stored_in_loop[loop->num]);
1635
1636      /* Propagate the information about accessed memory references up
1637	 the loop hierarchy.  */
1638      outer = loop_outer (loop);
1639      if (outer == current_loops->tree_root)
1640	continue;
1641
1642      bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1643		       &memory_accesses.all_refs_stored_in_loop[loop->num]);
1644    }
1645}
1646
1647/* Returns true if MEM1 and MEM2 may alias.  TTAE_CACHE is used as a cache in
1648   tree_to_aff_combination_expand.  */
1649
1650static bool
1651mem_refs_may_alias_p (mem_ref_p mem1, mem_ref_p mem2,
1652		      hash_map<tree, name_expansion *> **ttae_cache)
1653{
1654  /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1655     object and their offset differ in such a way that the locations cannot
1656     overlap, then they cannot alias.  */
1657  widest_int size1, size2;
1658  aff_tree off1, off2;
1659
1660  /* Perform basic offset and type-based disambiguation.  */
1661  if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, true))
1662    return false;
1663
1664  /* The expansion of addresses may be a bit expensive, thus we only do
1665     the check at -O2 and higher optimization levels.  */
1666  if (optimize < 2)
1667    return true;
1668
1669  get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1670  get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1671  aff_combination_expand (&off1, ttae_cache);
1672  aff_combination_expand (&off2, ttae_cache);
1673  aff_combination_scale (&off1, -1);
1674  aff_combination_add (&off2, &off1);
1675
1676  if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1677    return false;
1678
1679  return true;
1680}
1681
1682/* Compare function for bsearch searching for reference locations
1683   in a loop.  */
1684
1685static int
1686find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_)
1687{
1688  struct loop *loop = (struct loop *)const_cast<void *>(loop_);
1689  mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1690  struct loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1691  if (loop->num  == loc_loop->num
1692      || flow_loop_nested_p (loop, loc_loop))
1693    return 0;
1694  return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1695	  ? -1 : 1);
1696}
1697
1698/* Iterates over all locations of REF in LOOP and its subloops calling
1699   fn.operator() with the location as argument.  When that operator
1700   returns true the iteration is stopped and true is returned.
1701   Otherwise false is returned.  */
1702
1703template <typename FN>
1704static bool
1705for_all_locs_in_loop (struct loop *loop, mem_ref_p ref, FN fn)
1706{
1707  unsigned i;
1708  mem_ref_loc_p loc;
1709
1710  /* Search for the cluster of locs in the accesses_in_loop vector
1711     which is sorted after postorder index of the loop father.  */
1712  loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp);
1713  if (!loc)
1714    return false;
1715
1716  /* We have found one location inside loop or its sub-loops.  Iterate
1717     both forward and backward to cover the whole cluster.  */
1718  i = loc - ref->accesses_in_loop.address ();
1719  while (i > 0)
1720    {
1721      --i;
1722      mem_ref_loc_p l = &ref->accesses_in_loop[i];
1723      if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1724	break;
1725      if (fn (l))
1726	return true;
1727    }
1728  for (i = loc - ref->accesses_in_loop.address ();
1729       i < ref->accesses_in_loop.length (); ++i)
1730    {
1731      mem_ref_loc_p l = &ref->accesses_in_loop[i];
1732      if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1733	break;
1734      if (fn (l))
1735	return true;
1736    }
1737
1738  return false;
1739}
1740
1741/* Rewrites location LOC by TMP_VAR.  */
1742
1743struct rewrite_mem_ref_loc
1744{
1745  rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1746  bool operator () (mem_ref_loc_p loc);
1747  tree tmp_var;
1748};
1749
1750bool
1751rewrite_mem_ref_loc::operator () (mem_ref_loc_p loc)
1752{
1753  *loc->ref = tmp_var;
1754  update_stmt (loc->stmt);
1755  return false;
1756}
1757
1758/* Rewrites all references to REF in LOOP by variable TMP_VAR.  */
1759
1760static void
1761rewrite_mem_refs (struct loop *loop, mem_ref_p ref, tree tmp_var)
1762{
1763  for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1764}
1765
1766/* Stores the first reference location in LOCP.  */
1767
1768struct first_mem_ref_loc_1
1769{
1770  first_mem_ref_loc_1 (mem_ref_loc_p *locp_) : locp (locp_) {}
1771  bool operator () (mem_ref_loc_p loc);
1772  mem_ref_loc_p *locp;
1773};
1774
1775bool
1776first_mem_ref_loc_1::operator () (mem_ref_loc_p loc)
1777{
1778  *locp = loc;
1779  return true;
1780}
1781
1782/* Returns the first reference location to REF in LOOP.  */
1783
1784static mem_ref_loc_p
1785first_mem_ref_loc (struct loop *loop, mem_ref_p ref)
1786{
1787  mem_ref_loc_p locp = NULL;
1788  for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1789  return locp;
1790}
1791
1792struct prev_flag_edges {
1793  /* Edge to insert new flag comparison code.  */
1794  edge append_cond_position;
1795
1796  /* Edge for fall through from previous flag comparison.  */
1797  edge last_cond_fallthru;
1798};
1799
1800/* Helper function for execute_sm.  Emit code to store TMP_VAR into
1801   MEM along edge EX.
1802
1803   The store is only done if MEM has changed.  We do this so no
1804   changes to MEM occur on code paths that did not originally store
1805   into it.
1806
1807   The common case for execute_sm will transform:
1808
1809     for (...) {
1810       if (foo)
1811         stuff;
1812       else
1813         MEM = TMP_VAR;
1814     }
1815
1816   into:
1817
1818     lsm = MEM;
1819     for (...) {
1820       if (foo)
1821         stuff;
1822       else
1823         lsm = TMP_VAR;
1824     }
1825     MEM = lsm;
1826
1827  This function will generate:
1828
1829     lsm = MEM;
1830
1831     lsm_flag = false;
1832     ...
1833     for (...) {
1834       if (foo)
1835         stuff;
1836       else {
1837         lsm = TMP_VAR;
1838         lsm_flag = true;
1839       }
1840     }
1841     if (lsm_flag)	<--
1842       MEM = lsm;	<--
1843*/
1844
1845static void
1846execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag)
1847{
1848  basic_block new_bb, then_bb, old_dest;
1849  bool loop_has_only_one_exit;
1850  edge then_old_edge, orig_ex = ex;
1851  gimple_stmt_iterator gsi;
1852  gimple stmt;
1853  struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;
1854  bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
1855
1856  /* ?? Insert store after previous store if applicable.  See note
1857     below.  */
1858  if (prev_edges)
1859    ex = prev_edges->append_cond_position;
1860
1861  loop_has_only_one_exit = single_pred_p (ex->dest);
1862
1863  if (loop_has_only_one_exit)
1864    ex = split_block_after_labels (ex->dest);
1865  else
1866    {
1867      for (gphi_iterator gpi = gsi_start_phis (ex->dest);
1868	   !gsi_end_p (gpi); gsi_next (&gpi))
1869	{
1870	  gphi *phi = gpi.phi ();
1871	  if (virtual_operand_p (gimple_phi_result (phi)))
1872	    continue;
1873
1874	  /* When the destination has a non-virtual PHI node with multiple
1875	     predecessors make sure we preserve the PHI structure by
1876	     forcing a forwarder block so that hoisting of that PHI will
1877	     still work.  */
1878	  split_edge (ex);
1879	  break;
1880	}
1881    }
1882
1883  old_dest = ex->dest;
1884  new_bb = split_edge (ex);
1885  then_bb = create_empty_bb (new_bb);
1886  if (irr)
1887    then_bb->flags = BB_IRREDUCIBLE_LOOP;
1888  add_bb_to_loop (then_bb, new_bb->loop_father);
1889
1890  gsi = gsi_start_bb (new_bb);
1891  stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
1892			    NULL_TREE, NULL_TREE);
1893  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1894
1895  gsi = gsi_start_bb (then_bb);
1896  /* Insert actual store.  */
1897  stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
1898  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1899
1900  make_edge (new_bb, then_bb,
1901	     EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1902  make_edge (new_bb, old_dest,
1903	     EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1904  then_old_edge = make_edge (then_bb, old_dest,
1905			     EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1906
1907  set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
1908
1909  if (prev_edges)
1910    {
1911      basic_block prevbb = prev_edges->last_cond_fallthru->src;
1912      redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
1913      set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
1914      set_immediate_dominator (CDI_DOMINATORS, old_dest,
1915			       recompute_dominator (CDI_DOMINATORS, old_dest));
1916    }
1917
1918  /* ?? Because stores may alias, they must happen in the exact
1919     sequence they originally happened.  Save the position right after
1920     the (_lsm) store we just created so we can continue appending after
1921     it and maintain the original order.  */
1922  {
1923    struct prev_flag_edges *p;
1924
1925    if (orig_ex->aux)
1926      orig_ex->aux = NULL;
1927    alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
1928    p = (struct prev_flag_edges *) orig_ex->aux;
1929    p->append_cond_position = then_old_edge;
1930    p->last_cond_fallthru = find_edge (new_bb, old_dest);
1931    orig_ex->aux = (void *) p;
1932  }
1933
1934  if (!loop_has_only_one_exit)
1935    for (gphi_iterator gpi = gsi_start_phis (old_dest);
1936	 !gsi_end_p (gpi); gsi_next (&gpi))
1937      {
1938	gphi *phi = gpi.phi ();
1939	unsigned i;
1940
1941	for (i = 0; i < gimple_phi_num_args (phi); i++)
1942	  if (gimple_phi_arg_edge (phi, i)->src == new_bb)
1943	    {
1944	      tree arg = gimple_phi_arg_def (phi, i);
1945	      add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
1946	      update_stmt (phi);
1947	    }
1948      }
1949  /* Remove the original fall through edge.  This was the
1950     single_succ_edge (new_bb).  */
1951  EDGE_SUCC (new_bb, 0)->flags &= ~EDGE_FALLTHRU;
1952}
1953
1954/* When REF is set on the location, set flag indicating the store.  */
1955
1956struct sm_set_flag_if_changed
1957{
1958  sm_set_flag_if_changed (tree flag_) : flag (flag_) {}
1959  bool operator () (mem_ref_loc_p loc);
1960  tree flag;
1961};
1962
1963bool
1964sm_set_flag_if_changed::operator () (mem_ref_loc_p loc)
1965{
1966  /* Only set the flag for writes.  */
1967  if (is_gimple_assign (loc->stmt)
1968      && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
1969    {
1970      gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
1971      gimple stmt = gimple_build_assign (flag, boolean_true_node);
1972      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1973    }
1974  return false;
1975}
1976
1977/* Helper function for execute_sm.  On every location where REF is
1978   set, set an appropriate flag indicating the store.  */
1979
1980static tree
1981execute_sm_if_changed_flag_set (struct loop *loop, mem_ref_p ref)
1982{
1983  tree flag;
1984  char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
1985  flag = create_tmp_reg (boolean_type_node, str);
1986  for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag));
1987  return flag;
1988}
1989
1990/* Executes store motion of memory reference REF from LOOP.
1991   Exits from the LOOP are stored in EXITS.  The initialization of the
1992   temporary variable is put to the preheader of the loop, and assignments
1993   to the reference from the temporary variable are emitted to exits.  */
1994
1995static void
1996execute_sm (struct loop *loop, vec<edge> exits, mem_ref_p ref)
1997{
1998  tree tmp_var, store_flag = NULL_TREE;
1999  unsigned i;
2000  gassign *load;
2001  struct fmt_data fmt_data;
2002  edge ex;
2003  struct lim_aux_data *lim_data;
2004  bool multi_threaded_model_p = false;
2005  gimple_stmt_iterator gsi;
2006
2007  if (dump_file && (dump_flags & TDF_DETAILS))
2008    {
2009      fprintf (dump_file, "Executing store motion of ");
2010      print_generic_expr (dump_file, ref->mem.ref, 0);
2011      fprintf (dump_file, " from loop %d\n", loop->num);
2012    }
2013
2014  tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
2015			    get_lsm_tmp_name (ref->mem.ref, ~0));
2016
2017  fmt_data.loop = loop;
2018  fmt_data.orig_loop = loop;
2019  for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
2020
2021  if (bb_in_transaction (loop_preheader_edge (loop)->src)
2022      || !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES))
2023    multi_threaded_model_p = true;
2024
2025  if (multi_threaded_model_p)
2026    store_flag = execute_sm_if_changed_flag_set (loop, ref);
2027
2028  rewrite_mem_refs (loop, ref, tmp_var);
2029
2030  /* Emit the load code on a random exit edge or into the latch if
2031     the loop does not exit, so that we are sure it will be processed
2032     by move_computations after all dependencies.  */
2033  gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2034
2035  /* FIXME/TODO: For the multi-threaded variant, we could avoid this
2036     load altogether, since the store is predicated by a flag.  We
2037     could, do the load only if it was originally in the loop.  */
2038  load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
2039  lim_data = init_lim_data (load);
2040  lim_data->max_loop = loop;
2041  lim_data->tgt_loop = loop;
2042  gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2043
2044  if (multi_threaded_model_p)
2045    {
2046      load = gimple_build_assign (store_flag, boolean_false_node);
2047      lim_data = init_lim_data (load);
2048      lim_data->max_loop = loop;
2049      lim_data->tgt_loop = loop;
2050      gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2051    }
2052
2053  /* Sink the store to every exit from the loop.  */
2054  FOR_EACH_VEC_ELT (exits, i, ex)
2055    if (!multi_threaded_model_p)
2056      {
2057	gassign *store;
2058	store = gimple_build_assign (unshare_expr (ref->mem.ref), tmp_var);
2059	gsi_insert_on_edge (ex, store);
2060      }
2061    else
2062      execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag);
2063}
2064
2065/* Hoists memory references MEM_REFS out of LOOP.  EXITS is the list of exit
2066   edges of the LOOP.  */
2067
2068static void
2069hoist_memory_references (struct loop *loop, bitmap mem_refs,
2070			 vec<edge> exits)
2071{
2072  mem_ref_p ref;
2073  unsigned  i;
2074  bitmap_iterator bi;
2075
2076  EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2077    {
2078      ref = memory_accesses.refs_list[i];
2079      execute_sm (loop, exits, ref);
2080    }
2081}
2082
2083struct ref_always_accessed
2084{
2085  ref_always_accessed (struct loop *loop_, bool stored_p_)
2086      : loop (loop_), stored_p (stored_p_) {}
2087  bool operator () (mem_ref_loc_p loc);
2088  struct loop *loop;
2089  bool stored_p;
2090};
2091
2092bool
2093ref_always_accessed::operator () (mem_ref_loc_p loc)
2094{
2095  struct loop *must_exec;
2096
2097  if (!get_lim_data (loc->stmt))
2098    return false;
2099
2100  /* If we require an always executed store make sure the statement
2101     stores to the reference.  */
2102  if (stored_p)
2103    {
2104      tree lhs = gimple_get_lhs (loc->stmt);
2105      if (!lhs
2106	  || lhs != *loc->ref)
2107	return false;
2108    }
2109
2110  must_exec = get_lim_data (loc->stmt)->always_executed_in;
2111  if (!must_exec)
2112    return false;
2113
2114  if (must_exec == loop
2115      || flow_loop_nested_p (must_exec, loop))
2116    return true;
2117
2118  return false;
2119}
2120
2121/* Returns true if REF is always accessed in LOOP.  If STORED_P is true
2122   make sure REF is always stored to in LOOP.  */
2123
2124static bool
2125ref_always_accessed_p (struct loop *loop, mem_ref_p ref, bool stored_p)
2126{
2127  return for_all_locs_in_loop (loop, ref,
2128			       ref_always_accessed (loop, stored_p));
2129}
2130
2131/* Returns true if REF1 and REF2 are independent.  */
2132
2133static bool
2134refs_independent_p (mem_ref_p ref1, mem_ref_p ref2)
2135{
2136  if (ref1 == ref2)
2137    return true;
2138
2139  if (dump_file && (dump_flags & TDF_DETAILS))
2140    fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2141	     ref1->id, ref2->id);
2142
2143  if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache))
2144    {
2145      if (dump_file && (dump_flags & TDF_DETAILS))
2146	fprintf (dump_file, "dependent.\n");
2147      return false;
2148    }
2149  else
2150    {
2151      if (dump_file && (dump_flags & TDF_DETAILS))
2152	fprintf (dump_file, "independent.\n");
2153      return true;
2154    }
2155}
2156
2157/* Mark REF dependent on stores or loads (according to STORED_P) in LOOP
2158   and its super-loops.  */
2159
2160static void
2161record_dep_loop (struct loop *loop, mem_ref_p ref, bool stored_p)
2162{
2163  /* We can propagate dependent-in-loop bits up the loop
2164     hierarchy to all outer loops.  */
2165  while (loop != current_loops->tree_root
2166	 && bitmap_set_bit (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2167    loop = loop_outer (loop);
2168}
2169
2170/* Returns true if REF is independent on all other memory references in
2171   LOOP.  */
2172
2173static bool
2174ref_indep_loop_p_1 (struct loop *loop, mem_ref_p ref, bool stored_p)
2175{
2176  bitmap refs_to_check;
2177  unsigned i;
2178  bitmap_iterator bi;
2179  mem_ref_p aref;
2180
2181  if (stored_p)
2182    refs_to_check = &memory_accesses.refs_in_loop[loop->num];
2183  else
2184    refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
2185
2186  if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
2187    return false;
2188
2189  EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2190    {
2191      aref = memory_accesses.refs_list[i];
2192      if (!refs_independent_p (ref, aref))
2193	return false;
2194    }
2195
2196  return true;
2197}
2198
2199/* Returns true if REF is independent on all other memory references in
2200   LOOP.  Wrapper over ref_indep_loop_p_1, caching its results.  */
2201
2202static bool
2203ref_indep_loop_p_2 (struct loop *loop, mem_ref_p ref, bool stored_p)
2204{
2205  stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
2206
2207  if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2208    return true;
2209  if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2210    return false;
2211
2212  struct loop *inner = loop->inner;
2213  while (inner)
2214    {
2215      if (!ref_indep_loop_p_2 (inner, ref, stored_p))
2216	return false;
2217      inner = inner->next;
2218    }
2219
2220  bool indep_p = ref_indep_loop_p_1 (loop, ref, stored_p);
2221
2222  if (dump_file && (dump_flags & TDF_DETAILS))
2223    fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
2224	     ref->id, loop->num, indep_p ? "independent" : "dependent");
2225
2226  /* Record the computed result in the cache.  */
2227  if (indep_p)
2228    {
2229      if (bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p))
2230	  && stored_p)
2231	{
2232	  /* If it's independend against all refs then it's independent
2233	     against stores, too.  */
2234	  bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, false));
2235	}
2236    }
2237  else
2238    {
2239      record_dep_loop (loop, ref, stored_p);
2240      if (!stored_p)
2241	{
2242	  /* If it's dependent against stores it's dependent against
2243	     all refs, too.  */
2244	  record_dep_loop (loop, ref, true);
2245	}
2246    }
2247
2248  return indep_p;
2249}
2250
2251/* Returns true if REF is independent on all other memory references in
2252   LOOP.  */
2253
2254static bool
2255ref_indep_loop_p (struct loop *loop, mem_ref_p ref)
2256{
2257  gcc_checking_assert (MEM_ANALYZABLE (ref));
2258
2259  return ref_indep_loop_p_2 (loop, ref, false);
2260}
2261
2262/* Returns true if we can perform store motion of REF from LOOP.  */
2263
2264static bool
2265can_sm_ref_p (struct loop *loop, mem_ref_p ref)
2266{
2267  tree base;
2268
2269  /* Can't hoist unanalyzable refs.  */
2270  if (!MEM_ANALYZABLE (ref))
2271    return false;
2272
2273  /* It should be movable.  */
2274  if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
2275      || TREE_THIS_VOLATILE (ref->mem.ref)
2276      || !for_each_index (&ref->mem.ref, may_move_till, loop))
2277    return false;
2278
2279  /* If it can throw fail, we do not properly update EH info.  */
2280  if (tree_could_throw_p (ref->mem.ref))
2281    return false;
2282
2283  /* If it can trap, it must be always executed in LOOP.
2284     Readonly memory locations may trap when storing to them, but
2285     tree_could_trap_p is a predicate for rvalues, so check that
2286     explicitly.  */
2287  base = get_base_address (ref->mem.ref);
2288  if ((tree_could_trap_p (ref->mem.ref)
2289       || (DECL_P (base) && TREE_READONLY (base)))
2290      && !ref_always_accessed_p (loop, ref, true))
2291    return false;
2292
2293  /* And it must be independent on all other memory references
2294     in LOOP.  */
2295  if (!ref_indep_loop_p (loop, ref))
2296    return false;
2297
2298  return true;
2299}
2300
2301/* Marks the references in LOOP for that store motion should be performed
2302   in REFS_TO_SM.  SM_EXECUTED is the set of references for that store
2303   motion was performed in one of the outer loops.  */
2304
2305static void
2306find_refs_for_sm (struct loop *loop, bitmap sm_executed, bitmap refs_to_sm)
2307{
2308  bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
2309  unsigned i;
2310  bitmap_iterator bi;
2311  mem_ref_p ref;
2312
2313  EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2314    {
2315      ref = memory_accesses.refs_list[i];
2316      if (can_sm_ref_p (loop, ref))
2317	bitmap_set_bit (refs_to_sm, i);
2318    }
2319}
2320
2321/* Checks whether LOOP (with exits stored in EXITS array) is suitable
2322   for a store motion optimization (i.e. whether we can insert statement
2323   on its exits).  */
2324
2325static bool
2326loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED,
2327		      vec<edge> exits)
2328{
2329  unsigned i;
2330  edge ex;
2331
2332  FOR_EACH_VEC_ELT (exits, i, ex)
2333    if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
2334      return false;
2335
2336  return true;
2337}
2338
2339/* Try to perform store motion for all memory references modified inside
2340   LOOP.  SM_EXECUTED is the bitmap of the memory references for that
2341   store motion was executed in one of the outer loops.  */
2342
2343static void
2344store_motion_loop (struct loop *loop, bitmap sm_executed)
2345{
2346  vec<edge> exits = get_loop_exit_edges (loop);
2347  struct loop *subloop;
2348  bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
2349
2350  if (loop_suitable_for_sm (loop, exits))
2351    {
2352      find_refs_for_sm (loop, sm_executed, sm_in_loop);
2353      hoist_memory_references (loop, sm_in_loop, exits);
2354    }
2355  exits.release ();
2356
2357  bitmap_ior_into (sm_executed, sm_in_loop);
2358  for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
2359    store_motion_loop (subloop, sm_executed);
2360  bitmap_and_compl_into (sm_executed, sm_in_loop);
2361  BITMAP_FREE (sm_in_loop);
2362}
2363
2364/* Try to perform store motion for all memory references modified inside
2365   loops.  */
2366
2367static void
2368store_motion (void)
2369{
2370  struct loop *loop;
2371  bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
2372
2373  for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
2374    store_motion_loop (loop, sm_executed);
2375
2376  BITMAP_FREE (sm_executed);
2377  gsi_commit_edge_inserts ();
2378}
2379
2380/* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2381   for each such basic block bb records the outermost loop for that execution
2382   of its header implies execution of bb.  CONTAINS_CALL is the bitmap of
2383   blocks that contain a nonpure call.  */
2384
2385static void
2386fill_always_executed_in_1 (struct loop *loop, sbitmap contains_call)
2387{
2388  basic_block bb = NULL, *bbs, last = NULL;
2389  unsigned i;
2390  edge e;
2391  struct loop *inn_loop = loop;
2392
2393  if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
2394    {
2395      bbs = get_loop_body_in_dom_order (loop);
2396
2397      for (i = 0; i < loop->num_nodes; i++)
2398	{
2399	  edge_iterator ei;
2400	  bb = bbs[i];
2401
2402	  if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2403	    last = bb;
2404
2405	  if (bitmap_bit_p (contains_call, bb->index))
2406	    break;
2407
2408	  FOR_EACH_EDGE (e, ei, bb->succs)
2409	    if (!flow_bb_inside_loop_p (loop, e->dest))
2410	      break;
2411	  if (e)
2412	    break;
2413
2414	  /* A loop might be infinite (TODO use simple loop analysis
2415	     to disprove this if possible).  */
2416	  if (bb->flags & BB_IRREDUCIBLE_LOOP)
2417	    break;
2418
2419	  if (!flow_bb_inside_loop_p (inn_loop, bb))
2420	    break;
2421
2422	  if (bb->loop_father->header == bb)
2423	    {
2424	      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2425		break;
2426
2427	      /* In a loop that is always entered we may proceed anyway.
2428		 But record that we entered it and stop once we leave it.  */
2429	      inn_loop = bb->loop_father;
2430	    }
2431	}
2432
2433      while (1)
2434	{
2435	  SET_ALWAYS_EXECUTED_IN (last, loop);
2436	  if (last == loop->header)
2437	    break;
2438	  last = get_immediate_dominator (CDI_DOMINATORS, last);
2439	}
2440
2441      free (bbs);
2442    }
2443
2444  for (loop = loop->inner; loop; loop = loop->next)
2445    fill_always_executed_in_1 (loop, contains_call);
2446}
2447
2448/* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
2449   for each such basic block bb records the outermost loop for that execution
2450   of its header implies execution of bb.  */
2451
2452static void
2453fill_always_executed_in (void)
2454{
2455  sbitmap contains_call = sbitmap_alloc (last_basic_block_for_fn (cfun));
2456  basic_block bb;
2457  struct loop *loop;
2458
2459  bitmap_clear (contains_call);
2460  FOR_EACH_BB_FN (bb, cfun)
2461    {
2462      gimple_stmt_iterator gsi;
2463      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2464	{
2465	  if (nonpure_call_p (gsi_stmt (gsi)))
2466	    break;
2467	}
2468
2469      if (!gsi_end_p (gsi))
2470	bitmap_set_bit (contains_call, bb->index);
2471    }
2472
2473  for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
2474    fill_always_executed_in_1 (loop, contains_call);
2475
2476  sbitmap_free (contains_call);
2477}
2478
2479
2480/* Compute the global information needed by the loop invariant motion pass.  */
2481
2482static void
2483tree_ssa_lim_initialize (void)
2484{
2485  struct loop *loop;
2486  unsigned i;
2487
2488  bitmap_obstack_initialize (&lim_bitmap_obstack);
2489  gcc_obstack_init (&mem_ref_obstack);
2490  lim_aux_data_map = new hash_map<gimple, lim_aux_data *>;
2491
2492  if (flag_tm)
2493    compute_transaction_bits ();
2494
2495  alloc_aux_for_edges (0);
2496
2497  memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
2498  memory_accesses.refs_list.create (100);
2499  /* Allocate a special, unanalyzable mem-ref with ID zero.  */
2500  memory_accesses.refs_list.quick_push
2501    (mem_ref_alloc (error_mark_node, 0, UNANALYZABLE_MEM_ID));
2502
2503  memory_accesses.refs_in_loop.create (number_of_loops (cfun));
2504  memory_accesses.refs_in_loop.quick_grow (number_of_loops (cfun));
2505  memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
2506  memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2507  memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
2508  memory_accesses.all_refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2509
2510  for (i = 0; i < number_of_loops (cfun); i++)
2511    {
2512      bitmap_initialize (&memory_accesses.refs_in_loop[i],
2513			 &lim_bitmap_obstack);
2514      bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
2515			 &lim_bitmap_obstack);
2516      bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
2517			 &lim_bitmap_obstack);
2518    }
2519
2520  memory_accesses.ttae_cache = NULL;
2521
2522  /* Initialize bb_loop_postorder with a mapping from loop->num to
2523     its postorder index.  */
2524  i = 0;
2525  bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
2526  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2527    bb_loop_postorder[loop->num] = i++;
2528}
2529
2530/* Cleans up after the invariant motion pass.  */
2531
2532static void
2533tree_ssa_lim_finalize (void)
2534{
2535  basic_block bb;
2536  unsigned i;
2537  mem_ref_p ref;
2538
2539  free_aux_for_edges ();
2540
2541  FOR_EACH_BB_FN (bb, cfun)
2542    SET_ALWAYS_EXECUTED_IN (bb, NULL);
2543
2544  bitmap_obstack_release (&lim_bitmap_obstack);
2545  delete lim_aux_data_map;
2546
2547  delete memory_accesses.refs;
2548  memory_accesses.refs = NULL;
2549
2550  FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
2551    memref_free (ref);
2552  memory_accesses.refs_list.release ();
2553  obstack_free (&mem_ref_obstack, NULL);
2554
2555  memory_accesses.refs_in_loop.release ();
2556  memory_accesses.refs_stored_in_loop.release ();
2557  memory_accesses.all_refs_stored_in_loop.release ();
2558
2559  if (memory_accesses.ttae_cache)
2560    free_affine_expand_cache (&memory_accesses.ttae_cache);
2561
2562  free (bb_loop_postorder);
2563}
2564
2565/* Moves invariants from loops.  Only "expensive" invariants are moved out --
2566   i.e. those that are likely to be win regardless of the register pressure.  */
2567
2568unsigned int
2569tree_ssa_lim (void)
2570{
2571  unsigned int todo;
2572
2573  tree_ssa_lim_initialize ();
2574
2575  /* Gathers information about memory accesses in the loops.  */
2576  analyze_memory_references ();
2577
2578  /* Fills ALWAYS_EXECUTED_IN information for basic blocks.  */
2579  fill_always_executed_in ();
2580
2581  /* For each statement determine the outermost loop in that it is
2582     invariant and cost for computing the invariant.  */
2583  invariantness_dom_walker (CDI_DOMINATORS)
2584    .walk (cfun->cfg->x_entry_block_ptr);
2585
2586  /* Execute store motion.  Force the necessary invariants to be moved
2587     out of the loops as well.  */
2588  store_motion ();
2589
2590  /* Move the expressions that are expensive enough.  */
2591  todo = move_computations ();
2592
2593  tree_ssa_lim_finalize ();
2594
2595  return todo;
2596}
2597
2598/* Loop invariant motion pass.  */
2599
2600namespace {
2601
2602const pass_data pass_data_lim =
2603{
2604  GIMPLE_PASS, /* type */
2605  "lim", /* name */
2606  OPTGROUP_LOOP, /* optinfo_flags */
2607  TV_LIM, /* tv_id */
2608  PROP_cfg, /* properties_required */
2609  0, /* properties_provided */
2610  0, /* properties_destroyed */
2611  0, /* todo_flags_start */
2612  0, /* todo_flags_finish */
2613};
2614
2615class pass_lim : public gimple_opt_pass
2616{
2617public:
2618  pass_lim (gcc::context *ctxt)
2619    : gimple_opt_pass (pass_data_lim, ctxt)
2620  {}
2621
2622  /* opt_pass methods: */
2623  opt_pass * clone () { return new pass_lim (m_ctxt); }
2624  virtual bool gate (function *) { return flag_tree_loop_im != 0; }
2625  virtual unsigned int execute (function *);
2626
2627}; // class pass_lim
2628
2629unsigned int
2630pass_lim::execute (function *fun)
2631{
2632  if (number_of_loops (fun) <= 1)
2633    return 0;
2634
2635  return tree_ssa_lim ();
2636}
2637
2638} // anon namespace
2639
2640gimple_opt_pass *
2641make_pass_lim (gcc::context *ctxt)
2642{
2643  return new pass_lim (ctxt);
2644}
2645
2646
2647